... | ... |
@@ -23,6 +23,7 @@ export(PileupSequenceDataFrame) |
23 | 23 |
export(ProtectedEndSequenceData) |
24 | 24 |
export(ProtectedEndSequenceDataFrame) |
25 | 25 |
export(RNASequenceTrack) |
26 |
+export(SequenceData) |
|
26 | 27 |
export(SequenceDataList) |
27 | 28 |
export(SequenceDataSet) |
28 | 29 |
export(aggregate) |
... | ... |
@@ -84,9 +85,11 @@ exportMethods("[") |
84 | 85 |
exportMethods("settings<-") |
85 | 86 |
exportMethods(Modifier) |
86 | 87 |
exportMethods(ModifierSet) |
88 |
+exportMethods(SequenceData) |
|
87 | 89 |
exportMethods(aggregate) |
88 | 90 |
exportMethods(aggregateData) |
89 | 91 |
exportMethods(bamfiles) |
92 |
+exportMethods(cbind) |
|
90 | 93 |
exportMethods(compareByCoord) |
91 | 94 |
exportMethods(conditions) |
92 | 95 |
exportMethods(dataType) |
... | ... |
@@ -108,6 +111,7 @@ exportMethods(plotData) |
108 | 111 |
exportMethods(plotDataByCoord) |
109 | 112 |
exportMethods(plotROC) |
110 | 113 |
exportMethods(ranges) |
114 |
+exportMethods(rbind) |
|
111 | 115 |
exportMethods(replicates) |
112 | 116 |
exportMethods(seqinfo) |
113 | 117 |
exportMethods(sequenceData) |
... | ... |
@@ -383,7 +383,7 @@ setMethod(f = "bamfiles", |
383 | 383 |
setMethod(f = "conditions", |
384 | 384 |
signature = signature(object = "Modifier"), |
385 | 385 |
definition = function(object){ |
386 |
- conditions(sequenceData(object)) |
|
386 |
+ object@condition |
|
387 | 387 |
}) |
388 | 388 |
#' @rdname Modifier-functions |
389 | 389 |
#' @export |
... | ... |
@@ -461,7 +461,7 @@ setMethod(f = "ranges", |
461 | 461 |
setMethod(f = "replicates", |
462 | 462 |
signature = signature(x = "Modifier"), |
463 | 463 |
definition = function(x){ |
464 |
- replicates(sequenceData(x)) |
|
464 |
+ x@replicate |
|
465 | 465 |
}) |
466 | 466 |
#' @rdname Modifier-functions |
467 | 467 |
#' @export |
... | ... |
@@ -628,12 +628,12 @@ setReplaceMethod(f = "settings", |
628 | 628 |
data <- .norm_Modifier_input_SequenceData_elements(data, proto) |
629 | 629 |
bamfiles <- bamfiles(data) |
630 | 630 |
condition <- factor(names(bamfiles)) |
631 |
- new2(className, |
|
632 |
- mod = .norm_mod(proto@mod, className), |
|
633 |
- bamfiles = bamfiles, |
|
634 |
- condition = condition, |
|
635 |
- replicate = .get_replicate_number(bamfiles, condition), |
|
636 |
- data = data) |
|
631 |
+ new(className, |
|
632 |
+ mod = .norm_mod(proto@mod, className), |
|
633 |
+ bamfiles = bamfiles, |
|
634 |
+ condition = condition, |
|
635 |
+ replicate = .get_replicate_number(condition), |
|
636 |
+ data = data) |
|
637 | 637 |
} |
638 | 638 |
|
639 | 639 |
.load_SequenceData <- function(classes, bamfiles, annotation, sequences, |
... | ... |
@@ -9,6 +9,8 @@ NULL |
9 | 9 |
#' |
10 | 10 |
#' @title The SequenceData class |
11 | 11 |
#' |
12 |
+#' @md |
|
13 |
+#' |
|
12 | 14 |
#' @description |
13 | 15 |
#' The \code{SequenceData} class is implemented to contain data on each position |
14 | 16 |
#' along transcripts and holds the corresponding annotation data and |
... | ... |
@@ -37,8 +39,20 @@ NULL |
37 | 39 |
#' The \code{SequenceData} class is derived from the |
38 | 40 |
#' \code{\link[IRanges:DataFrameList-class]{CompressedSplitDataFrameList}} class |
39 | 41 |
#' with additional slots for annotation and sequence data. Some functionality is |
40 |
-#' not inherited and not available, e.g. \code{cbind}, \code{rbind} amd |
|
41 |
-#' \code{relist}. |
|
42 |
+#' not inherited and might not available to full extend, e.g.\code{relist}. |
|
43 |
+#' |
|
44 |
+#' **SequenceDataFrame** |
|
45 |
+#' |
|
46 |
+#' #' The \code{SequenceDataFrame} class contains data for positions along a single |
|
47 |
+#' transcript. It is used to describe elements from a \code{SequenceData} |
|
48 |
+#' object. |
|
49 |
+#' |
|
50 |
+#' The \code{SequenceDataFrame} class is derived from the |
|
51 |
+#' \code{\link[S4Vectors:DataFrame-class]{DataFrame}} class. |
|
52 |
+#' |
|
53 |
+#' Subsetting of a \code{SequenceDataFrame} returns a \code{SequenceDataFrame} or |
|
54 |
+#' \code{DataFrame}, if it is subset by a column or row, respectively. The |
|
55 |
+#' \code{drop} argument is ignored for column subsetting. |
|
42 | 56 |
#' |
43 | 57 |
#' @param dataType The prefix for construction the class name of the |
44 | 58 |
#' \code{SequenceData} subclass to be constructed. |
... | ... |
@@ -68,6 +82,8 @@ NULL |
68 | 82 |
#' \item{\code{max_depth}} {maximum depth for pileup loading (default: |
69 | 83 |
#' \code{max_depth = 10000L}).} |
70 | 84 |
#' } |
85 |
+#' @param deparse.level See \code{\link[base:cbind]{base::cbind}} for a |
|
86 |
+#' description of this argument. |
|
71 | 87 |
#' |
72 | 88 |
#' @slot ranges a \code{\link[GenomicRanges:GRangesList-class]{GRangesList}} |
73 | 89 |
#' object each element describing a transcript including its element. The |
... | ... |
@@ -96,8 +112,6 @@ NULL |
96 | 112 |
setClass("SequenceData", |
97 | 113 |
contains = c("VIRTUAL", "CompressedSplitDataFrameList"), |
98 | 114 |
slots = c(sequencesType = "character", |
99 |
- bamfiles = "BamFileList", |
|
100 |
- seqinfo = "Seqinfo", |
|
101 | 115 |
minQuality = "integer", |
102 | 116 |
unlistData = "SequenceDataFrame", |
103 | 117 |
unlistType = "character", |
... | ... |
@@ -192,12 +206,33 @@ S4Vectors::setValidity2(Class = "SequenceData", .valid.SequenceData) |
192 | 206 |
|
193 | 207 |
# coercion --------------------------------------------------------------------- |
194 | 208 |
|
195 |
-.as_SplitDataFrameList <- function(from){ |
|
196 |
- relist(as(unlist(from, use.names = FALSE),"DataFrame"), |
|
197 |
- IRanges::PartitioningByWidth(from)) |
|
209 |
+coerceToSequenceData <- function(className) { |
|
210 |
+ function(from) { |
|
211 |
+ if(is.list(from)) { |
|
212 |
+ classes <- unlist(lapply(from,class)) |
|
213 |
+ from <- from[classes == paste0(className,"Frame")] |
|
214 |
+ if(length(from) == 0) { |
|
215 |
+ FUN <- match.fun(className) |
|
216 |
+ from <- list(FUN()) |
|
217 |
+ } |
|
218 |
+ } else { |
|
219 |
+ if(is(from,className)){ |
|
220 |
+ return(from) |
|
221 |
+ } else if(is(from,paste0(className,"Frame"))) { |
|
222 |
+ from <- list(from) |
|
223 |
+ } else { |
|
224 |
+ stop("Cannot coerce ",class(from)," to ",className,".") |
|
225 |
+ } |
|
226 |
+ } |
|
227 |
+ IRanges:::coerceToCompressedList(from) |
|
228 |
+ } |
|
198 | 229 |
} |
199 |
-setAs("SequenceData", "SplitDataFrameList", .as_SplitDataFrameList) |
|
200 | 230 |
|
231 |
+setSequenceDataCoercions <- function(type) { |
|
232 |
+ className <- sequenceDataClass(type) |
|
233 |
+ setAs("ANY", className, coerceToSequenceData(className)) |
|
234 |
+ setAs("list", className, coerceToSequenceData(className)) |
|
235 |
+} |
|
201 | 236 |
|
202 | 237 |
# internals -------------------------------------------------------------------- |
203 | 238 |
|
... | ... |
@@ -223,8 +258,7 @@ setMethod("extractROWS", "SequenceData", |
223 | 258 |
ans_partitioning <- new("PartitioningByEnd", end = ans_breakpoints, |
224 | 259 |
NAMES = extractROWS(names(x), i)) |
225 | 260 |
ans_elementMetadata <- extractROWS(x@elementMetadata, i) |
226 |
- initialize(x, bamfiles = x@bamfiles, seqinfo = x@seqinfo, |
|
227 |
- minQuality = x@minQuality, unlistData = ans_unlistData, |
|
261 |
+ initialize(x, minQuality = x@minQuality, unlistData = ans_unlistData, |
|
228 | 262 |
partitioning = ans_partitioning, |
229 | 263 |
elementMetadata = ans_elementMetadata) |
230 | 264 |
} |
... | ... |
@@ -237,35 +271,115 @@ setMethod("rownames", "SequenceData", |
237 | 271 |
} |
238 | 272 |
) |
239 | 273 |
|
240 |
-# methods inherited from List and CompressedList, contain a coercion step |
|
241 |
-# x <- as(x, "List", strict = FALSE) |
|
242 |
-# |
|
243 |
-# This does not keep the SequenceData object intact resulting in coercion |
|
244 |
-# to a CompressedSplitDataFrameList. |
|
245 |
-setMethod("[[", "SequenceData", |
|
246 |
- function(x, i, j, ...) |
|
247 |
- { |
|
248 |
- METHOD <- selectMethod("[[", "List") |
|
249 |
- METHOD(x, i, j, ...) |
|
250 |
- } |
|
251 |
-) |
|
274 |
+# Concatenation ---------------------------------------------------------------- |
|
252 | 275 |
|
276 |
+.check_ranges <- function(args){ |
|
277 |
+ ranges <- lapply(args,ranges) |
|
278 |
+ ranges <- vapply(ranges[seq.int(2L,length(ranges))], |
|
279 |
+ function(r){ |
|
280 |
+ all(all(r == ranges[[1L]])) |
|
281 |
+ }, |
|
282 |
+ logical(1)) |
|
283 |
+ if(!all(ranges)){ |
|
284 |
+ stop("Inputs must have the same ranges.") |
|
285 |
+ } |
|
286 |
+} |
|
253 | 287 |
|
254 |
-# Concatenation ---------------------------------------------------------------- |
|
288 |
+.check_sequences <- function(args){ |
|
289 |
+ sequences <- lapply(args,sequences) |
|
290 |
+ sequences <- vapply(sequences[seq.int(2L,length(sequences))], |
|
291 |
+ function(s){ |
|
292 |
+ all(s == sequences[[1L]]) |
|
293 |
+ }, |
|
294 |
+ logical(1)) |
|
295 |
+ if(!all(sequences)){ |
|
296 |
+ stop("Inputs must have the same sequences.") |
|
297 |
+ } |
|
298 |
+} |
|
255 | 299 |
|
256 |
-setMethod("cbind", "SequenceData", |
|
257 |
- function(...){ |
|
258 |
- arg1 <- list(...)[[1L]] |
|
259 |
- stop("'rbind' is not supported for ",class(arg1),".") |
|
300 |
+.check_bamfiles <- function(args){ |
|
301 |
+ bamfiles <- lapply(args,bamfiles) |
|
302 |
+ bamfiles <- vapply(bamfiles[seq.int(2L,length(bamfiles))], |
|
303 |
+ function(b){ |
|
304 |
+ all(path(b) == path(bamfiles[[1L]])) |
|
305 |
+ }, |
|
306 |
+ logical(1)) |
|
307 |
+ if(!all(bamfiles)){ |
|
308 |
+ stop("Inputs must be derived from the same bamfiles.") |
|
260 | 309 |
} |
310 |
+} |
|
311 |
+ |
|
312 |
+#' @rdname SequenceData-class |
|
313 |
+#' @export |
|
314 |
+setMethod("cbind", "SequenceData", |
|
315 |
+ function(..., deparse.level = 1) |
|
316 |
+ { |
|
317 |
+ args <- list(...) |
|
318 |
+ if(length(args) == 1L){ |
|
319 |
+ return(args[[1L]]) |
|
320 |
+ } |
|
321 |
+ # input checks |
|
322 |
+ classes <- lapply(args,class) |
|
323 |
+ if(length(unique(classes)) != 1L){ |
|
324 |
+ stop("Inputs must be of the same SequenceDataFrame type.") |
|
325 |
+ } |
|
326 |
+ lengths <- vapply(args,function(a){sum(lengths(a))},integer(1)) |
|
327 |
+ if(length(unique(lengths)) != 1L){ |
|
328 |
+ stop("Inputs must have the same lengths.") |
|
329 |
+ } |
|
330 |
+ .check_ranges(args) |
|
331 |
+ .check_sequences(args) |
|
332 |
+ callNextMethod() |
|
333 |
+ } |
|
261 | 334 |
) |
335 |
+ |
|
336 |
+#' @rdname SequenceData-class |
|
337 |
+#' @export |
|
262 | 338 |
setMethod("rbind", "SequenceData", |
263 |
- function(...){ |
|
264 |
- arg1 <- list(...)[[1L]] |
|
265 |
- stop("'rbind' is not supported for ",class(arg1),".") |
|
339 |
+ function(..., deparse.level = 1) |
|
340 |
+ { |
|
341 |
+ args <- list(...) |
|
342 |
+ if(length(args) == 1L){ |
|
343 |
+ return(args[[1L]]) |
|
344 |
+ } |
|
345 |
+ # input checks |
|
346 |
+ classes <- lapply(args,class) |
|
347 |
+ if(length(unique(classes)) != 1L){ |
|
348 |
+ stop("Inputs must be of the same SequenceDataFrame type.") |
|
349 |
+ } |
|
350 |
+ lengths <- vapply(args,function(a){ncol(unlist(a))},integer(1)) |
|
351 |
+ if(length(unique(lengths)) != 1L){ |
|
352 |
+ stop("Inputs must have the same width.") |
|
353 |
+ } |
|
354 |
+ .check_bamfiles(args) |
|
355 |
+ callNextMethod() |
|
266 | 356 |
} |
267 | 357 |
) |
268 | 358 |
|
359 |
+setMethod("bindROWS", "SequenceData", |
|
360 |
+ function (x, objects = list(), use.names = TRUE, ignore.mcols = FALSE, |
|
361 |
+ check = TRUE) |
|
362 |
+ { |
|
363 |
+ objects <- S4Vectors:::prepare_objects_to_bind(x, objects) |
|
364 |
+ all_objects <- c(list(x), objects) |
|
365 |
+ names <- unlist(lapply(all_objects,names)) |
|
366 |
+ if(any(duplicated(names))){ |
|
367 |
+ stop("Input must have unique names.") |
|
368 |
+ } |
|
369 |
+ .check_bamfiles(all_objects) |
|
370 |
+ callNextMethod(x, objects, use.names = use.names, |
|
371 |
+ ignore.mcols = ignore.mcols, check = FALSE) |
|
372 |
+ } |
|
373 |
+) |
|
374 |
+ |
|
375 |
+setMethod("unlist", "SequenceData", |
|
376 |
+ function(x, recursive = TRUE, use.names = FALSE) |
|
377 |
+ { |
|
378 |
+ callNextMethod(x, recursive = recursive, use.names = FALSE) |
|
379 |
+ } |
|
380 |
+) |
|
381 |
+ |
|
382 |
+ |
|
269 | 383 |
# constructor ------------------------------------------------------------------ |
270 | 384 |
|
271 | 385 |
.quality_settings <- data.frame( |
... | ... |
@@ -279,9 +393,9 @@ setMethod("rbind", "SequenceData", |
279 | 393 |
.norm_settings(input, .quality_settings, minQuality)[["minQuality"]] |
280 | 394 |
} |
281 | 395 |
|
282 |
-.get_replicate_number <- function(bamfiles, conditions){ |
|
283 |
- control_rep <- seq_along(bamfiles[conditions == "control"]) |
|
284 |
- treated_rep <- seq_along(bamfiles[conditions == "treated"]) |
|
396 |
+.get_replicate_number <- function(conditions){ |
|
397 |
+ control_rep <- seq_along(conditions[conditions == "control"]) |
|
398 |
+ treated_rep <- seq_along(conditions[conditions == "treated"]) |
|
285 | 399 |
rep <- c(control_rep,treated_rep) |
286 | 400 |
rep <- rep[c(which(conditions == "control"), |
287 | 401 |
which(conditions == "treated"))] |
... | ... |
@@ -365,7 +479,7 @@ setMethod("rbind", "SequenceData", |
365 | 479 |
proto <- new(className) |
366 | 480 |
minQuality <- .norm_min_quality(args, proto@minQuality) |
367 | 481 |
condition <- factor(names(bamfiles)) |
368 |
- replicate <- .get_replicate_number(bamfiles, condition) |
|
482 |
+ replicate <- .get_replicate_number(condition) |
|
369 | 483 |
if(!assertive::is_a_non_empty_string(proto@dataDescription)){ |
370 | 484 |
stop("'dataDescription' must be a single non empty character value.") |
371 | 485 |
} |
... | ... |
@@ -419,16 +533,18 @@ setMethod("rbind", "SequenceData", |
419 | 533 |
############################################################################## |
420 | 534 |
# Create SequenceData object |
421 | 535 |
############################################################################## |
536 |
+ unlist_data <- |
|
537 |
+ .SequenceDataFrame(class = gsub("SequenceData","",className), |
|
538 |
+ df = unlist(data, use.names = FALSE), |
|
539 |
+ ranges = unlist(ranges, use.names = FALSE), |
|
540 |
+ sequence = unlist(sequences, use.names = FALSE), |
|
541 |
+ replicate = replicate, |
|
542 |
+ condition = condition, |
|
543 |
+ bamfiles = bamfiles, |
|
544 |
+ seqinfo = seqinfo) |
|
422 | 545 |
ans <- new(className, |
423 |
- bamfiles = bamfiles, |
|
424 |
- seqinfo = seqinfo, |
|
425 | 546 |
minQuality = minQuality, |
426 |
- unlistData = .SequenceDataFrame(gsub("SequenceData","",className), |
|
427 |
- unlist(data, use.names = FALSE), |
|
428 |
- unlist(ranges, use.names = FALSE), |
|
429 |
- unlist(sequences, use.names = FALSE), |
|
430 |
- replicate, |
|
431 |
- condition), |
|
547 |
+ unlistData = unlist_data, |
|
432 | 548 |
partitioning = IRanges::PartitioningByEnd(data), |
433 | 549 |
...) |
434 | 550 |
message("OK") |
... | ... |
@@ -558,6 +674,8 @@ setMethod("rbind", "SequenceData", |
558 | 674 |
|
559 | 675 |
################################################################################ |
560 | 676 |
|
677 |
+#' @rdname SequenceData-class |
|
678 |
+#' @export |
|
561 | 679 |
setGeneric( |
562 | 680 |
name = "SequenceData", |
563 | 681 |
signature = c("annotation","sequences"), |
... | ... |
@@ -565,72 +683,96 @@ setGeneric( |
565 | 683 |
standardGeneric("SequenceData") |
566 | 684 |
) |
567 | 685 |
|
686 |
+#' @rdname SequenceData-class |
|
687 |
+#' @export |
|
568 | 688 |
setMethod("SequenceData", |
569 | 689 |
signature = c(annotation = "character", sequences = "character"), |
570 | 690 |
function(dataType, bamfiles, annotation, sequences, seqinfo, ...){ |
571 | 691 |
.new_SequenceData(dataType, bamfiles, annotation, sequences, |
572 | 692 |
seqinfo, ...) |
573 | 693 |
}) |
694 |
+#' @rdname SequenceData-class |
|
695 |
+#' @export |
|
574 | 696 |
setMethod("SequenceData", |
575 | 697 |
signature = c(annotation = "character", sequences = "BSgenome"), |
576 | 698 |
function(dataType, bamfiles, annotation, sequences, seqinfo, ...){ |
577 | 699 |
.new_SequenceData(dataType, bamfiles, annotation, sequences, |
578 | 700 |
seqinfo, ...) |
579 | 701 |
}) |
702 |
+#' @rdname SequenceData-class |
|
703 |
+#' @export |
|
580 | 704 |
setMethod("SequenceData", |
581 | 705 |
signature = c(annotation = "TxDb", sequences = "character"), |
582 | 706 |
function(dataType, bamfiles, annotation, sequences, seqinfo, ...){ |
583 | 707 |
.new_SequenceData(dataType, bamfiles, annotation, sequences, |
584 | 708 |
seqinfo, ...) |
585 | 709 |
}) |
710 |
+#' @rdname SequenceData-class |
|
711 |
+#' @export |
|
586 | 712 |
setMethod("SequenceData", |
587 | 713 |
signature = c(annotation = "TxDb", sequences = "BSgenome"), |
588 | 714 |
function(dataType, bamfiles, annotation, sequences, seqinfo, ...){ |
589 | 715 |
.new_SequenceData(dataType, bamfiles, annotation, sequences, |
590 | 716 |
seqinfo, ...) |
591 | 717 |
}) |
718 |
+#' @rdname SequenceData-class |
|
719 |
+#' @export |
|
592 | 720 |
setMethod("SequenceData", |
593 | 721 |
signature = c(annotation = "GRangesList", sequences = "character"), |
594 | 722 |
function(dataType, bamfiles, annotation, sequences, seqinfo, ...){ |
595 | 723 |
.new_SequenceData(dataType, bamfiles, annotation, sequences, |
596 | 724 |
seqinfo, ...) |
597 | 725 |
}) |
726 |
+#' @rdname SequenceData-class |
|
727 |
+#' @export |
|
598 | 728 |
setMethod("SequenceData", |
599 | 729 |
signature = c(annotation = "GRangesList", sequences = "BSgenome"), |
600 | 730 |
function(dataType, bamfiles, annotation, sequences, seqinfo, ...){ |
601 | 731 |
.new_SequenceData(dataType, bamfiles, annotation, sequences, |
602 | 732 |
seqinfo, ...) |
603 | 733 |
}) |
734 |
+#' @rdname SequenceData-class |
|
735 |
+#' @export |
|
604 | 736 |
setMethod("SequenceData", |
605 | 737 |
signature = c(annotation = "GFF3File", sequences = "BSgenome"), |
606 | 738 |
function(dataType, bamfiles, annotation, sequences, seqinfo, ...){ |
607 | 739 |
.new_SequenceData(dataType, bamfiles, annotation, sequences, |
608 | 740 |
seqinfo, ...) |
609 | 741 |
}) |
742 |
+#' @rdname SequenceData-class |
|
743 |
+#' @export |
|
610 | 744 |
setMethod("SequenceData", |
611 | 745 |
signature = c(annotation = "GFF3File", sequences = "character"), |
612 | 746 |
function(dataType, bamfiles, annotation, sequences, seqinfo, ...){ |
613 | 747 |
.new_SequenceData(dataType, bamfiles, annotation, sequences, |
614 | 748 |
seqinfo, ...) |
615 | 749 |
}) |
750 |
+#' @rdname SequenceData-class |
|
751 |
+#' @export |
|
616 | 752 |
setMethod("SequenceData", |
617 | 753 |
signature = c(annotation = "character", sequences = "FaFile"), |
618 | 754 |
function(dataType, bamfiles, annotation, sequences, seqinfo, ...){ |
619 | 755 |
.new_SequenceData(dataType, bamfiles, annotation, sequences, |
620 | 756 |
seqinfo, ...) |
621 | 757 |
}) |
758 |
+#' @rdname SequenceData-class |
|
759 |
+#' @export |
|
622 | 760 |
setMethod("SequenceData", |
623 | 761 |
signature = c(annotation = "GFF3File", sequences = "FaFile"), |
624 | 762 |
function(dataType, bamfiles, annotation, sequences, seqinfo, ...){ |
625 | 763 |
.new_SequenceData(dataType, bamfiles, annotation, sequences, |
626 | 764 |
seqinfo, ...) |
627 | 765 |
}) |
766 |
+#' @rdname SequenceData-class |
|
767 |
+#' @export |
|
628 | 768 |
setMethod("SequenceData", |
629 | 769 |
signature = c(annotation = "TxDb", sequences = "FaFile"), |
630 | 770 |
function(dataType, bamfiles, annotation, sequences, seqinfo, ...){ |
631 | 771 |
.new_SequenceData(dataType, bamfiles, annotation, sequences, |
632 | 772 |
seqinfo, ...) |
633 | 773 |
}) |
774 |
+#' @rdname SequenceData-class |
|
775 |
+#' @export |
|
634 | 776 |
setMethod("SequenceData", |
635 | 777 |
signature = c(annotation = "GRangesList", sequences = "FaFile"), |
636 | 778 |
function(dataType, bamfiles, annotation, sequences, seqinfo, ...){ |
... | ... |
@@ -659,7 +801,7 @@ setMethod("getData", |
659 | 801 |
#' @export |
660 | 802 |
setMethod(f = "bamfiles", |
661 | 803 |
signature = signature(x = "SequenceData"), |
662 |
- definition = function(x){x@bamfiles}) |
|
804 |
+ definition = function(x){bamfiles(unlist(x))}) |
|
663 | 805 |
#' @rdname SequenceData-functions |
664 | 806 |
#' @export |
665 | 807 |
setMethod(f = "conditions", |
... | ... |
@@ -674,12 +816,12 @@ setMethod( |
674 | 816 |
function(x){ |
675 | 817 |
partitioning <- IRanges::PartitioningByEnd(x) |
676 | 818 |
unlisted_ranges <- ranges(unlist(x)) |
677 |
- ends <- cumsum(width(unlisted_ranges)) == cumsum(width(partitioning)) |
|
678 |
- partitioning_relist <- IRanges::PartitioningByEnd(which(ends)) |
|
679 |
- names(partitioning_relist) <- names(x) |
|
819 |
+ ends <- match(cumsum(width(partitioning)),cumsum(width(unlisted_ranges))) |
|
820 |
+ partitioning_relist <- IRanges::PartitioningByEnd(ends) |
|
680 | 821 |
if(length(x) != length(partitioning_relist)){ |
681 | 822 |
stop("ranges could not be relisted.") |
682 | 823 |
} |
824 |
+ names(partitioning_relist) <- names(x) |
|
683 | 825 |
relist(unlisted_ranges, partitioning_relist) |
684 | 826 |
}) |
685 | 827 |
#' @rdname SequenceData-functions |
... | ... |
@@ -691,7 +833,7 @@ setMethod(f = "replicates", |
691 | 833 |
#' @export |
692 | 834 |
setMethod(f = "seqinfo", |
693 | 835 |
signature = signature(x = "SequenceData"), |
694 |
- definition = function(x){x@seqinfo}) |
|
836 |
+ definition = function(x){seqinfo(unlist(x))}) |
|
695 | 837 |
#' @rdname SequenceData-functions |
696 | 838 |
#' @export |
697 | 839 |
setMethod(f = "sequences", |
... | ... |
@@ -25,6 +25,9 @@ NULL |
25 | 25 |
#' of \code{ranges(x)} |
26 | 26 |
#' @param condition For \code{\link{aggregate}}: condition for which the data |
27 | 27 |
#' should be aggregated. |
28 |
+#' @param df,ranges,sequence,replicate inputs for creating a |
|
29 |
+#' \code{SequenceDataFrame}. See |
|
30 |
+#' \code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}. |
|
28 | 31 |
#' |
29 | 32 |
#' @return a \code{CoverageSequenceData} object |
30 | 33 |
#' |
... | ... |
@@ -47,8 +50,9 @@ setClass(Class = "CoverageSequenceDataFrame", |
47 | 50 |
#' @rdname CoverageSequenceData-class |
48 | 51 |
#' @export |
49 | 52 |
CoverageSequenceDataFrame <- function(df, ranges, sequence, replicate, |
50 |
- condition){ |
|
51 |
- .SequenceDataFrame("Coverage",df, ranges, sequence, replicate, condition) |
|
53 |
+ condition, bamfiles, seqinfo){ |
|
54 |
+ .SequenceDataFrame("Coverage",df, ranges, sequence, replicate, condition, |
|
55 |
+ bamfiles, seqinfo) |
|
52 | 56 |
} |
53 | 57 |
#' @rdname CoverageSequenceData-class |
54 | 58 |
#' @export |
... | ... |
@@ -66,6 +70,8 @@ CoverageSequenceData <- function(bamfiles, annotation, sequences, seqinfo, ...){ |
66 | 70 |
sequences = sequences, seqinfo = seqinfo, ...) |
67 | 71 |
} |
68 | 72 |
|
73 |
+setSequenceDataCoercions("Coverage") |
|
74 |
+ |
|
69 | 75 |
# CoverageSequenceData --------------------------------------------------------- |
70 | 76 |
|
71 | 77 |
.process_coverage_data <- function(coverage, grl){ |
... | ... |
@@ -28,6 +28,9 @@ NULL |
28 | 28 |
#' transcript name. Must be a name of \code{ranges(x).} |
29 | 29 |
#' @param condition For \code{\link{aggregate}}: condition for which the data |
30 | 30 |
#' should be aggregated. |
31 |
+#' @param df,ranges,sequence,replicate inputs for creating a |
|
32 |
+#' \code{SequenceDataFrame}. See |
|
33 |
+#' \code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}. |
|
31 | 34 |
#' |
32 | 35 |
#' @return a \code{End5SequenceData}, a \code{End3SequenceData} or a |
33 | 36 |
#' \code{EndSequenceData} object |
... | ... |
@@ -50,8 +53,9 @@ setClass(Class = "End5SequenceDataFrame", |
50 | 53 |
#' @rdname EndSequenceData-class |
51 | 54 |
#' @export |
52 | 55 |
End5SequenceDataFrame <- function(df, ranges, sequence, replicate, |
53 |
- condition){ |
|
54 |
- .SequenceDataFrame("End5",df, ranges, sequence, replicate, condition) |
|
56 |
+ condition, bamfiles, seqinfo){ |
|
57 |
+ .SequenceDataFrame("End5",df, ranges, sequence, replicate, condition, |
|
58 |
+ bamfiles, seqinfo) |
|
55 | 59 |
} |
56 | 60 |
#' @rdname EndSequenceData-class |
57 | 61 |
#' @export |
... | ... |
@@ -69,8 +73,10 @@ setClass(Class = "End3SequenceDataFrame", |
69 | 73 |
contains = "SequenceDataFrame") |
70 | 74 |
#' @rdname EndSequenceData-class |
71 | 75 |
#' @export |
72 |
-End3SequenceDataFrame <- function(df, ranges, sequence, replicate, condition){ |
|
73 |
- .SequenceDataFrame("End3",df, ranges, sequence, replicate, condition) |
|
76 |
+End3SequenceDataFrame <- function(df, ranges, sequence, replicate, condition, |
|
77 |
+ bamfiles, seqinfo){ |
|
78 |
+ .SequenceDataFrame("End3",df, ranges, sequence, replicate, condition, |
|
79 |
+ bamfiles, seqinfo) |
|
74 | 80 |
} |
75 | 81 |
#' @rdname EndSequenceData-class |
76 | 82 |
#' @export |
... | ... |
@@ -84,8 +90,10 @@ setClass(Class = "End3SequenceData", |
84 | 90 |
|
85 | 91 |
#' @rdname EndSequenceData-class |
86 | 92 |
#' @export |
87 |
-EndSequenceDataFrame <- function(df, ranges, sequence, replicate, condition){ |
|
88 |
- .SequenceDataFrame("End",df, ranges, sequence, replicate, condition) |
|
93 |
+EndSequenceDataFrame <- function(df, ranges, sequence, replicate, condition, |
|
94 |
+ bamfiles, seqinfo){ |
|
95 |
+ .SequenceDataFrame("End",df, ranges, sequence, replicate, condition, |
|
96 |
+ bamfiles, seqinfo) |
|
89 | 97 |
} |
90 | 98 |
#' @rdname EndSequenceData-class |
91 | 99 |
#' @export |
... | ... |
@@ -120,6 +128,10 @@ EndSequenceData <- function(bamfiles, annotation, sequences, seqinfo, ...){ |
120 | 128 |
sequences = sequences, seqinfo = seqinfo, ...) |
121 | 129 |
} |
122 | 130 |
|
131 |
+setSequenceDataCoercions("End5") |
|
132 |
+setSequenceDataCoercions("End3") |
|
133 |
+setSequenceDataCoercions("End") |
|
134 |
+ |
|
123 | 135 |
# End5SequenceData ------------------------------------------------------------------ |
124 | 136 |
|
125 | 137 |
.summarize_to_position_data <- function(data, hits, names, strands, type){ |
... | ... |
@@ -259,7 +271,7 @@ setMethod("getData", |
259 | 271 |
.aggregate_list_data_mean_sd <- function(x, condition){ |
260 | 272 |
conditions <- conditions(x) |
261 | 273 |
f <- .subset_to_condition(conditions, condition) |
262 |
- df <- as(unlist(x,use.names=FALSE),"DataFrame") |
|
274 |
+ df <- as(unlist(x,use.names=FALSE),"DataFrame")[,f,drop=FALSE] |
|
263 | 275 |
conditions_u <- unique(conditions[f]) |
264 | 276 |
# set up some base values. replicates is here the same as the number of |
265 | 277 |
# columns, since a list per replicate is assumed |
... | ... |
@@ -31,6 +31,9 @@ NULL |
31 | 31 |
#' transcript name. Must be a name of \code{ranges(x)} |
32 | 32 |
#' @param condition For \code{\link{aggregate}}: condition for which the data |
33 | 33 |
#' should be aggregated. |
34 |
+#' @param df,ranges,sequence,replicate inputs for creating a |
|
35 |
+#' \code{SequenceDataFrame}. See |
|
36 |
+#' \code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}. |
|
34 | 37 |
#' |
35 | 38 |
#' @return a \code{NormEnd5SequenceData} or \code{NormEnd3SequenceData} object |
36 | 39 |
#' |
... | ... |
@@ -54,8 +57,9 @@ setClass(Class = "NormEnd5SequenceDataFrame", |
54 | 57 |
#' @rdname NormEndSequenceData-class |
55 | 58 |
#' @export |
56 | 59 |
NormEnd5SequenceDataFrame <- function(df, ranges, sequence, replicate, |
57 |
- condition){ |
|
58 |
- .SequenceDataFrame("NormEnd5",df, ranges, sequence, replicate, condition) |
|
60 |
+ condition, bamfiles, seqinfo){ |
|
61 |
+ .SequenceDataFrame("NormEnd5",df, ranges, sequence, replicate, condition, |
|
62 |
+ bamfiles, seqinfo) |
|
59 | 63 |
} |
60 | 64 |
#' @rdname NormEndSequenceData-class |
61 | 65 |
#' @export |
... | ... |
@@ -74,8 +78,9 @@ setClass(Class = "NormEnd3SequenceDataFrame", |
74 | 78 |
#' @rdname NormEndSequenceData-class |
75 | 79 |
#' @export |
76 | 80 |
NormEnd3SequenceDataFrame <- function(df, ranges, sequence, replicate, |
77 |
- condition){ |
|
78 |
- .SequenceDataFrame("NormEnd3",df, ranges, sequence, replicate, condition) |
|
81 |
+ condition, bamfiles, seqinfo){ |
|
82 |
+ .SequenceDataFrame("NormEnd3",df, ranges, sequence, replicate, condition, |
|
83 |
+ bamfiles, seqinfo) |
|
79 | 84 |
} |
80 | 85 |
#' @rdname NormEndSequenceData-class |
81 | 86 |
#' @export |
... | ... |
@@ -100,6 +105,9 @@ NormEnd3SequenceData <- function(bamfiles, annotation, sequences, seqinfo, ...){ |
100 | 105 |
sequences = sequences, seqinfo = seqinfo, ...) |
101 | 106 |
} |
102 | 107 |
|
108 |
+setSequenceDataCoercions("NormEnd5") |
|
109 |
+setSequenceDataCoercions("NormEnd3") |
|
110 |
+ |
|
103 | 111 |
# summary ---------------------------------------------------------------------- |
104 | 112 |
|
105 | 113 |
.get_summary_MultiColSequenceData <- function(object){ |
... | ... |
@@ -246,7 +254,7 @@ setMethod("getData", |
246 | 254 |
.aggregate_data_frame_mean_sd <- function(x, condition){ |
247 | 255 |
conditions <- conditions(x) |
248 | 256 |
f <- .subset_to_condition(conditions, condition) |
249 |
- df <- as(unlist(x,use.names=FALSE)[,f],"DataFrame") |
|
257 |
+ df <- as(unlist(x,use.names=FALSE),"DataFrame")[,f,drop=FALSE] |
|
250 | 258 |
conditions_u <- unique(conditions[f]) |
251 | 259 |
replicates <- replicates(x)[f] |
252 | 260 |
# set up some base values |
... | ... |
@@ -28,6 +28,9 @@ NULL |
28 | 28 |
#' transcript name. Must be a name of \code{ranges(x)} |
29 | 29 |
#' @param condition For \code{\link{aggregate}}: condition for which the data |
30 | 30 |
#' should be aggregated. |
31 |
+#' @param df,ranges,sequence,replicate inputs for creating a |
|
32 |
+#' \code{SequenceDataFrame}. See |
|
33 |
+#' \code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}. |
|
31 | 34 |
#' |
32 | 35 |
#' @return a \code{PileupSequenceData} object |
33 | 36 |
#' |
... | ... |
@@ -48,8 +51,10 @@ setClass(Class = "PileupSequenceDataFrame", |
48 | 51 |
contains = "SequenceDataFrame") |
49 | 52 |
#' @rdname PileupSequenceData-class |
50 | 53 |
#' @export |
51 |
-PileupSequenceDataFrame <- function(df, ranges, sequence, replicate, condition){ |
|
52 |
- .SequenceDataFrame("Pileup", df, ranges, sequence, replicate, condition) |
|
54 |
+PileupSequenceDataFrame <- function(df, ranges, sequence, replicate, condition, |
|
55 |
+ bamfiles, seqinfo){ |
|
56 |
+ .SequenceDataFrame("Pileup", df, ranges, sequence, replicate, condition, |
|
57 |
+ bamfiles, seqinfo) |
|
53 | 58 |
} |
54 | 59 |
#' @rdname PileupSequenceData-class |
55 | 60 |
#' @export |
... | ... |
@@ -68,6 +73,8 @@ PileupSequenceData <- function(bamfiles, annotation, sequences, seqinfo, ...){ |
68 | 73 |
sequences = sequences, seqinfo = seqinfo, ...) |
69 | 74 |
} |
70 | 75 |
|
76 |
+setSequenceDataCoercions("Pileup") |
|
77 |
+ |
|
71 | 78 |
# PileupSequenceData ---------------------------------------------------------------- |
72 | 79 |
|
73 | 80 |
.fill_up_pileup_data <- function(pileup,grl,irl){ |
... | ... |
@@ -180,7 +187,7 @@ setMethod("summary", |
180 | 187 |
.aggregate_data_frame_percentage_mean_sd <- function(x,condition){ |
181 | 188 |
conditions <- conditions(x) |
182 | 189 |
f <- .subset_to_condition(conditions, condition) |
183 |
- df <- as(unlist(x,use.names=FALSE)[,f],"DataFrame") |
|
190 |
+ df <- as(unlist(x,use.names=FALSE),"DataFrame")[,f,drop=FALSE] |
|
184 | 191 |
conditions_u <- unique(conditions[f]) |
185 | 192 |
replicates <- replicates(x)[f] |
186 | 193 |
# set up some base values |
... | ... |
@@ -370,12 +377,13 @@ setGeneric(name = "pileupToCoverage", |
370 | 377 |
def = function(x) standardGeneric("pileupToCoverage")) |
371 | 378 |
|
372 | 379 |
.aggregate_pile_up_to_coverage <- function(data){ |
373 |
- unlisted_data <- unlist(data) |
|
374 |
- replicates <- unique(replicates(data)) |
|
380 |
+ unlisted_data <- unlist(data,use.names=FALSE) |
|
381 |
+ replicates <- unique(as.integer(interaction(conditions(data), |
|
382 |
+ replicates(data)))) |
|
375 | 383 |
ans <- IRanges::IntegerList( |
376 | 384 |
lapply(seq_along(replicates), |
377 | 385 |
function(i){ |
378 |
- rowSums(as.data.frame(unlisted_data[,replicates(data) == i])) |
|
386 |
+ rowSums(as.data.frame(unlisted_data[,replicates == i])) |
|
379 | 387 |
})) |
380 | 388 |
names(ans) <- paste0("replicate.",replicates) |
381 | 389 |
ans <- do.call(S4Vectors::DataFrame,ans) |
... | ... |
@@ -38,6 +38,9 @@ RNAMODR_PROT_SEQDATA_PLOT_DATA_COLOURS <- c(means = "#FBB4AE", |
38 | 38 |
#' transcript name. Must be a name of \code{ranges(x)} |
39 | 39 |
#' @param condition For \code{\link{aggregate}}: condition for which the data |
40 | 40 |
#' should be aggregated. |
41 |
+#' @param df,ranges,sequence,replicate inputs for creating a |
|
42 |
+#' \code{SequenceDataFrame}. See |
|
43 |
+#' \code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}. |
|
41 | 44 |
#' |
42 | 45 |
#' @return a \code{ProtectedEndSequenceData} object |
43 | 46 |
#' |
... | ... |
@@ -61,8 +64,9 @@ setClass(Class = "ProtectedEndSequenceDataFrame", |
61 | 64 |
#' @rdname ProtectedEndSequenceData-class |
62 | 65 |
#' @export |
63 | 66 |
ProtectedEndSequenceDataFrame <- function(df, ranges, sequence, replicate, |
64 |
- condition){ |
|
65 |
- .SequenceDataFrame("ProtectedEnd",df, ranges, sequence, replicate, condition) |
|
67 |
+ condition, bamfiles, seqinfo){ |
|
68 |
+ .SequenceDataFrame("ProtectedEnd",df, ranges, sequence, replicate, condition, |
|
69 |
+ bamfiles, seqinfo) |
|
66 | 70 |
} |
67 | 71 |
#' @rdname ProtectedEndSequenceData-class |
68 | 72 |
#' @export |
... | ... |
@@ -82,6 +86,8 @@ ProtectedEndSequenceData <- function(bamfiles, annotation, sequences, seqinfo, |
82 | 86 |
sequences = sequences, seqinfo = seqinfo, ...) |
83 | 87 |
} |
84 | 88 |
|
89 |
+setSequenceDataCoercions("ProtectedEnd") |
|
90 |
+ |
|
85 | 91 |
# ProtectedEndSequenceData ----------------------------------------------------- |
86 | 92 |
|
87 | 93 |
#' @rdname ProtectedEndSequenceData-class |
... | ... |
@@ -96,10 +96,12 @@ NULL |
96 | 96 |
} |
97 | 97 |
} |
98 | 98 |
} |
99 |
+ levels <- unique(coord$Parent) |
|
100 |
+ # coord <- coord[order(start(coord))] |
|
99 | 101 |
if(merge){ |
100 |
- coord <- split(coord, factor(coord$Parent, levels = unique(coord$Parent))) |
|
102 |
+ coord <- split(coord, factor(coord$Parent, levels = levels)) |
|
101 | 103 |
} else { |
102 |
- coord <- coord[order(factor(coord$Parent,unique(coord$Parent)))] |
|
104 |
+ coord <- coord[order(factor(coord$Parent, levels = levels))] |
|
103 | 105 |
coord <- split(coord, seq_along(coord)) |
104 | 106 |
names(coord) <- mcols(coord, level="within")[,"Parent"] |
105 | 107 |
} |
... | ... |
@@ -216,7 +218,9 @@ NULL |
216 | 218 |
# converts everything to a GRangesList |
217 | 219 |
coord <- .norm_coord(coord, args[["type"]], args[["merge"]]) |
218 | 220 |
if(args[["rawData"]]){ |
219 |
- data <- .norm_sequence_data(as(x,"SplitDataFrameList")) |
|
221 |
+ data <- relist(as(unlist(x, use.names = FALSE),"DataFrame"), |
|
222 |
+ IRanges::PartitioningByWidth(x)) |
|
223 |
+ data <- .norm_sequence_data(data) |
|
220 | 224 |
} else { |
221 | 225 |
data <- .norm_aggregate_data(aggregate(x)) |
222 | 226 |
} |
... | ... |
@@ -350,7 +354,8 @@ setMethod("subsetByCoord", |
350 | 354 |
# converts everything to a GRangesList |
351 | 355 |
coord <- .norm_coord(coord, args[["type"]]) |
352 | 356 |
if(args[["rawData"]]){ |
353 |
- data <- as(x,"SplitDataFrameList") |
|
357 |
+ data <- relist(as(unlist(x, use.names = FALSE),"DataFrame"), |
|
358 |
+ IRanges::PartitioningByWidth(x)) |
|
354 | 359 |
} else { |
355 | 360 |
data <- aggregate(x) |
356 | 361 |
} |
... | ... |
@@ -58,21 +58,11 @@ NULL |
58 | 58 |
#' Subsetting of a \code{SequenceDataFrame} returns a \code{SequenceDataFrame} or |
59 | 59 |
#' \code{DataFrame}, if it is subset by a column or row, respectively. The |
60 | 60 |
#' \code{drop} argument is ignored for column subsetting. |
61 |
-#' |
|
62 |
-#' @param df the data as a \code{DataFrame}. |
|
63 |
-#' @param ranges a \code{GRanges} object containing all annotation elements |
|
64 |
-#' for a transcript. |
|
65 |
-#' @param sequence \code{XString} object describing the nucleotide sequence of |
|
66 |
-#' the transcript. |
|
67 |
-#' @param condition The condition of each column or set of columns. Either |
|
68 |
-#' \code{control} or \code{treated}. |
|
69 |
-#' @param replicate The replicate of each column or set of columns for the |
|
70 |
-#' individual conditions |
|
71 |
-#' @param x,i,j,...,drop arguments used for |
|
72 |
-#' \code{\link[S4Vectors:DataFrame-class]{subsetting}}. |
|
73 |
-#' |
|
74 |
-#' @return a \code{SequenceDataFrame} object |
|
75 |
-#' |
|
61 |
+#' |
|
62 |
+#' @param x,i,j,...,drop,deparse.level arguments used for |
|
63 |
+#' \code{\link[S4Vectors:DataFrame-class]{subsetting}} or |
|
64 |
+#' \code{\link[base:cbind]{base::cbind}}. |
|
65 |
+#' |
|
76 | 66 |
#' @examples |
77 | 67 |
#' data(e5sd,package="RNAmodR") |
78 | 68 |
#' # A SequenceDataFrame can is usually constructed by subsetting from |
... | ... |
@@ -89,11 +79,15 @@ setClass(Class = "SequenceDataFrame", |
89 | 79 |
slots = c(ranges = "GRanges", |
90 | 80 |
sequence = "XString", |
91 | 81 |
condition = "factor", |
92 |
- replicate = "factor"), |
|
82 |
+ replicate = "factor", |
|
83 |
+ bamfiles = "BamFileList", |
|
84 |
+ seqinfo = "Seqinfo"), |
|
93 | 85 |
prototype = list(ranges = GRanges(), |
94 | 86 |
sequence = RNAString(), |
95 | 87 |
condition = factor(), |
96 |
- replicate = factor())) |
|
88 |
+ replicate = factor(), |
|
89 |
+ bamfiles = Rsamtools::BamFileList(), |
|
90 |
+ seqinfo = GenomeInfoDb::Seqinfo())) |
|
97 | 91 |
|
98 | 92 |
setMethod("relistToClass", "SequenceDataFrame", |
99 | 93 |
function(x) gsub("DataFrame","Data",class(x)) |
... | ... |
@@ -112,7 +106,7 @@ sequenceDataFrameClass <- function(dataType){ |
112 | 106 |
} |
113 | 107 |
|
114 | 108 |
.SequenceDataFrame <- function(class, df, ranges, sequence, replicate, |
115 |
- condition){ |
|
109 |
+ condition, bamfiles, seqinfo){ |
|
116 | 110 |
# defaults from function are strangly not set |
117 | 111 |
if(missing(df)){ |
118 | 112 |
df <- DataFrame() |
... | ... |
@@ -129,6 +123,12 @@ sequenceDataFrameClass <- function(dataType){ |
129 | 123 |
if(missing(condition)){ |
130 | 124 |
condition <- factor() |
131 | 125 |
} |
126 |
+ if(missing(bamfiles)){ |
|
127 |
+ bamfiles <- Rsamtools::BamFileList() |
|
128 |
+ } |
|
129 |
+ if(missing(seqinfo)){ |
|
130 |
+ seqinfo <- GenomeInfoDb::Seqinfo() |
|
131 |
+ } |
|
132 | 132 |
# check inputs |
133 | 133 |
if(!is(df,"DataFrame")){ |
134 | 134 |
stop("Invalid data object: ", class(df), " found, DataFrame expected.") |
... | ... |
@@ -149,6 +149,8 @@ sequenceDataFrameClass <- function(dataType){ |
149 | 149 |
sequence = sequence, |
150 | 150 |
condition = condition, |
151 | 151 |
replicate = replicate, |
152 |
+ bamfiles = bamfiles, |
|
153 |
+ seqinfo = seqinfo, |
|
152 | 154 |
rownames = df@rownames, |
153 | 155 |
nrows = df@nrows, |
154 | 156 |
listData = df@listData, |
... | ... |
@@ -208,6 +210,18 @@ setMethod( |
208 | 210 |
f = "conditions", |
209 | 211 |
signature = signature(object = "SequenceDataFrame"), |
210 | 212 |
definition = function(object){object@condition}) |
213 |
+#' @rdname SequenceData-functions |
|
214 |
+#' @export |
|
215 |
+setMethod( |
|
216 |
+ f = "bamfiles", |
|
217 |
+ signature = signature(x = "SequenceDataFrame"), |
|
218 |
+ definition = function(x){x@bamfiles}) |
|
219 |
+#' @rdname SequenceData-functions |
|
220 |
+#' @export |
|
221 |
+setMethod( |
|
222 |
+ f = "seqinfo", |
|
223 |
+ signature = signature(x = "SequenceDataFrame"), |
|
224 |
+ definition = function(x){x@seqinfo}) |
|
211 | 225 |
|
212 | 226 |
# internals -------------------------------------------------------------------- |
213 | 227 |
|
... | ... |
@@ -249,6 +263,61 @@ setMethod( |
249 | 263 |
} |
250 | 264 |
) |
251 | 265 |
|
266 |
+#' @rdname SequenceDataFrame-class |
|
267 |
+#' @export |
|
268 |
+setMethod( |
|
269 |
+ "cbind", "SequenceDataFrame", |
|
270 |
+ function(...){ |
|
271 |
+ args <- list(...) |
|
272 |
+ if(length(args) == 1L){ |
|
273 |
+ return(args[[1L]]) |
|
274 |
+ } |
|
275 |
+ # input checks |
|
276 |
+ classes <- lapply(args,class) |
|
277 |
+ if(length(unique(classes)) != 1L){ |
|
278 |
+ stop("Inputs must be of the same SequenceDataFrame type.") |
|
279 |
+ } |
|
280 |
+ className <- unique(classes) |
|
281 |
+ lengths <- vapply(args,function(a){sum(lengths(a))},integer(1)) |
|
282 |
+ if(length(unique(lengths)) != 1L){ |
|
283 |
+ stop("Inputs must have the same length.") |
|
284 |
+ } |
|
285 |
+ .check_ranges(args) |
|
286 |
+ .check_sequences(args) |
|
287 |
+ # |
|
288 |
+ data <- do.call(cbind, |
|
289 |
+ lapply(args,function(a){ |
|
290 |
+ as(a, "DataFrame") |
|
291 |
+ })) |
|
292 |
+ ranges <- ranges(args[[1L]]) |
|
293 |
+ sequences <- sequences(args[[1L]]) |
|
294 |
+ colnames <- IRanges::CharacterList(strsplit(colnames(data),"\\.")) |
|
295 |
+ colnames_conditions <- colnames %in% c("treated","control") |
|
296 |
+ colnames_replicates <- !is.na(suppressWarnings(IntegerList(colnames))) |
|
297 |
+ colnames_f <- !(colnames_conditions | colnames_replicates) |
|
298 |
+ conditionsFmultiplier <- length(unique(vapply(colnames[colnames_f], |
|
299 |
+ paste,character(1), |
|
300 |
+ collapse="."))) |
|
301 |
+ condition <- unlist(lapply(args,conditions)) |
|
302 |
+ condition_steps <- seq.int(1,length(condition),by=conditionsFmultiplier) |
|
303 |
+ replicate <- .get_replicate_number(condition[condition_steps]) |
|
304 |
+ replicate <- rep(replicate, each = conditionsFmultiplier) |
|
305 |
+ colnames[colnames_conditions] <- IRanges::CharacterList(condition) |
|
306 |
+ colnames[colnames_replicates] <- IRanges::CharacterList(replicate) |
|
307 |
+ colnames(data) <- vapply(colnames,paste,character(1),collapse = ".") |
|
308 |
+ bamfiles <- do.call(c,lapply(args,bamfiles)) |
|
309 |
+ seqinfo <- seqinfo(args[[1L]]) |
|
310 |
+ .SequenceDataFrame(class = gsub("SequenceDataFrame","",className), |
|
311 |
+ df = data, |
|
312 |
+ ranges = ranges, |
|
313 |
+ sequence = sequences, |
|
314 |
+ replicate = replicate, |
|
315 |
+ condition = condition, |
|
316 |
+ bamfiles = bamfiles, |
|
317 |
+ seqinfo = seqinfo) |
|
318 |
+ } |
|
319 |
+) |
|
320 |
+ |
|
252 | 321 |
#' @importFrom stats setNames |
253 | 322 |
#' @rdname SequenceDataFrame-class |
254 | 323 |
#' @export |
... | ... |
@@ -272,15 +341,29 @@ setMethod( |
272 | 341 |
return(x) |
273 | 342 |
j <- i |
274 | 343 |
} |
275 |
- if (!is(j, "IntegerRanges")) { |
|
276 |
- xstub <- stats::setNames(seq_along(x), names(x)) |
|
344 |
+ xstub <- stats::setNames(seq_along(x), names(x)) |
|
345 |
+ ia <- interaction(conditions(x), replicates(x)) |
|
346 |
+ if(is.character(j)){ |
|
277 | 347 |
j <- normalizeSingleBracketSubscript(j, xstub) |
348 |
+ j <- as.integer(ia)[j] |
|
278 | 349 |
} |
279 |
- x <- initialize(x, as(x,"DataFrame")[, j, drop = FALSE], |
|
350 |
+ colnames <- IRanges::CharacterList(strsplit(colnames(x),"\\.")) |
|
351 |
+ colnames_conditions <- colnames %in% c("treated","control") |
|
352 |
+ colnames_replicates <- !is.na(suppressWarnings(IntegerList(colnames))) |
|
353 |
+ colnames_f <- !(colnames_conditions | colnames_replicates) |
|
354 |
+ conditionsFmultiplier <- length(unique(vapply(colnames[colnames_f], |
|
355 |
+ paste,character(1), |
|
356 |
+ collapse="."))) |
|
357 |
+ j <- normalizeSingleBracketSubscript(j, xstub[seq_len(length(xstub)/conditionsFmultiplier)]) |
|
358 |
+ j2 <- which(!is.na(match(as.integer(ia), j))) |
|
359 |
+ x <- initialize(x, |
|
360 |
+ as(x,"DataFrame")[, j2, drop = FALSE], |
|
280 | 361 |
ranges = x@ranges, |
281 | 362 |
sequence = x@sequence, |
282 |
- replicate = x@replicate[j], |
|
283 |
- condition = x@condition[j]) |
|
363 |
+ replicate = factor(x@replicate[j2]), |
|
364 |
+ condition = factor(x@condition[j2]), |
|
365 |
+ bamfiles = x@bamfiles[j], |
|
366 |
+ seqinfo = x@seqinfo) |
|
284 | 367 |
if (anyDuplicated(names(x))){ |
285 | 368 |
names(x) <- make.unique(names(x)) |
286 | 369 |
} |
... | ... |
@@ -11,7 +11,8 @@ |
11 | 11 |
\alias{getDataTrack,CoverageSequenceData-method} |
12 | 12 |
\title{CoverageSequenceData} |
13 | 13 |
\usage{ |
14 |
-CoverageSequenceDataFrame(df, ranges, sequence, replicate, condition) |
|
14 |
+CoverageSequenceDataFrame(df, ranges, sequence, replicate, condition, |
|
15 |
+ bamfiles, seqinfo) |
|
15 | 16 |
|
16 | 17 |
CoverageSequenceData(bamfiles, annotation, sequences, seqinfo, ...) |
17 | 18 |
|
... | ... |
@@ -25,6 +26,10 @@ CoverageSequenceData(bamfiles, annotation, sequences, seqinfo, ...) |
25 | 26 |
\S4method{getDataTrack}{CoverageSequenceData}(x, name, ...) |
26 | 27 |
} |
27 | 28 |
\arguments{ |
29 |
+\item{df, ranges, sequence, replicate}{inputs for creating a |
|
30 |
+\code{SequenceDataFrame}. See |
|
31 |
+\code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}.} |
|
32 |
+ |
|
28 | 33 |
\item{condition}{For \code{\link{aggregate}}: condition for which the data |
29 | 34 |
should be aggregated.} |
30 | 35 |
|
... | ... |
@@ -25,11 +25,14 @@ |
25 | 25 |
\alias{getDataTrack,End3SequenceData-method} |
26 | 26 |
\title{End5SequenceData/End3SequenceData/EndSequenceData} |
27 | 27 |
\usage{ |
28 |
-End5SequenceDataFrame(df, ranges, sequence, replicate, condition) |
|
28 |
+End5SequenceDataFrame(df, ranges, sequence, replicate, condition, bamfiles, |
|
29 |
+ seqinfo) |
|
29 | 30 |
|
30 |
-End3SequenceDataFrame(df, ranges, sequence, replicate, condition) |
|
31 |
+End3SequenceDataFrame(df, ranges, sequence, replicate, condition, bamfiles, |
|
32 |
+ seqinfo) |
|
31 | 33 |
|
32 |
-EndSequenceDataFrame(df, ranges, sequence, replicate, condition) |
|
34 |
+EndSequenceDataFrame(df, ranges, sequence, replicate, condition, bamfiles, |
|
35 |
+ seqinfo) |
|
33 | 36 |
|
34 | 37 |
End5SequenceData(bamfiles, annotation, sequences, seqinfo, ...) |
35 | 38 |
|
... | ... |
@@ -65,6 +68,10 @@ EndSequenceData(bamfiles, annotation, sequences, seqinfo, ...) |
65 | 68 |
\S4method{getDataTrack}{End3SequenceData}(x, name, ...) |
66 | 69 |
} |
67 | 70 |
\arguments{ |
71 |
+\item{df, ranges, sequence, replicate}{inputs for creating a |
|
72 |
+\code{SequenceDataFrame}. See |
|
73 |
+\code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}.} |
|
74 |
+ |
|
68 | 75 |
\item{condition}{For \code{\link{aggregate}}: condition for which the data |
69 | 76 |
should be aggregated.} |
70 | 77 |
|
... | ... |
@@ -19,9 +19,11 @@ |
19 | 19 |
\alias{getDataTrack,NormEnd3SequenceData-method} |
20 | 20 |
\title{NormEnd5SequenceData/NormEnd3SequenceData} |
21 | 21 |
\usage{ |
22 |
-NormEnd5SequenceDataFrame(df, ranges, sequence, replicate, condition) |
|
22 |
+NormEnd5SequenceDataFrame(df, ranges, sequence, replicate, condition, |
|
23 |
+ bamfiles, seqinfo) |
|
23 | 24 |
|
24 |
-NormEnd3SequenceDataFrame(df, ranges, sequence, replicate, condition) |
|
25 |
+NormEnd3SequenceDataFrame(df, ranges, sequence, replicate, condition, |
|
26 |
+ bamfiles, seqinfo) |
|
25 | 27 |
|
26 | 28 |
NormEnd5SequenceData(bamfiles, annotation, sequences, seqinfo, ...) |
27 | 29 |
|
... | ... |
@@ -46,6 +48,10 @@ NormEnd3SequenceData(bamfiles, annotation, sequences, seqinfo, ...) |
46 | 48 |
\S4method{getDataTrack}{NormEnd3SequenceData}(x, name, ...) |
47 | 49 |
} |
48 | 50 |
\arguments{ |
51 |
+\item{df, ranges, sequence, replicate}{inputs for creating a |
|
52 |
+\code{SequenceDataFrame}. See |
|
53 |
+\code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}.} |
|
54 |
+ |
|
49 | 55 |
\item{condition}{For \code{\link{aggregate}}: condition for which the data |
50 | 56 |
should be aggregated.} |
51 | 57 |
|
... | ... |
@@ -13,7 +13,8 @@ |
13 | 13 |
\alias{pileupToCoverage,PileupSequenceData-method} |
14 | 14 |
\title{PileupSequenceData} |
15 | 15 |
\usage{ |
16 |
-PileupSequenceDataFrame(df, ranges, sequence, replicate, condition) |
|
16 |
+PileupSequenceDataFrame(df, ranges, sequence, replicate, condition, |
|
17 |
+ bamfiles, seqinfo) |
|
17 | 18 |
|
18 | 19 |
PileupSequenceData(bamfiles, annotation, sequences, seqinfo, ...) |
19 | 20 |
|
... | ... |
@@ -31,6 +32,10 @@ pileupToCoverage(x) |
31 | 32 |
\S4method{pileupToCoverage}{PileupSequenceData}(x) |
32 | 33 |
} |
33 | 34 |
\arguments{ |
35 |
+\item{df, ranges, sequence, replicate}{inputs for creating a |
|
36 |
+\code{SequenceDataFrame}. See |
|
37 |
+\code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}.} |
|
38 |
+ |
|
34 | 39 |
\item{condition}{For \code{\link{aggregate}}: condition for which the data |
35 | 40 |
should be aggregated.} |
36 | 41 |
|
... | ... |
@@ -11,7 +11,8 @@ |
11 | 11 |
\alias{getDataTrack,ProtectedEndSequenceData-method} |
12 | 12 |
\title{ProtectedEndSequenceData} |
13 | 13 |
\usage{ |
14 |
-ProtectedEndSequenceDataFrame(df, ranges, sequence, replicate, condition) |
|
14 |
+ProtectedEndSequenceDataFrame(df, ranges, sequence, replicate, condition, |
|
15 |
+ bamfiles, seqinfo) |
|
15 | 16 |
|
16 | 17 |
ProtectedEndSequenceData(bamfiles, annotation, sequences, seqinfo, ...) |
17 | 18 |
|
... | ... |
@@ -25,6 +26,10 @@ ProtectedEndSequenceData(bamfiles, annotation, sequences, seqinfo, ...) |
25 | 26 |
\S4method{getDataTrack}{ProtectedEndSequenceData}(x, name, ...) |
26 | 27 |
} |
27 | 28 |
\arguments{ |
29 |
+\item{df, ranges, sequence, replicate}{inputs for creating a |
|
30 |
+\code{SequenceDataFrame}. See |
|
31 |
+\code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}.} |
|
32 |
+ |
|
28 | 33 |
\item{condition}{For \code{\link{aggregate}}: condition for which the data |
29 | 34 |
should be aggregated.} |
30 | 35 |
|
... | ... |
@@ -1,11 +1,86 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/SequenceData-class.R |
3 |
+\docType{methods} |
|
3 | 4 |
\name{SequenceData-class} |
4 | 5 |
\alias{SequenceData-class} |
5 | 6 |
\alias{SequenceData} |
7 |
+\alias{cbind,SequenceData-method} |
|
8 |
+\alias{rbind,SequenceData-method} |
|
9 |
+\alias{SequenceData,character,character-method} |
|
10 |
+\alias{SequenceData,character,BSgenome-method} |
|
11 |
+\alias{SequenceData,TxDb,character-method} |
|
12 |
+\alias{SequenceData,TxDb,BSgenome-method} |
|
13 |
+\alias{SequenceData,GRangesList,character-method} |
|
14 |
+\alias{SequenceData,GRangesList,BSgenome-method} |
|
15 |
+\alias{SequenceData,GFF3File,BSgenome-method} |
|
16 |
+\alias{SequenceData,GFF3File,character-method} |
|
17 |
+\alias{SequenceData,character,FaFile-method} |
|
18 |
+\alias{SequenceData,GFF3File,FaFile-method} |
|
19 |
+\alias{SequenceData,TxDb,FaFile-method} |
|
20 |
+\alias{SequenceData,GRangesList,FaFile-method} |
|
6 | 21 |
\title{The SequenceData class} |
22 |
+\usage{ |
|
23 |
+\S4method{cbind}{SequenceData}(..., deparse.level = 1) |
|
24 |
+ |
|
25 |
+\S4method{rbind}{SequenceData}(..., deparse.level = 1) |
|
26 |
+ |
|
27 |
+SequenceData(dataType, bamfiles, annotation, sequences, seqinfo, ...) |
|
28 |
+ |
|
29 |
+\S4method{SequenceData}{character,character}(dataType, bamfiles, |
|
30 |
+ annotation, sequences, seqinfo, ...) |
|
31 |
+ |
|
32 |
+\S4method{SequenceData}{character,BSgenome}(dataType, bamfiles, annotation, |
|
33 |
+ sequences, seqinfo, ...) |
|
34 |
+ |
|
35 |
+\S4method{SequenceData}{TxDb,character}(dataType, bamfiles, annotation, |
|
36 |
+ sequences, seqinfo, ...) |
|
37 |
+ |
|
38 |
+\S4method{SequenceData}{TxDb,BSgenome}(dataType, bamfiles, annotation, |
|
39 |
+ sequences, seqinfo, ...) |
|
40 |
+ |
|
41 |
+\S4method{SequenceData}{GRangesList,character}(dataType, bamfiles, |
|
42 |
+ annotation, sequences, seqinfo, ...) |
|
43 |
+ |
|
44 |
+\S4method{SequenceData}{GRangesList,BSgenome}(dataType, bamfiles, |
|
45 |
+ annotation, sequences, seqinfo, ...) |
|
46 |
+ |
|
47 |
+\S4method{SequenceData}{GFF3File,BSgenome}(dataType, bamfiles, annotation, |
|
48 |
+ sequences, seqinfo, ...) |
|
49 |
+ |
|
50 |
+\S4method{SequenceData}{GFF3File,character}(dataType, bamfiles, annotation, |
|
51 |
+ sequences, seqinfo, ...) |
|
52 |
+ |
|
53 |
+\S4method{SequenceData}{character,FaFile}(dataType, bamfiles, annotation, |
|
54 |
+ sequences, seqinfo, ...) |
|
55 |
+ |
|
56 |
+\S4method{SequenceData}{GFF3File,FaFile}(dataType, bamfiles, annotation, |
|
57 |
+ sequences, seqinfo, ...) |
|
58 |
+ |
|
59 |
+\S4method{SequenceData}{TxDb,FaFile}(dataType, bamfiles, annotation, |
|
60 |
+ sequences, seqinfo, ...) |
|
61 |
+ |
|
62 |
+\S4method{SequenceData}{GRangesList,FaFile}(dataType, bamfiles, annotation, |
|
63 |
+ sequences, seqinfo, ...) |
|
64 |
+} |
|
7 | 65 |
\arguments{ |
8 |
-\item{dataType}{The prefix for construction the class name of the |
|
66 |
+\item{...}{Optional arguments overwriting default values. Not all |
|
67 |
+\code{SequenceData} classes use all arguments. The arguments are: |
|
68 |
+\itemize{ |
|
69 |
+\item{\code{minLength}} {single integer value setting a threshold for minimum |
|
70 |
+read length. Shorther reads are discarded (default: \code{minLength = NA}).} |
|
71 |
+\item{\code{maxLength}} {single integer value setting a threshold for maximum |
|
72 |
+read length. Longer reads are discarded (default: \code{maxLength = NA}).} |
|
73 |
+\item{\code{minQuality}} {single integer value setting a threshold for maximum |
|
74 |
+read quality. Reads with a lower quality are discarded (default: |
|
75 |
+\code{minQuality = 5L}, but this is class dependent).} |
|
76 |
+\item{\code{max_depth}} {maximum depth for pileup loading (default: |
|
77 |
+\code{max_depth = 10000L}).} |
|
78 |
+}} |
|
79 |
+ |
|
80 |
+\item{deparse.level}{See \code{\link[base:cbind]{base::cbind}} for a |
|
81 |
+description of this argument.} |
|
82 |
+ |
|
83 |
+\item{dataType}{The prefix for construction the class name of the |
|
9 | 84 |
\code{SequenceData} subclass to be constructed.} |
10 | 85 |
|
11 | 86 |
\item{bamfiles}{the input which can be of the following types |
... | ... |
@@ -19,25 +94,11 @@ to a named \code{BamFileList} referencing existing bam files. Valid names are |
19 | 94 |
\item{annotation}{annotation data, which must match the information contained |
20 | 95 |
in the BAM files.} |
21 | 96 |
|
22 |
-\item{sequences}{sequences matching the target sequences the reads were |
|
97 |
+\item{sequences}{sequences matching the target sequences the reads were |
|
23 | 98 |
mapped onto. This must match the information contained in the BAM files.} |
24 | 99 |
|
25 |
-\item{seqinfo}{optional \code{\link[GenomeInfoDb:Seqinfo]{Seqinfo}} to |
|
100 |
+\item{seqinfo}{optional \code{\link[GenomeInfoDb:Seqinfo]{Seqinfo}} to |
|
26 | 101 |
subset the transcripts analyzed on a chromosome basis.} |
27 |
- |
|
28 |
-\item{...}{Optional arguments overwriting default values. Not all |
|
29 |
-\code{SequenceData} classes use all arguments. The arguments are: |
|
30 |
-\itemize{ |
|
31 |
-\item{\code{minLength}} {single integer value setting a threshold for minimum |
|
32 |
-read length. Shorther reads are discarded (default: \code{minLength = NA}).} |
|
33 |
-\item{\code{maxLength}} {single integer value setting a threshold for maximum |
|
34 |
-read length. Longer reads are discarded (default: \code{maxLength = NA}).} |
|
35 |
-\item{\code{minQuality}} {single integer value setting a threshold for maximum |
|
36 |
-read quality. Reads with a lower quality are discarded (default: |
|
37 |
-\code{minQuality = 5L}, but this is class dependent).} |
|
38 |
-\item{\code{max_depth}} {maximum depth for pileup loading (default: |
|
39 |
-\code{max_depth = 10000L}).} |
|
40 |
-}} |
|
41 | 102 |
} |
42 | 103 |
\description{ |
43 | 104 |
The \code{SequenceData} class is implemented to contain data on each position |
... | ... |
@@ -48,11 +109,11 @@ nucleotide sequence of these transcripts. To access this data several |
48 | 109 |
be extended. Currently the following classes are implemented: |
49 | 110 |
|
50 | 111 |
\itemize{ |
51 |
-\item{\code{\link[=CoverageSequenceData-class]{CoverageSequenceData}}} |
|
52 |
-\item{\code{\link[=EndSequenceData-class]{End5SequenceData}}, |
|
53 |
-\code{\link[=EndSequenceData-class]{End3SequenceData}}, |
|
112 |
+\item{\code{\link[=CoverageSequenceData-class]{CoverageSequenceData}}} |
|
113 |
+\item{\code{\link[=EndSequenceData-class]{End5SequenceData}}, |
|
114 |
+\code{\link[=EndSequenceData-class]{End3SequenceData}}, |
|
54 | 115 |
\code{\link[=EndSequenceData-class]{EndSequenceData}}} |
55 |
-\item{\code{\link[=NormEndSequenceData-class]{NormEnd5SequenceData}}, |
|
116 |
+\item{\code{\link[=NormEndSequenceData-class]{NormEnd5SequenceData}}, |
|
56 | 117 |
\code{\link[=NormEndSequenceData-class]{NormEnd5SequenceData}}} |
57 | 118 |
\item{\code{\link[=PileupSequenceData-class]{PileupSequenceData}}} |
58 | 119 |
\item{\code{\link[=ProtectedEndSequenceData-class]{ProtectedEndSequenceData}}} |
... | ... |
@@ -67,30 +128,42 @@ transcript. Therefore, it is necessary to treat the minus strand accordingly. |
67 | 128 |
The \code{SequenceData} class is derived from the |
68 | 129 |
\code{\link[IRanges:DataFrameList-class]{CompressedSplitDataFrameList}} class |
69 | 130 |
with additional slots for annotation and sequence data. Some functionality is |
70 |
-not inherited and not available, e.g. \code{cbind}, \code{rbind} amd |
|
71 |
-\code{relist}. |
|
131 |
+not inherited and might not available to full extend, e.g.\code{relist}. |
|
132 |
+ |
|
133 |
+\strong{SequenceDataFrame} |
|
134 |
+ |
|
135 |
+#' The \code{SequenceDataFrame} class contains data for positions along a single |
|
136 |
+transcript. It is used to describe elements from a \code{SequenceData} |
|
137 |
+object. |
|
138 |
+ |
|
139 |
+The \code{SequenceDataFrame} class is derived from the |
|
140 |
+\code{\link[S4Vectors:DataFrame-class]{DataFrame}} class. |
|
141 |
+ |
|
142 |
+Subsetting of a \code{SequenceDataFrame} returns a \code{SequenceDataFrame} or |
|
143 |
+\code{DataFrame}, if it is subset by a column or row, respectively. The |
|
144 |
+\code{drop} argument is ignored for column subsetting. |
|
72 | 145 |
} |
73 | 146 |
\section{Slots}{ |
74 | 147 |
|
75 | 148 |
\describe{ |
76 |
-\item{\code{ranges}}{a \code{\link[GenomicRanges:GRangesList-class]{GRangesList}} |
|
77 |
-object each element describing a transcript including its element. The |
|
78 |
-\code{GRangesList} is constructed from the |
|
149 |
+\item{\code{ranges}}{a \code{\link[GenomicRanges:GRangesList-class]{GRangesList}} |
|
150 |
+object each element describing a transcript including its element. The |
|
151 |
+\code{GRangesList} is constructed from the |
|
79 | 152 |
\code{\link[GenomicFeatures:transcriptsBy]{exonsBy(x, by="tx")}} function. |
80 |
-If during construction a \code{GRangesList} is provided instead of a |
|
153 |
+If during construction a \code{GRangesList} is provided instead of a |
|
81 | 154 |
character value pointing to a gff3 file or a \code{TxDb} object, it must have |
82 | 155 |
a comparable structure.} |
83 | 156 |
|
84 |
-\item{\code{sequences}}{a \code{\link[Biostrings:XStringSet-class]{XStringSet}} of |
|
157 |
+\item{\code{sequences}}{a \code{\link[Biostrings:XStringSet-class]{XStringSet}} of |
|
85 | 158 |
type \code{sequencesType}.} |
86 | 159 |
|
87 |
-\item{\code{sequencesType}}{a \code{character} value for the class name of |
|
160 |
+\item{\code{sequencesType}}{a \code{character} value for the class name of |
|
88 | 161 |
\code{sequences}. Either \code{RNAStringSet} or \code{ModRNAStringSet}.} |
89 | 162 |
|
90 |
-\item{\code{bamfiles}}{the input bam files as |
|
163 |
+\item{\code{bamfiles}}{the input bam files as |
|
91 | 164 |
\code{\link[Rsamtools:BamFile-class]{BamFileList}}} |
92 | 165 |
|
93 |
-\item{\code{condition}}{conditions along the |
|
166 |
+\item{\code{condition}}{conditions along the |
|
94 | 167 |
\code{\link[Rsamtools:BamFile-class]{BamFileList}}: Either \code{control} |
95 | 168 |
or \code{treated}} |
96 | 169 |
|
... | ... |
@@ -11,6 +11,8 @@ |
11 | 11 |
\alias{ranges,SequenceDataFrame-method} |
12 | 12 |
\alias{replicates,SequenceDataFrame-method} |
13 | 13 |
\alias{conditions,SequenceDataFrame-method} |
14 |
+\alias{bamfiles,SequenceDataFrame-method} |
|
15 |
+\alias{seqinfo,SequenceDataFrame-method} |
|
14 | 16 |
\alias{show,SequenceData-method} |
15 | 17 |
\alias{getData,SequenceData,BamFileList,GRangesList,XStringSet,ScanBamParam-method} |
16 | 18 |
\alias{bamfiles,SequenceData-method} |
... | ... |
@@ -50,6 +52,10 @@ replicates(x) |
50 | 52 |
|
51 | 53 |
\S4method{conditions}{SequenceDataFrame}(object) |
52 | 54 |
|
55 |
+\S4method{bamfiles}{SequenceDataFrame}(x) |
|
56 |
+ |
|
57 |
+\S4method{seqinfo}{SequenceDataFrame}(x) |
|
58 |
+ |
|
53 | 59 |
\S4method{show}{SequenceData}(object) |
54 | 60 |
|
55 | 61 |
|
... | ... |
@@ -4,31 +4,18 @@ |
4 | 4 |
\name{SequenceDataFrame-class} |
5 | 5 |
\alias{SequenceDataFrame-class} |
6 | 6 |
\alias{SequenceDataFrame} |
7 |
+\alias{cbind,SequenceDataFrame-method} |
|
7 | 8 |
\alias{[,SequenceDataFrame,ANY,ANY,ANY-method} |
8 | 9 |
\title{The SequenceDataFrame class} |
9 | 10 |
\usage{ |
11 |
+\S4method{cbind}{SequenceDataFrame}(..., deparse.level = 1) |
|
12 |
+ |
|
10 | 13 |
\S4method{[}{SequenceDataFrame,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE) |
11 | 14 |
} |
12 | 15 |
\arguments{ |
13 |
-\item{x, i, j, ..., drop}{arguments used for |
|
14 |
-\code{\link[S4Vectors:DataFrame-class]{subsetting}}.} |
|
15 |
- |
|
16 |
-\item{df}{the data as a \code{DataFrame}.} |
|
17 |
- |
|
18 |
-\item{ranges}{a \code{GRanges} object containing all annotation elements |
|
19 |
-for a transcript.} |
|
20 |
- |
|
21 |
-\item{sequence}{\code{XString} object describing the nucleotide sequence of |
|
22 |
-the transcript.} |
|
23 |
- |
|
24 |
-\item{condition}{The condition of each column or set of columns. Either |
|
25 |
-\code{control} or \code{treated}.} |
|
26 |
- |
|
27 |
-\item{replicate}{The replicate of each column or set of columns for the |
|
28 |
-individual conditions} |
|
29 |
-} |
|
30 |
-\value{ |
|
31 |
-a \code{SequenceDataFrame} object |
|
16 |
+\item{x, i, j, ..., drop, deparse.level}{arguments used for |
|
17 |
+\code{\link[S4Vectors:DataFrame-class]{subsetting}} or |
|
18 |
+\code{\link[base:cbind]{base::cbind}}.} |
|
32 | 19 |
} |
33 | 20 |
\description{ |
34 | 21 |
The \code{SequenceDataFrame} class contains data for positions along a single |
... | ... |
@@ -14,7 +14,9 @@ test_that("SequenceDataFrame:",{ |
14 | 14 |
ranges(sdf), |
15 | 15 |
sequences(sdf), |
16 | 16 |
replicates(sdf), |
17 |
- conditions(sdf)) |
|
17 |
+ conditions(sdf), |
|
18 |
+ bamfiles(sdf), |
|
19 |
+ seqinfo(sdf)) |
|
18 | 20 |
expect_equal(sdf,sdf2) |
19 | 21 |
############################################################################## |
20 | 22 |
# errors |
... | ... |
@@ -49,11 +51,11 @@ test_that("SequenceDataFrame:",{ |
49 | 51 |
expect_s4_class(sdf[1,,drop = FALSE],"DataFrame") |
50 | 52 |
expect_equal(ncol(sdf[1,,drop = FALSE]),ncol(sdf)) |
51 | 53 |
expect_s4_class(sdf[,1],"PileupSequenceDataFrame") |
52 |
- expect_equal(ncol(sdf[,1]),1L) |
|
54 |
+ expect_equal(ncol(sdf[,1]),5L) |
|
53 | 55 |
expect_type(sdf["1",],"list") |
54 | 56 |
expect_equal(length(sdf["1",]),ncol(sdf)) |
55 | 57 |
expect_s4_class(sdf["1",,drop = FALSE],"DataFrame") |
56 | 58 |
expect_equal(ncol(sdf["1",,drop = FALSE]),ncol(sdf)) |
57 | 59 |
expect_s4_class(sdf[,"pileup.treated.1.G"],"PileupSequenceDataFrame") |
58 |
- expect_equal(ncol(sdf[,"pileup.treated.1.G"]),1L) |
|
60 |
+ expect_equal(ncol(sdf[,"pileup.treated.1.G"]),5L) |
|
59 | 61 |
}) |
... | ... |
@@ -61,7 +61,7 @@ test_that("Modifier/ModifierSet:",{ |
61 | 61 |
factor("*", levels = c("+","-","*"))) |
62 | 62 |
expect_true(is.factor(conditions(msi[[1]]))) |
63 | 63 |
expect_equal(conditions(msi[[1]]), |
64 |
- factor(rep("treated",ncol(sequenceData(msi[[1]]))[1]))) |
|
64 |
+ factor(rep("treated",ncol(sequenceData(msi[[1]]))[1]/5))) |
|
65 | 65 |
############################################################################## |
66 | 66 |
skip_on_bioc() |
67 | 67 |
# Modifier creation |
... | ... |
@@ -217,3 +217,24 @@ test_that("Subsetting Modifier/ModifierSet:",{ |
217 | 217 |
expect_type(actual@unlistData$labels,"logical") |
218 | 218 |
|
219 | 219 |
}) |
220 |
+ |
|
221 |
+context("Combining SequenceData") |
|
222 |
+test_that("Combining SequenceData:",{ |
|
223 |
+ data(psd,package = "RNAmodR") |
|
224 |
+ expect_error(c(psd[1],psd[1]), |
|
225 |
+ "Input must have unique names.") |
|
226 |
+ expect_error(cbind(psd[1],psd[2]), |
|
227 |
+ "Inputs must have the same lengths.") |
|
228 |
+ expect_error(cbind(psd[1],psd), |
|
229 |
+ "Inputs must have the same lengths.") |
|
230 |
+ expect_error(rbind(psd[1],psd[,1:2]), |
|
231 |
+ "Inputs must have the same width.") |
|
232 |
+ expect_s4_class(c(psd[1],psd[2]), |
|
233 |
+ "PileupSequenceData") |
|
234 |
+ expect_s4_class(cbind(psd[1],psd[1]), |
|
235 |
+ "PileupSequenceData") |
|
236 |
+ expect_s4_class(rbind(psd[1],psd), |
|
237 |
+ "PileupSequenceData") |
|
238 |
+ expect_equal(relist(unlist(psd),psd),psd) |
|
239 |
+ expect_equal(relist(unlist(psd,use.names = FALSE),psd),psd) |
|
240 |
+}) |