Browse code

added support for c, cbind, rbind and relist function for SequenceData objects

Felix Ernst authored on 30/05/2019 22:25:57
Showing33 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: RNAmodR
2 2
 Type: Package
3 3
 Title: Detection of post-transcriptional modifications
4
-Version: 0.99.16
5
-Date: 2019-05-28
4
+Version: 0.99.17
5
+Date: 2019-05-31
6 6
 Authors@R: c(person("Felix G.M.", 
7 7
                     "Ernst", 
8 8
                     email = "felix.gm.ernst@outlook.com", 
... ...
@@ -23,6 +23,7 @@ export(PileupSequenceDataFrame)
23 23
 export(ProtectedEndSequenceData)
24 24
 export(ProtectedEndSequenceDataFrame)
25 25
 export(RNASequenceTrack)
26
+export(SequenceData)
26 27
 export(SequenceDataList)
27 28
 export(SequenceDataSet)
28 29
 export(aggregate)
... ...
@@ -84,9 +85,11 @@ exportMethods("[")
84 85
 exportMethods("settings<-")
85 86
 exportMethods(Modifier)
86 87
 exportMethods(ModifierSet)
88
+exportMethods(SequenceData)
87 89
 exportMethods(aggregate)
88 90
 exportMethods(aggregateData)
89 91
 exportMethods(bamfiles)
92
+exportMethods(cbind)
90 93
 exportMethods(compareByCoord)
91 94
 exportMethods(conditions)
92 95
 exportMethods(dataType)
... ...
@@ -108,6 +111,7 @@ exportMethods(plotData)
108 111
 exportMethods(plotDataByCoord)
109 112
 exportMethods(plotROC)
110 113
 exportMethods(ranges)
114
+exportMethods(rbind)
111 115
 exportMethods(replicates)
112 116
 exportMethods(seqinfo)
113 117
 exportMethods(sequenceData)
... ...
@@ -383,7 +383,7 @@ setMethod(f = "bamfiles",
383 383
 setMethod(f = "conditions",
384 384
           signature = signature(object = "Modifier"),
385 385
           definition = function(object){
386
-            conditions(sequenceData(object))
386
+            object@condition
387 387
           })
388 388
 #' @rdname Modifier-functions
389 389
 #' @export
... ...
@@ -461,7 +461,7 @@ setMethod(f = "ranges",
461 461
 setMethod(f = "replicates",
462 462
           signature = signature(x = "Modifier"),
463 463
           definition = function(x){
464
-            replicates(sequenceData(x))
464
+            x@replicate
465 465
           })
466 466
 #' @rdname Modifier-functions
467 467
 #' @export
... ...
@@ -628,12 +628,12 @@ setReplaceMethod(f = "settings",
628 628
   data <- .norm_Modifier_input_SequenceData_elements(data, proto)
629 629
   bamfiles <- bamfiles(data)
630 630
   condition <- factor(names(bamfiles))
631
-  new2(className,
632
-       mod = .norm_mod(proto@mod, className),
633
-       bamfiles = bamfiles,
634
-       condition = condition,
635
-       replicate = .get_replicate_number(bamfiles, condition),
636
-       data = data)
631
+  new(className,
632
+      mod = .norm_mod(proto@mod, className),
633
+      bamfiles = bamfiles,
634
+      condition = condition,
635
+      replicate = .get_replicate_number(condition),
636
+      data = data)
637 637
 }
638 638
 
639 639
 .load_SequenceData <- function(classes, bamfiles, annotation, sequences,
... ...
@@ -9,6 +9,8 @@ NULL
9 9
 #' 
10 10
 #' @title The SequenceData class
11 11
 #' 
12
+#' @md
13
+#' 
12 14
 #' @description 
13 15
 #' The \code{SequenceData} class is implemented to contain data on each position
14 16
 #' along transcripts and holds the corresponding annotation data and
... ...
@@ -37,8 +39,20 @@ NULL
37 39
 #' The \code{SequenceData} class is derived from the
38 40
 #' \code{\link[IRanges:DataFrameList-class]{CompressedSplitDataFrameList}} class
39 41
 #' with additional slots for annotation and sequence data. Some functionality is
40
-#' not inherited and not available, e.g. \code{cbind}, \code{rbind} amd
41
-#' \code{relist}.
42
+#' not inherited and might not available to full extend, e.g.\code{relist}.
43
+#' 
44
+#' **SequenceDataFrame**
45
+#' 
46
+#' #' The \code{SequenceDataFrame} class contains data for positions along a single
47
+#' transcript. It is used to describe elements from a \code{SequenceData}
48
+#' object.
49
+#' 
50
+#' The \code{SequenceDataFrame} class is derived from the
51
+#' \code{\link[S4Vectors:DataFrame-class]{DataFrame}} class.
52
+#' 
53
+#' Subsetting of a \code{SequenceDataFrame} returns a \code{SequenceDataFrame} or 
54
+#' \code{DataFrame}, if it is subset by a column or row, respectively. The 
55
+#' \code{drop} argument is ignored for column subsetting.
42 56
 #' 
43 57
 #' @param dataType The prefix for construction the class name of the 
44 58
 #' \code{SequenceData} subclass to be constructed.
... ...
@@ -68,6 +82,8 @@ NULL
68 82
 #' \item{\code{max_depth}} {maximum depth for pileup loading (default: 
69 83
 #' \code{max_depth = 10000L}).}
70 84
 #' }
85
+#' @param deparse.level See \code{\link[base:cbind]{base::cbind}} for a 
86
+#' description of this argument.
71 87
 #' 
72 88
 #' @slot ranges a \code{\link[GenomicRanges:GRangesList-class]{GRangesList}} 
73 89
 #' object each element describing a transcript including its element. The 
... ...
@@ -96,8 +112,6 @@ NULL
96 112
 setClass("SequenceData",
97 113
          contains = c("VIRTUAL", "CompressedSplitDataFrameList"),
98 114
          slots = c(sequencesType = "character",
99
-                   bamfiles = "BamFileList",
100
-                   seqinfo = "Seqinfo",
101 115
                    minQuality = "integer",
102 116
                    unlistData = "SequenceDataFrame",
103 117
                    unlistType = "character",
... ...
@@ -192,12 +206,33 @@ S4Vectors::setValidity2(Class = "SequenceData", .valid.SequenceData)
192 206
 
193 207
 # coercion ---------------------------------------------------------------------
194 208
 
195
-.as_SplitDataFrameList <- function(from){
196
-  relist(as(unlist(from, use.names = FALSE),"DataFrame"),
197
-         IRanges::PartitioningByWidth(from))
209
+coerceToSequenceData <- function(className) {
210
+  function(from) {
211
+    if(is.list(from)) {
212
+      classes <- unlist(lapply(from,class))
213
+      from <- from[classes == paste0(className,"Frame")]
214
+      if(length(from) == 0) {
215
+        FUN <- match.fun(className)
216
+        from <- list(FUN())
217
+      }
218
+    } else {
219
+      if(is(from,className)){
220
+        return(from)
221
+      } else if(is(from,paste0(className,"Frame"))) {
222
+        from <- list(from)
223
+      } else {
224
+        stop("Cannot coerce ",class(from)," to ",className,".")
225
+      }
226
+    }
227
+    IRanges:::coerceToCompressedList(from)
228
+  }
198 229
 }
199
-setAs("SequenceData", "SplitDataFrameList", .as_SplitDataFrameList)
200 230
 
231
+setSequenceDataCoercions <- function(type) {
232
+  className <- sequenceDataClass(type)
233
+  setAs("ANY", className, coerceToSequenceData(className))
234
+  setAs("list", className, coerceToSequenceData(className))
235
+}
201 236
 
202 237
 # internals --------------------------------------------------------------------
203 238
 
... ...
@@ -223,8 +258,7 @@ setMethod("extractROWS", "SequenceData",
223 258
     ans_partitioning <- new("PartitioningByEnd", end = ans_breakpoints,
224 259
                             NAMES = extractROWS(names(x), i))
225 260
     ans_elementMetadata <- extractROWS(x@elementMetadata, i)
226
-    initialize(x, bamfiles = x@bamfiles, seqinfo = x@seqinfo, 
227
-               minQuality = x@minQuality, unlistData = ans_unlistData,
261
+    initialize(x, minQuality = x@minQuality, unlistData = ans_unlistData,
228 262
                partitioning = ans_partitioning, 
229 263
                elementMetadata = ans_elementMetadata)
230 264
   }
... ...
@@ -237,35 +271,115 @@ setMethod("rownames", "SequenceData",
237 271
           }
238 272
 )
239 273
 
240
-# methods inherited from List and CompressedList, contain a coercion step
241
-# x <- as(x, "List", strict = FALSE)
242
-# 
243
-# This does not keep the SequenceData object intact resulting in coercion
244
-# to a CompressedSplitDataFrameList.
245
-setMethod("[[", "SequenceData",
246
-          function(x, i, j, ...) 
247
-          {
248
-            METHOD <- selectMethod("[[", "List")
249
-            METHOD(x, i, j, ...)
250
-          }
251
-)
274
+# Concatenation ----------------------------------------------------------------
252 275
 
276
+.check_ranges <- function(args){
277
+  ranges <- lapply(args,ranges)
278
+  ranges <- vapply(ranges[seq.int(2L,length(ranges))],
279
+                   function(r){
280
+                     all(all(r == ranges[[1L]]))
281
+                   },
282
+                   logical(1))
283
+  if(!all(ranges)){
284
+    stop("Inputs must have the same ranges.")
285
+  }
286
+}
253 287
 
254
-# Concatenation ----------------------------------------------------------------
288
+.check_sequences <- function(args){
289
+  sequences <- lapply(args,sequences)
290
+  sequences <- vapply(sequences[seq.int(2L,length(sequences))],
291
+                      function(s){
292
+                        all(s == sequences[[1L]])
293
+                      },
294
+                      logical(1))
295
+  if(!all(sequences)){
296
+    stop("Inputs must have the same sequences.")
297
+  }
298
+}
255 299
 
256
-setMethod("cbind", "SequenceData",
257
-          function(...){
258
-            arg1 <- list(...)[[1L]]
259
-            stop("'rbind' is not supported for ",class(arg1),".")
300
+.check_bamfiles <- function(args){
301
+  bamfiles <- lapply(args,bamfiles)
302
+  bamfiles <- vapply(bamfiles[seq.int(2L,length(bamfiles))],
303
+                     function(b){
304
+                       all(path(b) == path(bamfiles[[1L]]))
305
+                     },
306
+                     logical(1))
307
+  if(!all(bamfiles)){
308
+    stop("Inputs must be derived from the same bamfiles.")
260 309
   }
310
+}
311
+
312
+#' @rdname SequenceData-class
313
+#' @export
314
+setMethod("cbind", "SequenceData",
315
+          function(..., deparse.level = 1) 
316
+          {
317
+            args <- list(...)
318
+            if(length(args) == 1L){
319
+              return(args[[1L]])
320
+            }
321
+            # input checks
322
+            classes <- lapply(args,class)
323
+            if(length(unique(classes)) != 1L){
324
+              stop("Inputs must be of the same SequenceDataFrame type.")
325
+            }
326
+            lengths <- vapply(args,function(a){sum(lengths(a))},integer(1))
327
+            if(length(unique(lengths)) != 1L){
328
+              stop("Inputs must have the same lengths.")
329
+            }
330
+            .check_ranges(args)
331
+            .check_sequences(args)
332
+            callNextMethod()
333
+          }
261 334
 )
335
+
336
+#' @rdname SequenceData-class
337
+#' @export
262 338
 setMethod("rbind", "SequenceData",
263
-          function(...){
264
-            arg1 <- list(...)[[1L]]
265
-            stop("'rbind' is not supported for ",class(arg1),".")
339
+          function(..., deparse.level = 1) 
340
+          {
341
+            args <- list(...)
342
+            if(length(args) == 1L){
343
+              return(args[[1L]])
344
+            }
345
+            # input checks
346
+            classes <- lapply(args,class)
347
+            if(length(unique(classes)) != 1L){
348
+              stop("Inputs must be of the same SequenceDataFrame type.")
349
+            }
350
+            lengths <- vapply(args,function(a){ncol(unlist(a))},integer(1))
351
+            if(length(unique(lengths)) != 1L){
352
+              stop("Inputs must have the same width.")
353
+            }
354
+            .check_bamfiles(args)
355
+            callNextMethod()
266 356
           }
267 357
 )
268 358
 
359
+setMethod("bindROWS", "SequenceData",
360
+          function (x, objects = list(), use.names = TRUE, ignore.mcols = FALSE, 
361
+                    check = TRUE) 
362
+          {
363
+            objects <- S4Vectors:::prepare_objects_to_bind(x, objects)
364
+            all_objects <- c(list(x), objects)
365
+            names <- unlist(lapply(all_objects,names))
366
+            if(any(duplicated(names))){
367
+              stop("Input must have unique names.")
368
+            }
369
+            .check_bamfiles(all_objects)
370
+            callNextMethod(x, objects, use.names = use.names, 
371
+                           ignore.mcols = ignore.mcols, check = FALSE)
372
+          }
373
+)
374
+
375
+setMethod("unlist", "SequenceData",
376
+          function(x, recursive = TRUE, use.names = FALSE) 
377
+          {
378
+            callNextMethod(x, recursive = recursive, use.names = FALSE) 
379
+          }
380
+)
381
+
382
+
269 383
 # constructor ------------------------------------------------------------------
270 384
 
271 385
 .quality_settings <- data.frame(
... ...
@@ -279,9 +393,9 @@ setMethod("rbind", "SequenceData",
279 393
   .norm_settings(input, .quality_settings, minQuality)[["minQuality"]]
280 394
 }
281 395
 
282
-.get_replicate_number <- function(bamfiles, conditions){
283
-  control_rep <- seq_along(bamfiles[conditions == "control"])
284
-  treated_rep <- seq_along(bamfiles[conditions == "treated"])
396
+.get_replicate_number <- function(conditions){
397
+  control_rep <- seq_along(conditions[conditions == "control"])
398
+  treated_rep <- seq_along(conditions[conditions == "treated"])
285 399
   rep <- c(control_rep,treated_rep)
286 400
   rep <- rep[c(which(conditions == "control"),
287 401
                which(conditions == "treated"))]
... ...
@@ -365,7 +479,7 @@ setMethod("rbind", "SequenceData",
365 479
   proto <- new(className)
366 480
   minQuality <- .norm_min_quality(args, proto@minQuality)
367 481
   condition <- factor(names(bamfiles))
368
-  replicate <- .get_replicate_number(bamfiles, condition)
482
+  replicate <- .get_replicate_number(condition)
369 483
   if(!assertive::is_a_non_empty_string(proto@dataDescription)){
370 484
     stop("'dataDescription' must be a single non empty character value.")
371 485
   }
... ...
@@ -419,16 +533,18 @@ setMethod("rbind", "SequenceData",
419 533
   ##############################################################################
420 534
   # Create SequenceData object
421 535
   ##############################################################################
536
+  unlist_data <- 
537
+    .SequenceDataFrame(class = gsub("SequenceData","",className),
538
+                       df = unlist(data, use.names = FALSE),
539
+                       ranges = unlist(ranges, use.names = FALSE),
540
+                       sequence = unlist(sequences, use.names = FALSE),
541
+                       replicate = replicate,
542
+                       condition = condition,
543
+                       bamfiles = bamfiles,
544
+                       seqinfo = seqinfo)
422 545
   ans <- new(className, 
423
-             bamfiles = bamfiles,
424
-             seqinfo = seqinfo,
425 546
              minQuality = minQuality,
426
-             unlistData = .SequenceDataFrame(gsub("SequenceData","",className),
427
-                                             unlist(data, use.names = FALSE),
428
-                                             unlist(ranges, use.names = FALSE),
429
-                                             unlist(sequences, use.names = FALSE),
430
-                                             replicate,
431
-                                             condition),
547
+             unlistData = unlist_data,
432 548
              partitioning = IRanges::PartitioningByEnd(data),
433 549
              ...)
434 550
   message("OK")
... ...
@@ -558,6 +674,8 @@ setMethod("rbind", "SequenceData",
558 674
 
559 675
 ################################################################################
560 676
 
677
+#' @rdname SequenceData-class
678
+#' @export
561 679
 setGeneric( 
562 680
   name = "SequenceData",
563 681
   signature = c("annotation","sequences"),
... ...
@@ -565,72 +683,96 @@ setGeneric(
565 683
     standardGeneric("SequenceData")
566 684
 ) 
567 685
 
686
+#' @rdname SequenceData-class
687
+#' @export
568 688
 setMethod("SequenceData",
569 689
           signature = c(annotation = "character", sequences = "character"),
570 690
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
571 691
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
572 692
                               seqinfo, ...)
573 693
           })
694
+#' @rdname SequenceData-class
695
+#' @export
574 696
 setMethod("SequenceData",
575 697
           signature = c(annotation = "character", sequences = "BSgenome"),
576 698
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
577 699
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
578 700
                               seqinfo, ...)
579 701
           })
702
+#' @rdname SequenceData-class
703
+#' @export
580 704
 setMethod("SequenceData",
581 705
           signature = c(annotation = "TxDb", sequences = "character"),
582 706
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
583 707
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
584 708
                               seqinfo, ...)
585 709
           })
710
+#' @rdname SequenceData-class
711
+#' @export
586 712
 setMethod("SequenceData",
587 713
           signature = c(annotation = "TxDb", sequences = "BSgenome"),
588 714
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
589 715
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
590 716
                               seqinfo, ...)
591 717
           })
718
+#' @rdname SequenceData-class
719
+#' @export
592 720
 setMethod("SequenceData",
593 721
           signature = c(annotation = "GRangesList", sequences = "character"),
594 722
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
595 723
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
596 724
                               seqinfo, ...)
597 725
           })
726
+#' @rdname SequenceData-class
727
+#' @export
598 728
 setMethod("SequenceData",
599 729
           signature = c(annotation = "GRangesList", sequences = "BSgenome"),
600 730
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
601 731
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
602 732
                               seqinfo, ...)
603 733
           })
734
+#' @rdname SequenceData-class
735
+#' @export
604 736
 setMethod("SequenceData",
605 737
           signature = c(annotation = "GFF3File", sequences = "BSgenome"),
606 738
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
607 739
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
608 740
                               seqinfo, ...)
609 741
           })
742
+#' @rdname SequenceData-class
743
+#' @export
610 744
 setMethod("SequenceData",
611 745
           signature = c(annotation = "GFF3File", sequences = "character"),
612 746
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
613 747
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
614 748
                               seqinfo, ...)
615 749
           })
750
+#' @rdname SequenceData-class
751
+#' @export
616 752
 setMethod("SequenceData",
617 753
           signature = c(annotation = "character", sequences = "FaFile"),
618 754
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
619 755
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
620 756
                               seqinfo, ...)
621 757
           })
758
+#' @rdname SequenceData-class
759
+#' @export
622 760
 setMethod("SequenceData",
623 761
           signature = c(annotation = "GFF3File", sequences = "FaFile"),
624 762
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
625 763
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
626 764
                               seqinfo, ...)
627 765
           })
766
+#' @rdname SequenceData-class
767
+#' @export
628 768
 setMethod("SequenceData",
629 769
           signature = c(annotation = "TxDb", sequences = "FaFile"),
630 770
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
631 771
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
632 772
                               seqinfo, ...)
633 773
           })
774
+#' @rdname SequenceData-class
775
+#' @export
634 776
 setMethod("SequenceData",
635 777
           signature = c(annotation = "GRangesList", sequences = "FaFile"),
636 778
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
... ...
@@ -659,7 +801,7 @@ setMethod("getData",
659 801
 #' @export
660 802
 setMethod(f = "bamfiles", 
661 803
           signature = signature(x = "SequenceData"),
662
-          definition = function(x){x@bamfiles})
804
+          definition = function(x){bamfiles(unlist(x))})
663 805
 #' @rdname SequenceData-functions
664 806
 #' @export
665 807
 setMethod(f = "conditions", 
... ...
@@ -674,12 +816,12 @@ setMethod(
674 816
     function(x){
675 817
       partitioning <- IRanges::PartitioningByEnd(x)
676 818
       unlisted_ranges <- ranges(unlist(x))
677
-      ends <- cumsum(width(unlisted_ranges)) == cumsum(width(partitioning))
678
-      partitioning_relist <- IRanges::PartitioningByEnd(which(ends))
679
-      names(partitioning_relist) <- names(x)
819
+      ends <- match(cumsum(width(partitioning)),cumsum(width(unlisted_ranges)))
820
+      partitioning_relist <- IRanges::PartitioningByEnd(ends)
680 821
       if(length(x) != length(partitioning_relist)){
681 822
         stop("ranges could not be relisted.")
682 823
       }
824
+      names(partitioning_relist) <- names(x)
683 825
       relist(unlisted_ranges, partitioning_relist)
684 826
     })
685 827
 #' @rdname SequenceData-functions
... ...
@@ -691,7 +833,7 @@ setMethod(f = "replicates",
691 833
 #' @export
692 834
 setMethod(f = "seqinfo", 
693 835
           signature = signature(x = "SequenceData"),
694
-          definition = function(x){x@seqinfo})
836
+          definition = function(x){seqinfo(unlist(x))})
695 837
 #' @rdname SequenceData-functions
696 838
 #' @export
697 839
 setMethod(f = "sequences", 
... ...
@@ -25,6 +25,9 @@ NULL
25 25
 #' of \code{ranges(x)}
26 26
 #' @param condition For \code{\link{aggregate}}: condition for which the data 
27 27
 #' should be aggregated.
28
+#' @param df,ranges,sequence,replicate inputs for creating a 
29
+#' \code{SequenceDataFrame}. See 
30
+#' \code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}.
28 31
 #' 
29 32
 #' @return a \code{CoverageSequenceData} object
30 33
 #' 
... ...
@@ -47,8 +50,9 @@ setClass(Class = "CoverageSequenceDataFrame",
47 50
 #' @rdname CoverageSequenceData-class
48 51
 #' @export
49 52
 CoverageSequenceDataFrame <- function(df, ranges, sequence, replicate,
50
-                                      condition){
51
-  .SequenceDataFrame("Coverage",df, ranges, sequence, replicate, condition)
53
+                                      condition, bamfiles, seqinfo){
54
+  .SequenceDataFrame("Coverage",df, ranges, sequence, replicate, condition,
55
+                     bamfiles, seqinfo)
52 56
 }
53 57
 #' @rdname CoverageSequenceData-class
54 58
 #' @export
... ...
@@ -66,6 +70,8 @@ CoverageSequenceData <- function(bamfiles, annotation, sequences, seqinfo, ...){
66 70
                     sequences = sequences, seqinfo = seqinfo, ...)
67 71
 }
68 72
 
73
+setSequenceDataCoercions("Coverage")
74
+
69 75
 # CoverageSequenceData ---------------------------------------------------------
70 76
 
71 77
 .process_coverage_data <- function(coverage, grl){
... ...
@@ -28,6 +28,9 @@ NULL
28 28
 #' transcript name. Must be a name of \code{ranges(x).}
29 29
 #' @param condition For \code{\link{aggregate}}: condition for which the data
30 30
 #' should be aggregated.
31
+#' @param df,ranges,sequence,replicate inputs for creating a 
32
+#' \code{SequenceDataFrame}. See 
33
+#' \code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}.
31 34
 #'
32 35
 #' @return a \code{End5SequenceData}, a \code{End3SequenceData} or a
33 36
 #' \code{EndSequenceData} object
... ...
@@ -50,8 +53,9 @@ setClass(Class = "End5SequenceDataFrame",
50 53
 #' @rdname EndSequenceData-class
51 54
 #' @export
52 55
 End5SequenceDataFrame <- function(df, ranges, sequence, replicate,
53
-                                  condition){
54
-  .SequenceDataFrame("End5",df, ranges, sequence, replicate, condition)
56
+                                  condition, bamfiles, seqinfo){
57
+  .SequenceDataFrame("End5",df, ranges, sequence, replicate, condition,
58
+                     bamfiles, seqinfo)
55 59
 }
56 60
 #' @rdname EndSequenceData-class
57 61
 #' @export
... ...
@@ -69,8 +73,10 @@ setClass(Class = "End3SequenceDataFrame",
69 73
          contains = "SequenceDataFrame")
70 74
 #' @rdname EndSequenceData-class
71 75
 #' @export
72
-End3SequenceDataFrame <- function(df, ranges, sequence, replicate, condition){
73
-  .SequenceDataFrame("End3",df, ranges, sequence, replicate, condition)
76
+End3SequenceDataFrame <- function(df, ranges, sequence, replicate, condition,
77
+                                  bamfiles, seqinfo){
78
+  .SequenceDataFrame("End3",df, ranges, sequence, replicate, condition,
79
+                     bamfiles, seqinfo)
74 80
 }
75 81
 #' @rdname EndSequenceData-class
76 82
 #' @export
... ...
@@ -84,8 +90,10 @@ setClass(Class = "End3SequenceData",
84 90
 
85 91
 #' @rdname EndSequenceData-class
86 92
 #' @export
87
-EndSequenceDataFrame <- function(df, ranges, sequence, replicate, condition){
88
-  .SequenceDataFrame("End",df, ranges, sequence, replicate, condition)
93
+EndSequenceDataFrame <- function(df, ranges, sequence, replicate, condition,
94
+                                 bamfiles, seqinfo){
95
+  .SequenceDataFrame("End",df, ranges, sequence, replicate, condition,
96
+                     bamfiles, seqinfo)
89 97
 }
90 98
 #' @rdname EndSequenceData-class
91 99
 #' @export
... ...
@@ -120,6 +128,10 @@ EndSequenceData <- function(bamfiles, annotation, sequences, seqinfo, ...){
120 128
                     sequences = sequences, seqinfo = seqinfo, ...)
121 129
 }
122 130
 
131
+setSequenceDataCoercions("End5")
132
+setSequenceDataCoercions("End3")
133
+setSequenceDataCoercions("End")
134
+
123 135
 # End5SequenceData ------------------------------------------------------------------
124 136
 
125 137
 .summarize_to_position_data <- function(data, hits, names, strands, type){
... ...
@@ -259,7 +271,7 @@ setMethod("getData",
259 271
 .aggregate_list_data_mean_sd <- function(x, condition){
260 272
   conditions <- conditions(x)
261 273
   f <- .subset_to_condition(conditions, condition)
262
-  df <- as(unlist(x,use.names=FALSE),"DataFrame")
274
+  df <- as(unlist(x,use.names=FALSE),"DataFrame")[,f,drop=FALSE]
263 275
   conditions_u <- unique(conditions[f])
264 276
   # set up some base values. replicates is here the same as the number of
265 277
   # columns, since a list per replicate is assumed
... ...
@@ -31,6 +31,9 @@ NULL
31 31
 #' transcript name. Must be a name of \code{ranges(x)}
32 32
 #' @param condition For \code{\link{aggregate}}: condition for which the data 
33 33
 #' should be aggregated.
34
+#' @param df,ranges,sequence,replicate inputs for creating a 
35
+#' \code{SequenceDataFrame}. See 
36
+#' \code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}.
34 37
 #' 
35 38
 #' @return a \code{NormEnd5SequenceData} or \code{NormEnd3SequenceData} object
36 39
 #' 
... ...
@@ -54,8 +57,9 @@ setClass(Class = "NormEnd5SequenceDataFrame",
54 57
 #' @rdname NormEndSequenceData-class
55 58
 #' @export
56 59
 NormEnd5SequenceDataFrame <- function(df, ranges, sequence, replicate,
57
-                                      condition){
58
-  .SequenceDataFrame("NormEnd5",df, ranges, sequence, replicate, condition)
60
+                                      condition, bamfiles, seqinfo){
61
+  .SequenceDataFrame("NormEnd5",df, ranges, sequence, replicate, condition,
62
+                     bamfiles, seqinfo)
59 63
 }
60 64
 #' @rdname NormEndSequenceData-class
61 65
 #' @export
... ...
@@ -74,8 +78,9 @@ setClass(Class = "NormEnd3SequenceDataFrame",
74 78
 #' @rdname NormEndSequenceData-class
75 79
 #' @export
76 80
 NormEnd3SequenceDataFrame <- function(df, ranges, sequence, replicate,
77
-                                      condition){
78
-  .SequenceDataFrame("NormEnd3",df, ranges, sequence, replicate, condition)
81
+                                      condition, bamfiles, seqinfo){
82
+  .SequenceDataFrame("NormEnd3",df, ranges, sequence, replicate, condition,
83
+                     bamfiles, seqinfo)
79 84
 }
80 85
 #' @rdname NormEndSequenceData-class
81 86
 #' @export
... ...
@@ -100,6 +105,9 @@ NormEnd3SequenceData <- function(bamfiles, annotation, sequences, seqinfo, ...){
100 105
                     sequences = sequences, seqinfo = seqinfo, ...)
101 106
 }
102 107
 
108
+setSequenceDataCoercions("NormEnd5")
109
+setSequenceDataCoercions("NormEnd3")
110
+
103 111
 # summary ----------------------------------------------------------------------
104 112
 
105 113
 .get_summary_MultiColSequenceData <- function(object){
... ...
@@ -246,7 +254,7 @@ setMethod("getData",
246 254
 .aggregate_data_frame_mean_sd <- function(x, condition){
247 255
   conditions <- conditions(x)
248 256
   f <- .subset_to_condition(conditions, condition)
249
-  df <- as(unlist(x,use.names=FALSE)[,f],"DataFrame")
257
+  df <- as(unlist(x,use.names=FALSE),"DataFrame")[,f,drop=FALSE]
250 258
   conditions_u <- unique(conditions[f])
251 259
   replicates <- replicates(x)[f]
252 260
   # set up some base values
... ...
@@ -28,6 +28,9 @@ NULL
28 28
 #' transcript name. Must be a name of \code{ranges(x)}
29 29
 #' @param condition For \code{\link{aggregate}}: condition for which the data 
30 30
 #' should be aggregated.
31
+#' @param df,ranges,sequence,replicate inputs for creating a 
32
+#' \code{SequenceDataFrame}. See 
33
+#' \code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}.
31 34
 #' 
32 35
 #' @return a \code{PileupSequenceData} object
33 36
 #' 
... ...
@@ -48,8 +51,10 @@ setClass(Class = "PileupSequenceDataFrame",
48 51
          contains = "SequenceDataFrame")
49 52
 #' @rdname PileupSequenceData-class
50 53
 #' @export
51
-PileupSequenceDataFrame <- function(df, ranges, sequence, replicate, condition){
52
-  .SequenceDataFrame("Pileup", df, ranges, sequence, replicate, condition)
54
+PileupSequenceDataFrame <- function(df, ranges, sequence, replicate, condition,
55
+                                    bamfiles, seqinfo){
56
+  .SequenceDataFrame("Pileup", df, ranges, sequence, replicate, condition,
57
+                     bamfiles, seqinfo)
53 58
 }
54 59
 #' @rdname PileupSequenceData-class
55 60
 #' @export
... ...
@@ -68,6 +73,8 @@ PileupSequenceData <- function(bamfiles, annotation, sequences, seqinfo, ...){
68 73
                sequences = sequences, seqinfo = seqinfo, ...)
69 74
 }
70 75
 
76
+setSequenceDataCoercions("Pileup")
77
+
71 78
 # PileupSequenceData ----------------------------------------------------------------
72 79
 
73 80
 .fill_up_pileup_data <- function(pileup,grl,irl){
... ...
@@ -180,7 +187,7 @@ setMethod("summary",
180 187
 .aggregate_data_frame_percentage_mean_sd <- function(x,condition){
181 188
   conditions <- conditions(x)
182 189
   f <- .subset_to_condition(conditions, condition)
183
-  df <- as(unlist(x,use.names=FALSE)[,f],"DataFrame")
190
+  df <- as(unlist(x,use.names=FALSE),"DataFrame")[,f,drop=FALSE]
184 191
   conditions_u <- unique(conditions[f])
185 192
   replicates <- replicates(x)[f]
186 193
   # set up some base values
... ...
@@ -370,12 +377,13 @@ setGeneric(name = "pileupToCoverage",
370 377
            def = function(x) standardGeneric("pileupToCoverage"))
371 378
 
372 379
 .aggregate_pile_up_to_coverage <- function(data){
373
-  unlisted_data <- unlist(data)
374
-  replicates <- unique(replicates(data))
380
+  unlisted_data <- unlist(data,use.names=FALSE)
381
+  replicates <- unique(as.integer(interaction(conditions(data),
382
+                                              replicates(data))))
375 383
   ans  <- IRanges::IntegerList(
376 384
     lapply(seq_along(replicates),
377 385
            function(i){
378
-             rowSums(as.data.frame(unlisted_data[,replicates(data) == i]))
386
+             rowSums(as.data.frame(unlisted_data[,replicates == i]))
379 387
            }))
380 388
   names(ans) <- paste0("replicate.",replicates)
381 389
   ans <- do.call(S4Vectors::DataFrame,ans)
... ...
@@ -38,6 +38,9 @@ RNAMODR_PROT_SEQDATA_PLOT_DATA_COLOURS <- c(means = "#FBB4AE",
38 38
 #' transcript name. Must be a name of \code{ranges(x)}
39 39
 #' @param condition For \code{\link{aggregate}}: condition for which the data 
40 40
 #' should be aggregated.
41
+#' @param df,ranges,sequence,replicate inputs for creating a 
42
+#' \code{SequenceDataFrame}. See 
43
+#' \code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}.
41 44
 #' 
42 45
 #' @return a \code{ProtectedEndSequenceData} object
43 46
 #' 
... ...
@@ -61,8 +64,9 @@ setClass(Class = "ProtectedEndSequenceDataFrame",
61 64
 #' @rdname ProtectedEndSequenceData-class
62 65
 #' @export
63 66
 ProtectedEndSequenceDataFrame <- function(df, ranges, sequence, replicate,
64
-                                          condition){
65
-  .SequenceDataFrame("ProtectedEnd",df, ranges, sequence, replicate, condition)
67
+                                          condition, bamfiles, seqinfo){
68
+  .SequenceDataFrame("ProtectedEnd",df, ranges, sequence, replicate, condition,
69
+                     bamfiles, seqinfo)
66 70
 }
67 71
 #' @rdname ProtectedEndSequenceData-class
68 72
 #' @export
... ...
@@ -82,6 +86,8 @@ ProtectedEndSequenceData <- function(bamfiles, annotation, sequences, seqinfo,
82 86
                sequences = sequences, seqinfo = seqinfo, ...)
83 87
 }
84 88
 
89
+setSequenceDataCoercions("ProtectedEnd")
90
+
85 91
 # ProtectedEndSequenceData -----------------------------------------------------
86 92
 
87 93
 #' @rdname ProtectedEndSequenceData-class
... ...
@@ -96,10 +96,12 @@ NULL
96 96
       }
97 97
     }
98 98
   }
99
+  levels <- unique(coord$Parent)
100
+  # coord <- coord[order(start(coord))]
99 101
   if(merge){
100
-    coord <- split(coord, factor(coord$Parent, levels = unique(coord$Parent)))
102
+    coord <- split(coord, factor(coord$Parent, levels = levels))
101 103
   } else {
102
-    coord <- coord[order(factor(coord$Parent,unique(coord$Parent)))]
104
+    coord <- coord[order(factor(coord$Parent, levels = levels))]
103 105
     coord <- split(coord, seq_along(coord))
104 106
     names(coord) <- mcols(coord, level="within")[,"Parent"]
105 107
   }
... ...
@@ -216,7 +218,9 @@ NULL
216 218
   # converts everything to a GRangesList
217 219
   coord <- .norm_coord(coord, args[["type"]], args[["merge"]])
218 220
   if(args[["rawData"]]){
219
-    data <- .norm_sequence_data(as(x,"SplitDataFrameList"))
221
+    data <- relist(as(unlist(x, use.names = FALSE),"DataFrame"),
222
+                   IRanges::PartitioningByWidth(x))
223
+    data <- .norm_sequence_data(data)
220 224
   } else {
221 225
     data <- .norm_aggregate_data(aggregate(x))
222 226
   }
... ...
@@ -350,7 +354,8 @@ setMethod("subsetByCoord",
350 354
   # converts everything to a GRangesList
351 355
   coord <- .norm_coord(coord, args[["type"]])
352 356
   if(args[["rawData"]]){
353
-    data <- as(x,"SplitDataFrameList")
357
+    data <- relist(as(unlist(x, use.names = FALSE),"DataFrame"),
358
+                   IRanges::PartitioningByWidth(x))
354 359
   } else {
355 360
     data <- aggregate(x)
356 361
   }
... ...
@@ -58,21 +58,11 @@ NULL
58 58
 #' Subsetting of a \code{SequenceDataFrame} returns a \code{SequenceDataFrame} or 
59 59
 #' \code{DataFrame}, if it is subset by a column or row, respectively. The 
60 60
 #' \code{drop} argument is ignored for column subsetting.
61
-#' 
62
-#' @param df the data as a \code{DataFrame}.
63
-#' @param ranges a \code{GRanges} object containing all annotation elements
64
-#' for a transcript.
65
-#' @param sequence \code{XString} object describing the nucleotide sequence of 
66
-#' the transcript.
67
-#' @param condition The condition of each column or set of columns. Either 
68
-#' \code{control} or \code{treated}.
69
-#' @param replicate The replicate of each column or set of columns for the 
70
-#' individual conditions
71
-#' @param x,i,j,...,drop arguments used for 
72
-#' \code{\link[S4Vectors:DataFrame-class]{subsetting}}.
73
-#' 
74
-#' @return a \code{SequenceDataFrame} object
75
-#' 
61
+#'
62
+#' @param x,i,j,...,drop,deparse.level arguments used for 
63
+#' \code{\link[S4Vectors:DataFrame-class]{subsetting}} or 
64
+#' \code{\link[base:cbind]{base::cbind}}.
65
+#'
76 66
 #' @examples 
77 67
 #' data(e5sd,package="RNAmodR")
78 68
 #' # A SequenceDataFrame can is usually constructed by subsetting from 
... ...
@@ -89,11 +79,15 @@ setClass(Class = "SequenceDataFrame",
89 79
          slots = c(ranges = "GRanges",
90 80
                    sequence = "XString",
91 81
                    condition = "factor",
92
-                   replicate = "factor"),
82
+                   replicate = "factor",
83
+                   bamfiles = "BamFileList",
84
+                   seqinfo = "Seqinfo"),
93 85
          prototype = list(ranges = GRanges(),
94 86
                           sequence = RNAString(),
95 87
                           condition = factor(),
96
-                          replicate = factor()))
88
+                          replicate = factor(),
89
+                          bamfiles = Rsamtools::BamFileList(),
90
+                          seqinfo = GenomeInfoDb::Seqinfo()))
97 91
 
98 92
 setMethod("relistToClass", "SequenceDataFrame",
99 93
           function(x) gsub("DataFrame","Data",class(x))
... ...
@@ -112,7 +106,7 @@ sequenceDataFrameClass <- function(dataType){
112 106
 }
113 107
 
114 108
 .SequenceDataFrame <- function(class, df, ranges, sequence, replicate,
115
-                               condition){
109
+                               condition, bamfiles, seqinfo){
116 110
   # defaults from function are strangly not set
117 111
   if(missing(df)){
118 112
     df <- DataFrame()
... ...
@@ -129,6 +123,12 @@ sequenceDataFrameClass <- function(dataType){
129 123
   if(missing(condition)){
130 124
     condition <- factor()
131 125
   }
126
+  if(missing(bamfiles)){
127
+    bamfiles <- Rsamtools::BamFileList()
128
+  }
129
+  if(missing(seqinfo)){
130
+    seqinfo <- GenomeInfoDb::Seqinfo()
131
+  }
132 132
   # check inputs
133 133
   if(!is(df,"DataFrame")){
134 134
     stop("Invalid data object: ", class(df), " found, DataFrame expected.")
... ...
@@ -149,6 +149,8 @@ sequenceDataFrameClass <- function(dataType){
149 149
       sequence = sequence,
150 150
       condition = condition,
151 151
       replicate = replicate,
152
+      bamfiles = bamfiles,
153
+      seqinfo = seqinfo,
152 154
       rownames = df@rownames,
153 155
       nrows = df@nrows,
154 156
       listData = df@listData,
... ...
@@ -208,6 +210,18 @@ setMethod(
208 210
   f = "conditions", 
209 211
   signature = signature(object = "SequenceDataFrame"),
210 212
   definition = function(object){object@condition})
213
+#' @rdname SequenceData-functions
214
+#' @export
215
+setMethod(
216
+  f = "bamfiles", 
217
+  signature = signature(x = "SequenceDataFrame"),
218
+  definition = function(x){x@bamfiles})
219
+#' @rdname SequenceData-functions
220
+#' @export
221
+setMethod(
222
+  f = "seqinfo", 
223
+  signature = signature(x = "SequenceDataFrame"),
224
+  definition = function(x){x@seqinfo})
211 225
 
212 226
 # internals --------------------------------------------------------------------
213 227
 
... ...
@@ -249,6 +263,61 @@ setMethod(
249 263
   }
250 264
 )
251 265
 
266
+#' @rdname SequenceDataFrame-class
267
+#' @export
268
+setMethod(
269
+  "cbind", "SequenceDataFrame",
270
+  function(...){
271
+    args <- list(...)
272
+    if(length(args) == 1L){
273
+      return(args[[1L]])
274
+    }
275
+    # input checks
276
+    classes <- lapply(args,class)
277
+    if(length(unique(classes)) != 1L){
278
+      stop("Inputs must be of the same SequenceDataFrame type.")
279
+    }
280
+    className <- unique(classes)
281
+    lengths <- vapply(args,function(a){sum(lengths(a))},integer(1))
282
+    if(length(unique(lengths)) != 1L){
283
+      stop("Inputs must have the same length.")
284
+    }
285
+    .check_ranges(args)
286
+    .check_sequences(args)
287
+    #
288
+    data <- do.call(cbind,
289
+                    lapply(args,function(a){
290
+                      as(a, "DataFrame")
291
+                    }))
292
+    ranges <- ranges(args[[1L]])
293
+    sequences <- sequences(args[[1L]])
294
+    colnames <- IRanges::CharacterList(strsplit(colnames(data),"\\."))
295
+    colnames_conditions <- colnames %in% c("treated","control")
296
+    colnames_replicates <- !is.na(suppressWarnings(IntegerList(colnames)))
297
+    colnames_f <- !(colnames_conditions | colnames_replicates)
298
+    conditionsFmultiplier <- length(unique(vapply(colnames[colnames_f],
299
+                                                  paste,character(1),
300
+                                                  collapse=".")))
301
+    condition <- unlist(lapply(args,conditions))
302
+    condition_steps <- seq.int(1,length(condition),by=conditionsFmultiplier)
303
+    replicate <- .get_replicate_number(condition[condition_steps])
304
+    replicate <- rep(replicate, each = conditionsFmultiplier)
305
+    colnames[colnames_conditions] <- IRanges::CharacterList(condition)
306
+    colnames[colnames_replicates] <- IRanges::CharacterList(replicate)
307
+    colnames(data) <- vapply(colnames,paste,character(1),collapse = ".")
308
+    bamfiles <- do.call(c,lapply(args,bamfiles))
309
+    seqinfo <- seqinfo(args[[1L]])
310
+    .SequenceDataFrame(class = gsub("SequenceDataFrame","",className),
311
+                       df = data,
312
+                       ranges = ranges,
313
+                       sequence = sequences,
314
+                       replicate = replicate,
315
+                       condition = condition,
316
+                       bamfiles = bamfiles,
317
+                       seqinfo = seqinfo)
318
+  }
319
+)
320
+
252 321
 #' @importFrom stats setNames
253 322
 #' @rdname SequenceDataFrame-class
254 323
 #' @export
... ...
@@ -272,15 +341,29 @@ setMethod(
272 341
           return(x)
273 342
         j <- i
274 343
       }
275
-      if (!is(j, "IntegerRanges")) {
276
-        xstub <- stats::setNames(seq_along(x), names(x))
344
+      xstub <- stats::setNames(seq_along(x), names(x))
345
+      ia <- interaction(conditions(x), replicates(x))
346
+      if(is.character(j)){
277 347
         j <- normalizeSingleBracketSubscript(j, xstub)
348
+        j <- as.integer(ia)[j]
278 349
       }
279
-      x <- initialize(x, as(x,"DataFrame")[, j, drop = FALSE],
350
+      colnames <- IRanges::CharacterList(strsplit(colnames(x),"\\."))
351
+      colnames_conditions <- colnames %in% c("treated","control")
352
+      colnames_replicates <- !is.na(suppressWarnings(IntegerList(colnames)))
353
+      colnames_f <- !(colnames_conditions | colnames_replicates)
354
+      conditionsFmultiplier <- length(unique(vapply(colnames[colnames_f],
355
+                                                    paste,character(1),
356
+                                                    collapse=".")))
357
+      j <- normalizeSingleBracketSubscript(j, xstub[seq_len(length(xstub)/conditionsFmultiplier)])
358
+      j2 <- which(!is.na(match(as.integer(ia), j)))
359
+      x <- initialize(x,
360
+                      as(x,"DataFrame")[, j2, drop = FALSE],
280 361
                       ranges = x@ranges,
281 362
                       sequence = x@sequence,
282
-                      replicate = x@replicate[j],
283
-                      condition = x@condition[j])
363
+                      replicate = factor(x@replicate[j2]),
364
+                      condition = factor(x@condition[j2]),
365
+                      bamfiles = x@bamfiles[j],
366
+                      seqinfo = x@seqinfo)
284 367
       if (anyDuplicated(names(x))){
285 368
         names(x) <- make.unique(names(x))
286 369
       }
287 370
Binary files a/data/csd.rda and b/data/csd.rda differ
288 371
Binary files a/data/e3sd.rda and b/data/e3sd.rda differ
289 372
Binary files a/data/e5sd.rda and b/data/e5sd.rda differ
290 373
Binary files a/data/esd.rda and b/data/esd.rda differ
291 374
Binary files a/data/msi.rda and b/data/msi.rda differ
292 375
Binary files a/data/ne3sd.rda and b/data/ne3sd.rda differ
293 376
Binary files a/data/ne5sd.rda and b/data/ne5sd.rda differ
294 377
Binary files a/data/pesd.rda and b/data/pesd.rda differ
295 378
Binary files a/data/psd.rda and b/data/psd.rda differ
296 379
Binary files a/data/sdl.rda and b/data/sdl.rda differ
297 380
Binary files a/data/sds.rda and b/data/sds.rda differ
... ...
@@ -11,7 +11,8 @@
11 11
 \alias{getDataTrack,CoverageSequenceData-method}
12 12
 \title{CoverageSequenceData}
13 13
 \usage{
14
-CoverageSequenceDataFrame(df, ranges, sequence, replicate, condition)
14
+CoverageSequenceDataFrame(df, ranges, sequence, replicate, condition,
15
+  bamfiles, seqinfo)
15 16
 
16 17
 CoverageSequenceData(bamfiles, annotation, sequences, seqinfo, ...)
17 18
 
... ...
@@ -25,6 +26,10 @@ CoverageSequenceData(bamfiles, annotation, sequences, seqinfo, ...)
25 26
 \S4method{getDataTrack}{CoverageSequenceData}(x, name, ...)
26 27
 }
27 28
 \arguments{
29
+\item{df, ranges, sequence, replicate}{inputs for creating a 
30
+\code{SequenceDataFrame}. See 
31
+\code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}.}
32
+
28 33
 \item{condition}{For \code{\link{aggregate}}: condition for which the data 
29 34
 should be aggregated.}
30 35
 
... ...
@@ -25,11 +25,14 @@
25 25
 \alias{getDataTrack,End3SequenceData-method}
26 26
 \title{End5SequenceData/End3SequenceData/EndSequenceData}
27 27
 \usage{
28
-End5SequenceDataFrame(df, ranges, sequence, replicate, condition)
28
+End5SequenceDataFrame(df, ranges, sequence, replicate, condition, bamfiles,
29
+  seqinfo)
29 30
 
30
-End3SequenceDataFrame(df, ranges, sequence, replicate, condition)
31
+End3SequenceDataFrame(df, ranges, sequence, replicate, condition, bamfiles,
32
+  seqinfo)
31 33
 
32
-EndSequenceDataFrame(df, ranges, sequence, replicate, condition)
34
+EndSequenceDataFrame(df, ranges, sequence, replicate, condition, bamfiles,
35
+  seqinfo)
33 36
 
34 37
 End5SequenceData(bamfiles, annotation, sequences, seqinfo, ...)
35 38
 
... ...
@@ -65,6 +68,10 @@ EndSequenceData(bamfiles, annotation, sequences, seqinfo, ...)
65 68
 \S4method{getDataTrack}{End3SequenceData}(x, name, ...)
66 69
 }
67 70
 \arguments{
71
+\item{df, ranges, sequence, replicate}{inputs for creating a 
72
+\code{SequenceDataFrame}. See 
73
+\code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}.}
74
+
68 75
 \item{condition}{For \code{\link{aggregate}}: condition for which the data
69 76
 should be aggregated.}
70 77
 
... ...
@@ -19,9 +19,11 @@
19 19
 \alias{getDataTrack,NormEnd3SequenceData-method}
20 20
 \title{NormEnd5SequenceData/NormEnd3SequenceData}
21 21
 \usage{
22
-NormEnd5SequenceDataFrame(df, ranges, sequence, replicate, condition)
22
+NormEnd5SequenceDataFrame(df, ranges, sequence, replicate, condition,
23
+  bamfiles, seqinfo)
23 24
 
24
-NormEnd3SequenceDataFrame(df, ranges, sequence, replicate, condition)
25
+NormEnd3SequenceDataFrame(df, ranges, sequence, replicate, condition,
26
+  bamfiles, seqinfo)
25 27
 
26 28
 NormEnd5SequenceData(bamfiles, annotation, sequences, seqinfo, ...)
27 29
 
... ...
@@ -46,6 +48,10 @@ NormEnd3SequenceData(bamfiles, annotation, sequences, seqinfo, ...)
46 48
 \S4method{getDataTrack}{NormEnd3SequenceData}(x, name, ...)
47 49
 }
48 50
 \arguments{
51
+\item{df, ranges, sequence, replicate}{inputs for creating a 
52
+\code{SequenceDataFrame}. See 
53
+\code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}.}
54
+
49 55
 \item{condition}{For \code{\link{aggregate}}: condition for which the data 
50 56
 should be aggregated.}
51 57
 
... ...
@@ -13,7 +13,8 @@
13 13
 \alias{pileupToCoverage,PileupSequenceData-method}
14 14
 \title{PileupSequenceData}
15 15
 \usage{
16
-PileupSequenceDataFrame(df, ranges, sequence, replicate, condition)
16
+PileupSequenceDataFrame(df, ranges, sequence, replicate, condition,
17
+  bamfiles, seqinfo)
17 18
 
18 19
 PileupSequenceData(bamfiles, annotation, sequences, seqinfo, ...)
19 20
 
... ...
@@ -31,6 +32,10 @@ pileupToCoverage(x)
31 32
 \S4method{pileupToCoverage}{PileupSequenceData}(x)
32 33
 }
33 34
 \arguments{
35
+\item{df, ranges, sequence, replicate}{inputs for creating a 
36
+\code{SequenceDataFrame}. See 
37
+\code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}.}
38
+
34 39
 \item{condition}{For \code{\link{aggregate}}: condition for which the data 
35 40
 should be aggregated.}
36 41
 
... ...
@@ -11,7 +11,8 @@
11 11
 \alias{getDataTrack,ProtectedEndSequenceData-method}
12 12
 \title{ProtectedEndSequenceData}
13 13
 \usage{
14
-ProtectedEndSequenceDataFrame(df, ranges, sequence, replicate, condition)
14
+ProtectedEndSequenceDataFrame(df, ranges, sequence, replicate, condition,
15
+  bamfiles, seqinfo)
15 16
 
16 17
 ProtectedEndSequenceData(bamfiles, annotation, sequences, seqinfo, ...)
17 18
 
... ...
@@ -25,6 +26,10 @@ ProtectedEndSequenceData(bamfiles, annotation, sequences, seqinfo, ...)
25 26
 \S4method{getDataTrack}{ProtectedEndSequenceData}(x, name, ...)
26 27
 }
27 28
 \arguments{
29
+\item{df, ranges, sequence, replicate}{inputs for creating a 
30
+\code{SequenceDataFrame}. See 
31
+\code{\link[=SequenceDataFrame-class]{SequenceDataFrame}}.}
32
+
28 33
 \item{condition}{For \code{\link{aggregate}}: condition for which the data 
29 34
 should be aggregated.}
30 35
 
... ...
@@ -1,11 +1,86 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/SequenceData-class.R
3
+\docType{methods}
3 4
 \name{SequenceData-class}
4 5
 \alias{SequenceData-class}
5 6
 \alias{SequenceData}
7
+\alias{cbind,SequenceData-method}
8
+\alias{rbind,SequenceData-method}
9
+\alias{SequenceData,character,character-method}
10
+\alias{SequenceData,character,BSgenome-method}
11
+\alias{SequenceData,TxDb,character-method}
12
+\alias{SequenceData,TxDb,BSgenome-method}
13
+\alias{SequenceData,GRangesList,character-method}
14
+\alias{SequenceData,GRangesList,BSgenome-method}
15
+\alias{SequenceData,GFF3File,BSgenome-method}
16
+\alias{SequenceData,GFF3File,character-method}
17
+\alias{SequenceData,character,FaFile-method}
18
+\alias{SequenceData,GFF3File,FaFile-method}
19
+\alias{SequenceData,TxDb,FaFile-method}
20
+\alias{SequenceData,GRangesList,FaFile-method}
6 21
 \title{The SequenceData class}
22
+\usage{
23
+\S4method{cbind}{SequenceData}(..., deparse.level = 1)
24
+
25
+\S4method{rbind}{SequenceData}(..., deparse.level = 1)
26
+
27
+SequenceData(dataType, bamfiles, annotation, sequences, seqinfo, ...)
28
+
29
+\S4method{SequenceData}{character,character}(dataType, bamfiles,
30
+  annotation, sequences, seqinfo, ...)
31
+
32
+\S4method{SequenceData}{character,BSgenome}(dataType, bamfiles, annotation,
33
+  sequences, seqinfo, ...)
34
+
35
+\S4method{SequenceData}{TxDb,character}(dataType, bamfiles, annotation,
36
+  sequences, seqinfo, ...)
37
+
38
+\S4method{SequenceData}{TxDb,BSgenome}(dataType, bamfiles, annotation,
39
+  sequences, seqinfo, ...)
40
+
41
+\S4method{SequenceData}{GRangesList,character}(dataType, bamfiles,
42
+  annotation, sequences, seqinfo, ...)
43
+
44
+\S4method{SequenceData}{GRangesList,BSgenome}(dataType, bamfiles,
45
+  annotation, sequences, seqinfo, ...)
46
+
47
+\S4method{SequenceData}{GFF3File,BSgenome}(dataType, bamfiles, annotation,
48
+  sequences, seqinfo, ...)
49
+
50
+\S4method{SequenceData}{GFF3File,character}(dataType, bamfiles, annotation,
51
+  sequences, seqinfo, ...)
52
+
53
+\S4method{SequenceData}{character,FaFile}(dataType, bamfiles, annotation,
54
+  sequences, seqinfo, ...)
55
+
56
+\S4method{SequenceData}{GFF3File,FaFile}(dataType, bamfiles, annotation,
57
+  sequences, seqinfo, ...)
58
+
59
+\S4method{SequenceData}{TxDb,FaFile}(dataType, bamfiles, annotation,
60
+  sequences, seqinfo, ...)
61
+
62
+\S4method{SequenceData}{GRangesList,FaFile}(dataType, bamfiles, annotation,
63
+  sequences, seqinfo, ...)
64
+}
7 65
 \arguments{
8
-\item{dataType}{The prefix for construction the class name of the 
66
+\item{...}{Optional arguments overwriting default values. Not all
67
+\code{SequenceData} classes use all arguments. The arguments are:
68
+\itemize{
69
+\item{\code{minLength}} {single integer value setting a threshold for minimum
70
+read length. Shorther reads are discarded (default: \code{minLength = NA}).}
71
+\item{\code{maxLength}} {single integer value setting a threshold for maximum
72
+read length. Longer reads are discarded (default: \code{maxLength = NA}).}
73
+\item{\code{minQuality}} {single integer value setting a threshold for maximum
74
+read quality. Reads with a lower quality are discarded (default:
75
+\code{minQuality = 5L}, but this is class dependent).}
76
+\item{\code{max_depth}} {maximum depth for pileup loading (default:
77
+\code{max_depth = 10000L}).}
78
+}}
79
+
80
+\item{deparse.level}{See \code{\link[base:cbind]{base::cbind}} for a
81
+description of this argument.}
82
+
83
+\item{dataType}{The prefix for construction the class name of the
9 84
 \code{SequenceData} subclass to be constructed.}
10 85
 
11 86
 \item{bamfiles}{the input which can be of the following types
... ...
@@ -19,25 +94,11 @@ to a named \code{BamFileList} referencing existing bam files. Valid names are
19 94
 \item{annotation}{annotation data, which must match the information contained
20 95
 in the BAM files.}
21 96
 
22
-\item{sequences}{sequences matching the target sequences the reads were 
97
+\item{sequences}{sequences matching the target sequences the reads were
23 98
 mapped onto. This must match the information contained in the BAM files.}
24 99
 
25
-\item{seqinfo}{optional \code{\link[GenomeInfoDb:Seqinfo]{Seqinfo}} to 
100
+\item{seqinfo}{optional \code{\link[GenomeInfoDb:Seqinfo]{Seqinfo}} to
26 101
 subset the transcripts analyzed on a chromosome basis.}
27
-
28
-\item{...}{Optional arguments overwriting default values. Not all 
29
-\code{SequenceData} classes use all arguments. The arguments are:
30
-\itemize{
31
-\item{\code{minLength}} {single integer value setting a threshold for minimum
32
-read length. Shorther reads are discarded (default: \code{minLength = NA}).}
33
-\item{\code{maxLength}} {single integer value setting a threshold for maximum
34
-read length. Longer reads are discarded (default: \code{maxLength = NA}).}
35
-\item{\code{minQuality}} {single integer value setting a threshold for maximum
36
-read quality. Reads with a lower quality are discarded (default: 
37
-\code{minQuality = 5L}, but this is class dependent).}
38
-\item{\code{max_depth}} {maximum depth for pileup loading (default: 
39
-\code{max_depth = 10000L}).}
40
-}}
41 102
 }
42 103
 \description{
43 104
 The \code{SequenceData} class is implemented to contain data on each position
... ...
@@ -48,11 +109,11 @@ nucleotide sequence of these transcripts. To access this data several
48 109
 be extended. Currently the following classes are implemented:
49 110
 
50 111
 \itemize{
51
-\item{\code{\link[=CoverageSequenceData-class]{CoverageSequenceData}}} 
52
-\item{\code{\link[=EndSequenceData-class]{End5SequenceData}}, 
53
-\code{\link[=EndSequenceData-class]{End3SequenceData}}, 
112
+\item{\code{\link[=CoverageSequenceData-class]{CoverageSequenceData}}}
113
+\item{\code{\link[=EndSequenceData-class]{End5SequenceData}},
114
+\code{\link[=EndSequenceData-class]{End3SequenceData}},
54 115
 \code{\link[=EndSequenceData-class]{EndSequenceData}}}
55
-\item{\code{\link[=NormEndSequenceData-class]{NormEnd5SequenceData}}, 
116
+\item{\code{\link[=NormEndSequenceData-class]{NormEnd5SequenceData}},
56 117
 \code{\link[=NormEndSequenceData-class]{NormEnd5SequenceData}}}
57 118
 \item{\code{\link[=PileupSequenceData-class]{PileupSequenceData}}}
58 119
 \item{\code{\link[=ProtectedEndSequenceData-class]{ProtectedEndSequenceData}}}
... ...
@@ -67,30 +128,42 @@ transcript. Therefore, it is necessary to treat the minus strand accordingly.
67 128
 The \code{SequenceData} class is derived from the
68 129
 \code{\link[IRanges:DataFrameList-class]{CompressedSplitDataFrameList}} class
69 130
 with additional slots for annotation and sequence data. Some functionality is
70
-not inherited and not available, e.g. \code{cbind}, \code{rbind} amd
71
-\code{relist}.
131
+not inherited and might not available to full extend, e.g.\code{relist}.
132
+
133
+\strong{SequenceDataFrame}
134
+
135
+#' The \code{SequenceDataFrame} class contains data for positions along a single
136
+transcript. It is used to describe elements from a \code{SequenceData}
137
+object.
138
+
139
+The \code{SequenceDataFrame} class is derived from the
140
+\code{\link[S4Vectors:DataFrame-class]{DataFrame}} class.
141
+
142
+Subsetting of a \code{SequenceDataFrame} returns a \code{SequenceDataFrame} or
143
+\code{DataFrame}, if it is subset by a column or row, respectively. The
144
+\code{drop} argument is ignored for column subsetting.
72 145
 }
73 146
 \section{Slots}{
74 147
 
75 148
 \describe{
76
-\item{\code{ranges}}{a \code{\link[GenomicRanges:GRangesList-class]{GRangesList}} 
77
-object each element describing a transcript including its element. The 
78
-\code{GRangesList} is constructed from the 
149
+\item{\code{ranges}}{a \code{\link[GenomicRanges:GRangesList-class]{GRangesList}}
150
+object each element describing a transcript including its element. The
151
+\code{GRangesList} is constructed from the
79 152
 \code{\link[GenomicFeatures:transcriptsBy]{exonsBy(x, by="tx")}} function.
80
-If during construction a \code{GRangesList} is provided instead of a 
153
+If during construction a \code{GRangesList} is provided instead of a
81 154
 character value pointing to a gff3 file or a \code{TxDb} object, it must have
82 155
 a comparable structure.}
83 156
 
84
-\item{\code{sequences}}{a \code{\link[Biostrings:XStringSet-class]{XStringSet}} of 
157
+\item{\code{sequences}}{a \code{\link[Biostrings:XStringSet-class]{XStringSet}} of
85 158
 type \code{sequencesType}.}
86 159
 
87
-\item{\code{sequencesType}}{a \code{character} value for the class name of 
160
+\item{\code{sequencesType}}{a \code{character} value for the class name of
88 161
 \code{sequences}. Either \code{RNAStringSet} or \code{ModRNAStringSet}.}
89 162
 
90
-\item{\code{bamfiles}}{the input bam files as 
163
+\item{\code{bamfiles}}{the input bam files as
91 164
 \code{\link[Rsamtools:BamFile-class]{BamFileList}}}
92 165
 
93
-\item{\code{condition}}{conditions along the 
166
+\item{\code{condition}}{conditions along the
94 167
 \code{\link[Rsamtools:BamFile-class]{BamFileList}}: Either \code{control}
95 168
 or \code{treated}}
96 169
 
... ...
@@ -11,6 +11,8 @@
11 11
 \alias{ranges,SequenceDataFrame-method}
12 12
 \alias{replicates,SequenceDataFrame-method}
13 13
 \alias{conditions,SequenceDataFrame-method}
14
+\alias{bamfiles,SequenceDataFrame-method}
15
+\alias{seqinfo,SequenceDataFrame-method}
14 16
 \alias{show,SequenceData-method}
15 17
 \alias{getData,SequenceData,BamFileList,GRangesList,XStringSet,ScanBamParam-method}
16 18
 \alias{bamfiles,SequenceData-method}
... ...
@@ -50,6 +52,10 @@ replicates(x)
50 52
 
51 53
 \S4method{conditions}{SequenceDataFrame}(object)
52 54
 
55
+\S4method{bamfiles}{SequenceDataFrame}(x)
56
+
57
+\S4method{seqinfo}{SequenceDataFrame}(x)
58
+
53 59
 \S4method{show}{SequenceData}(object)
54 60
 
55 61
 
... ...
@@ -4,31 +4,18 @@
4 4
 \name{SequenceDataFrame-class}
5 5
 \alias{SequenceDataFrame-class}
6 6
 \alias{SequenceDataFrame}
7
+\alias{cbind,SequenceDataFrame-method}
7 8
 \alias{[,SequenceDataFrame,ANY,ANY,ANY-method}
8 9
 \title{The SequenceDataFrame class}
9 10
 \usage{
11
+\S4method{cbind}{SequenceDataFrame}(..., deparse.level = 1)
12
+
10 13
 \S4method{[}{SequenceDataFrame,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE)
11 14
 }
12 15
 \arguments{
13
-\item{x, i, j, ..., drop}{arguments used for 
14
-\code{\link[S4Vectors:DataFrame-class]{subsetting}}.}
15
-
16
-\item{df}{the data as a \code{DataFrame}.}
17
-
18
-\item{ranges}{a \code{GRanges} object containing all annotation elements
19
-for a transcript.}
20
-
21
-\item{sequence}{\code{XString} object describing the nucleotide sequence of 
22
-the transcript.}
23
-
24
-\item{condition}{The condition of each column or set of columns. Either 
25
-\code{control} or \code{treated}.}
26
-
27
-\item{replicate}{The replicate of each column or set of columns for the 
28
-individual conditions}
29
-}
30
-\value{
31
-a \code{SequenceDataFrame} object
16
+\item{x, i, j, ..., drop, deparse.level}{arguments used for 
17
+\code{\link[S4Vectors:DataFrame-class]{subsetting}} or 
18
+\code{\link[base:cbind]{base::cbind}}.}
32 19
 }
33 20
 \description{
34 21
 The \code{SequenceDataFrame} class contains data for positions along a single
... ...
@@ -14,7 +14,9 @@ test_that("SequenceDataFrame:",{
14 14
                                   ranges(sdf),
15 15
                                   sequences(sdf),
16 16
                                   replicates(sdf),
17
-                                  conditions(sdf))
17
+                                  conditions(sdf),
18
+                                  bamfiles(sdf),
19
+                                  seqinfo(sdf))
18 20
   expect_equal(sdf,sdf2)
19 21
   ##############################################################################
20 22
   # errors
... ...
@@ -49,11 +51,11 @@ test_that("SequenceDataFrame:",{
49 51
   expect_s4_class(sdf[1,,drop = FALSE],"DataFrame")
50 52
   expect_equal(ncol(sdf[1,,drop = FALSE]),ncol(sdf))
51 53
   expect_s4_class(sdf[,1],"PileupSequenceDataFrame")
52
-  expect_equal(ncol(sdf[,1]),1L)
54
+  expect_equal(ncol(sdf[,1]),5L)
53 55
   expect_type(sdf["1",],"list")
54 56
   expect_equal(length(sdf["1",]),ncol(sdf))
55 57
   expect_s4_class(sdf["1",,drop = FALSE],"DataFrame")
56 58
   expect_equal(ncol(sdf["1",,drop = FALSE]),ncol(sdf))
57 59
   expect_s4_class(sdf[,"pileup.treated.1.G"],"PileupSequenceDataFrame")
58
-  expect_equal(ncol(sdf[,"pileup.treated.1.G"]),1L)
60
+  expect_equal(ncol(sdf[,"pileup.treated.1.G"]),5L)
59 61
 })
... ...
@@ -61,7 +61,7 @@ test_that("Modifier/ModifierSet:",{
61 61
                factor("*", levels = c("+","-","*")))
62 62
   expect_true(is.factor(conditions(msi[[1]])))
63 63
   expect_equal(conditions(msi[[1]]),
64
-               factor(rep("treated",ncol(sequenceData(msi[[1]]))[1])))
64
+               factor(rep("treated",ncol(sequenceData(msi[[1]]))[1]/5)))
65 65
   ##############################################################################
66 66
   skip_on_bioc()
67 67
   # Modifier creation
... ...
@@ -217,3 +217,24 @@ test_that("Subsetting Modifier/ModifierSet:",{
217 217
   expect_type(actual@unlistData$labels,"logical")
218 218
   
219 219
 })
220
+
221
+context("Combining SequenceData")
222
+test_that("Combining SequenceData:",{
223
+  data(psd,package = "RNAmodR")
224
+  expect_error(c(psd[1],psd[1]),
225
+               "Input must have unique names.")
226
+  expect_error(cbind(psd[1],psd[2]),
227
+               "Inputs must have the same lengths.")
228
+  expect_error(cbind(psd[1],psd),
229
+               "Inputs must have the same lengths.")
230
+  expect_error(rbind(psd[1],psd[,1:2]),
231
+               "Inputs must have the same width.")
232
+  expect_s4_class(c(psd[1],psd[2]),
233
+                  "PileupSequenceData")
234
+  expect_s4_class(cbind(psd[1],psd[1]),
235
+                  "PileupSequenceData")
236
+  expect_s4_class(rbind(psd[1],psd),
237
+                  "PileupSequenceData")
238
+  expect_equal(relist(unlist(psd),psd),psd)
239
+  expect_equal(relist(unlist(psd,use.names = FALSE),psd),psd)
240
+})