Browse code

Get RNAmodR ready for the switch to virtual DataFrame (in S4Vectors package)

Hervé Pagès authored on 05/11/2021 01:34:40
Showing1 changed files
... ...
@@ -206,7 +206,7 @@ coerceSequenceDataToCompressedSplitDataFrameList <- function(className){
206 206
           for (what in c("elementType", "elementMetadata", 
207 207
                          "metadata", "unlistData", "partitioning"
208 208
           )) slot(value, what) <- slot(from, what)
209
-          value@unlistData <- as(value@unlistData,"DataFrame")
209
+          value@unlistData <- as(value@unlistData,"DFrame")
210 210
           value
211 211
         }
212 212
       } else from
Browse code

reworked dependent on assertive package

FelixErnst authored on 18/07/2020 08:13:55
Showing1 changed files
... ...
@@ -110,7 +110,7 @@ setMethod(
110 110
   f = "initialize",
111 111
   signature = signature(.Object = "SequenceData"),
112 112
   definition = function(.Object, ...){
113
-    if(!assertive::is_a_non_empty_string(.Object@dataDescription)){
113
+    if(!.is_non_empty_string(.Object@dataDescription)){
114 114
       stop("'dataDescription' must be a single non empty character value.")
115 115
     }
116 116
     callNextMethod(.Object, ...)
... ...
@@ -467,7 +467,7 @@ setMethod("unlist", "SequenceData",
467 467
   proto@minQuality <- .norm_min_quality(args, proto@minQuality)
468 468
   condition <- factor(names(bamfiles))
469 469
   replicate <- .get_replicate_number(condition)
470
-  if(!assertive::is_a_non_empty_string(proto@dataDescription)){
470
+  if(!.is_non_empty_string(proto@dataDescription)){
471 471
     stop("'dataDescription' must be a single non empty character value.")
472 472
   }
473 473
   if(is.null(proto@minQuality)){
Browse code

added stats function

FelixErnst authored on 26/04/2020 18:11:08
Showing1 changed files
... ...
@@ -407,6 +407,7 @@ setMethod("unlist", "SequenceData",
407 407
 # or mergeing DataFrames into one
408 408
 .norm_postprocess_read_data <- function(data){
409 409
   if(is(data[[1L]],"IntegerList") || is(data[[1L]],"NumericList")){
410
+    m_data <- lapply(lapply(data, metadata),"[[","stats")
410 411
     data <- lapply(data, unlist)
411 412
     if(length(unique(lengths(data))) != 1L){
412 413
       stop("Data is of unequal length and cannot be coerced to a DataFrame.",
... ...
@@ -414,6 +415,7 @@ setMethod("unlist", "SequenceData",
414 415
     }
415 416
     data <- S4Vectors::DataFrame(data)
416 417
   } else if(is(data[[1L]],"DataFrameList")) {
418
+    m_data <- lapply(lapply(data, metadata),"$","stats")
417 419
     data <- lapply(data, unlist, use.names = FALSE)
418 420
     ncols <- unique(unlist(lapply(data, ncol)))
419 421
     if(length(ncols) != 1L){
... ...
@@ -427,6 +429,7 @@ setMethod("unlist", "SequenceData",
427 429
   } else {
428 430
     stop("")
429 431
   }
432
+  metadata(data) <- list(stats = m_data) 
430 433
   data
431 434
 }
432 435
 
... ...
@@ -461,13 +464,13 @@ setMethod("unlist", "SequenceData",
461 464
     args <- as.list(args)
462 465
   }
463 466
   proto <- new(className)
464
-  minQuality <- .norm_min_quality(args, proto@minQuality)
467
+  proto@minQuality <- .norm_min_quality(args, proto@minQuality)
465 468
   condition <- factor(names(bamfiles))
466 469
   replicate <- .get_replicate_number(condition)
467 470
   if(!assertive::is_a_non_empty_string(proto@dataDescription)){
468 471
     stop("'dataDescription' must be a single non empty character value.")
469 472
   }
470
-  if(is.null(minQuality)){
473
+  if(is.null(proto@minQuality)){
471 474
     stop("Minimum quality is not set for '", className ,"'.",
472 475
          call. = FALSE)
473 476
   }
... ...
@@ -523,9 +526,10 @@ setMethod("unlist", "SequenceData",
523 526
                        bamfiles = bamfiles,
524 527
                        seqinfo = seqinfo)
525 528
   ans <- new(className, 
526
-             minQuality = minQuality,
529
+             minQuality = proto@minQuality,
527 530
              unlistData = unlist_data,
528 531
              partitioning = IRanges::PartitioningByEnd(data),
532
+             metadata = metadata(unlist_data),
529 533
              ...)
530 534
   message("OK")
531 535
   ans
Browse code

Fix bindROWS() methods for SequenceData and SequenceDataFrame objects

This fixes some long-standing issues with how these methods handle
the 'check' argument. These issues were not causing visible problems
so far but with the recent changes to S4Vectors (0.25.14) they started
to.

Hervé Pagès authored on 25/03/2020 06:40:01
Showing1 changed files
... ...
@@ -352,7 +352,7 @@ setMethod("bindROWS", "SequenceData",
352 352
             }
353 353
             .check_bamfiles(all_objects)
354 354
             callNextMethod(x, objects, use.names = use.names, 
355
-                           ignore.mcols = ignore.mcols, check = FALSE)
355
+                           ignore.mcols = ignore.mcols, check = check)
356 356
           }
357 357
 )
358 358
 
Browse code

added virtual class SequenceData to export

To be able to inherit the class needs to be exported

FelixErnst authored on 16/03/2020 11:19:57
Showing1 changed files
... ...
@@ -97,6 +97,8 @@ NULL
97 97
 #' @return A SequenceData object
98 98
 NULL
99 99
 
100
+#' @name RNAmodR-development
101
+#' @export
100 102
 setClass("SequenceData",
101 103
          contains = c("VIRTUAL", "CompressedSplitDataFrameList"),
102 104
          slots = c(minQuality = "integer",
Browse code

Removed fixed RNA focus for Modifier and SequenceData* classes

All classes can now be used to detect RNA and DNA modification. For this the seqtype slot was added to Modifier class via inheritance to RNAModifier and DNAModifier classes. seqtype getter is available for Modifier class, and getter/setter for SequenceData* classes.

Felix Ernst authored on 27/09/2019 16:13:26
Showing1 changed files
... ...
@@ -89,7 +89,8 @@ NULL
89 89
 #' description of this argument.
90 90
 #' 
91 91
 #' @slot sequencesType a \code{character} value for the class name of 
92
-#' \code{sequences}. Either \code{RNAStringSet} or \code{ModRNAStringSet}.
92
+#' \code{sequences}. Either \code{RNAStringSet}, \code{ModRNAStringSet}, 
93
+#' \code{DNAStringSet} or \code{ModDNAStringSet}.
93 94
 #' @slot minQuality a \code{integer} value describing a threshold of the minimum
94 95
 #' quality of reads to be used.
95 96
 #' 
... ...
@@ -98,12 +99,10 @@ NULL
98 99
 
99 100
 setClass("SequenceData",
100 101
          contains = c("VIRTUAL", "CompressedSplitDataFrameList"),
101
-         slots = c(sequencesType = "character",
102
-                   minQuality = "integer",
102
+         slots = c(minQuality = "integer",
103 103
                    unlistData = "SequenceDataFrame",
104 104
                    unlistType = "character",
105
-                   dataDescription = "character"),
106
-         prototype = list(sequencesType = "RNAStringSet"))
105
+                   dataDescription = "character"))
107 106
 
108 107
 setMethod(
109 108
   f = "initialize",
... ...
@@ -112,9 +111,6 @@ setMethod(
112 111
     if(!assertive::is_a_non_empty_string(.Object@dataDescription)){
113 112
       stop("'dataDescription' must be a single non empty character value.")
114 113
     }
115
-    if(!(.Object@sequencesType %in% c("RNAStringSet","ModRNAStringSet"))){
116
-      stop("'sequencesType' must be either 'RNAStringSet' or 'ModRNAStringSet'")
117
-    }
118 114
     callNextMethod(.Object, ...)
119 115
   }
120 116
 )
... ...
@@ -469,9 +465,6 @@ setMethod("unlist", "SequenceData",
469 465
   if(!assertive::is_a_non_empty_string(proto@dataDescription)){
470 466
     stop("'dataDescription' must be a single non empty character value.")
471 467
   }
472
-  if(!(proto@sequencesType %in% c("RNAStringSet","ModRNAStringSet"))){
473
-    stop("'sequencesType' must be either 'RNAStringSet' or 'ModRNAStringSet'")
474
-  }
475 468
   if(is.null(minQuality)){
476 469
     stop("Minimum quality is not set for '", className ,"'.",
477 470
          call. = FALSE)
... ...
@@ -508,7 +501,6 @@ setMethod("unlist", "SequenceData",
508 501
   rownames(data) <- IRanges::CharacterList(positions)
509 502
   data <- .order_read_data_by_strand(data, ranges)
510 503
   # order sequences
511
-  sequences <- as(sequences, proto@sequencesType)
512 504
   sequences <- sequences[match(names(ranges), names(sequences))]
513 505
   # basic checks
514 506
   names(data) <- names(ranges)
... ...
@@ -540,16 +532,20 @@ setMethod("unlist", "SequenceData",
540 532
 .SequenceData_settings <- data.frame(
541 533
   variable = c("max_depth",
542 534
                "minLength",
543
-               "maxLength"),
535
+               "maxLength",
536
+               "seqtype"),
544 537
   testFUN = c(".not_integer_bigger_than_10",
545 538
               ".not_integer_bigger_equal_than_zero_nor_na",
546
-              ".not_integer_bigger_equal_than_one_nor_na"),
539
+              ".not_integer_bigger_equal_than_one_nor_na",
540
+              ".is_valid_nucleotide_seqtype"),
547 541
   errorValue = c(TRUE,
548 542
                  TRUE,
549
-                 TRUE),
543
+                 TRUE,
544
+                 FALSE),
550 545
   errorMessage = c("'max_depth' must be integer with a value higher than 10L.",
551 546
                    "'minLength' must be integer with a value higher than 0L or NA.",
552
-                   "'maxLength' must be integer with a value higher than 1L or NA."),
547
+                   "'maxLength' must be integer with a value higher than 1L or NA.",
548
+                   paste0("'seqtype' must be either '",seqtype(RNAString()) ,"' or '",seqtype(DNAString()) ,"'.")),
553 549
   stringsAsFactors = FALSE)
554 550
 
555 551
 .get_SequenceData_args <- function(input){
... ...
@@ -557,8 +553,9 @@ setMethod("unlist", "SequenceData",
557 553
   max_depth <- 10000L # the default is 250, which is to small
558 554
   minLength <- NA_integer_
559 555
   maxLength <- NA_integer_
556
+  seqtype <- seqtype(RNAString()) 
560 557
   args <- .norm_settings(input, .SequenceData_settings, max_depth, minLength,
561
-                         maxLength)
558
+                         maxLength, seqtype)
562 559
   if(!is.na(args[["minLength"]]) && !is.na(args[["maxLength"]])){
563 560
     if(args[["minLength"]] > args[["maxLength"]]){
564 561
       stop("'minLength' must be smaller or equal to 'maxLength'.",
... ...
@@ -596,7 +593,7 @@ setMethod("unlist", "SequenceData",
596 593
       stop("No overlap between bamfiles, annotation and seqinfo.")
597 594
     }
598 595
   }
599
-  sequences <- .load_transcript_sequences(sequences, grl)
596
+  sequences <- .load_sequences(sequences, grl, args)
600 597
   # create the class
601 598
   .SequenceData(className, bamfiles, grl, sequences, seqinfo, args)
602 599
 }
... ...
@@ -643,11 +640,12 @@ setMethod("unlist", "SequenceData",
643 640
 #' @importFrom Biostrings xscat
644 641
 # load the transcript sequence per transcript aka. one sequence per GRangesList
645 642
 # element
646
-.load_transcript_sequences <- function(sequences, grl){
643
+.load_sequences <- function(sequences, grl, args){
647 644
   seq <- Biostrings::getSeq(sequences, unlist(grl))
648 645
   seq <- relist(unlist(seq),IRanges::PartitioningByWidth(sum(width(grl))))
649 646
   names(seq) <- names(grl)
650
-  as(seq,"RNAStringSet")
647
+  seqtype(seq) <- args[["seqtype"]]
648
+  seq
651 649
 }
652 650
 
653 651
 # remove any elements, which are not in the seqinfo
... ...
@@ -788,11 +786,13 @@ setMethod("getData",
788 786
 setMethod(f = "bamfiles", 
789 787
           signature = signature(x = "SequenceData"),
790 788
           definition = function(x){bamfiles(unlist(x))})
789
+
791 790
 #' @rdname SequenceData-functions
792 791
 #' @export
793 792
 setMethod(f = "conditions", 
794 793
           signature = signature(object = "SequenceData"),
795 794
           definition = function(object){conditions(unlist(object))})
795
+
796 796
 #' @rdname SequenceData-functions
797 797
 #' @export
798 798
 setMethod(
... ...
@@ -810,21 +810,41 @@ setMethod(
810 810
       names(partitioning_relist) <- names(x)
811 811
       relist(unlisted_ranges, partitioning_relist)
812 812
     })
813
+
813 814
 #' @rdname SequenceData-functions
814 815
 #' @export
815 816
 setMethod(f = "replicates", 
816 817
           signature = signature(x = "SequenceData"),
817 818
           definition = function(x){replicates(unlist(x))})
819
+
818 820
 #' @rdname SequenceData-functions
819 821
 #' @export
820 822
 setMethod(f = "seqinfo", 
821 823
           signature = signature(x = "SequenceData"),
822 824
           definition = function(x){seqinfo(unlist(x))})
825
+
823 826
 #' @rdname SequenceData-functions
824 827
 #' @export
825 828
 setMethod(f = "sequences", 
826 829
           signature = signature(x = "SequenceData"),
827 830
           definition = function(x){relist(sequences(unlist(x)),x)})
831
+
832
+#' @rdname SequenceData-functions
833
+#' @export
834
+setMethod(f = "seqtype", 
835
+          signature = signature(x = "SequenceData"),
836
+          definition = function(x){seqtype(unlist(x))})
837
+
838
+#' @rdname SequenceData-functions
839
+#' @export
840
+setReplaceMethod(f = "seqtype", 
841
+                 signature = signature(x = "SequenceData"),
842
+                 definition = function(x, value){
843
+                   unlisted_x <- unlist(x)
844
+                   seqtype(unlisted_x) <- value
845
+                   relist(unlisted_x,x)
846
+                 })
847
+
828 848
 #' @rdname SequenceData-functions
829 849
 #' @export
830 850
 setMethod(f = "dataType",
Browse code

Imports updated and dataType function added for SequenceData/Frame class

Felix Ernst authored on 10/07/2019 12:07:41
Showing1 changed files
... ...
@@ -825,6 +825,11 @@ setMethod(f = "seqinfo",
825 825
 setMethod(f = "sequences", 
826 826
           signature = signature(x = "SequenceData"),
827 827
           definition = function(x){relist(sequences(unlist(x)),x)})
828
+#' @rdname SequenceData-functions
829
+#' @export
830
+setMethod(f = "dataType",
831
+          signature = signature(x = "SequenceData"),
832
+          definition = function(x){dataType(unlist(x))})
828 833
 
829 834
 # dummy functions --------------------------------------------------------------
830 835
 # this needs to be implemented by each subclass
Browse code

fixed coercion of SequenceData to CompressedSplitDataFrameList

the SequenceDataFrame object is now coerced to a DataFrame.

Felix Ernst authored on 22/06/2019 16:21:24
Showing1 changed files
... ...
@@ -129,6 +129,10 @@ sequenceDataClass <- function(dataType){
129 129
   ans
130 130
 }
131 131
 
132
+setMethod("classNameForDisplay", "SequenceData",
133
+          function(x) class(x)
134
+)
135
+
132 136
 #' @rdname SequenceData-functions
133 137
 setMethod("show", "SequenceData",
134 138
   function(object){
... ...
@@ -193,6 +197,24 @@ S4Vectors::setValidity2(Class = "SequenceData", .valid.SequenceData)
193 197
 
194 198
 # coercion ---------------------------------------------------------------------
195 199
 
200
+coerceSequenceDataToCompressedSplitDataFrameList <- function(className){
201
+  setMethod("coerce",
202
+    signature = c(from = className, to = "CompressedSplitDataFrameList"),
203
+    function(from, to, strict = TRUE){
204
+      if (strict) {
205
+        from <- from
206
+        {
207
+          value <- new("CompressedSplitDataFrameList")
208
+          for (what in c("elementType", "elementMetadata", 
209
+                         "metadata", "unlistData", "partitioning"
210
+          )) slot(value, what) <- slot(from, what)
211
+          value@unlistData <- as(value@unlistData,"DataFrame")
212
+          value
213
+        }
214
+      } else from
215
+    })
216
+}
217
+
196 218
 coerceToSequenceData <- function(className) {
197 219
   function(from) {
198 220
     if(is.list(from)) {
... ...
@@ -217,45 +239,22 @@ coerceToSequenceData <- function(className) {
217 239
 
218 240
 setSequenceDataCoercions <- function(type) {
219 241
   className <- sequenceDataClass(type)
242
+  coerceSequenceDataToCompressedSplitDataFrameList(className)
220 243
   setAs("ANY", className, coerceToSequenceData(className))
221 244
   setAs("list", className, coerceToSequenceData(className))
222 245
 }
223 246
 
224 247
 # internals --------------------------------------------------------------------
225 248
 
226
-#' @importClassesFrom IRanges PartitioningByEnd
227
-#' @importFrom IRanges PartitioningByEnd
228
-setMethod("extractROWS", "SequenceData",
229
-  function(x, i){
230
-    i <- normalizeSingleBracketSubscript(i, x, as.NSBS = TRUE)
231
-    ans_eltNROWS <- extractROWS(width(IRanges::PartitioningByEnd(x)), i)
232
-    ans_breakpoints <- suppressWarnings(cumsum(ans_eltNROWS))
233
-    nbreakpoints <- length(ans_breakpoints)
234
-    if (nbreakpoints != 0L && is.na(ans_breakpoints[[nbreakpoints]])){
235
-      stop("Subsetting operation on ", class(x), " object 'x' ",
236
-           "produces a result that is too big to be ",
237
-           "represented as a CompressedList object. ",
238
-           "This is not implemented, yet.")
239
-    }
240
-    idx_on_unlisted_x <- 
241
-      IRanges::IRanges(end = extractROWS(end(IRanges::PartitioningByEnd(x)), i),
242
-                       width = ans_eltNROWS)
243
-    ans_unlistData <- extractROWS(unlist(x,use.names = FALSE),
244
-                                  idx_on_unlisted_x)
245
-    ans_partitioning <- new("PartitioningByEnd", end = ans_breakpoints,
246
-                            NAMES = extractROWS(names(x), i))
247
-    ans_elementMetadata <- extractROWS(x@elementMetadata, i)
248
-    initialize(x, minQuality = x@minQuality, unlistData = ans_unlistData,
249
-               partitioning = ans_partitioning, 
250
-               elementMetadata = ans_elementMetadata)
251
-  }
252
-)
249
+
250
+
251
+# Accessors --------------------------------------------------------------------
253 252
 
254 253
 setMethod("rownames", "SequenceData",
255
-          function (x){
256
-            ans <- rownames(unlist(x,use.names = FALSE), do.NULL = TRUE)
257
-            relist(ans,x)
258
-          }
254
+  function (x){
255
+    ans <- rownames(unlist(x,use.names = FALSE), do.NULL = TRUE)
256
+    relist(ans,x)
257
+  }
259 258
 )
260 259
 
261 260
 # Concatenation ----------------------------------------------------------------
Browse code

added changes to man pages of SequenceData and SequenceDataFrame

Felix Ernst authored on 05/06/2019 13:14:54
Showing1 changed files
... ...
@@ -43,9 +43,12 @@ NULL
43 43
 #' 
44 44
 #' **SequenceDataFrame**
45 45
 #' 
46
-#' #' The \code{SequenceDataFrame} class contains data for positions along a single
47
-#' transcript. It is used to describe elements from a \code{SequenceData}
48
-#' object.
46
+#' The \code{SequenceDataFrame} class is a virtual class and  contains data for
47
+#' positions along a single transcript. In addition to being used for returning
48
+#' elements from a \code{SequenceData} object, the SequenceDataFrame class is
49
+#' used to store the unlisted data within a
50
+#' \code{\link[=SequenceData-class]{SequenceData}} object. Therefore, a matching
51
+#' \code{SequenceData} and \code{SequenceDataFrame} class must be implemented.
49 52
 #' 
50 53
 #' The \code{SequenceDataFrame} class is derived from the
51 54
 #' \code{\link[S4Vectors:DataFrame-class]{DataFrame}} class.
... ...
@@ -85,28 +88,12 @@ NULL
85 88
 #' @param deparse.level See \code{\link[base:cbind]{base::cbind}} for a 
86 89
 #' description of this argument.
87 90
 #' 
88
-#' @slot ranges a \code{\link[GenomicRanges:GRangesList-class]{GRangesList}} 
89
-#' object each element describing a transcript including its element. The 
90
-#' \code{GRangesList} is constructed from the 
91
-#' \code{\link[GenomicFeatures:transcriptsBy]{exonsBy(x, by="tx")}} function.
92
-#' If during construction a \code{GRangesList} is provided instead of a 
93
-#' character value pointing to a gff3 file or a \code{TxDb} object, it must have
94
-#' a comparable structure. 
95
-#' @slot sequences a \code{\link[Biostrings:XStringSet-class]{XStringSet}} of 
96
-#' type \code{sequencesType}.
97 91
 #' @slot sequencesType a \code{character} value for the class name of 
98 92
 #' \code{sequences}. Either \code{RNAStringSet} or \code{ModRNAStringSet}.
99
-#' @slot bamfiles the input bam files as 
100
-#' \code{\link[Rsamtools:BamFile-class]{BamFileList}}
101
-#' @slot condition conditions along the 
102
-#' \code{\link[Rsamtools:BamFile-class]{BamFileList}}: Either \code{control}
103
-#' or \code{treated}
104
-#' @slot replicate replicate number along the \code{BamFileList} for each of the
105
-#' condition types.
106
-#' @slot seqinfo a \code{\link[GenomeInfoDb:Seqinfo-class]{Seqinfo}} describing
107
-#' the avialable chromosomes. This is an intersection of 
108 93
 #' @slot minQuality a \code{integer} value describing a threshold of the minimum
109 94
 #' quality of reads to be used.
95
+#' 
96
+#' @return A SequenceData object
110 97
 NULL
111 98
 
112 99
 setClass("SequenceData",
Browse code

added support for c, cbind, rbind and relist function for SequenceData objects

Felix Ernst authored on 30/05/2019 22:25:57
Showing1 changed files
... ...
@@ -9,6 +9,8 @@ NULL
9 9
 #' 
10 10
 #' @title The SequenceData class
11 11
 #' 
12
+#' @md
13
+#' 
12 14
 #' @description 
13 15
 #' The \code{SequenceData} class is implemented to contain data on each position
14 16
 #' along transcripts and holds the corresponding annotation data and
... ...
@@ -37,8 +39,20 @@ NULL
37 39
 #' The \code{SequenceData} class is derived from the
38 40
 #' \code{\link[IRanges:DataFrameList-class]{CompressedSplitDataFrameList}} class
39 41
 #' with additional slots for annotation and sequence data. Some functionality is
40
-#' not inherited and not available, e.g. \code{cbind}, \code{rbind} amd
41
-#' \code{relist}.
42
+#' not inherited and might not available to full extend, e.g.\code{relist}.
43
+#' 
44
+#' **SequenceDataFrame**
45
+#' 
46
+#' #' The \code{SequenceDataFrame} class contains data for positions along a single
47
+#' transcript. It is used to describe elements from a \code{SequenceData}
48
+#' object.
49
+#' 
50
+#' The \code{SequenceDataFrame} class is derived from the
51
+#' \code{\link[S4Vectors:DataFrame-class]{DataFrame}} class.
52
+#' 
53
+#' Subsetting of a \code{SequenceDataFrame} returns a \code{SequenceDataFrame} or 
54
+#' \code{DataFrame}, if it is subset by a column or row, respectively. The 
55
+#' \code{drop} argument is ignored for column subsetting.
42 56
 #' 
43 57
 #' @param dataType The prefix for construction the class name of the 
44 58
 #' \code{SequenceData} subclass to be constructed.
... ...
@@ -68,6 +82,8 @@ NULL
68 82
 #' \item{\code{max_depth}} {maximum depth for pileup loading (default: 
69 83
 #' \code{max_depth = 10000L}).}
70 84
 #' }
85
+#' @param deparse.level See \code{\link[base:cbind]{base::cbind}} for a 
86
+#' description of this argument.
71 87
 #' 
72 88
 #' @slot ranges a \code{\link[GenomicRanges:GRangesList-class]{GRangesList}} 
73 89
 #' object each element describing a transcript including its element. The 
... ...
@@ -96,8 +112,6 @@ NULL
96 112
 setClass("SequenceData",
97 113
          contains = c("VIRTUAL", "CompressedSplitDataFrameList"),
98 114
          slots = c(sequencesType = "character",
99
-                   bamfiles = "BamFileList",
100
-                   seqinfo = "Seqinfo",
101 115
                    minQuality = "integer",
102 116
                    unlistData = "SequenceDataFrame",
103 117
                    unlistType = "character",
... ...
@@ -192,12 +206,33 @@ S4Vectors::setValidity2(Class = "SequenceData", .valid.SequenceData)
192 206
 
193 207
 # coercion ---------------------------------------------------------------------
194 208
 
195
-.as_SplitDataFrameList <- function(from){
196
-  relist(as(unlist(from, use.names = FALSE),"DataFrame"),
197
-         IRanges::PartitioningByWidth(from))
209
+coerceToSequenceData <- function(className) {
210
+  function(from) {
211
+    if(is.list(from)) {
212
+      classes <- unlist(lapply(from,class))
213
+      from <- from[classes == paste0(className,"Frame")]
214
+      if(length(from) == 0) {
215
+        FUN <- match.fun(className)
216
+        from <- list(FUN())
217
+      }
218
+    } else {
219
+      if(is(from,className)){
220
+        return(from)
221
+      } else if(is(from,paste0(className,"Frame"))) {
222
+        from <- list(from)
223
+      } else {
224
+        stop("Cannot coerce ",class(from)," to ",className,".")
225
+      }
226
+    }
227
+    IRanges:::coerceToCompressedList(from)
228
+  }
198 229
 }
199
-setAs("SequenceData", "SplitDataFrameList", .as_SplitDataFrameList)
200 230
 
231
+setSequenceDataCoercions <- function(type) {
232
+  className <- sequenceDataClass(type)
233
+  setAs("ANY", className, coerceToSequenceData(className))
234
+  setAs("list", className, coerceToSequenceData(className))
235
+}
201 236
 
202 237
 # internals --------------------------------------------------------------------
203 238
 
... ...
@@ -223,8 +258,7 @@ setMethod("extractROWS", "SequenceData",
223 258
     ans_partitioning <- new("PartitioningByEnd", end = ans_breakpoints,
224 259
                             NAMES = extractROWS(names(x), i))
225 260
     ans_elementMetadata <- extractROWS(x@elementMetadata, i)
226
-    initialize(x, bamfiles = x@bamfiles, seqinfo = x@seqinfo, 
227
-               minQuality = x@minQuality, unlistData = ans_unlistData,
261
+    initialize(x, minQuality = x@minQuality, unlistData = ans_unlistData,
228 262
                partitioning = ans_partitioning, 
229 263
                elementMetadata = ans_elementMetadata)
230 264
   }
... ...
@@ -237,35 +271,115 @@ setMethod("rownames", "SequenceData",
237 271
           }
238 272
 )
239 273
 
240
-# methods inherited from List and CompressedList, contain a coercion step
241
-# x <- as(x, "List", strict = FALSE)
242
-# 
243
-# This does not keep the SequenceData object intact resulting in coercion
244
-# to a CompressedSplitDataFrameList.
245
-setMethod("[[", "SequenceData",
246
-          function(x, i, j, ...) 
247
-          {
248
-            METHOD <- selectMethod("[[", "List")
249
-            METHOD(x, i, j, ...)
250
-          }
251
-)
274
+# Concatenation ----------------------------------------------------------------
252 275
 
276
+.check_ranges <- function(args){
277
+  ranges <- lapply(args,ranges)
278
+  ranges <- vapply(ranges[seq.int(2L,length(ranges))],
279
+                   function(r){
280
+                     all(all(r == ranges[[1L]]))
281
+                   },
282
+                   logical(1))
283
+  if(!all(ranges)){
284
+    stop("Inputs must have the same ranges.")
285
+  }
286
+}
253 287
 
254
-# Concatenation ----------------------------------------------------------------
288
+.check_sequences <- function(args){
289
+  sequences <- lapply(args,sequences)
290
+  sequences <- vapply(sequences[seq.int(2L,length(sequences))],
291
+                      function(s){
292
+                        all(s == sequences[[1L]])
293
+                      },
294
+                      logical(1))
295
+  if(!all(sequences)){
296
+    stop("Inputs must have the same sequences.")
297
+  }
298
+}
255 299
 
256
-setMethod("cbind", "SequenceData",
257
-          function(...){
258
-            arg1 <- list(...)[[1L]]
259
-            stop("'rbind' is not supported for ",class(arg1),".")
300
+.check_bamfiles <- function(args){
301
+  bamfiles <- lapply(args,bamfiles)
302
+  bamfiles <- vapply(bamfiles[seq.int(2L,length(bamfiles))],
303
+                     function(b){
304
+                       all(path(b) == path(bamfiles[[1L]]))
305
+                     },
306
+                     logical(1))
307
+  if(!all(bamfiles)){
308
+    stop("Inputs must be derived from the same bamfiles.")
260 309
   }
310
+}
311
+
312
+#' @rdname SequenceData-class
313
+#' @export
314
+setMethod("cbind", "SequenceData",
315
+          function(..., deparse.level = 1) 
316
+          {
317
+            args <- list(...)
318
+            if(length(args) == 1L){
319
+              return(args[[1L]])
320
+            }
321
+            # input checks
322
+            classes <- lapply(args,class)
323
+            if(length(unique(classes)) != 1L){
324
+              stop("Inputs must be of the same SequenceDataFrame type.")
325
+            }
326
+            lengths <- vapply(args,function(a){sum(lengths(a))},integer(1))
327
+            if(length(unique(lengths)) != 1L){
328
+              stop("Inputs must have the same lengths.")
329
+            }
330
+            .check_ranges(args)
331
+            .check_sequences(args)
332
+            callNextMethod()
333
+          }
261 334
 )
335
+
336
+#' @rdname SequenceData-class
337
+#' @export
262 338
 setMethod("rbind", "SequenceData",
263
-          function(...){
264
-            arg1 <- list(...)[[1L]]
265
-            stop("'rbind' is not supported for ",class(arg1),".")
339
+          function(..., deparse.level = 1) 
340
+          {
341
+            args <- list(...)
342
+            if(length(args) == 1L){
343
+              return(args[[1L]])
344
+            }
345
+            # input checks
346
+            classes <- lapply(args,class)
347
+            if(length(unique(classes)) != 1L){
348
+              stop("Inputs must be of the same SequenceDataFrame type.")
349
+            }
350
+            lengths <- vapply(args,function(a){ncol(unlist(a))},integer(1))
351
+            if(length(unique(lengths)) != 1L){
352
+              stop("Inputs must have the same width.")
353
+            }
354
+            .check_bamfiles(args)
355
+            callNextMethod()
266 356
           }
267 357
 )
268 358
 
359
+setMethod("bindROWS", "SequenceData",
360
+          function (x, objects = list(), use.names = TRUE, ignore.mcols = FALSE, 
361
+                    check = TRUE) 
362
+          {
363
+            objects <- S4Vectors:::prepare_objects_to_bind(x, objects)
364
+            all_objects <- c(list(x), objects)
365
+            names <- unlist(lapply(all_objects,names))
366
+            if(any(duplicated(names))){
367
+              stop("Input must have unique names.")
368
+            }
369
+            .check_bamfiles(all_objects)
370
+            callNextMethod(x, objects, use.names = use.names, 
371
+                           ignore.mcols = ignore.mcols, check = FALSE)
372
+          }
373
+)
374
+
375
+setMethod("unlist", "SequenceData",
376
+          function(x, recursive = TRUE, use.names = FALSE) 
377
+          {
378
+            callNextMethod(x, recursive = recursive, use.names = FALSE) 
379
+          }
380
+)
381
+
382
+
269 383
 # constructor ------------------------------------------------------------------
270 384
 
271 385
 .quality_settings <- data.frame(
... ...
@@ -279,9 +393,9 @@ setMethod("rbind", "SequenceData",
279 393
   .norm_settings(input, .quality_settings, minQuality)[["minQuality"]]
280 394
 }
281 395
 
282
-.get_replicate_number <- function(bamfiles, conditions){
283
-  control_rep <- seq_along(bamfiles[conditions == "control"])
284
-  treated_rep <- seq_along(bamfiles[conditions == "treated"])
396
+.get_replicate_number <- function(conditions){
397
+  control_rep <- seq_along(conditions[conditions == "control"])
398
+  treated_rep <- seq_along(conditions[conditions == "treated"])
285 399
   rep <- c(control_rep,treated_rep)
286 400
   rep <- rep[c(which(conditions == "control"),
287 401
                which(conditions == "treated"))]
... ...
@@ -365,7 +479,7 @@ setMethod("rbind", "SequenceData",
365 479
   proto <- new(className)
366 480
   minQuality <- .norm_min_quality(args, proto@minQuality)
367 481
   condition <- factor(names(bamfiles))
368
-  replicate <- .get_replicate_number(bamfiles, condition)
482
+  replicate <- .get_replicate_number(condition)
369 483
   if(!assertive::is_a_non_empty_string(proto@dataDescription)){
370 484
     stop("'dataDescription' must be a single non empty character value.")
371 485
   }
... ...
@@ -419,16 +533,18 @@ setMethod("rbind", "SequenceData",
419 533
   ##############################################################################
420 534
   # Create SequenceData object
421 535
   ##############################################################################
536
+  unlist_data <- 
537
+    .SequenceDataFrame(class = gsub("SequenceData","",className),
538
+                       df = unlist(data, use.names = FALSE),
539
+                       ranges = unlist(ranges, use.names = FALSE),
540
+                       sequence = unlist(sequences, use.names = FALSE),
541
+                       replicate = replicate,
542
+                       condition = condition,
543
+                       bamfiles = bamfiles,
544
+                       seqinfo = seqinfo)
422 545
   ans <- new(className, 
423
-             bamfiles = bamfiles,
424
-             seqinfo = seqinfo,
425 546
              minQuality = minQuality,
426
-             unlistData = .SequenceDataFrame(gsub("SequenceData","",className),
427
-                                             unlist(data, use.names = FALSE),
428
-                                             unlist(ranges, use.names = FALSE),
429
-                                             unlist(sequences, use.names = FALSE),
430
-                                             replicate,
431
-                                             condition),
547
+             unlistData = unlist_data,
432 548
              partitioning = IRanges::PartitioningByEnd(data),
433 549
              ...)
434 550
   message("OK")
... ...
@@ -558,6 +674,8 @@ setMethod("rbind", "SequenceData",
558 674
 
559 675
 ################################################################################
560 676
 
677
+#' @rdname SequenceData-class
678
+#' @export
561 679
 setGeneric( 
562 680
   name = "SequenceData",
563 681
   signature = c("annotation","sequences"),
... ...
@@ -565,72 +683,96 @@ setGeneric(
565 683
     standardGeneric("SequenceData")
566 684
 ) 
567 685
 
686
+#' @rdname SequenceData-class
687
+#' @export
568 688
 setMethod("SequenceData",
569 689
           signature = c(annotation = "character", sequences = "character"),
570 690
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
571 691
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
572 692
                               seqinfo, ...)
573 693
           })
694
+#' @rdname SequenceData-class
695
+#' @export
574 696
 setMethod("SequenceData",
575 697
           signature = c(annotation = "character", sequences = "BSgenome"),
576 698
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
577 699
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
578 700
                               seqinfo, ...)
579 701
           })
702
+#' @rdname SequenceData-class
703
+#' @export
580 704
 setMethod("SequenceData",
581 705
           signature = c(annotation = "TxDb", sequences = "character"),
582 706
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
583 707
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
584 708
                               seqinfo, ...)
585 709
           })
710
+#' @rdname SequenceData-class
711
+#' @export
586 712
 setMethod("SequenceData",
587 713
           signature = c(annotation = "TxDb", sequences = "BSgenome"),
588 714
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
589 715
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
590 716
                               seqinfo, ...)
591 717
           })
718
+#' @rdname SequenceData-class
719
+#' @export
592 720
 setMethod("SequenceData",
593 721
           signature = c(annotation = "GRangesList", sequences = "character"),
594 722
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
595 723
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
596 724
                               seqinfo, ...)
597 725
           })
726
+#' @rdname SequenceData-class
727
+#' @export
598 728
 setMethod("SequenceData",
599 729
           signature = c(annotation = "GRangesList", sequences = "BSgenome"),
600 730
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
601 731
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
602 732
                               seqinfo, ...)
603 733
           })
734
+#' @rdname SequenceData-class
735
+#' @export
604 736
 setMethod("SequenceData",
605 737
           signature = c(annotation = "GFF3File", sequences = "BSgenome"),
606 738
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
607 739
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
608 740
                               seqinfo, ...)
609 741
           })
742
+#' @rdname SequenceData-class
743
+#' @export
610 744
 setMethod("SequenceData",
611 745
           signature = c(annotation = "GFF3File", sequences = "character"),
612 746
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
613 747
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
614 748
                               seqinfo, ...)
615 749
           })
750
+#' @rdname SequenceData-class
751
+#' @export
616 752
 setMethod("SequenceData",
617 753
           signature = c(annotation = "character", sequences = "FaFile"),
618 754
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
619 755
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
620 756
                               seqinfo, ...)
621 757
           })
758
+#' @rdname SequenceData-class
759
+#' @export
622 760
 setMethod("SequenceData",
623 761
           signature = c(annotation = "GFF3File", sequences = "FaFile"),
624 762
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
625 763
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
626 764
                               seqinfo, ...)
627 765
           })
766
+#' @rdname SequenceData-class
767
+#' @export
628 768
 setMethod("SequenceData",
629 769
           signature = c(annotation = "TxDb", sequences = "FaFile"),
630 770
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
631 771
             .new_SequenceData(dataType, bamfiles, annotation, sequences,
632 772
                               seqinfo, ...)
633 773
           })
774
+#' @rdname SequenceData-class
775
+#' @export
634 776
 setMethod("SequenceData",
635 777
           signature = c(annotation = "GRangesList", sequences = "FaFile"),
636 778
           function(dataType, bamfiles, annotation, sequences, seqinfo, ...){
... ...
@@ -659,7 +801,7 @@ setMethod("getData",
659 801
 #' @export
660 802
 setMethod(f = "bamfiles", 
661 803
           signature = signature(x = "SequenceData"),
662
-          definition = function(x){x@bamfiles})
804
+          definition = function(x){bamfiles(unlist(x))})
663 805
 #' @rdname SequenceData-functions
664 806
 #' @export
665 807
 setMethod(f = "conditions", 
... ...
@@ -674,12 +816,12 @@ setMethod(
674 816
     function(x){
675 817
       partitioning <- IRanges::PartitioningByEnd(x)
676 818
       unlisted_ranges <- ranges(unlist(x))
677
-      ends <- cumsum(width(unlisted_ranges)) == cumsum(width(partitioning))
678
-      partitioning_relist <- IRanges::PartitioningByEnd(which(ends))
679
-      names(partitioning_relist) <- names(x)
819
+      ends <- match(cumsum(width(partitioning)),cumsum(width(unlisted_ranges)))
820
+      partitioning_relist <- IRanges::PartitioningByEnd(ends)
680 821
       if(length(x) != length(partitioning_relist)){
681 822
         stop("ranges could not be relisted.")
682 823
       }
824
+      names(partitioning_relist) <- names(x)
683 825
       relist(unlisted_ranges, partitioning_relist)
684 826
     })
685 827
 #' @rdname SequenceData-functions
... ...
@@ -691,7 +833,7 @@ setMethod(f = "replicates",
691 833
 #' @export
692 834
 setMethod(f = "seqinfo", 
693 835
           signature = signature(x = "SequenceData"),
694
-          definition = function(x){x@seqinfo})
836
+          definition = function(x){seqinfo(unlist(x))})
695 837
 #' @rdname SequenceData-functions
696 838
 #' @export
697 839
 setMethod(f = "sequences", 
Browse code

refactored SequenceData and SequenceDataFrame classes

Felix Ernst authored on 30/05/2019 11:33:10
Showing1 changed files
... ...
@@ -95,20 +95,14 @@ NULL
95 95
 
96 96
 setClass("SequenceData",
97 97
          contains = c("VIRTUAL", "CompressedSplitDataFrameList"),
98
-         slots = c(ranges = "GRangesList",
99
-                   sequences = "XStringSet",
100
-                   sequencesType = "character",
98
+         slots = c(sequencesType = "character",
101 99
                    bamfiles = "BamFileList",
102
-                   condition = "factor",
103
-                   replicate = "factor",
104 100
                    seqinfo = "Seqinfo",
105 101
                    minQuality = "integer",
102
+                   unlistData = "SequenceDataFrame",
106 103
                    unlistType = "character",
107 104
                    dataDescription = "character"),
108
-         prototype = list(ranges = GRangesList(),
109
-                          sequencesType = "RNAStringSet",
110
-                          sequences = RNAStringSet(),
111
-                          unlistType = "SequenceDataFrame"))
105
+         prototype = list(sequencesType = "RNAStringSet"))
112 106
 
113 107
 setMethod(
114 108
   f = "initialize",
... ...
@@ -131,22 +125,17 @@ sequenceDataClass <- function(dataType){
131 125
   if(is(tmp,"try-error")){
132 126
     stop("Class '",ans,"' not found: ",tmp)
133 127
   }
134
-  
135 128
   ans
136 129
 }
137 130
 
138
-# for concat
139
-#' @rdname RNAmodR-internals
140
-setMethod("parallelSlotNames", "SequenceData",
141
-          function(x) c("ranges","sequences", callNextMethod())
142
-)
143
-
144 131
 #' @rdname SequenceData-functions
145 132
 setMethod("show", "SequenceData",
146 133
   function(object){
147 134
     k <- length(object)
148
-    data_nc <- ncol(object@unlistData)
149
-    ranges_mcols <- mcols(object@ranges@unlistData, use.names = FALSE)
135
+    unlisted_object <- object@unlistData
136
+    data_nc <- ncol(unlisted_object)
137
+    unlisted_ranges <- unlist(ranges(object),use.names = FALSE)
138
+    ranges_mcols <- mcols(unlisted_ranges, use.names = FALSE)
150 139
     ranges_nmc <- if (is.null(ranges_mcols)) 0L else ncol(ranges_mcols)
151 140
     cat(classNameForDisplay(object), " with ", k, " elements ",
152 141
         "containing ",sep = "")
... ...
@@ -157,9 +146,9 @@ setMethod("show", "SequenceData",
157 146
     out_data <- NULL
158 147
     # data
159 148
     if (data_nc > 0) {
160
-      data_col_names <- colnames(object@unlistData)
149
+      data_col_names <- colnames(unlisted_object)
161 150
       data_col_types <- 
162
-        lapply(object@unlistData, function(x) {
151
+        lapply(unlisted_object, function(x) {
163 152
           paste0("<", classNameForDisplay(x)[1],">")
164 153
         })
165 154
       out_data <- 
... ...
@@ -168,21 +157,22 @@ setMethod("show", "SequenceData",
168 157
     }
169 158
     cat("- Data columns:\n")
170 159
     print(out_data, quote = FALSE, right = TRUE)
171
-    cat("-  ",class(object@seqinfo), " object with ", 
172
-        summary(object@seqinfo), ":\n", sep = "")
160
+    cat("-  ",class(seqinfo(object)), " object with ", 
161
+        summary(seqinfo(object)), ":\n", sep = "")
173 162
   }
174 163
 )
175 164
 # validity ---------------------------------------------------------------------
176 165
 
177 166
 .valid.SequenceData_elements <- function(x){
178
-  nrow <- sum(unlist(width(ranges(x))))
179
-  if(nrow != nrow(x@unlistData)){
167
+  unlisted_x <- unlist(x, use.names=FALSE)
168
+  nrow <- sum(width(ranges(unlisted_x)))
169
+  if(nrow != nrow(unlisted_x)){
180 170
     return("row number of data does not match position covered by annotation.")
181 171
   }
182
-  if(nrow != sum(width(x@sequences))){
172
+  if(nrow != sum(width(sequences(x)))){
183 173
     return("Length of sequences does not match position covered by annotation.")
184 174
   }
185
-  if(is.null(rownames(x@unlistData))){
175
+  if(is.null(rownames(unlisted_x))){
186 176
     return("rownames of data is not set.")
187 177
   } else {
188 178
     seqs <- .seqs_rl_strand(ranges(x))
... ...
@@ -200,100 +190,16 @@ setMethod("show", "SequenceData",
200 190
 }
201 191
 S4Vectors::setValidity2(Class = "SequenceData", .valid.SequenceData)
202 192
 
203
-# replacing --------------------------------------------------------------------
193
+# coercion ---------------------------------------------------------------------
204 194
 
205
-#' @rdname RNAmodR-internals
206
-setReplaceMethod("[", "SequenceData",
207
-  function(x, i, j, ..., value) {
208
-    if (length(list(...)) > 0L){
209
-      stop("invalid replacement")
210
-    }
211
-    if(!missing(j)){
212
-      stop("replacement of columns not supported")
213
-    }
214
-    if(!is(value,class(x))){
215
-      stop("replacement 'value' must be of the same class than 'x'")
216
-    }
217
-    if (missing(i)){
218
-      x <- value
219
-    } else {
220
-      if(length(i) != length(value)){
221
-        warning("number of items to replace is not a multiple of replacement ",
222
-                "length")
223
-        value <- value[seq_along(i)]
224
-      }
225
-      x@ranges[i] <- ranges(value)
226
-      names(x@ranges)[i] <- names(ranges(value)) # must be set explicitly
227
-      x@sequences[i] <- sequences(value)
228
-      names(x@sequences)[i] <- names(sequences(value)) # must be set explicitly
229
-      # rownames needs to be savid since a replace removes them
230
-      rownames <- rownames(x)
231
-      rownames[i] <- rownames(value)
232
-      tmp <- callNextMethod(x = as(x,"SplitDataFrameList"), i = i,
233
-                            value = as(value,"SplitDataFrameList"))
234
-      x@unlistData <- tmp@unlistData
235
-      x@partitioning <- tmp@partitioning
236
-      rownames(x) <- rownames
237
-    }
238
-    validObject(x)
239
-    return(x)
240
-  }
241
-)
242
-
243
-#' @rdname RNAmodR-internals
244
-setMethod("setListElement", "SequenceData",
245
-  function(x, i, value){
246
-    if(!is(value,"SequenceDataFrame")){
247
-      stop("invalid value. must be 'SequenceDataFrame'.")
248
-    }
249
-    i2 <- S4Vectors::normalizeDoubleBracketSubscript(i, x, allow.append = TRUE,
250
-                                                     allow.nomatch = TRUE)
251
-    if(any(colnames(value) != colnames(unlist(x, use.names=FALSE)))){
252
-      stop("'value' does not have matching colnames.")
253
-    }
254
-    x@ranges[[i2]] <- ranges(value)
255
-    x@sequences[[i2]] <- sequences(value)
256
-    # rownames needs to be savid since a replace removes them
257
-    rownames <- rownames(x)
258
-    rownames[[i2]] <- rownames(value)
259
-    tmp <- callNextMethod(x = as(x,"SplitDataFrameList"), i = i,
260
-                          value = as(value,"DataFrame"))
261
-    x@unlistData <- tmp@unlistData
262
-    x@partitioning <- tmp@partitioning
263
-    rownames(x) <- rownames
264
-    validObject(x)
265
-    x
266
-  }
267
-)
268
-
269
-# looping ----------------------------------------------------------------------
270
-
271
-#' @importFrom IRanges PartitioningByEnd
272
-lapply_SequenceData <- function(X, FUN, ...){
273
-  FUN <- match.fun(FUN)
274
-  ans <- vector(mode = "list", length = length(X))
275
-  X_partitioning <- IRanges::PartitioningByEnd(X)
276
-  X_elt_width <- width(X_partitioning)
277
-  empty_idx <- which(X_elt_width == 0L)
278
-  if (length(empty_idx) != 0L){
279
-    ans[empty_idx] <- NULL
280
-  }
281
-  non_empty_idx <- which(X_elt_width != 0L)
282
-  if (length(non_empty_idx) == 0L){
283
-    return(ans)
284
-  }
285
-  ans[non_empty_idx] <- 
286
-    lapply(non_empty_idx, function(i){ FUN(getListElement(X,i), ...) })
287
-  ans
195
+.as_SplitDataFrameList <- function(from){
196
+  relist(as(unlist(from, use.names = FALSE),"DataFrame"),
197
+         IRanges::PartitioningByWidth(from))
288 198
 }
199
+setAs("SequenceData", "SplitDataFrameList", .as_SplitDataFrameList)
289 200
 
290
-setMethod("lapply", "SequenceData",
291
-  function(X, FUN, ...){
292
-    ans <- lapply_SequenceData(X, FUN, ...)
293
-    names(ans) <- names(X)
294
-    ans
295
-  }
296
-)
201
+
202
+# internals --------------------------------------------------------------------
297 203
 
298 204
 #' @importClassesFrom IRanges PartitioningByEnd
299 205
 #' @importFrom IRanges PartitioningByEnd
... ...
@@ -312,44 +218,36 @@ setMethod("extractROWS", "SequenceData",
312 218
     idx_on_unlisted_x <- 
313 219
       IRanges::IRanges(end = extractROWS(end(IRanges::PartitioningByEnd(x)), i),
314 220
                        width = ans_eltNROWS)
315
-    ans_unlistData <- extractROWS(x@unlistData, idx_on_unlisted_x)
316
-    ans_partitioning <- new2("PartitioningByEnd", end = ans_breakpoints,
317
-                             NAMES = extractROWS(names(x), i), check = FALSE)
221
+    ans_unlistData <- extractROWS(unlist(x,use.names = FALSE),
222
+                                  idx_on_unlisted_x)
223
+    ans_partitioning <- new("PartitioningByEnd", end = ans_breakpoints,
224
+                            NAMES = extractROWS(names(x), i))
318 225
     ans_elementMetadata <- extractROWS(x@elementMetadata, i)
319
-    ans_ranges <- extractROWS(x@ranges, i)
320
-    ans_sequences <- extractROWS(x@sequences, i)
321
-    initialize(x, ranges = ans_ranges, sequences = ans_sequences,
322
-               replicate = x@replicate, condition = x@condition,
323
-               bamfiles = x@bamfiles, seqinfo = x@seqinfo, 
226
+    initialize(x, bamfiles = x@bamfiles, seqinfo = x@seqinfo, 
324 227
                minQuality = x@minQuality, unlistData = ans_unlistData,
325 228
                partitioning = ans_partitioning, 
326 229
                elementMetadata = ans_elementMetadata)
327 230
   }
328 231
 )
329 232
 
330
-#' @rdname RNAmodR-internals
331
-#' @importFrom IRanges PartitioningByEnd
332
-setMethod("getListElement", "SequenceData",
333
-  function(x, i, exact=TRUE){
334
-    i2 <- normalizeDoubleBracketSubscript(i, x, exact = exact,
335
-                                          allow.NA = TRUE,
336
-                                          allow.nomatch = TRUE)
337
-    if (is.na(i2)){
338
-      return(NULL)
339
-    }
340
-    unlisted_x <- unlist(x, use.names = FALSE)
341
-    x_partitioning <- IRanges::PartitioningByEnd(x)
342
-    window_start <- start(x_partitioning)[i2]
343
-    window_end <- end(x_partitioning)[i2]
344
-    new(x@unlistType,
345
-        S4Vectors:::Vector_window(unlisted_x,
346
-                                  start = window_start,
347
-                                  end = window_end),
348
-        ranges = x@ranges[[i2]],
349
-        sequence = x@sequences[[i2]],
350
-        condition = x@condition,
351
-        replicate = x@replicate)
352
-  }
233
+setMethod("rownames", "SequenceData",
234
+          function (x){
235
+            ans <- rownames(unlist(x,use.names = FALSE), do.NULL = TRUE)
236
+            relist(ans,x)
237
+          }
238
+)
239
+
240
+# methods inherited from List and CompressedList, contain a coercion step
241
+# x <- as(x, "List", strict = FALSE)
242
+# 
243
+# This does not keep the SequenceData object intact resulting in coercion
244
+# to a CompressedSplitDataFrameList.
245
+setMethod("[[", "SequenceData",
246
+          function(x, i, j, ...) 
247
+          {
248
+            METHOD <- selectMethod("[[", "List")
249
+            METHOD(x, i, j, ...)
250
+          }
353 251
 )
354 252
 
355 253
 
... ...
@@ -359,7 +257,7 @@ setMethod("cbind", "SequenceData",
359 257
           function(...){
360 258
             arg1 <- list(...)[[1L]]
361 259
             stop("'rbind' is not supported for ",class(arg1),".")
362
-          }
260
+  }
363 261
 )
364 262
 setMethod("rbind", "SequenceData",
365 263
           function(...){
... ...
@@ -368,26 +266,6 @@ setMethod("rbind", "SequenceData",
368 266
           }
369 267
 )
370 268
 
371
-# unlisting --------------------------------------------------------------------
372
-
373
-setMethod("unlist", "SequenceData",
374
-          function(x, recursive=TRUE, use.names=TRUE){
375
-            if (!isTRUEorFALSE(use.names)){
376
-              stop("'use.names' must be TRUE or FALSE")
377
-            }
378
-            unlisted_x <- x@unlistData
379
-            if (use.names){
380
-              unlisted_x <- S4Vectors:::set_unlisted_names(unlisted_x, x)
381
-            }
382
-            new(x@unlistType,
383
-                unlisted_x,
384
-                ranges = unlist(ranges(x), use.names = use.names),
385
-                sequence = unlist(sequences(x)),
386
-                condition = x@condition,
387
-                replicate = x@replicate)
388
-          }
389
-)
390
-
391 269
 # constructor ------------------------------------------------------------------
392 270
 
393 271
 .quality_settings <- data.frame(
... ...
@@ -538,24 +416,25 @@ setMethod("unlist", "SequenceData",
538 416
      any(names(ranges) != names(data))){
539 417
     stop("")
540 418
   }
541
-  message("OK")
542 419
   ##############################################################################
543 420
   # Create SequenceData object
544 421
   ##############################################################################
545
-  new2(className, 
546
-       ranges = ranges,
547
-       sequences = sequences,
548
-       bamfiles = bamfiles, 
549
-       condition = condition,
550
-       replicate = replicate,
551
-       seqinfo = seqinfo,
552
-       minQuality = minQuality,
553
-       unlistData = unlist(data, use.names = FALSE),
554
-       partitioning = IRanges::PartitioningByEnd(data),
555
-       ...)
422
+  ans <- new(className, 
423
+             bamfiles = bamfiles,
424
+             seqinfo = seqinfo,
425
+             minQuality = minQuality,
426
+             unlistData = .SequenceDataFrame(gsub("SequenceData","",className),
427
+                                             unlist(data, use.names = FALSE),
428
+                                             unlist(ranges, use.names = FALSE),
429
+                                             unlist(sequences, use.names = FALSE),
430
+                                             replicate,
431
+                                             condition),
432
+             partitioning = IRanges::PartitioningByEnd(data),
433
+             ...)
434
+  message("OK")
435
+  ans
556 436
 }
557 437
 
558
-
559 438
 .SequenceData_settings <- data.frame(
560 439
   variable = c("max_depth",
561 440
                "minLength",
... ...
@@ -603,10 +482,18 @@ setMethod("unlist", "SequenceData",
603 482
   # get annotation and sequence data
604 483
   annotation <- .norm_annotation(annotation, className)
605 484
   sequences <- .norm_sequences(sequences, className)
485
+  seqinfo_missing <- missing(seqinfo)
606 486
   seqinfo <- .norm_seqnames(bamfiles, annotation, sequences, seqinfo, className)
607 487
   # load transcript data and sequence data
608 488
   grl <- .load_annotation(annotation)
609 489
   grl <- .subset_by_seqinfo(grl, seqinfo)
490
+  if(length(grl) == 0L){
491
+    if(seqinfo_missing){
492
+      stop("No overlap between bamfiles and annotation.")
493
+    } else {
494
+      stop("No overlap between bamfiles, annotation and seqinfo.")
495
+    }
496
+  }
610 497
   sequences <- .load_transcript_sequences(sequences, grl)
611 498
   # create the class
612 499
   .SequenceData(className, bamfiles, grl, sequences, seqinfo, args)
... ...
@@ -777,17 +664,29 @@ setMethod(f = "bamfiles",
777 664
 #' @export
778 665
 setMethod(f = "conditions", 
779 666
           signature = signature(object = "SequenceData"),
780
-          definition = function(object){object@condition})
667
+          definition = function(object){conditions(unlist(object))})
781 668
 #' @rdname SequenceData-functions
782 669
 #' @export
783
-setMethod(f = "ranges", 
784
-          signature = signature(x = "SequenceData"),
785
-          definition = function(x){x@ranges})
670
+setMethod(
671
+  f = "ranges", 
672
+  signature = signature(x = "SequenceData"),
673
+  definition = 
674
+    function(x){
675
+      partitioning <- IRanges::PartitioningByEnd(x)
676
+      unlisted_ranges <- ranges(unlist(x))
677
+      ends <- cumsum(width(unlisted_ranges)) == cumsum(width(partitioning))
678
+      partitioning_relist <- IRanges::PartitioningByEnd(which(ends))
679
+      names(partitioning_relist) <- names(x)
680
+      if(length(x) != length(partitioning_relist)){
681
+        stop("ranges could not be relisted.")
682
+      }
683
+      relist(unlisted_ranges, partitioning_relist)
684
+    })
786 685
 #' @rdname SequenceData-functions
787 686
 #' @export
788 687
 setMethod(f = "replicates", 
789 688
           signature = signature(x = "SequenceData"),
790
-          definition = function(x){x@replicate})
689
+          definition = function(x){replicates(unlist(x))})
791 690
 #' @rdname SequenceData-functions
792 691
 #' @export
793 692
 setMethod(f = "seqinfo", 
... ...
@@ -797,7 +696,7 @@ setMethod(f = "seqinfo",
797 696
 #' @export
798 697
 setMethod(f = "sequences", 
799 698
           signature = signature(x = "SequenceData"),
800
-          definition = function(x){x@sequences})
699
+          definition = function(x){relist(sequences(unlist(x)),x)})
801 700
 
802 701
 # dummy functions --------------------------------------------------------------
803 702
 # this needs to be implemented by each subclass
Browse code

restructured coverage data loading

Felix Ernst authored on 29/05/2019 11:12:19
Showing1 changed files
... ...
@@ -388,14 +388,6 @@ setMethod("unlist", "SequenceData",
388 388
           }
389 389
 )
390 390
 
391
-# accessors --------------------------------------------------------------------
392
-
393
-setMethod("rownames", "SequenceData",
394
-          function(x, do.NULL = TRUE, prefix = "row"){
395
-            relist(rownames(unlist(x)),IRanges::PartitioningByEnd(x))
396
-          }
397
-)
398
-
399 391
 # constructor ------------------------------------------------------------------
400 392
 
401 393
 .quality_settings <- data.frame(
Browse code

removed direct calls to class slots were possible

Felix Ernst authored on 28/05/2019 20:20:32
Showing1 changed files
... ...
@@ -300,7 +300,7 @@ setMethod("lapply", "SequenceData",
300 300
 setMethod("extractROWS", "SequenceData",
301 301
   function(x, i){
302 302
     i <- normalizeSingleBracketSubscript(i, x, as.NSBS = TRUE)
303
-    ans_eltNROWS <- extractROWS(width(x@partitioning), i)
303
+    ans_eltNROWS <- extractROWS(width(IRanges::PartitioningByEnd(x)), i)
304 304
     ans_breakpoints <- suppressWarnings(cumsum(ans_eltNROWS))
305 305
     nbreakpoints <- length(ans_breakpoints)
306 306
     if (nbreakpoints != 0L && is.na(ans_breakpoints[[nbreakpoints]])){
... ...
@@ -310,7 +310,7 @@ setMethod("extractROWS", "SequenceData",
310 310
            "This is not implemented, yet.")
311 311
     }
312 312
     idx_on_unlisted_x <- 
313
-      IRanges::IRanges(end = extractROWS(end(x@partitioning), i),
313
+      IRanges::IRanges(end = extractROWS(end(IRanges::PartitioningByEnd(x)), i),
314 314
                        width = ans_eltNROWS)
315 315
     ans_unlistData <- extractROWS(x@unlistData, idx_on_unlisted_x)
316 316
     ans_partitioning <- new2("PartitioningByEnd", end = ans_breakpoints,
... ...
@@ -392,7 +392,7 @@ setMethod("unlist", "SequenceData",
392 392
 
393 393
 setMethod("rownames", "SequenceData",
394 394
           function(x, do.NULL = TRUE, prefix = "row"){
395
-            relist(rownames(x@unlistData),x@partitioning)
395
+            relist(rownames(unlist(x)),IRanges::PartitioningByEnd(x))
396 396
           }
397 397
 )
398 398
 
Browse code

updated settings functions (again) and refactored annotation loading

Felix Ernst authored on 27/05/2019 14:27:12
Showing1 changed files
... ...
@@ -514,7 +514,6 @@ setMethod("rownames", "SequenceData",
514 514
   message("Loading ", proto@dataDescription, " from BAM files ... ",
515 515
           appendLF = FALSE)
516 516
   data <- getData(proto, bamfiles, ranges, sequences, param, args)
517
-  browser()
518 517
   names(data) <- paste0(names(data),".",condition,".",replicate)
519 518
   # check positions
520 519
   .check_positions_in_data(data, positions)
... ...
@@ -574,7 +573,7 @@ setMethod("rownames", "SequenceData",
574 573
               ".not_integer_bigger_equal_than_one_nor_na"),
575 574
   errorValue = c(TRUE,
576 575
                  TRUE,
577
-                 FALSE),
576
+                 TRUE),
578 577
   errorMessage = c("'max_depth' must be integer with a value higher than 10L.",
579 578
                    "'minLength' must be integer with a value higher than 0L or NA.",
580 579
                    "'maxLength' must be integer with a value higher than 1L or NA."),
... ...
@@ -583,10 +582,16 @@ setMethod("rownames", "SequenceData",
583 582
 .get_SequenceData_args <- function(input){
584 583
   minQuality <- .norm_min_quality(input, NULL)
585 584
   max_depth <- 10000L # the default is 250, which is to small
586
-  minLength <- NA
587
-  maxLength <- NA
585
+  minLength <- NA_integer_
586
+  maxLength <- NA_integer_
588 587
   args <- .norm_settings(input, .SequenceData_settings, max_depth, minLength,
589 588
                          maxLength)
589
+  if(!is.na(args[["minLength"]]) && !is.na(args[["maxLength"]])){
590
+    if(args[["minLength"]] > args[["maxLength"]]){
591
+      stop("'minLength' must be smaller or equal to 'maxLength'.",
592
+           call. = FALSE)
593
+    }
594
+  }
590 595
   #
591 596
   args <- c(list(minQuality = minQuality),
592 597
             args)
... ...
@@ -618,24 +623,39 @@ setMethod("rownames", "SequenceData",
618 623
 # constructor utility functions ------------------------------------------------
619 624
 # also used at other places
620 625
 
626
+# check for multiple seqnames per ranges
627
+.norm_unique_seqnames <- function(ranges){
628
+  seqnames_ranges_u <- unique(GenomeInfoDb::seqnames(ranges))
629
+  f <- lengths(seqnames_ranges_u) != 1L
630
+  if(any(f)){
631
+    message("Found transcript annotation with non unique seqnames. Removing ",
632
+            "them ...")
633
+    ranges <- ranges[!f]
634
+    GenomeInfoDb::seqlevels(ranges) <- GenomeInfoDb::seqlevelsInUse(ranges)
635
+  }
636
+  ranges
637
+}
638
+
621 639
 # load annotation as GRangesList. one element per transcript
622 640
 .load_annotation <- function(annotation){
623 641
   if(is(annotation,"TxDb")){
624 642
     ranges <- GenomicFeatures::exonsBy(annotation, by = "tx")
643
+    ranges <- .norm_unique_seqnames(ranges)
625 644
   } else if(is(annotation,"GRangesList")) {
626 645
     ranges <- annotation
646
+    ranges <- .norm_unique_seqnames(ranges)
647
+    # make sure, that the elements are reverse order if on minus strand
648
+    unlisted_ranges <- unlist(ranges)
649
+    strand <- strand(unlisted_ranges) == "+"
650
+    unlisted_ranges <- 
651
+      c(unlisted_ranges[strand][order(unlisted_ranges[strand])],
652
+        unlisted_ranges[!strand][rev(order(unlisted_ranges[!strand]))])
653
+    ranges <- split(unname(unlisted_ranges),
654
+                    factor(names(unlisted_ranges),
655
+                           levels = unique(names(ranges))))
627 656
   } else {
628 657
     stop("Annotation is not a 'TxDb' or a 'GRangesList'.")
629 658
   }
630
-  browser()
631
-  # check for multiple seqnames per ranges
632
-  seqnames_ranges_u <- unique(GenomeInfoDb::seqnames(ranges))
633
-  f <- lengths(seqnames_ranges_u) != 1L
634
-  if(any(f)){
635
-    warning("Found transcript annotation with non unique seqnames. Removing ",
636
-            "them ...")
637
-    ranges <- ranges[!f]
638
-  }
639 659
   ranges
640 660
 }
641 661
 
... ...
@@ -644,7 +664,7 @@ setMethod("rownames", "SequenceData",
644 664
 # element
645 665
 .load_transcript_sequences <- function(sequences, grl){
646 666
   seq <- Biostrings::getSeq(sequences, unlist(grl))
647
-  seq <- relist(unlist(seq),PartitioningByWidth(sum(width(grl))))
667
+  seq <- relist(unlist(seq),IRanges::PartitioningByWidth(sum(width(grl))))
648 668
   names(seq) <- names(grl)
649 669
   as(seq,"RNAStringSet")
650 670
 }
... ...
@@ -652,9 +672,8 @@ setMethod("rownames", "SequenceData",
652 672
 # remove any elements, which are not in the seqinfo
653 673
 .subset_by_seqinfo <- function(grl, seqinfo){
654 674
   grl <- grl[GenomicRanges::seqnames(grl) %in% GenomeInfoDb::seqnames(seqinfo)]
655
-  grl <- grl[width(grl@partitioning) != 0L]
656
-  GenomeInfoDb::seqlevels(grl) <- GenomeInfoDb::seqlevels(seqinfo)
657
-  grl@unlistData <- .rebase_GRanges(grl@unlistData)
675
+  grl <- grl[width(IRanges::PartitioningByWidth(grl)) != 0L]
676
+  GenomeInfoDb::seqlevels(grl) <- GenomeInfoDb::seqlevelsInUse(grl)
658 677