R/Modifier-class.R
1f8fe8c2
 #' @include RNAmodR.R
09f9831a
 #' @include SequenceData-class.R
f0180769
 #' @include SequenceDataSet-class.R
30dc4ddf
 #' @include SequenceDataList-class.R
d1217371
 #' @include Modifier-utils.R
57698a41
 #' @include settings.R
1f8fe8c2
 NULL
 
16803e67
 invalidMessage <- paste0("Settings were changed after data aggregation or ",
                          "modification search. Rerun with modify(x,force = ",
                          "TRUE) to update with current settings.")
 
a4b215eb
 #' @name Modifier-class
 #' @aliases Modifier
b75e0751
 #'
a4b215eb
 #' @title The Modifier class
b75e0751
 #'
14ccc6b7
 #' @description
f2c595d7
 #' The \code{Modifier} class is a virtual class, which provides the central
 #' functionality to search for post-transcriptional RNA modification patterns in
 #' high throughput sequencing data.
b75e0751
 #'
14ccc6b7
 #' Each subclass has to implement the following functions:
b75e0751
 #'
14ccc6b7
 #' \itemize{
8a505469
 #' \item{Slot \code{nucleotide}: } {Either "RNA" or "DNA". For conveniance the
 #' subclasses \code{RNAModifier} and \code{DNAModifier} are already available
 #' and can be inherited from.}
 #' \item{Function \code{\link{aggregateData}}: }{used for specific data 
 #' aggregation}
 #' \item{Function \code{\link{findMod}}: }{used for specific search for 
 #' modifications}
14ccc6b7
 #' }
b75e0751
 #'
a4b215eb
 #' Optionally the function \code{\link[=Modifier-functions]{settings<-}} can be
6daa6377
 #' implemented to store additional arguments, which the base class does not
 #' recognize.
b75e0751
 #'
 #' \code{Modifier} objects are constructed centrally by calling
14ccc6b7
 #' \code{Modifier()} with a \code{className} matching the specific class to be
405606cb
 #' constructed. This will trigger the immediate analysis, if \code{find.mod} is
f2c595d7
 #' not set to \code{FALSE}.
b75e0751
 #'
 #'
 #' @section Creation:
750c3c85
 #' \code{Modifier} objects can be created in two ways, either by providing a
b75e0751
 #' list of bamfiles or
 #' \code{SequenceData}/\code{SequenceDataSet}/\code{SequenceDataList} objects,
750c3c85
 #' which match the structure in \code{dataType()}.
b75e0751
 #'
750c3c85
 #' \code{dataType()} can be a \code{character} vector or a \code{list} of
b75e0751
 #' \code{character} vectors and depending on this the input files have to
750c3c85
 #' follow this structure:
b75e0751
 #'
750c3c85
 #' \itemize{
 #' \item{a single \code{character}:} {a \code{SequenceData} is
 #' constructed/expected.}
 #' \item{a \code{character} vector:} {a \code{SequenceDataSet} is
 #' constructed/expected.}
 #' \item{a \code{list} of \code{character} vectors:} {a \code{SequenceDataList}
 #' is constructed/expected.}
 #' }
b75e0751
 #'
f2c595d7
 #' The cases for a \code{SequenceData} or \code{SequenceDataSet} are straight
 #' forward, since the input remains the same. The last case is special, since it
 #' is a hypothetical option, in which bam files from two or more different
 #' methods have to be combined to reliably detect a single modification (The
 #' elements of a \code{SequenceDataList} don't have to be created from the
 #' bamfiles, whereas from a \code{SequenceDataSet} they have to be).
b75e0751
 #'
750c3c85
 #' For this example a \code{list} of \code{character} vectors is expected.
b75e0751
 #' Each element must be named according to the names of \code{dataType()} and
750c3c85
 #' contain a \code{character} vector for creating a \code{SequenceData} object.
16803e67
 #' 
 #' All additional options must be named and will be passed to the
 #' \code{\link[=settings]{settings}} function and onto the \code{SequenceData}
 #' objects, if \code{x} is not a \code{SequenceData} object or a list of
 #' \code{SequenceData} objects.
b75e0751
 #'
14ccc6b7
 #' @param className The name of the class which should be constructed.
 #' @param x the input which can be of the following types
 #' \itemize{
f2c595d7
 #' \item{\code{SequenceData}:} {a single \code{SequenceData} or a list
 #' containing only \code{SequenceData} objects. The input will just be used to
 #' file the \code{data} slot of the \code{Modifier} and must match the
 #' requirements of specific \code{Modifier} class.}
14ccc6b7
 #' \item{\code{BamFileList}:} {a named \code{BamFileList}}
 #' \item{\code{character}:} {a \code{character} vector, which must be coercible
 #' to a named \code{BamFileList} referencing existing bam files. Valid names are
 #' \code{control} and \code{treated} to define conditions and replicates}
 #' }
 #' @param annotation annotation data, which must match the information contained
b75e0751
 #' in the BAM files. This parameter is only required if \code{x} is not a
f2c595d7
 #' \code{SequenceData} object or a list of \code{SequenceData} objects.
 #' @param sequences sequences matching the target sequences the reads were
 #'   mapped onto. This must match the information contained in the BAM files.
 #'   TThis parameter is only required if \code{x} is not a \code{SequenceData}
 #'   object or a list of \code{SequenceData} objects.
b75e0751
 #' @param seqinfo An optional \code{\link[GenomeInfoDb:Seqinfo-class]{Seqinfo}}
 #' argument or character vector, which can be coerced to one, to subset the
f2c595d7
 #' sequences to be analyzed on a per chromosome basis.
14ccc6b7
 #' @param ... Additional otpional parameters:
 #' \itemize{
b75e0751
 #' \item{\code{find.mod}:} {\code{TRUE} or \code{FALSE}: should the search for
 #' for modifications be triggered upon construction? If not the search can be
14ccc6b7
 #' started by calling the \code{modify()} function.}
16803e67
 #' \item{additional parameters depending on the specific \code{Modifier} class}
14ccc6b7
 #' }
16803e67
 #' All additional options must be named and will be passed to the
 #' \code{\link[=settings]{settings}} function and onto the \code{SequenceData}
 #' objects, if \code{x} is not a \code{SequenceData} object or a list of
 #' \code{SequenceData} objects.
b75e0751
 #'
8a505469
 #' @slot nucleotide a \code{character} value, which needs to contain "RNA" or 
 #' "DNA"
b75e0751
 #' @slot mod a \code{character} value, which needs to contain one or more
 #' elements from the alphabet of a
8a505469
 #' \code{\link[Modstrings:ModRNAString]{ModRNAString}} or 
 #' \code{\link[Modstrings:ModDNAString]{ModDNAString}} class.
14ccc6b7
 #' @slot score the main score identifier used for visualizations
b75e0751
 #' @slot dataType the class name(s) of the \code{SequenceData} class used
14ccc6b7
 #' @slot bamfiles the input bam files as \code{BamFileList}
b75e0751
 #' @slot condition conditions along the \code{BamFileList}: Either
14ccc6b7
 #' \code{control} or \code{treated}
 #' @slot replicate replicate number along the \code{BamFileList} for each of the
 #' condition types.
b75e0751
 #' @slot data The sequence data object: Either a \code{SequenceData},
 #' \code{SequenceDataSet} or a \code{SequenceDataList} object, if more than one
f0180769
 #' \code{dataType} is used.
14ccc6b7
 #' @slot aggregate the aggregated data as a \code{SplitDataFrameList}
 #' @slot modifications the found modifications as a \code{GRanges} object
57698a41
 #' @slot settings arguments used for the analysis as a \code{list}
14ccc6b7
 #' @slot aggregateValidForCurrentArguments \code{TRUE} or \code{FALSE} whether
 #' the aggregate data was constructed with the current arguments
b75e0751
 #' @slot modificationsValidForCurrentArguments \code{TRUE} or \code{FALSE}
14ccc6b7
 #' whether the modifications were found with the current arguments
b75e0751
 #'
750c3c85
 #' @return a \code{Modifier} object of type \code{className}
1f8fe8c2
 NULL
 
a4b215eb
 #' @name Modifier-functions
b75e0751
 #'
a4b215eb
 #' @title Modifier/ModifierSet functions
b75e0751
 #'
a4b215eb
 #' @description
 #' For the \code{Modifier} and  \code{ModifierSet} classes a number of functions
 #' are implemented to access the data stored by the object.
16803e67
 #' 
 #' The \code{validAggregate} and \code{validModification} functions check if
 #' \code{\link[=settings]{settings}} have been modified, after the data was 
 #' loaded. This potentially invalidates them. To update the data, run the
 #' \code{aggregate} or the \code{modify} function.
b75e0751
 #'
a4b215eb
 #' @param x,object a \code{Modifier} or \code{ModifierSet} class
41ddf102
 #' @param modified For \code{sequences}: \code{TRUE} or \code{FALSE}: Should
8a505469
 #' the sequences be returned as a \code{ModRNAString}/\code{ModDNAString} with
 #' the found modifications added on top of the \code{RNAString}/
 #' \code{DNAString}? See 
41ddf102
 #' \code{\link[Modstrings:separate]{combineIntoModstrings}}.
b8137aff
 #' @param perTranscript \code{TRUE} or \code{FALSE}: Should the positions shown
 #' per transcript? (default: \code{perTranscript = FALSE})
b75e0751
 #' @param ... Additional arguments.
 #'
21b04006
 #' @return
 #' \itemize{
f2c595d7
 #' \item{\code{modifierType}:} {a character vector with the appropriate class
 #' Name of a \code{\link[=Modifier-class]{Modifier}}.}
8a505469
 #' \item{\code{modType}:} {a character vector with the modifications detected by
 #' the \code{Modifier} class.}
 #' \item{\code{seqtype}:} {a single character value defining if either
 #' "RNA" or "DNA" modifications are detected by the \code{Modifier} class.}
f2c595d7
 #' \item{\code{mainScore}:} {a character vector.}
 #' \item{\code{sequenceData}:} {a \code{SequenceData} object.}
 #' \item{\code{modifications}:} {a \code{GRanges} or \code{GRangesList} object
21b04006
 #' describing the found modifications.}
f2c595d7
 #' \item{\code{seqinfo}:} {a \code{Seqinfo} object.}
 #' \item{\code{sequences}:} {a \code{RNAStingSet} object.}
b75e0751
 #' \item{\code{ranges}:} {a \code{GRangesList} object with each element per
f2c595d7
 #' transcript.}
 #' \item{\code{bamfiles}:} {a \code{BamFileList} object.}
16803e67
 #' \item{\code{validAggregate}:} {\code{TRUE} or \code{FALSE}. Checks if current
 #' settings are the same for which the data was aggregate}
 #' \item{\code{validModification}:} {\code{TRUE} or \code{FALSE}. Checks if 
 #' current settings are the same for which modification were found}
21b04006
 #' }
16803e67
 #' 
 #' @seealso \code{\link[=settings]{settings}}
b75e0751
 #'
21b04006
 #' @examples
 #' data(msi,package="RNAmodR")
 #' mi <- msi[[1]]
 #' modifierType(mi) # The class name of the Modifier object
8a505469
 #' modifierType(msi)
 #' seqtype(mi)
 #' modType(mi)
21b04006
 #' mainScore(mi)
cd326fd6
 #' sequenceData(mi)
21b04006
 #' modifications(mi)
 #' # general accessors
 #' seqinfo(mi)
 #' sequences(mi)
 #' ranges(mi)
 #' bamfiles(mi)
a4b215eb
 NULL
 
f0180769
 setClassUnion("list_OR_character",
               c("list", "character"))
c70f0bc6
 #' @importClassesFrom Rsamtools BamFileList PileupFiles
 setClassUnion("list_OR_BamFileList",
               c("list", "BamFileList"))
30dc4ddf
 
a4b215eb
 #' @rdname Modifier-class
1f8fe8c2
 #' @export
d1217371
 setClass("Modifier",
1f8fe8c2
          contains = c("VIRTUAL"),
8a505469
          slots = c(seqtype = "character", # this have to be populated by subclass,
                    mod = "character", # this have to be populated by subclass
b51379c0
                    score = "character", # this have to be populated by subclass
f0180769
                    dataType = "list_OR_character", # this have to be populated by subclass
c70f0bc6
                    bamfiles = "list_OR_BamFileList",
6daa6377
                    condition = "factor",
14ccc6b7
                    replicate = "factor",
f0180769
                    data = "SD_or_SDS_or_SDL",
e7831bc9
                    aggregate = "CompressedSplitDataFrameList",
f49b7684
                    modifications = "GRanges",
57698a41
                    settings = "list",
f49b7684
                    aggregateValidForCurrentArguments = "logical",
                    modificationsValidForCurrentArguments = "logical"),
8cd7f034
          prototype = list(aggregate = new("CompressedSplitDFrameList"),
                           aggregateValidForCurrentArguments = FALSE,
f49b7684
                           modificationsValidForCurrentArguments = FALSE))
1f8fe8c2
 
2be8c7e1
 # validity ---------------------------------------------------------------------
 
16803e67
 .check_SequenceData_elements <- function(x, data){
   if(is(data,"SequenceData")){
     data <- list(data)
   } else if(is(data,"list")){
     elementTypeMatch <- !vapply(data,is,logical(1),"SequenceData")
abdd8686
     if(any(elementTypeMatch)){
       stop("Not all elements are 'SequenceData' objects.", call. = FALSE)
2be8c7e1
     }
16803e67
   } else if(!is(data,"SequenceDataSet")){
     stop("")
2be8c7e1
   }
16803e67
   elementTypes <- vapply(data,class,character(1))
750c3c85
   if(length(elementTypes) != length(dataType(x))){
2be8c7e1
     stop("Number of 'SequenceData' elements does not match the requirements of",
750c3c85
          " ",class(x),". '",paste(dataType(x), collapse = "','"),"' are ",
2be8c7e1
          "required", call. = FALSE)
   }
750c3c85
   elementTypes <- elementTypes[match(elementTypes,dataType(x))]
9018aeec
   if(any(is.na(elementTypes)) || any(elementTypes != dataType(x))){
2be8c7e1
     stop("Type of SequenceData elements does not match the requirements of ",
750c3c85
          class(x),". '",paste(dataType(x), collapse = "','"),"' are ",
2be8c7e1
          "required", call. = FALSE)
   }
ef579737
   NULL
2be8c7e1
 }
 
16803e67
 .check_SequenceDataList_data_elements <- function(x, data){
   ans <- lapply(data, .check_SequenceData_elements, x)
f0180769
   if(all(vapply(ans,is.null))) {
     return(NULL)
   }
   ans
 }
 
 .check_Modifier_data_elements <- function(x, data){
   if(is(data,"SequenceData") || is(data,"SequenceDataSet")){
     return(.check_SequenceData_elements(x, data))
b75e0751
   }
50390ea5
   .check_SequenceDataList_data_elements(x, data)
f0180769
 }
 
2be8c7e1
 .valid_SequenceData <- function(x){
f0180769
   tmp <- try(.check_Modifier_data_elements(x,x@data))
2be8c7e1
   if (inherits(tmp, "try-error")){
     return(tmp)
   }
81e6327d
   NULL
 }
2be8c7e1
 
 .valid_Modifier <- function(x){
8a505469
   if(is.null(x@seqtype)){
     return("'seqtype' slot not populated.")
   }
   if(!.is_valid_nucleotide_seqtype(seqtype(x))){
     return(paste0("'seqtype' slot must contain the character value '",
                   seqtype(RNAString()),"' or '",seqtype(DNAString()),"'."))
   }
ef579737
   seqdata <- x@data
8a505469
   if(seqtype(x) != seqtype(seqdata)){
     return("'seqtype' does not match seqtype() of SequenceData contained ",
            "within Modifier object.")
   }
c70f0bc6
   if(is.list(x@bamfiles)){
     test <- !vapply(x@bamfiles,is,logical(1),"BamFileList")
     if(any(test)){
       return("Invalid slot: 'bamfiles'")
     }
   }
ef579737
   if(is(seqdata,"SequenceData")){
     seqdata <- list(seqdata)
   }
c3cad704
   if(!.is_a_bool(x@aggregateValidForCurrentArguments)){
c70f0bc6
     return("Invalid slot: 'aggregateValidForCurrentArguments'")
abdd8686
   }
c3cad704
   if(!.is_a_bool(x@aggregateValidForCurrentArguments)){
c70f0bc6
     return("Invalid slot: 'modificationsValidForCurrentArguments'")
abdd8686
   }
478352fe
   if(hasAggregateData(x)){
     data <- getAggregateData(x)
acf4e07e
     if(is.null(rownames(unlist(data, use.names = FALSE)))){
478352fe
       return("rownames of aggregate data is not set.")
     } else {
       seqs <- .seqs_rl_strand(ranges(x))
       if(any(any(seqs != IRanges::IntegerList(rownames(data))))){
         return(paste0("Out of range rownames of aggregate data. The rownames ",
                       "do not match the genomic coordinate covered by the ",
                       "annotation data."))
       }
     }
   }
abdd8686
   c(.valid_SequenceData(x), unlist(lapply(seqdata, .valid.SequenceData)))
2be8c7e1
 }
abdd8686
 S4Vectors::setValidity2(Class = "Modifier", .valid_Modifier)
81e6327d
 
 # show -------------------------------------------------------------------------
 
f49b7684
 .show_settings <- function(settings){
17651fca
   l <- length(settings)
14ccc6b7
   if(l == 0L){
     return(NULL)
   }
17651fca
   nc <- 6
   nr <- ceiling(l / nc)
f49b7684
   settings <- lapply(seq_len(nr),
                      function(i){
                        f <- seq.int(from = (i * nc) - nc + 1L,
                                     to = i * nc)
                        f <- f[f <= l]
cede7c18
                        settings[,f,drop = FALSE]
f49b7684
                      })
17651fca
   if(is.null(rownames(settings[[1]]))){
16803e67
     setting_names <- rep(" ",nrow(settings[[1]]))
17651fca
   } else {
16803e67
     setting_names <- rownames(settings[[1]])
17651fca
   }
f49b7684
   for(i in seq_along(settings)){
     out <-
       as.matrix(format(as.data.frame(
         lapply(settings[[i]],showAsCell),
         optional = TRUE)))
     classinfo <-
       matrix(unlist(lapply(settings[[i]], function(x) {
         paste0("<", classNameForDisplay(x)[1],
                ">")
       }), use.names = FALSE), nrow = 1,
       dimnames = list("", colnames(out)))
     out <- rbind(classinfo, out)
16803e67
     rownames(out) <- c(" ",setting_names)
f49b7684
     print(out, quote = FALSE, right = TRUE)
   }
 }
 
a4b215eb
 #' @rdname Modifier-functions
1f8fe8c2
 setMethod(
b75e0751
   f = "show",
d1217371
   signature = signature(object = "Modifier"),
1f8fe8c2
   definition = function(object) {
750c3c85
     cat("A", class(object), "object containing",dataType(object),
104daf68
         "with",length(object@data),"elements.\n")
ef579737
     files <- BiocGenerics::path(object@bamfiles)
f49b7684
     cat("| Input files:\n",paste0("  - ",names(files),": ",files,"\n"))
8a505469
     cat("| Nucleotide - Modification type(s): ",
         paste0(seqtype(object), collapse = " / ")," - ",
         paste0(modType(object), collapse = " / "),"\n")
f49b7684
     cat("| Modifications found:",ifelse(length(object@modifications) != 0L,
104daf68
                                       paste0("yes (",
                                              length(object@modifications),
                                              ")"),
f49b7684
                                       "no"),"\n")
     cat("| Settings:\n")
     settings <- settings(object)
     settings <- lapply(settings,
                        function(s){
                          if(length(s) > 1L){
                            ans <- List(s)
                            return(ans)
                          }
                          s
                        })
     settings <- DataFrame(settings)
     .show_settings(settings)
abdd8686
     valid <- c(validAggregate(object), validModification(object))
f49b7684
     if(!all(valid)){
16803e67
       warning(invalidMessage, call. = FALSE)
f49b7684
     }
1f8fe8c2
   }
 )
4730877c
 
 # accessors --------------------------------------------------------------------
 
c70f0bc6
 #' @rdname Modifier-functions
 #' @export
b75e0751
 setMethod(f = "bamfiles",
c70f0bc6
           signature = signature(x = "Modifier"),
           definition = function(x){x@bamfiles})
8a505469
 
c70f0bc6
 #' @rdname Modifier-functions
 #' @export
b75e0751
 setMethod(f = "conditions",
c70f0bc6
           signature = signature(object = "Modifier"),
           definition = function(object){
45929b71
             object@condition
c70f0bc6
           })
8a505469
 
c70f0bc6
 #' @rdname Modifier-functions
 #' @export
b75e0751
 setMethod(f = "mainScore",
c70f0bc6
           signature = signature(x = "Modifier"),
           definition = function(x){x@score})
 
 # converts the genomic coordinates to transcript based coordinates
 .get_modifications_per_transcript <- function(x){
   grl <- ranges(x)
   strand_u <- .get_strand_u_GRangesList(grl)
   seqs <- .seqs_rl_by(grl, 1L)
   seqs[strand_u == "-"] <- .seqs_rl_by(grl[strand_u == "-"], -1L)
   modifications <- modifications(x)
   if(is(modifications,"GRangesList")){
     modifications <- unlist(modifications)
     modifications <- modifications[!duplicated(modifications)]
   }
   start_mod <- start(modifications)
   parent_mod <- as.character(modifications$Parent)
   new_start_mod <- BiocGenerics::which(seqs[parent_mod] == start_mod)
   # reset strand since it is now transcipt centric
   strand(modifications) <- "*"
   ranges(modifications) <- IRanges::IRanges(start = unlist(new_start_mod),
                                             end = unlist(new_start_mod))
   modifications
 }
 
 #' @rdname modify
 #' @export
b75e0751
 setMethod(f = "modifications",
c70f0bc6
           signature = signature(x = "Modifier"),
b75e0751
           definition =
c70f0bc6
             function(x, perTranscript = FALSE){
c3cad704
               if(!.is_a_bool(perTranscript)){
                 stop("'perTranscript' has to be a single logical value.",
                      call. = FALSE)
c70f0bc6
               }
16803e67
               valid <- c(validAggregate(x), validModification(x))
               if(!all(valid)){
                 warning(invalidMessage, call. = FALSE)
               }
c70f0bc6
               if(perTranscript){
                 return(.get_modifications_per_transcript(x))
               }
               x@modifications
             }
 )
8a505469
 
a4b215eb
 #' @rdname Modifier-functions
6fac637f
 #' @export
b75e0751
 setMethod(f = "modifierType",
6fac637f
           signature = signature(x = "Modifier"),
8a505469
           definition = function(x){class(x)[[1L]]})
 
a4b215eb
 #' @rdname Modifier-functions
b51379c0
 #' @export
b75e0751
 setMethod(f = "modType",
b51379c0
           signature = signature(x = "Modifier"),
           definition = function(x){x@mod})
8a505469
 
a4b215eb
 #' @rdname Modifier-functions
b51379c0
 #' @export
b75e0751
 setMethod(f = "dataType",
750c3c85
           signature = signature(x = "Modifier"),
           definition = function(x){x@dataType})
8a505469
 
750c3c85
 #' @rdname Modifier-functions
 #' @export
b75e0751
 setMethod(f = "names",
b51379c0
           signature = signature(x = "Modifier"),
c70f0bc6
           definition = function(x){names(sequenceData(x))})
8a505469
 
c70f0bc6
 #' @rdname Modifier-functions
 #' @export
b75e0751
 setMethod(f = "ranges",
c70f0bc6
           signature = signature(x = "Modifier"),
           definition = function(x){ranges(sequenceData(x))})
8a505469
 
c70f0bc6
 #' @rdname Modifier-functions
 #' @export
b75e0751
 setMethod(f = "replicates",
c70f0bc6
           signature = signature(x = "Modifier"),
           definition = function(x){
45929b71
             x@replicate
c70f0bc6
           })
8a505469
 
 #' @rdname Modifier-functions
 #' @export
 setMethod(f = "seqinfo",
           signature = signature(x = "Modifier"),
           definition = function(x){seqinfo(sequenceData(x))}
 )
 
 #' @rdname Modifier-functions
 #' @export
 setMethod(f = "seqtype",
           signature = signature(x = "Modifier"),
           definition = function(x){x@seqtype}
 )
 
c70f0bc6
 #' @rdname Modifier-functions
 #' @export
b75e0751
 setMethod(f = "sequenceData",
c70f0bc6
           signature = signature(x = "Modifier"),
           definition = function(x){x@data})
8a505469
 
 .get_modified_sequences <- function(x, modified){
   if(is(x,"Modifier")){
     seqData <- sequenceData(x)
   } else if(is(x,"ModifierSet")) {
     seqData <- sequenceData(x[[1L]])
   } else {
     stop("")
   }
   if(!modified){
     return(sequences(seqData))
   }
   mod <- .get_modifications_per_transcript(x)
   mod <- .rebase_seqnames(mod, mod$Parent)
   mod <- split(mod,factor(mod$Parent, levels = mod$Parent))
   if(seqtype(x) == seqtype(RNAString())){
     ans <- ModRNAStringSet(sequences(seqData))
   } else if(seqtype(x) == seqtype(DNAString())){
     ans <- ModDNAStringSet(sequences(seqData))
   } else {
     stop("")
   }
   modSeqList <- ans[names(ans) %in% names(mod)]
   mod <- mod[match(names(mod),names(modSeqList))]
   ans[names(ans) %in% names(mod)] <-
     Modstrings::combineIntoModstrings(modSeqList, mod)
   ans
 }
 
c70f0bc6
 #' @rdname Modifier-functions
 #' @export
b75e0751
 setMethod(f = "sequences",
c70f0bc6
           signature = signature(x = "Modifier"),
b75e0751
           definition =
c70f0bc6
             function(x, modified = FALSE){
c3cad704
               if(!.is_a_bool(modified)){
c70f0bc6
                 stop("'modified' has to be a single logical value.",
                      call. = FALSE)
               }
8a505469
               .get_modified_sequences(x, modified)
c70f0bc6
             }
 )
8a505469
 
a4b215eb
 #' @rdname Modifier-functions
f49b7684
 #' @export
16803e67
 setMethod(f = "validAggregate",
           signature = signature(x = "Modifier"),
           definition = function(x) x@aggregateValidForCurrentArguments
 )
8a505469
 
16803e67
 #' @rdname Modifier-functions
 #' @export
 setMethod(f = "validModification",
           signature = signature(x = "Modifier"),
           definition = function(x) x@modificationsValidForCurrentArguments
 )
 
 # settings ---------------------------------------------------------------------
 
 #' @name settings
 #' 
 #' @title Settings for \code{Modifier} objects
 #' 
 #' @description 
 #' Depending on data prepation, quality and desired stringency of a modification
 #' strategy, settings for cut off parameters or other variables may need to be 
 #' adjusted. This should be rarely the case, but a function for changing these
 #' settings, is implemented as the... \code{settings} function.
 #' 
 #' For changing values the input can be either a \code{list} or something 
 #' coercible to a \code{list}. Upon changing a setting, the validity of the
 #' value in terms of type(!) and dimensions will be checked. 
 #' 
 #' If settings have been modified after the data was loaded, the data is 
 #' potentially invalid. To update the data, run the \code{aggregate} or the 
 #' \code{modify} function.
 #' 
 #' @param x a \code{Modifier} or \code{ModifierSet} class
 #' @param name name of the setting to be returned or set
 #' @param value value of the setting to be set
 #' 
 #' @return 
 #' If \code{name} is omitted, \code{settings} returns a list of all settings.
 #' If \code{name} is set, \code{settings} returns a single settings or 
 #' \code{NULL}, if a value for \code{name} is not available. 
 #' 
 #' @examples 
 #' data(msi,package="RNAmodR")
 #' mi <- msi[[1]]
 #' # returns a list of all settings
 #' settings(mi)
 #' # accesses a specific setting
 #' settings(mi,"minCoverage")
 #' # modification of setting
 #' settings(mi) <- list(minCoverage = 11L)
 NULL
 
57698a41
 .Modifier_settings <- data.frame(
   variable = c("minCoverage",
                "minReplicate",
                "find.mod"),
   testFUN = c(".not_integer_bigger_than_zero",
               ".not_integer_bigger_than_zero",
               ".is_a_bool"),
   errorValue = c(TRUE,
                  TRUE,
                  FALSE),
   errorMessage = c("'minCoverage' must be a single positive integer value.",
                    "'minReplicate' must be a single positive integer value.",
                    "'find.mod' must be a single logical value."),
   stringsAsFactors = FALSE)
 
 .norm_Modifier_settings <- function(input){
   minCoverage <- 10L
   minReplicate <- 1L
   find.mod <- TRUE
   args <- .norm_settings(input, .Modifier_settings, minCoverage, minReplicate,
                          find.mod)
   args
 }
 
16803e67
 #' @rdname settings
 #' @export
f30cb3b2
 setMethod(f = "settings", signature = signature(x = "Modifier"),
     definition = function(x, name){
         if(missing(name) || is.null(name)){
             return(x@settings)
         }
         if(!.is_a_string(name)){
             stop("'name' must be a single character value.",
                  call. = FALSE)
         }
         x@settings[[name]]
     }
f49b7684
 )
 
f30cb3b2
 .add_settings_value <- function(x, value, names){
     if(is.null(names)){
         names <- names(value)
     } else {
         names <- names[names %in% names(value)]
     }
     if(length(names) != 0L){
         value <- value[names]
         x@settings[names(value)] <- unname(value)
     }
     x
 }
 
16803e67
 #' @rdname settings
abdd8686
 #' @export
16803e67
 setReplaceMethod(f = "settings",
f30cb3b2
     signature = signature(x = "Modifier"),
     definition = function(x, value){
         if(is.null(names(value)) && length(value) > 0L){
             stop("'value' has to be a named.", call. = FALSE)
         }
         if(!is.list(value)){
             value <- as.list(value)
         }
         names <- names(value)
         value <- .norm_Modifier_settings(value)
         x <- .add_settings_value(x, value, names)
         x@aggregateValidForCurrentArguments <- FALSE
         x@modificationsValidForCurrentArguments <- FALSE
         x
     }
 )
73a01f9d
 
 # constructors -----------------------------------------------------------------
 
f0180769
 .norm_Modifier_input_SequenceData_elements <- function(list, ans){
abdd8686
   if(is(ans,"character") && extends(ans,"Modifier")){
     ans <- getClass(ans)@prototype
   } else if(!is(ans,"Modifier")) {
16803e67
     stop("")
abdd8686
   }
f0180769
   .check_Modifier_data_elements(ans, list)
abdd8686
   if(length(list) == 1L){
     return(list[[1]])
   }
f0180769
   list
abdd8686
 }
 
 .Modifier <- function(className, data){
   proto <- new(className)  # create prototype object for mod normalization only
f0180769
   data <- .norm_Modifier_input_SequenceData_elements(data, proto)
abdd8686
   bamfiles <- bamfiles(data)
96ad785c
   # setup conditions and replicates
   conditions <- .norm_conditions(data)
   replicates <- .norm_replicates(data)
   ia <- as.integer(interaction(conditions,replicates))
   m <- match(unique(ia),ia)
   condition <- conditions[m]
   replicate <- replicates[m]
   # create Modifier object
45929b71
   new(className,
8a505469
       mod = .norm_mod(proto),
45929b71
       bamfiles = bamfiles,
       condition = condition,
96ad785c
       replicate = replicate,
45929b71
       data = data)
67abf29d
 }
 
f30cb3b2
 #' @importFrom BiocParallel bpparam bpisup bpstart bpstop bpmapply bplapply
f0180769
 .load_SequenceData <- function(classes, bamfiles, annotation, sequences,
                                seqinfo, args){
   if(is.list(classes)){
f30cb3b2
     BPPARAM <- bpparam()
     if (!(bpisup(BPPARAM) || is(BPPARAM, "MulticoreParam"))) {
       bpstart(BPPARAM)
       on.exit(bpstop(BPPARAM), add = TRUE)
     }
c70f0bc6
     if(is.list(bamfiles)){
       if(length(classes) != length(bamfiles)){
         stop("'x' has invalid length. '",paste(classes, collapse = "' and '"),
              "' ",ifelse(length(classes) > 1L,"are","is")," required.",
              call. = FALSE)
       }
       if(is.null(names(bamfiles))){
         stop("If 'x' is a list, it must be named. The names must match the ",
              "required SequenceData class names.",
              call. = FALSE)
       }
       if(all(classes %in% names(bamfiles))){
         stop("If 'x' is named, names must match the required SequenceData ",
              "class names.",
              call. = FALSE)
       }
       bamfiles <- bamfiles[match(class,names(bamfiles))]
f30cb3b2
       data <- bpmapply(.load_SequenceData, classes, bamfiles,
                        MoreArgs = list(annotation, sequences,
                                        seqinfo, args),
                        SIMPLIFY = FALSE,
                        BPPARAM = BPPARAM)
c70f0bc6
     } else {
f30cb3b2
       data <- bplapply(classes, .load_SequenceData, bamfiles,
                        annotation, sequences, seqinfo, args,
                        BPPARAM = BPPARAM)
c70f0bc6
     }
f0180769
     data <- as(data,"SequenceDataList")
   } else if(is.character(classes)){
     data <- lapply(classes,
                    function(class){
                      do.call(class, c(list(bamfiles = bamfiles,
                                            annotation = annotation,
                                            sequences = sequences,
                                            seqinfo = seqinfo),
                                       args))
                    })
     data <- as(data,"SequenceDataSet")
   } else {
16803e67
     stop("")
f0180769
   }
   data
 }
 
473ca656
 .new_ModFromCharacter <- function(className, x, annotation, sequences, seqinfo,
                                   ...){
abdd8686
   # Check that external classes are implemented correctly
   className <- .norm_modifiertype(className)
   # create prototype object for mod normalization and settings only
b75e0751
   proto <- new(className)
abdd8686
   # short cut for creating an empty object
c70f0bc6
   if(is.null(x)){
8a505469
     return(new2(className, mod = .norm_mod(proto)))
abdd8686
   }
   bamfiles <- .norm_bamfiles(x, className) # check bam files
2be8c7e1
   # settings
f30cb3b2
   settings(proto) <- list()
abdd8686
   settings(proto) <- list(...)
2be8c7e1
   #
473ca656
   annotation <- .norm_annotation(annotation, className)
   sequences <- .norm_sequences(sequences, className)
abdd8686
   seqinfo <- .norm_seqnames(bamfiles, annotation, sequences, seqinfo, className)
e7831bc9
   annotation <- .load_annotation(annotation)
   annotation <- .subset_by_seqinfo(annotation, seqinfo)
2be8c7e1
   # get SequenceData
750c3c85
   data <- .load_SequenceData(dataType(proto), bamfiles = bamfiles,
f0180769
                              annotation = annotation, sequences = sequences,
8a505469
                              seqinfo = seqinfo,
                              args = list(settings(proto),seqtype(proto)))
abdd8686
   .new_ModFromSequenceData(className, data, ...)
2be8c7e1
 }
 
473ca656
 .new_ModFromSequenceData <- function(className, x, ...){
abdd8686
   # create Modifier object
   ans <- .Modifier(className, x)
2be8c7e1
   # settings
f30cb3b2
   settings(ans) <- list()
473ca656
   settings(ans) <- list(...)
abdd8686
   # aggregate data
   message("Aggregating data and calculating scores ... ", appendLF = FALSE)
   ans <- aggregate(ans)
3191cb7e
   # search for modifications
405606cb
   if(settings(ans,"find.mod")){
8a505469
     if(seqtype(ans) == seqtype(RNAString())){
51a2e660
       modName <- .get_modification_full_name(ans,seqtype(ans))
8a505469
     } else if(seqtype(ans) == seqtype(DNAString())){
51a2e660
       modName <- .get_modification_full_name(ans,seqtype(ans))
8a505469
     } else {
       stop("")
     }
b75e0751
     message("Starting to search for '", paste(tools::toTitleCase(modName),
3191cb7e
                                               collapse = "', '"),
             "' ... ", appendLF = FALSE)
     ans <- modify(ans)
73a01f9d
   }
abdd8686
   message("done.")
2be8c7e1
   validObject(ans)
73a01f9d
   ans
 }
 
f2c595d7
 #' @rdname Modifier-class
 #' @export
b75e0751
 setGeneric(
f2c595d7
   name = "Modifier",
16803e67
   signature = "x",
f2c595d7
   def = function(className, x, annotation, sequences, seqinfo, ...)
     standardGeneric("Modifier")
b75e0751
 )
f2c595d7
 
a4b215eb
 #' @rdname Modifier-class
14ccc6b7
 #' @export
473ca656
 setMethod("Modifier",
           signature = c(x = "SequenceData"),
b75e0751
           function(className, x, annotation = NULL, sequences = NULL,
473ca656
                    seqinfo = NULL, ...){
             .new_ModFromSequenceData(className, x, ...)
           })
a4b215eb
 #' @rdname Modifier-class
14ccc6b7
 #' @export
abdd8686
 setMethod("Modifier",
f0180769
           signature = c(x = "SequenceDataSet"),
b75e0751
           function(className, x, annotation = NULL, sequences = NULL,
abdd8686
                    seqinfo = NULL, ...){
             .new_ModFromSequenceData(className, x, ...)
           })
 #' @rdname Modifier-class
 #' @export
c70f0bc6
 setMethod("Modifier",
           signature = c(x = "SequenceDataList"),
b75e0751
           function(className, x, annotation = NULL, sequences = NULL,
c70f0bc6
                    seqinfo = NULL, ...){
             .new_ModFromSequenceData(className, x, ...)
           })
 #' @rdname Modifier-class
 #' @export
473ca656
 setMethod("Modifier",
           signature = c(x = "character"),
b75e0751
           function(className, x, annotation = NULL, sequences = NULL,
473ca656
                    seqinfo = NULL, ...){
             .new_ModFromCharacter(className, x, annotation, sequences, seqinfo,
                                   ...)
           })
a4b215eb
 #' @rdname Modifier-class
c70f0bc6
 #' @export
 setMethod("Modifier",
           signature = c(x = "list"),
b75e0751
           function(className, x, annotation = NULL, sequences = NULL,
c70f0bc6
                    seqinfo = NULL, ...){
             .new_ModFromCharacter(className, x, annotation, sequences, seqinfo,
                                   ...)
           })
 #' @rdname Modifier-class
14ccc6b7
 #' @export
473ca656
 setMethod("Modifier",
           signature = c(x = "BamFileList"),
b75e0751
           function(className, x, annotation = NULL, sequences = NULL,
473ca656
                    seqinfo = NULL, ...){
             .new_ModFromCharacter(className, x, annotation, sequences, seqinfo,
                                   ...)
           })
 
14ccc6b7
 # aggregate --------------------------------------------------------------------
 
073bdf6b
 #' @name aggregate
478352fe
 #' @aliases hasAggregateData aggregateData getAggregateData
b75e0751
 #'
f2c595d7
 #' @title Aggregate data per positions
b75e0751
 #'
 #' @description
f2c595d7
 #' The \code{aggregate} function is defined for each
 #' \code{\link[=SequenceData-class]{SequenceData}} object and can be used
 #' directly on a \code{\link[=SequenceData-class]{SequenceData}} object or
 #' indirectly via a \code{\link[=Modifier-class]{Modifier}} object.
b75e0751
 #'
f2c595d7
 #' For the letter the call is redirect to the
 #' \code{\link[=SequenceData-class]{SequenceData}} object, the result summarized
 #' as defined for the individual \code{Modifier} class and stored in the
 #' \code{aggregate} slot of the \code{Modifier} object. The data is then used
 #' for subsequent tasks, such as search for modifications and visualization of
 #' the results.
b75e0751
 #'
f2c595d7
 #' The summarization is implemented in the \code{aggregateData} for each type of
 #' \code{Modifier} class. The stored data from the \code{aggregate} slot can be
 #' retrieved using the \code{getAggregateData} function.
b75e0751
 #'
f2c595d7
 #' Whether the aggrgeated data is already present in the \code{aggregate} slot
 #' can be checked using the \code{hasAggregateData} function.
b75e0751
 #'
 #' For \code{SequenceDataSet}, \code{SequenceDataList} and \code{ModfierSet}
f2c595d7
 #' classes wrapper of the \code{aggregate} function exist as well.
b75e0751
 #'
 #' @param x a \code{\link[=SequenceData-class]{SequenceData}},
 #' \code{SequenceDataSet}, \code{SequenceDataList},
 #' \code{\link[=Modifier-class]{Modifier}} or
f2c595d7
 #' \code{\link[=Modifier-class]{ModfierSet}}  object.
14ccc6b7
 #' @param force whether to recreate the aggregated data, if it is already stored
 #' inside the \code{Modifier} object.
a4b215eb
 #' @param condition character value, which selects, for which condition the data
b75e0751
 #' should be aggregated. One of the following values: \code{Both},
 #' \code{Control}, \code{Treated}
a4b215eb
 #' @param ... additional arguments
b75e0751
 #'
 #' @return
14ccc6b7
 #' \itemize{
f2c595d7
 #' \item{\code{aggregate}: }{for \code{SequenceData} object the aggregated data
 #' is returned as a \code{SplitDataFrameList} with an element per transcript,
 #' whereas for a \code{Modifier} the modified input object is returned,
 #' containing the aggregated data, which can be accessed using
478352fe
 #' \code{getAggregateData}.}
b75e0751
 #' \item{\code{getAggregateData}: }{only for \code{Modifier}: a
 #' \code{SplitDataFrameList} with an element per transcript is returned. If the
 #' aggregated data is not stored in the object, it is generated on the fly, but
f2c595d7
 #' does not persist.}
b75e0751
 #' \item{\code{hasAggregateData}: }{TRUE or FALSE. Does the \code{Modifier}
14ccc6b7
 #' object already contain aggregated data?}
 #' }
b75e0751
 #'
 #' @return
21b04006
 #' If 'x' is a
 #' \itemize{
b75e0751
 #' \item{\code{\link[=SequenceData-class]{SequenceData}}} {a
21b04006
 #' \code{SplitDataFrameList} with elments per transcript.}
f2c595d7
 #' \item{\code{\link[=SequenceDataSet-class]{SequenceDataSet}} or
 #' \code{\link[=SequenceDataList-class]{SequenceDataList}}} {a \code{SimpleList}
 #' with \code{SplitDataFrameList} as elements.}
 #' \item{\code{\link[=Modifier-class]{Modifier}} or
 #' \code{\link[=ModifierSet-class]{ModifierSet}}} {an updated \code{Modifier}
21b04006
 #' object. The data can be accessed by using the \code{aggregateData} function.}
 #' }
b75e0751
 #'
 #' @examples
21b04006
 #' data(e5sd,package="RNAmodR")
 #' data(msi,package="RNAmodR")
405606cb
 #' # modify() triggers the search for modifications in the data contained in
 #' # the Modifier or ModifierSet object
21b04006
 #' sdfl <- aggregate(e5sd)
 #' mi <- aggregate(msi[[1]])
a4b215eb
 NULL
 
478352fe
 .check_aggregate_modifier <- function(data, x){
   score <- x@score
14ccc6b7
   if(is(data,"CompressedSplitDataFrameList")){
acf4e07e
     columns <- colnames(unlist(data, use.names = FALSE))
14ccc6b7
   } else {
478352fe
     stop("aggregate data has to be a 'CompressedSplitDataFrameList' object. ",
          "Contact the maintainer of the class used.",
          call. = FALSE)
14ccc6b7
   }
   if(!(score %in% columns)){
     stop("The default score is not present in the aggregate data. Contact the ",
          "maintainer of the class used.",
          call. = FALSE)
   }
478352fe
   seqs <- .seqs_rl_strand(ranges(x))
   if(any(any(seqs != IRanges::IntegerList(rownames(data))))){
     stop("rownames() of aggregate data is not named or not named along the ",
          "genomic coordinates. Contact the maintainer of the class used.",
          call. = FALSE)
   }
14ccc6b7
   data
 }
478352fe
 
 #' @rdname aggregate
 #' @export
b75e0751
 setMethod(f = "aggregate",
478352fe
           signature = signature(x = "Modifier"),
b75e0751
           definition =
478352fe
             function(x, force = FALSE){
c3cad704
               if(!.is_a_bool(force)){
                 stop("'force' has to be TRUE or FALSE.",
                      call. = FALSE)
               }
478352fe
               if(!hasAggregateData(x) || force){
                 x@aggregate <- .check_aggregate_modifier(aggregateData(x), x)
                 x@aggregateValidForCurrentArguments <- TRUE
               }
               x
             }
 )
 
14ccc6b7
 #' @rdname aggregate
 #' @export
b75e0751
 setMethod(f = "aggregateData",
478352fe
           signature = signature(x = "Modifier"),
b75e0751
           definition =
478352fe
             function(x){
b75e0751
               stop("The 'aggregateData' functions needs to be implemented by
814caf5c
                    '",class(x),"'.",call. = FALSE)
478352fe
             }
 )
 #' @rdname aggregate
 #' @export
b75e0751
 setMethod(f = "getAggregateData",
14ccc6b7
           signature = signature(x = "Modifier"),
           definition = function(x){
             x <- aggregate(x)
478352fe
             x@aggregate
14ccc6b7
           })
19221815
 
073bdf6b
 #' @rdname aggregate
19221815
 #' @export
b75e0751
 setMethod(f = "hasAggregateData",
19221815
           signature = signature(x = "Modifier"),
b75e0751
           definition =
19221815
             function(x){
9ac0f032
               if(is.null(x@aggregate) || nrow(x@aggregate@unlistData) == 0L){
19221815
                 return(FALSE)
               }
               return(TRUE)
             }
 )
36172a18
 
 # modify -----------------------------------------------------------------------
 
 #' @name modify
 #' @aliases modifications
b75e0751
 #'
36172a18
 #' @title Searching for modifications in \code{SequenceData}
b75e0751
 #'
 #' @description
 #' The \code{modify} function executes the search for modifications for a
 #' \code{\link[=Modifier-class]{Modifier}} class. Usually this is done
f2c595d7
 #' automatically during construction of a \code{Modifier} object.
b75e0751
 #'
 #' When the \code{modify} functions is called, the aggregated data is checked
 #' for validity for the current settings and the search for modifications is
 #' performed using the \code{findMod}. The results are stored in the
 #' \code{modification} slot of the \code{Modifier} object, which is returned by
 #' \code{modify}. The results can be accessed via the \code{modifications()}
 #' function.
 #'
 #' \code{findMod} returns the found modifications as a \code{GRanges}
 #' object and has to be implemented for each individual \code{Modifier} class.
 #'
36172a18
 #' @param x a \code{Modifier} object.
 #' @param force force to run \code{aggregate} again, if data is already stored
 #' in \code{x}.
 #' @param perTranscript For \code{modifications>} \code{TRUE} or \code{FALSE}:
 #'   Should the coordinates be returned as local per transcript coordinates?
 #' @param ... additional arguments
b75e0751
 #'
 #' @return
36172a18
 #' \itemize{
 #' \item{\code{modify}: }{the updated \code{Modifier} object.}
 #' \item{\code{modifications}: }{the modifications found as a \code{GRanges}
 #' object.}
 #' }
b75e0751
 #'
 #' @examples
36172a18
 #' data(msi,package="RNAmodR")
 #' # modify() triggers the search for modifications in the data contained in
 #' # the Modifier or ModifierSet object
 #' mi <- modify(msi[[1]])
 NULL
 
 #' @rdname modify
 #' @export
b75e0751
 setMethod(f = "modify",
36172a18
           signature = signature(x = "Modifier"),
b75e0751
           definition =
478352fe
             function(x, force = FALSE){
c3cad704
               if(!.is_a_bool(force)){
                 stop("'force' has to be TRUE or FALSE.",
                      call. = FALSE)
               }
478352fe
               if(!validAggregate(x) | force){
                 x <- aggregate(x, force = TRUE)
               }
               x@modifications <- findMod(x)
36172a18
               x@modificationsValidForCurrentArguments <- TRUE
               x
             }
 )
478352fe
 
 #' @rdname modify
 #' @export
b75e0751
 setMethod(f = "findMod",
478352fe
           signature = signature(x = "Modifier"),
b75e0751
           definition =
478352fe
             function(x){
b75e0751
               stop("The 'findMod' functions needs to be implemented by
814caf5c
                    '",class(x),"'.",call. = FALSE)
478352fe
             }
 )
8a505469
 
 # RNAModifier and DNAModifier --------------------------------------------------
 
 #' @rdname Modifier-class
 #' @export
 setClass("RNAModifier",
          contains = c("VIRTUAL","Modifier"),
          prototype = list(seqtype = seqtype(RNAString())))
 
 #' @rdname Modifier-class
 #' @export
 setClass("DNAModifier",
          contains = c("VIRTUAL","Modifier"),
          prototype = list(seqtype = seqtype(DNAString())))