Browse code

Restructured the code, split according to the class, first version of roxygen2 documentation. Bumped the version.

Robert Ivánek authored on 21/01/2022 15:52:54
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,851 @@
1
+#' @include AnnotationTrack-class.R
2
+NULL
3
+
4
+## GeneRegionTrack Class -----------------------------------------------------
5
+
6
+
7
+#' GeneRegionTrack class and methods
8
+#'
9
+#'
10
+#' A class to hold gene model data for a genomic region.
11
+#'
12
+#'
13
+#' A track containing all gene models in a particular region. The data are
14
+#' usually fetched dynamially from an online data store, but it is also
15
+#' possible to manully construct objects from local data. Connections to
16
+#' particular online data sources should be implemented as sub-classes, and
17
+#' \code{GeneRegionTrack} is just the commone denominator that is being used
18
+#' for plotting later on. There are several levels of data associated to a
19
+#' \code{GeneRegionTrack}:
20
+#'
21
+#' \describe{
22
+#'
23
+#' \item{exon level:}{identifiers are stored in the exon column of the
24
+#' \code{\linkS4class{GRanges}} object in the \code{range} slot. Data may be
25
+#' extracted using the \code{exon} method.}
26
+#'
27
+#' \item{transcript level:}{identifiers are stored in the transcript column of
28
+#' the \code{\linkS4class{GRanges}} object. Data may be extracted using the
29
+#' \code{transcript} method.}
30
+#'
31
+#' \item{gene level:}{identifiers are stored in the gene column of the
32
+#' \code{\linkS4class{GRanges}} object, more human-readable versions in the
33
+#' symbol column. Data may be extracted using the \code{gene} or the
34
+#' \code{symbol} methods.}
35
+#'
36
+#' \item{transcript-type level:}{information is stored in the feature column of
37
+#' the \code{\linkS4class{GRanges}} object. If a display parameter of the same
38
+#' name is specified, the software will use its value for the coloring.}
39
+#'
40
+#' }
41
+#'
42
+#' \code{GeneRegionTrack} objects also know about coding regions and non-coding
43
+#' regions (e.g., UTRs) in a transcript, and will indicate those by using
44
+#' different shapes (wide boxes for all coding regions, thinner boxes for
45
+#' non-coding regions). This is archived by setting the \code{feature} values
46
+#' of the object for non-coding elements to one of the options that are
47
+#' provided in the \code{thinBoxFeature} display parameters. All other elements
48
+#' are considered to be coding elements.
49
+#'
50
+#' @name GeneRegionTrack-class
51
+#'
52
+#' @param range
53
+#'
54
+#' An optional meta argument to handle the different input types. If the
55
+#' \code{range} argument is missing, all the relevant information to create the
56
+#' object has to be provided as individual function arguments (see below).
57
+#'
58
+#' The different input options for \code{range} are:
59
+#'
60
+#' \describe{
61
+#'
62
+#' \item{}{A \code{TxDb} object: all the necessary gene model information
63
+#' including exon locations, transcript groupings and associated gene ids are
64
+#' contained in \code{TxDb} objects, and the coercion between the two is almost
65
+#' completely automated. If desired, the data to be fetched from the
66
+#' \code{TxDb} object can be restricted using the constructor's
67
+#' \code{chromosome}, \code{start} and \code{end} arguments. See below for
68
+#' details. A direct coercion method \code{as(obj, "GeneRegionTrack")} is also
69
+#' available. A nice added benefit of this input option is that the UTR and
70
+#' coding region information that is part of the original \code{TxDb} object is
71
+#' retained in the \code{GeneRegionTrack}.}
72
+#'
73
+#' \item{}{A \code{GRanges} object: the genomic ranges for the
74
+#' \code{GeneRegion} track as well as the optional additional metadata columns
75
+#' \code{feature}, \code{transcript}, \code{gene}, \code{exon} and
76
+#' \code{symbol} (see description of the individual function parameters below
77
+#' for details). Calling the constructor on a \code{GRanges} object without
78
+#' further arguments, e.g.  \code{GeneRegionTrack(range=obj)} is equivalent to
79
+#' calling the coerce method \code{as(obj, "GeneRegionTrack")}.}
80
+#'
81
+#' \item{}{A \code{GRangesList} object: this is very similar to the previous
82
+#' case, except that the grouping information that is part of the list
83
+#' structure is preserved in the \code{GeneRegionTrack}. I.e., all the elements
84
+#' within one list item receive the same group id. For consistancy, there is
85
+#' also a coercion method from \code{GRangesLists} \code{as(obj,
86
+#' "GeneRegionTrack")}. Please note that unless the necessary information about
87
+#' gene ids, symbols, etc. is present in the individual \code{GRanges} meta
88
+#' data slots, the object will not be particularly useful, because all the
89
+#' identifiers will be set to a common default value.}
90
+#'
91
+#' \item{}{An \code{\linkS4class{IRanges}} object: almost identical to the
92
+#' \code{GRanges} case, except that the chromosome and strand information as
93
+#' well as all additional data has to be provided in the separate
94
+#' \code{chromosome}, \code{strand}, \code{feature}, \code{transcript},
95
+#' \code{symbol}, \code{exon} or \code{gene} arguments, because it can not be
96
+#' directly encoded in an \code{IRanges} object. Note that only the former two
97
+#' are mandatory (if not provided explicitely the more or less reasonable
98
+#' default values \code{chromosome=NA} and \code{strand=*} are used, but not
99
+#' providing information about the gene-to-transcript relationship or the
100
+#' human-readble symbols renders a lot of the class' functionality useles.}
101
+#'
102
+#' \item{}{A \code{data.frame} object: the \code{data.frame} needs to contain
103
+#' at least the two mandatory columns \code{start} and \code{end} with the
104
+#' range coordinates. It may also contain a \code{chromosome} and a
105
+#' \code{strand} column with the chromosome and strand information for each
106
+#' range. If missing, this information will be drawn from the constructor's
107
+#' \code{chromosome} or \code{strand} arguments. In addition, the
108
+#' \code{feature}, \code{exon}, \code{transcript}, \code{gene} and
109
+#' \code{symbol} data can be provided as columns in the \code{data.frame}. The
110
+#' above comments about potential default values also apply here.}
111
+#'
112
+#' \item{}{A \code{character} scalar: in this case the value of the
113
+#' \code{range} argument is considered to be a file path to an annotation file
114
+#' on disk. A range of file types are supported by the \code{Gviz} package as
115
+#' identified by the file extension. See the \code{importFunction}
116
+#' documentation below for further details.}
117
+#'
118
+#' }
119
+#' @template GeneRegionTrack-class_param
120
+#' @return
121
+#'
122
+#' The return value of the constructor function is a new object of class
123
+#' \code{GeneRegionTrack}.
124
+#' @section Objects from the class:
125
+#'
126
+#' Objects can be created using the constructor function
127
+#' \code{GeneRegionTrack}.
128
+#'
129
+#' @author Florian Hahne, Steve Lianoglou
130
+#'
131
+#' @inherit GdObject-class seealso
132
+#'
133
+#' @examples
134
+#'
135
+#'
136
+#' ## The empty object
137
+#' GeneRegionTrack()
138
+#'
139
+#' ## Load some sample data
140
+#' data(cyp2b10)
141
+#'
142
+#' ## Construct the object
143
+#' grTrack <- GeneRegionTrack(
144
+#'     start = 26682683, end = 26711643,
145
+#'     rstart = cyp2b10$start, rends = cyp2b10$end, chromosome = 7, genome = "mm9",
146
+#'     transcript = cyp2b10$transcript, gene = cyp2b10$gene, symbol = cyp2b10$symbol,
147
+#'     feature = cyp2b10$feature, exon = cyp2b10$exon,
148
+#'     name = "Cyp2b10", strand = cyp2b10$strand
149
+#' )
150
+#'
151
+#' ## Directly from the data.frame
152
+#' grTrack <- GeneRegionTrack(cyp2b10)
153
+#'
154
+#' ## From a TxDb object
155
+#' if (require(GenomicFeatures)) {
156
+#'     samplefile <- system.file("extdata",
157
+#'                               "hg19_knownGene_sample.sqlite",
158
+#'                               package = "GenomicFeatures")
159
+#'     txdb <- loadDb(samplefile)
160
+#'     GeneRegionTrack(txdb)
161
+#'     GeneRegionTrack(txdb, chromosome = "chr6", start = 35000000, end = 40000000)
162
+#' }
163
+#' \dontshow{
164
+#' ## For some annoying reason the postscript device does not know about
165
+#' ## the sans font
166
+#' if (!interactive()) {
167
+#'     font <- ps.options()$family
168
+#'     displayPars(grTrack) <- list(fontfamily = font, fontfamily.title = font)
169
+#' }
170
+#' }
171
+#'
172
+#' ## Plotting
173
+#' plotTracks(grTrack)
174
+#'
175
+#' ## Track names
176
+#' names(grTrack)
177
+#' names(grTrack) <- "foo"
178
+#' plotTracks(grTrack)
179
+#'
180
+#' ## Subsetting and splitting
181
+#' subTrack <- subset(grTrack, from = 26700000, to = 26705000)
182
+#' length(subTrack)
183
+#' subTrack <- grTrack[transcript(grTrack) == "ENSMUST00000144140"]
184
+#' split(grTrack, transcript(grTrack))
185
+#'
186
+#' ## Accessors
187
+#' start(grTrack)
188
+#' end(grTrack)
189
+#' width(grTrack)
190
+#' position(grTrack)
191
+#' width(subTrack) <- width(subTrack) + 100
192
+#'
193
+#' strand(grTrack)
194
+#' strand(subTrack) <- "-"
195
+#'
196
+#' chromosome(grTrack)
197
+#' chromosome(subTrack) <- "chrX"
198
+#'
199
+#' genome(grTrack)
200
+#' genome(subTrack) <- "hg19"
201
+#'
202
+#' range(grTrack)
203
+#' ranges(grTrack)
204
+#'
205
+#' ## Annotation
206
+#' identifier(grTrack)
207
+#' identifier(grTrack, "lowest")
208
+#' identifier(subTrack) <- "bar"
209
+#'
210
+#' feature(grTrack)
211
+#' feature(subTrack) <- "foo"
212
+#'
213
+#' exon(grTrack)
214
+#' exon(subTrack) <- letters[1:2]
215
+#'
216
+#' gene(grTrack)
217
+#' gene(subTrack) <- "bar"
218
+#'
219
+#' symbol(grTrack)
220
+#' symbol(subTrack) <- "foo"
221
+#'
222
+#' transcript(grTrack)
223
+#' transcript(subTrack) <- c("foo", "bar")
224
+#' chromosome(subTrack) <- "chr7"
225
+#' plotTracks(subTrack)
226
+#'
227
+#' values(grTrack)
228
+#'
229
+#' ## Grouping
230
+#' group(grTrack)
231
+#' group(subTrack) <- "Group 1"
232
+#' transcript(subTrack)
233
+#' plotTracks(subTrack)
234
+#'
235
+#' ## Collapsing transcripts
236
+#' plotTracks(grTrack,
237
+#'     collapseTranscripts = TRUE, showId = TRUE,
238
+#'     extend.left = 10000, shape = "arrow"
239
+#' )
240
+#'
241
+#' ## Stacking
242
+#' stacking(grTrack)
243
+#' stacking(grTrack) <- "dense"
244
+#' plotTracks(grTrack)
245
+#'
246
+#' ## coercion
247
+#' as(grTrack, "data.frame")
248
+#' as(grTrack, "UCSCData")
249
+#'
250
+#' ## HTML image map
251
+#' coords(grTrack)
252
+#' tags(grTrack)
253
+#' grTrack <- plotTracks(grTrack)$foo
254
+#' coords(grTrack)
255
+#' tags(grTrack)
256
+#' @exportClass GeneRegionTrack
257
+setClass("GeneRegionTrack",
258
+    contains = "AnnotationTrack",
259
+    representation = representation(start = "NumericOrNULL", end = "NumericOrNULL"),
260
+    prototype = prototype(
261
+        columns = c("feature", "transcript", "symbol", "gene", "exon"),
262
+        stacking = "squish",
263
+        stacks = 0,
264
+        start = 0,
265
+        end = 0,
266
+        name = "GeneRegionTrack",
267
+        dp = DisplayPars(
268
+            arrowHeadWidth = 10,
269
+            arrowHeadMaxWidth = 20,
270
+            col = NULL,
271
+            collapseTranscripts = FALSE,
272
+            exonAnnotation = NULL,
273
+            fill = "orange",
274
+            min.distance = 0,
275
+            shape = c("smallArrow", "box"),
276
+            showExonId = NULL,
277
+            thinBoxFeature = .THIN_BOX_FEATURES,
278
+            transcriptAnnotation = NULL
279
+        )
280
+    )
281
+)
282
+
283
+## Initialize ----------------------------------------------------------------
284
+
285
+#' @describeIn GeneRegionTrack-class Initialize.
286
+#' @export
287
+setMethod("initialize", "GeneRegionTrack", function(.Object, start, end, ...) {
288
+    if (is.null(list(...)$range) && is.null(list(...)$genome) && is.null(list(...)$chromosome)) {
289
+        return(.Object)
290
+    }
291
+    ## the diplay parameter defaults
292
+    .makeParMapping()
293
+    .Object <- .updatePars(.Object, "GeneRegionTrack")
294
+    .Object@start <- ifelse(is.null(start), 0, start)
295
+    .Object@end <- ifelse(is.null(end), 0, end)
296
+    .Object <- callNextMethod(.Object, ...)
297
+    return(.Object)
298
+})
299
+
300
+## ReferenceGeneRegionTrack Class --------------------------------------------
301
+
302
+## The file-based version of the GeneRegionTrack class. This will mainly provide a means to dispatch to
303
+## a special 'subset' method which should stream the necessary data from disk.
304
+
305
+#' @describeIn GeneRegionTrack-class The file-based version of the `GeneRegionTrack-class`.
306
+#' @exportClass ReferenceGeneRegionTrack
307
+setClass("ReferenceGeneRegionTrack", contains = c("GeneRegionTrack", "ReferenceTrack"))
308
+
309
+## Initialize ----------------------------------------------------------------
310
+
311
+
312
+## This just needs to set the appropriate slots that are being inherited from ReferenceTrack because the
313
+## multiple inheritence has some strange features with regards to method selection
314
+
315
+#' @describeIn GeneRegionTrack-class Initialize.
316
+#' @export
317
+setMethod("initialize", "ReferenceGeneRegionTrack", function(.Object, stream, reference, mapping = list(),
318
+                                                             args = list(), defaults = list(), ...) {
319
+    .Object <- selectMethod("initialize", "ReferenceTrack")(.Object = .Object, reference = reference, stream = stream,
320
+        mapping = mapping, args = args, defaults = defaults)
321
+    .Object <- callNextMethod(.Object, ...)
322
+    return(.Object)
323
+})
324
+
325
+## Constructor ---------------------------------------------------------------
326
+
327
+## Constructor. The following arguments are supported:
328
+##    o range: one in a whole number of different potential inputs upon which the .buildRanges method will dispatch
329
+##    o start, end: numeric vectors of the track start and end coordinates.
330
+##    o genome, chromosome: the reference genome and active chromosome for the track.
331
+##    o rstarts, rends, rwidths: integer vectors of exon start and end locations or widths, or a character vector of
332
+##      comma-delimited exon locations, one vector element for each transcript
333
+##    o strand, feature, exon, transcript, gene, symbol, chromosome: vectors of equal length containing
334
+##       the exon strand, biotype, exon id, transcript id, gene id, human-readable gene symbol and chromosome information
335
+##    o stacking: character controlling the stacking of overlapping items. One in 'hide',
336
+##       'dense', 'squish', 'pack' or 'full'.
337
+##    o name: the name of the track. This will be used for the title panel.
338
+##    o exonList: boolean, causing the values in starts, rends or rwidths to be interpreted as delim-separated
339
+##       lists that have to be exploded. All other annotation arguments will be repeated accordingly.
340
+##    o delim: the delimiter if coordinates are in a list
341
+## All additional items in ... are being treated as DisplayParameters
342
+
343
+#' @describeIn GeneRegionTrack-class Constructor function for `GeneRegionTrack-class`.
344
+#' @export
345
+GeneRegionTrack <- function(range = NULL, rstarts = NULL, rends = NULL, rwidths = NULL, strand, feature, exon,
346
+                            transcript, gene, symbol, chromosome, genome, stacking = "squish",
347
+                            name = "GeneRegionTrack", start = NULL, end = NULL, importFunction, stream = FALSE, ...) {
348
+    ## Some defaults
349
+    covars <- if (is.data.frame(range)) range else if (is(range, "GRanges")) as.data.frame(mcols(range)) else data.frame()
350
+    isStream <- FALSE
351
+    if (!is.character(range)) {
352
+        n <- if (is.null(range)) max(c(length(start), length(end), length(width))) else if (is(range, "data.frame")) nrow(range) else length(range)
353
+        if (is.null(covars[["feature"]]) && missing(feature)) {
354
+            feature <- paste("exon", seq_len(n), sep = "_")
355
+        }
356
+        if (is.null(covars[["exon"]]) && missing(exon)) {
357
+            exon <- make.unique(rep(if (!missing(feature) && !is.null(feature)) as.character(feature) else covars[["feature"]], n)[seq_len(n)])
358
+        }
359
+        if (is.null(covars[["transcript"]]) && missing(transcript)) {
360
+            transcript <- paste("transcript", seq_len(n), sep = "_")
361
+        }
362
+        if (is.null(covars[["gene"]]) && missing(gene)) {
363
+            gene <- paste("gene", seq_len(n), sep = "_")
364
+        }
365
+    }
366
+    ## Build a GRanges object from the inputs
367
+    .missingToNull(c("feature", "exon", "transcript", "gene", "symbol", "strand", "chromosome", "importFunction", "genome"))
368
+    args <- list(
369
+        feature = feature, id = exon, exon = exon, transcript = transcript, gene = gene, symbol = symbol, strand = strand,
370
+        chromosome = chromosome, genome = genome
371
+    )
372
+    defs <- list(
373
+        feature = "unknown", id = "unknown", exon = "unknown", transcript = "unknown", genome = NA,
374
+        gene = "unknown", symbol = "unknown", strand = "*", density = 1, chromosome = "chrNA"
375
+    )
376
+    range <- .buildRange(
377
+        range = range, groupId = "transcript", start = rstarts, end = rends, width = rwidths, args = args, defaults = defs,
378
+        chromosome = chromosome, tstart = start, tend = end, trackType = "GeneRegionTrack", importFun = importFunction,
379
+        genome = genome
380
+    )
381
+    if (is.list(range)) {
382
+        isStream <- TRUE
383
+        slist <- range
384
+        range <- GRanges()
385
+    }
386
+    if (is.null(start)) {
387
+        start <- if (!length(range)) NULL else min(start(range))
388
+    }
389
+    if (is.null(end)) {
390
+        end <- if (!length(range)) NULL else max(end(range))
391
+    }
392
+    if (missing(chromosome) || is.null(chromosome)) {
393
+        chromosome <- if (length(range) > 0) .chrName(as.character(seqnames(range)[1])) else "chrNA"
394
+    }
395
+    genome <- .getGenomeFromGRange(range, ifelse(is.null(genome), character(), genome[1]))
396
+    if (!isStream) {
397
+        return(new("GeneRegionTrack",
398
+            start = start, end = end, chromosome = chromosome[1], range = range,
399
+            name = name, genome = genome, stacking = stacking, ...
400
+        ))
401
+    } else {
402
+        return(new("ReferenceGeneRegionTrack",
403
+            start = start, end = end, chromosome = chromosome[1], range = range,
404
+            name = name, genome = genome, stacking = stacking, stream = slist[["stream"]],
405
+            reference = slist[["reference"]], mapping = slist[["mapping"]], args = args, defaults = defs, ...
406
+        ))
407
+    }
408
+}
409
+
410
+## .buildRange ---------------------------------------------------------------
411
+##
412
+## Helper methods to build a GRanges object from the input arguments.
413
+##
414
+## Coordinates for grouped elements may be passed in as comma-separated values
415
+## (e.g. "1,5,9"), in which case we need to split and convert to numeric.
416
+## This also implies that the additional arguments (like feature, group, etc.)
417
+## have to be replicated accordingly. We handle this by passing along the repeat
418
+## vector 'by' to the numeric method below.
419
+
420
+
421
+## For TxDb objects we extract the grouping information and use the GRanges method
422
+
423
+#' @importClassesFrom GenomicFeatures TxDb
424
+#' @importMethodsFrom GenomicFeatures isActiveSeq "isActiveSeq<-" cdsBy exonsBy fiveUTRsByTranscript threeUTRsByTranscript transcriptsBy transcripts
425
+setMethod(
426
+    ".buildRange", signature("TxDb"),
427
+    function(range, groupId = "transcript", tstart, tend, chromosome, args, ...) {
428
+        ## If chromosome (and optional start and end) information is present we only extract parts of the annotation data
429
+        noSubset <- is.null(tstart) && is.null(tend)
430
+        if (!is.null(chromosome)) {
431
+            chromosome <- .chrName(chromosome)
432
+            ## Seems like TxDb objects use pass by reference for the active chromosomes, so we have to
433
+            ## restore the old values after we are done
434
+            oldAct <- seqlevels(range)
435
+            oldRange <- range
436
+            on.exit({
437
+                restoreSeqlevels(oldRange)
438
+                seqlevels(oldRange, pruning.mode = "coarse") <- oldAct
439
+            })
440
+            restoreSeqlevels(range)
441
+            seqlevels(range, pruning.mode = "coarse") <- chromosome
442
+            sl <- seqlengths(range)
443
+            if (is.null(tstart)) {
444
+                tstart <- rep(1, length(chromosome))
445
+            }
446
+            if (is.null(tend)) {
447
+                tend <- sl[chromosome] + 1
448
+                tend[is.na(tend)] <- tstart[is.na(tend)] + 1
449
+            }
450
+            sRange <- GRanges(seqnames = chromosome, ranges = IRanges(start = tstart, end = tend))
451
+        }
452
+        ## First the mapping of internal transcript ID to transcript name
453
+        txs <- as.data.frame(values(transcripts(range, columns = c("tx_id", "tx_name"))))
454
+        rownames(txs) <- txs[, "tx_id"]
455
+        ## Now the CDS ranges
456
+        t2c <- cdsBy(range, "tx")
457
+        names(t2c) <- txs[names(t2c), 2]
458
+        tids <- rep(names(t2c), elementNROWS(t2c))
459
+        t2c <- unlist(t2c)
460
+        if (length(t2c)) {
461
+            t2c$tx_id <- tids
462
+            t2c$feature_type <- "CDS"
463
+        }
464
+        ## And the 5'UTRS
465
+        t2f <- fiveUTRsByTranscript(range)
466
+        names(t2f) <- txs[names(t2f), 2]
467
+        tids <- rep(names(t2f), elementNROWS(t2f))
468
+        t2f <- unlist(t2f)
469
+        if (length(t2f)) {
470
+            t2f$tx_id <- tids
471
+            t2f$feature_type <- "utr5"
472
+        }
473
+        ## And the 3'UTRS
474
+        t2t <- threeUTRsByTranscript(range)
475
+        names(t2t) <- txs[names(t2t), 2]
476
+        tids <- rep(names(t2t), elementNROWS(t2t))
477
+        t2t <- unlist(t2t)
478
+        if (length(t2t)) {
479
+            t2t$tx_id <- tids
480
+            t2t$feature_type <- "utr3"
481
+        }
482
+        ## And finally all the non-coding transcripts
483
+        nt2e <- exonsBy(range, "tx")
484
+        names(nt2e) <- txs[names(nt2e), 2]
485
+        nt2e <- nt2e[!names(nt2e) %in% c(values(t2c)$tx_id, values(t2f)$tx_id, values(t2t)$tx_id)]
486
+        tids <- rep(names(nt2e), elementNROWS(nt2e))
487
+        nt2e <- unlist(nt2e)
488
+        if (length(nt2e)) {
489
+            nt2e$tx_id <- tids
490
+            nt2e$feature_type <- "ncRNA"
491
+        }
492
+        ## Now we can merge the three back together (we need to change the column names of t2c to make them all the same)
493
+        colnames(values(t2c))[c(1, 2)] <- c("exon_id", "exon_name")
494
+        ## t2e <- c(t2c, t2f, t2t, nt2e) ## This is super-slow, much more efficient if we build the GRanges object from the individual bits and pieces
495
+        vals <- DataFrame(
496
+            exon_id = c(values(t2c)$exon_id, values(t2f)$exon_id, values(t2t)$exon_id, values(nt2e)$exon_id),
497
+            exon_name = c(values(t2c)$exon_name, values(t2f)$exon_name, values(t2t)$exon_name, values(nt2e)$exon_name),
498
+            exon_rank = c(values(t2c)$exon_rank, values(t2f)$exon_rank, values(t2t)$exon_rank, values(nt2e)$exon_rank),
499
+            tx_id = c(values(t2c)$tx_id, values(t2f)$tx_id, values(t2t)$tx_id, values(nt2e)$tx_id),
500
+            feature_type = c(values(t2c)$feature_type, values(t2f)$feature_type, values(t2t)$feature_type, values(nt2e)$feature_type)
501
+        )
502
+        t2e <- GRanges(
503
+            seqnames = c(seqnames(t2c), seqnames(t2f), seqnames(t2t), seqnames(nt2e)),
504
+            ranges = IRanges(
505
+                start = c(start(t2c), start(t2f), start(t2t), start(nt2e)),
506
+                end = c(end(t2c), end(t2f), end(t2t), end(nt2e))
507
+            ),
508
+            strand = c(strand(t2c), strand(t2f), strand(t2t), strand(nt2e))
509
+        )
510
+        if (all(is.na(vals$exon_name))) {
511
+            vals$exon_name <- paste(vals$tx_id, vals$exon_rank, sep = "_")
512
+        }
513
+        values(t2e) <- vals
514
+        if (length(t2e) == 0) {
515
+            return(GRanges())
516
+        }
517
+        ## Add the gene level annotation
518
+        g2t <- transcriptsBy(range, "gene")
519
+        gids <- rep(names(g2t), elementNROWS(g2t))
520
+        g2t <- unlist(g2t)
521
+        values(g2t)[["gene_id"]] <- gids
522
+        values(t2e)$gene_id <- gids[match(values(t2e)$tx_id, as.character(txs[as.character(values(g2t)$tx_id), 2]))]
523
+        vals <- values(t2e)[c("tx_id", "exon_name", "exon_rank", "feature_type", "tx_id", "gene_id")]
524
+        colnames(vals) <- c("transcript", "exon", "rank", "feature", "symbol", "gene")
525
+        ## Add the genome information
526
+        genome(t2e) <- unique(genome(range))
527
+        ## Finally we re-assign, subset if necessary, and sort
528
+        range <- t2e
529
+        values(range) <- vals
530
+        if (!noSubset && !is.null(chromosome)) {
531
+            ## We have to keep all exons for all the overlapping transcripts
532
+            txSel <- unique(subsetByOverlaps(g2t, sRange)$tx_name)
533
+            range <- range[range$transcript %in% txSel]
534
+        }
535
+        args <- list(genome = genome(range)[1])
536
+        return(.buildRange(range = sort(range), chromosome = chromosome, args = args, ...))
537
+    }
538
+)
539
+
540
+## For ensDb objects we extract the grouping information and use the GRanges method
541
+
542
+#' @importClassesFrom ensembldb EnsDb
543
+#' @importMethodsFrom ensembldb cdsBy exonsBy fiveUTRsByTranscript threeUTRsByTranscript transcriptsBy transcripts
544
+setMethod(
545
+    ".buildRange", signature("EnsDb"),
546
+    function(range, groupId = "transcript", tstart, tend, chromosome, args, ...) {
547
+        ## If chromosome (and optional start and end) information is present we only extract parts of the annotation data
548
+        noSubset <- is.null(tstart) && is.null(tend)
549
+        if (!is.null(chromosome)) {
550
+            chromosome <- .chrName(chromosome)
551
+            sl <- seqlengths(range)
552
+            if (is.null(tstart)) {
553
+                tstart <- rep(1, length(chromosome))
554
+            }
555
+            if (is.null(tend)) {
556
+                tend <- sl[chromosome] + 1
557
+                tend[is.na(tend)] <- tstart[is.na(tend)] + 1
558
+            }
559
+            sRange <- GRanges(seqnames = chromosome, ranges = IRanges(start = tstart, end = tend))
560
+            ## sRange <- GRangesFilter(sRange, type = "any") can we filter directly?
561
+        }
562
+        ## First the mapping of internal transcript ID to transcript name
563
+        txs <- as.data.frame(values(transcripts(range, columns = c("tx_id", "tx_name"))))
564
+        rownames(txs) <- txs[, "tx_id"]
565
+        ## Now the CDS ranges
566
+        t2c <- cdsBy(range, "tx")
567
+        names(t2c) <- txs[names(t2c), 2]
568
+        tids <- rep(names(t2c), elementNROWS(t2c))
569
+        t2c <- unlist(t2c)
570
+        if (length(t2c)) {
571
+            t2c$tx_id <- tids
572
+            t2c$feature_type <- "CDS"
573
+        }
574
+        ## And the 5'UTRS
575
+        t2f <- fiveUTRsByTranscript(range)
576
+        names(t2f) <- txs[names(t2f), 2]
577
+        tids <- rep(names(t2f), elementNROWS(t2f))
578
+        t2f <- unlist(t2f)
579
+        if (length(t2f)) {
580
+            t2f$tx_id <- tids
581
+            t2f$feature_type <- "utr5"
582
+        }
583
+        ## And the 3'UTRS
584
+        t2t <- threeUTRsByTranscript(range)
585
+        names(t2t) <- txs[names(t2t), 2]
586
+        tids <- rep(names(t2t), elementNROWS(t2t))
587
+        t2t <- unlist(t2t)
588
+        if (length(t2t)) {
589
+            t2t$tx_id <- tids
590
+            t2t$feature_type <- "utr3"
591
+        }
592
+        ## And finally all the non-coding transcripts
593
+        nt2e <- exonsBy(range, "tx")
594
+        names(nt2e) <- txs[names(nt2e), 2]
595
+        nt2e <- nt2e[!names(nt2e) %in% c(values(t2c)$tx_id, values(t2f)$tx_id, values(t2t)$tx_id)]
596
+        tids <- rep(names(nt2e), elementNROWS(nt2e))
597
+        nt2e <- unlist(nt2e)
598
+        if (length(nt2e)) {
599
+            nt2e$tx_id <- tids
600
+            nt2e$feature_type <- "ncRNA"
601
+        }
602
+        ## Now we can merge the three back together (we need to change the column names of t2c to make them all the same)
603
+        ## t2e <- c(t2c, t2f, t2t, nt2e) ## This is super-slow, much more efficient if we build the GRanges object from the individual bits and pieces
604
+        vals <- DataFrame(
605
+            exon_id = c(values(t2c)$exon_id, values(t2f)$exon_id, values(t2t)$exon_id, values(nt2e)$exon_id),
606
+            exon_name = c(values(t2c)$exon_id, values(t2f)$exon_id, values(t2t)$exon_id, values(nt2e)$exon_id),
607
+            exon_rank = c(values(t2c)$exon_rank, values(t2f)$exon_rank, values(t2t)$exon_rank, values(nt2e)$exon_rank),
608
+            tx_id = c(values(t2c)$tx_id, values(t2f)$tx_id, values(t2t)$tx_id, values(nt2e)$tx_id),
609
+            feature_type = c(values(t2c)$feature_type, values(t2f)$feature_type, values(t2t)$feature_type, values(nt2e)$feature_type)
610
+        )
611
+        t2e <- GRanges(
612
+            seqnames = c(seqnames(t2c), seqnames(t2f), seqnames(t2t), seqnames(nt2e)),
613
+            ranges = IRanges(
614
+                start = c(start(t2c), start(t2f), start(t2t), start(nt2e)),
615
+                end = c(end(t2c), end(t2f), end(t2t), end(nt2e))
616
+            ),
617
+            strand = c(strand(t2c), strand(t2f), strand(t2t), strand(nt2e))
618
+        )
619
+        if (all(is.na(vals$exon_name))) {
620
+            vals$exon_name <- paste(vals$tx_id, vals$exon_rank, sep = "_")
621
+        }
622
+        values(t2e) <- vals
623
+        if (length(t2e) == 0) {
624
+            return(GRanges())
625
+        }
626
+        ## Add the gene level annotation
627
+        g2t <- transcriptsBy(range, "gene")
628
+        gids <- rep(names(g2t), elementNROWS(g2t))
629
+        g2t <- unlist(g2t)
630
+        values(g2t)[["gene_id"]] <- gids
631
+        values(t2e)$gene_id <- gids[match(values(t2e)$tx_id, as.character(txs[as.character(values(g2t)$tx_id), 2]))]
632
+        vals <- values(t2e)[c("tx_id", "exon_name", "exon_rank", "feature_type", "tx_id", "gene_id")]
633
+        colnames(vals) <- c("transcript", "exon", "rank", "feature", "symbol", "gene")
634
+        ## Add the genome information
635
+        genome(t2e) <- unique(genome(range))
636
+        ## Finally we re-assign, subset if necessary, and sort
637
+        range <- t2e
638
+        values(range) <- vals
639
+        if (!noSubset && !is.null(chromosome)) {
640
+            ## We have to keep all exons for all the overlapping transcripts
641
+            txSel <- unique(subsetByOverlaps(g2t, sRange)$tx_name)
642
+            range <- range[range$transcript %in% txSel]
643
+        }
644
+        args <- list(genome = genome(range)[1])
645
+        return(.buildRange(range = sort(range), chromosome = chromosome, args = args, ...))
646
+    }
647
+)
648
+
649
+## General accessors ---------------------------------------------------------
650
+## Annotation Accessors ------------------------------------------------------
651
+
652
+#' @describeIn GeneRegionTrack-class Extract the gene identifiers for all
653
+#' gene models.
654
+#' @export
655
+setMethod("gene", signature(GdObject = "GeneRegionTrack"), function(GdObject) .getAnn(GdObject, "gene"))
656
+
657
+#' @describeIn GeneRegionTrack-class Replace the gene identifiers for all
658
+#' gene models.
659
+#' The replacement value must be a character of appropriate length or another
660
+#' vector that can be coerced into such.
661
+#' @export
662
+setReplaceMethod("gene", signature("GeneRegionTrack", "character"), function(GdObject, value) .setAnn(GdObject, value, "gene"))
663
+
664
+#' @describeIn GeneRegionTrack-class Extract the human-readble gene symbol
665
+#' for all gene models.
666
+#' @export
667
+setMethod("symbol", signature(GdObject = "GeneRegionTrack"), function(GdObject) .getAnn(GdObject, "symbol"))
668
+
669
+#' @describeIn GeneRegionTrack-class Replace the human-readable gene symbol
670
+#' for all gene models.
671
+#' The replacement value must be a character of appropriate length or another
672
+#' vector that can be coerced into such.
673
+#' @export
674
+setReplaceMethod("symbol", signature("GeneRegionTrack", "character"), function(GdObject, value) .setAnn(GdObject, value, "symbol"))
675
+
676
+#' @describeIn GeneRegionTrack-class Extract the transcript identifiers for all
677
+#' transcripts in the gene models.
678
+#' @export
679
+setMethod("transcript", signature(GdObject = "GeneRegionTrack"), function(GdObject) .getAnn(GdObject, "transcript"))
680
+
681
+#' @describeIn GeneRegionTrack-class Replace the transcript identifiers for all
682
+#' transcripts in the gene model. The replacement value must be a character of
683
+#' appropriate length or another vector that can be coerced into such.
684
+#' @export
685
+setReplaceMethod(
686
+    "transcript", signature("GeneRegionTrack", "character"),
687
+    function(GdObject, value) .setAnn(GdObject, value, "transcript")
688
+)
689
+
690
+#' @describeIn GeneRegionTrack-class Extract the exon identifiers for all exons
691
+#' in the gene models.
692
+#' @export
693
+setMethod("exon", signature(GdObject = "GeneRegionTrack"), function(GdObject) .getAnn(GdObject, "exon"))
694
+
695
+#' @describeIn GeneRegionTrack-class replace the exon identifiers for all exons
696
+#' in the gene model. The replacement value must be a character of appropriate
697
+#' length or another vector that can be coerced into such.
698
+#' @export
699
+setReplaceMethod("exon", signature("GeneRegionTrack", "character"), function(GdObject, value) .setAnn(GdObject, value, "exon"))
700
+
701
+#' @describeIn GeneRegionTrack-class extract the group membership for all track items.
702
+#' @export
703
+setMethod("group", "GeneRegionTrack", function(GdObject) transcript(GdObject))
704
+
705
+#' @describeIn GeneRegionTrack-class replace the grouping information for track items.
706
+#' The replacement value must be a factor of appropriate length or another
707
+#' vector that can be coerced into such.
708
+#' @export
709
+setReplaceMethod(
710
+    "group", signature("GeneRegionTrack", "character"),
711
+    function(GdObject, value) .setAnn(GdObject, value, "transcript")
712
+)
713
+
714
+#' @describeIn GeneRegionTrack-class return track item identifiers.
715
+#' Depending on the setting of the optional argument lowest, these are either
716
+#' the group identifiers or the individual item identifiers.
717
+#' export
718
+setMethod("identifier", "GeneRegionTrack", function(GdObject, type = .dpOrDefault(GdObject, "transcriptAnnotation", "symbol")) {
719
+    if (is.logical(type)) {
720
+        type <- ifelse("symbol", "gene", type[1])
721
+    }
722
+    if (is.null(type)) {
723
+        type <- "symbol"
724
+    }
725
+    id <- switch(as.character(type),
726
+        "symbol" = symbol(GdObject),
727
+        "gene" = gene(GdObject),
728
+        "transcript" = transcript(GdObject),
729
+        "feature" = feature(GdObject),
730
+        "exon" = exon(GdObject),
731
+        "lowest" = exon(GdObject),
732
+        symbol(GdObject)
733
+    )
734
+    id[is.na(id)] <- "NA"
735
+    return(id)
736
+})
737
+
738
+#' @describeIn GeneRegionTrack-class Set the track item identifiers.
739
+#' The replacement value has to be a character vector of appropriate length.
740
+#' This always replaces the group-level identifiers, so essentially it is
741
+#' similar to `groups<-`.
742
+#' @export
743
+setReplaceMethod("identifier", c("GeneRegionTrack", "character"), function(GdObject, value) {
744
+    type <- .dpOrDefault(GdObject, "transcriptAnnotation", "symbol")
745
+    switch(as.character(type),
746
+        "symbol" = symbol(GdObject) <- value,
747
+        "gene" = gene(GdObject) <- value,
748
+        "transcript" = transcript(GdObject) <- value,
749
+        "feature" = feature(GdObject) <- value,
750
+        "exon" = exon(GdObject) <- value,
751
+        "lowest" = exon(GdObject) <- value,
752
+        symbol(GdObject) <- value
753
+    )
754
+    return(GdObject)
755
+})
756
+
757
+## Stacking ------------------------------------------------------------------
758
+## Consolidate ---------------------------------------------------------------
759
+## Collapse  -----------------------------------------------------------------
760
+## Subset --------------------------------------------------------------------
761
+
762
+## FIXME: Still needs to be implemented
763
+
764
+#' @describeIn GeneRegionTrack-class Subset a GeneRegionTrack by coordinates
765
+#' and sort if necessary.
766
+#' @export
767
+setMethod("subset", signature(x = "ReferenceGeneRegionTrack"), function(x, ...) {
768
+    warning("ReferenceGeneRegionTrack objects are not supported yet.")
769
+    return(callNextMethod())
770
+})
771
+
772
+## Position ------------------------------------------------------------------
773
+## DrawGrid ------------------------------------------------------------------
774
+## DrawAxis ------------------------------------------------------------------
775
+## DrawGD --------------------------------------------------------------------
776
+
777
+#' @describeIn GeneRegionTrack-class plot the object to a graphics device.
778
+#' The return value of this method is the input object, potentially updated
779
+#' during the plotting operation. Internally, there are two modes in which the
780
+#' method can be called. Either in 'prepare' mode, in which case no plotting is
781
+#' done but the object is preprocessed based on the available space, or in
782
+#' 'plotting' mode, in which case the actual graphical output is created.
783
+#' Since subsetting of the object can be potentially costly, this can be
784
+#' switched off in case subsetting has already been performed before or
785
+#' is not necessary.
786
+#'
787
+#' @export
788
+setMethod("drawGD", signature("GeneRegionTrack"), function(GdObject, ...) {
789
+    displayPars(GdObject) <- list(showFeatureId = as.vector(.dpOrDefault(GdObject, "showExonId")))
790
+    GdObject <- callNextMethod()
791
+    return(invisible(GdObject))
792
+})
793
+
794
+## SetAs ---------------------------------------------------------------------
795
+
796
+#' @importFrom rtracklayer GenomicData
797
+setAs(
798
+    "GeneRegionTrack", "UCSCData",
799
+    function(from, to) {
800
+        ranges <- cbind(as(as(ranges(from), "DataFrame"), "data.frame"),
801
+            start = start(from),
802
+            end = end(from), color = .getBiotypeColor(from), strand = strand(from)
803
+        )
804
+        ranges <- ranges[order(start(from)), ]
805
+        ranges <- split(ranges, ranges[, "X.transcript"])
806
+        start <- vapply(ranges, function(x) min(x$start), FUN.VALUE = numeric(1L))
807
+        end <- vapply(ranges, function(x) max(x$end), FUN.VALUE = numeric(1L))
808
+        name <- vapply(ranges, function(x) as.character(unique(x$X.symbol)), FUN.VALUE = character(1L))
809
+        color <- vapply(ranges, function(x) as.character(unique(x$color)), FUN.VALUE = character(1L))
810
+        strand <- vapply(ranges, function(x) as.character(unique(x$strand)), FUN.VALUE = character(1L))
811
+        strand[strand == "*"] <- "+"
812
+        id <- names(ranges)
813
+        blocks <- vapply(ranges, nrow, FUN.VALUE = numeric(1L))
814
+        bsizes <- vapply(ranges, function(x) paste(x$end - x$start + 1, collapse = ","), FUN.VALUE = character(1L))
815
+        bstarts <- vapply(ranges, function(x) paste(x$start - min(x$start), collapse = ","), FUN.VALUE = character(1L))
816
+        dcolor <- as.integer(col2rgb(.dpOrDefault(from, "col")))
817
+        line <- new("BasicTrackLine",
818
+            name = names(from),
819
+            description = names(from),
820
+            visibility = stacking(from), color = dcolor, itemRgb = TRUE
821
+        )
822
+        new("UCSCData", rtracklayer::GenomicData(IRanges(start, end),
823
+            chrom = chromosome(from),
824
+            id = id, name = name, itemRgb = color, blockCount = blocks,
825
+            blockSizes = bsizes, blockStarts = bstarts,
826
+            strand = strand
827
+        ),
828
+        trackLine = line
829
+        )
830
+    }
831
+)
832
+
833
+setAs("GRanges", "GeneRegionTrack", function(from, to) GeneRegionTrack(range = from))
834
+
835
+setAs("GRangesList", "GeneRegionTrack", function(from, to) GeneRegionTrack(range = from))
836
+
837
+setAs("TxDb", "GeneRegionTrack", function(from, to) GeneRegionTrack(range = from))
838
+
839
+## Show ----------------------------------------------------------------------
840
+
841
+#' @describeIn GeneRegionTrack-class Show method.
842
+#' @export
843
+setMethod("show", signature(object = "GeneRegionTrack"), function(object) {
844
+    cat(sprintf("GeneRegionTrack '%s'\n%s\n", names(object), .annotationTrackInfo(object)))
845
+})
846
+
847
+#' @describeIn GeneRegionTrack-class Show method.
848
+#' @export
849
+setMethod("show", signature(object = "ReferenceGeneRegionTrack"), function(object) {
850
+    .referenceTrackInfo(object, "ReferenceGeneRegionTrack")
851
+})