...
|
...
|
@@ -2841,18 +2841,38 @@ setMethod(".buildRange", signature("TranscriptDb"),
|
2841
|
2841
|
t2f$tx_id <- tids
|
2842
|
2842
|
t2f$feature_type <- "utr5"
|
2843
|
2843
|
}
|
2844
|
|
- ## And finally the 3'UTRS
|
|
2844
|
+ ## And the 3'UTRS
|
2845
|
2845
|
t2t <- threeUTRsByTranscript(range)
|
2846
|
2846
|
names(t2t) <- txs[names(t2t), 2]
|
2847
|
|
- tids <- rep(names(t2t), elementLengths(t2t))
|
2848
|
|
- t2t <- unlist(t2t)
|
|
2847
|
+ tids <- rep(names(t2t), elementLengths(t2t))
|
|
2848
|
+ t2t <- unlist(t2t)
|
2849
|
2849
|
if(length(t2t)){
|
2850
|
2850
|
t2t$tx_id <- tids
|
2851
|
2851
|
t2t$feature_type <- "utr3"
|
2852
|
2852
|
}
|
|
2853
|
+ ## And finally all the non-coding transcripts
|
|
2854
|
+ nt2e <- exonsBy(range, "tx")
|
|
2855
|
+ names(nt2e) <- txs[names(nt2e), 2]
|
|
2856
|
+ nt2e <- nt2e[!names(nt2e) %in% c(values(t2c)$tx_id, values(t2f)$tx_id, values(t2t)$tx_id)]
|
|
2857
|
+ tids <- rep(names(nt2e), elementLengths(nt2e))
|
|
2858
|
+ nt2e <- unlist(nt2e)
|
|
2859
|
+ if(length(nt2e)){
|
|
2860
|
+ nt2e$tx_id <- tids
|
|
2861
|
+ nt2e$feature_type <- "ncRNA"
|
|
2862
|
+ }
|
2853
|
2863
|
## Now we can merge the three back together (we need to change the column names of t2c to make them all the same)
|
2854
|
2864
|
colnames(values(t2c))[1:2] <- c("exon_id", "exon_name")
|
2855
|
|
- t2e <- c(t2c, t2f, t2t)
|
|
2865
|
+ ## t2e <- c(t2c, t2f, t2t, nt2e) ## This is super-slow, much more efficient if we build the GRanges object from the individual bits and pieces
|
|
2866
|
+ vals <- DataFrame(exon_id=c(values(t2c)$exon_id, values(t2f)$exon_id, values(t2t)$exon_id, values(nt2e)$exon_id),
|
|
2867
|
+ exon_name=c(values(t2c)$exon_name, values(t2f)$exon_name, values(t2t)$exon_name, values(nt2e)$exon_name),
|
|
2868
|
+ exon_rank=c(values(t2c)$exon_rank, values(t2f)$exon_rank, values(t2t)$exon_rank, values(nt2e)$exon_rank),
|
|
2869
|
+ tx_id=c(values(t2c)$tx_id, values(t2f)$tx_id, values(t2t)$tx_id, values(nt2e)$tx_id),
|
|
2870
|
+ feature_type=c(values(t2c)$feature_type, values(t2f)$feature_type, values(t2t)$feature_type, values(nt2e)$feature_type))
|
|
2871
|
+ t2e <- GRanges(seqnames=c(seqnames(t2c), seqnames(t2f), seqnames(t2t), seqnames(nt2e)),
|
|
2872
|
+ ranges=IRanges(start=c(start(t2c), start(t2f), start(t2t), start(nt2e)),
|
|
2873
|
+ end=c(end(t2c), end(t2f), end(t2t), end(nt2e))),
|
|
2874
|
+ strand=c(strand(t2c), strand(t2f), strand(t2t), strand(nt2e)))
|
|
2875
|
+ values(t2e) <- vals
|
2856
|
2876
|
if(length(t2e)==0)
|
2857
|
2877
|
return(GRanges())
|
2858
|
2878
|
## Add the gene level annotation
|
...
|
...
|
@@ -2863,6 +2883,8 @@ setMethod(".buildRange", signature("TranscriptDb"),
|
2863
|
2883
|
values(t2e)$gene_id <- gids[match(values(t2e)$tx_id, as.character(txs[as.character(values(g2t)$tx_id),2]))]
|
2864
|
2884
|
vals <- values(t2e)[c("tx_id", "exon_id", "exon_rank", "feature_type", "tx_id", "gene_id")]
|
2865
|
2885
|
colnames(vals) <- c("transcript", "exon", "rank", "feature", "symbol", "gene")
|
|
2886
|
+ ## Add the genome information
|
|
2887
|
+ genome(t2e) <- unique(genome(range))
|
2866
|
2888
|
## Finally we re-assign, subset if necessary, and sort
|
2867
|
2889
|
range <- t2e
|
2868
|
2890
|
values(range) <- vals
|