Browse code

Before the coersion method from TranscriptDb to GeneRegionTrack was missing the non-coding exons. This is now fixed

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/Gviz@69317 bc3139a8-67e5-0310-9ffc-ced21a209358

Florian Hahne authored on 11/09/2012 12:22:36
Showing 2 changed files

... ...
@@ -1,5 +1,5 @@
1 1
 Package: Gviz
2
-Version: 1.1.15
2
+Version: 1.1.16
3 3
 Title: Plotting data and annotation information along genomic coordinates
4 4
 Author: Florian Hahne, Steffen Durinck, Robert Ivanek, Arne Mueller, Steve Lianoglou>
5 5
 Maintainer: Florian Hahne <florian.hahne@novartis.com>
... ...
@@ -2841,18 +2841,38 @@ setMethod(".buildRange", signature("TranscriptDb"),
2841 2841
                   t2f$tx_id <- tids
2842 2842
                   t2f$feature_type <- "utr5"
2843 2843
               }
2844
-              ## And finally the 3'UTRS
2844
+              ## And the 3'UTRS
2845 2845
               t2t <- threeUTRsByTranscript(range)
2846 2846
               names(t2t) <- txs[names(t2t), 2]
2847
-                  tids <- rep(names(t2t), elementLengths(t2t))
2848
-                  t2t <- unlist(t2t)
2847
+              tids <- rep(names(t2t), elementLengths(t2t))
2848
+              t2t <- unlist(t2t)
2849 2849
               if(length(t2t)){
2850 2850
                   t2t$tx_id <- tids
2851 2851
                   t2t$feature_type <- "utr3"
2852 2852
               }
2853
+              ## And finally all the non-coding transcripts
2854
+              nt2e <- exonsBy(range, "tx")
2855
+              names(nt2e) <- txs[names(nt2e), 2]
2856
+              nt2e <- nt2e[!names(nt2e) %in% c(values(t2c)$tx_id, values(t2f)$tx_id, values(t2t)$tx_id)]
2857
+              tids <- rep(names(nt2e), elementLengths(nt2e))
2858
+              nt2e <- unlist(nt2e)
2859
+              if(length(nt2e)){
2860
+                  nt2e$tx_id <- tids
2861
+                  nt2e$feature_type <- "ncRNA"
2862
+              }
2853 2863
               ## Now we can merge the three back together (we need to change the column names of t2c to make them all the same)
2854 2864
               colnames(values(t2c))[1:2] <- c("exon_id", "exon_name")
2855
-              t2e <- c(t2c, t2f, t2t)
2865
+              ## t2e <- c(t2c, t2f, t2t, nt2e) ## This is super-slow, much more efficient if we build the GRanges object from the individual bits and pieces
2866
+              vals <- DataFrame(exon_id=c(values(t2c)$exon_id, values(t2f)$exon_id, values(t2t)$exon_id, values(nt2e)$exon_id),
2867
+                                exon_name=c(values(t2c)$exon_name, values(t2f)$exon_name, values(t2t)$exon_name, values(nt2e)$exon_name),
2868
+                                exon_rank=c(values(t2c)$exon_rank, values(t2f)$exon_rank, values(t2t)$exon_rank, values(nt2e)$exon_rank),
2869
+                                tx_id=c(values(t2c)$tx_id, values(t2f)$tx_id, values(t2t)$tx_id, values(nt2e)$tx_id),
2870
+                                feature_type=c(values(t2c)$feature_type, values(t2f)$feature_type, values(t2t)$feature_type, values(nt2e)$feature_type))
2871
+              t2e <- GRanges(seqnames=c(seqnames(t2c), seqnames(t2f), seqnames(t2t), seqnames(nt2e)),
2872
+                             ranges=IRanges(start=c(start(t2c), start(t2f), start(t2t), start(nt2e)),
2873
+                                            end=c(end(t2c), end(t2f), end(t2t), end(nt2e))),
2874
+                             strand=c(strand(t2c), strand(t2f), strand(t2t), strand(nt2e)))
2875
+              values(t2e) <- vals
2856 2876
               if(length(t2e)==0)
2857 2877
                   return(GRanges())
2858 2878
               ## Add the gene level annotation
... ...
@@ -2863,6 +2883,8 @@ setMethod(".buildRange", signature("TranscriptDb"),
2863 2883
               values(t2e)$gene_id <- gids[match(values(t2e)$tx_id, as.character(txs[as.character(values(g2t)$tx_id),2]))]
2864 2884
               vals <- values(t2e)[c("tx_id", "exon_id", "exon_rank", "feature_type", "tx_id", "gene_id")]
2865 2885
               colnames(vals) <- c("transcript", "exon", "rank", "feature", "symbol", "gene")
2886
+              ## Add the genome information
2887
+              genome(t2e) <- unique(genome(range))
2866 2888
               ## Finally we re-assign, subset if necessary, and sort
2867 2889
               range <- t2e
2868 2890
               values(range) <- vals