Browse code

Correction of diverse bugs

Thomas authored on 30/08/2021 11:10:34
Showing1 changed files
... ...
@@ -42,7 +42,9 @@ findGenes <- function(region, m) {
42 42
 
43 43
     map[which(map$strand == -1), "strand"] <- "-"
44 44
     map[which(map$strand == 1), "strand"] <- "+"
45
-    map$chromosome_name <- as.character(GenomicRanges::seqnames(region))
45
+    if(dim(map)[1] > 0){
46
+      map$chromosome_name <- as.character(GenomicRanges::seqnames(region))  
47
+    }
46 48
     gr <- GenomicRanges::makeGRangesFromDataFrame(map,
47 49
                                         keep.extra.columns = TRUE)
48 50
     return(gr)
... ...
@@ -302,7 +304,12 @@ plotGenomicTrack <- function(gr, UTR3, UTR5, region, cex) {
302 304
 
303 305
         }
304 306
     } else {
305
-        stop("There is no genes in the given region :", region)
307
+        message("There is no genes in the given region :", region)
308
+        graphics::plot(x = 1, ylim = c(0, 2),
309
+                     xlim = c(GenomicRanges::start(region),
310
+                              GenomicRanges::end(region)),
311
+                     xlab = GenomicRanges::seqnames(region), yaxt = "n", ylab = "",
312
+                     xaxt = "n")
306 313
     }
307 314
 
308 315
 }
Browse code

added an option to control the size of the text in legends and labels

Thomas authored on 25/04/2019 11:32:21
Showing1 changed files
... ...
@@ -50,7 +50,7 @@ findGenes <- function(region, m) {
50 50
 
51 51
 # Helper function for the two find UTR functions
52 52
 
53
-f <- function(UTR) {
53
+f <- function(UTR,region) {
54 54
   return_object <- GenomicRanges::GRanges()
55 55
   for (gene in unique(UTR$external_gene_name)) {
56 56
     temp <- UTR[which(UTR$external_gene_name == gene), ]
... ...
@@ -81,7 +81,7 @@ findUTR5 <- function(region, m) {
81 81
     UTR5 <- map[!is.na(map$`5_utr_start`), ]
82 82
     if (dim(UTR5)[1] > 0) {
83 83
         UTR5$chromosome_name <- as.character(GenomicRanges::seqnames(region))
84
-        UTR5 <- f(UTR5)
84
+        UTR5 <- f(UTR5,region)
85 85
 
86 86
     } else {
87 87
         UTR5 <- GenomicRanges::GRanges()
... ...
@@ -103,7 +103,7 @@ findUTR3 <- function(region, m) {
103 103
     UTR3 <- map[!is.na(map$`3_utr_start`), ]
104 104
     if (dim(UTR3)[1] > 0) {
105 105
         UTR3$chromosome_name <- as.character(GenomicRanges::seqnames(region))
106
-        UTR3 <- f(UTR3)
106
+        UTR3 <- f(UTR3,region)
107 107
     } else {
108 108
         UTR3 <- GenomicRanges::GRanges()
109 109
     }
... ...
@@ -264,7 +264,7 @@ arrowNumbers <- function(value) {
264 264
 # @param gr GRanges object containing the region to plot
265 265
 # @param region GRanges object containing the region to plot
266 266
 
267
-plotGenomicTrack <- function(gr, UTR3, UTR5, region) {
267
+plotGenomicTrack <- function(gr, UTR3, UTR5, region, cex) {
268 268
 
269 269
     if (dim(as.data.frame(gr))[1] > 0) {
270 270
         index <- 0
... ...
@@ -274,7 +274,9 @@ plotGenomicTrack <- function(gr, UTR3, UTR5, region) {
274 274
                 GenomicRanges::end(region)),
275 275
                 xlab = GenomicRanges::seqnames(region), yaxt = "n", ylab = "",
276 276
                 xaxt = "n")
277
-            graphics::axis(2, at = 1, unique(gr$external_gene_name), srt = 45)
277
+            graphics::axis(2, at = 1, labels = FALSE)
278
+            graphics::text(x = par()$usr[1] - 0.03 * (par()$usr[2] - par()$usr[1]), 
279
+                           y = 1, labels = unique(gr$external_gene_name),srt = 90, xpd = NA, font=2, cex= cex)
278 280
             plotGRanges(gr, region, 1, UTR3, UTR5)
279 281
         }
280 282
         if (length(unique(gr$external_gene_name)) > 1) {
... ...
@@ -287,7 +289,7 @@ plotGenomicTrack <- function(gr, UTR3, UTR5, region) {
287 289
             lab <- IRanges::unique(gr$external_gene_name)
288 290
             graphics::axis(2, at = tlab, labels = FALSE)
289 291
             graphics::text(x = par()$usr[1] - 0.01 * (par()$usr[2] - par()$usr[1]), y = tlab,
290
-                labels = lab, srt = 45, xpd = NA, adj = 1)
292
+                labels = lab, srt = 45, xpd = NA, adj = 1, font=2, cex = cex)
291 293
             for (i in unique(gr$external_gene_name)) {
292 294
                 index <- index + 1
293 295
                 gr2 <- gr[(GenomicRanges::elementMetadata(gr)[, "external_gene_name"] %in% i)]
Browse code

Changed the code of Arrownumber so the the N is not constently overwritten and version bump to 0.99.16

Thomas authored on 15/04/2019 12:18:05
Showing1 changed files
... ...
@@ -245,13 +245,13 @@ arrowNumbers <- function(value) {
245 245
     if (value < 0.25) {
246 246
         N = 1
247 247
     }
248
-    if (value > 0.25) {
248
+    if ((value > 0.25) & (value <= 0.75)) {
249 249
         N = 2
250 250
     }
251
-    if (value > 0.75) {
251
+    if ((value > 0.75) & (value <= 1)) {
252 252
         N = 3
253 253
     }
254
-    if (value > 1) {
254
+    if ((value > 1) & (value <= 2)) {
255 255
         N = 6
256 256
     }
257 257
     if (value > 2) {
Browse code

replaced error() with stop() and simplified the character output used in those functions

Thomas authored on 15/04/2019 11:35:55
Showing1 changed files
... ...
@@ -241,6 +241,7 @@ plotGRanges <- function(gr, region, y, UTR3, UTR5) {
241 241
 
242 242
 arrowNumbers <- function(value) {
243 243
     N <- 0
244
+
244 245
     if (value < 0.25) {
245 246
         N = 1
246 247
     }
... ...
@@ -299,7 +300,7 @@ plotGenomicTrack <- function(gr, UTR3, UTR5, region) {
299 300
 
300 301
         }
301 302
     } else {
302
-        error(paste0("There is no genes in the given region :", region))
303
+        stop("There is no genes in the given region :", region)
303 304
     }
304 305
 
305 306
 }
Browse code

replaced the getBiomaRt function code with a switch for better error handling

Thomas authored on 15/04/2019 10:08:29
Showing1 changed files
... ...
@@ -4,20 +4,6 @@
4 4
 ## Medical Bioinformatics Centre
5 5
 ###############################################################################
6 6
 
7
-# helper function to split the genes found
8
-# @param query GRanges query object
9
-# @param subject GRanges subject
10
-# @param column character for the name of columns you whish to retrive
11
-#               (default ENTREZ_ID)
12
-# @return returns a GRangesList object
13
-
14
-#splitColumnByOverlap <- function(query, subject, column = "ENTREZID") {
15
-#    olaps <- GenomicRanges::findOverlaps(query, subject)
16
-#    f1 <- factor(S4Vectors::subjectHits(olaps), levels = seq_len(S4Vectors::subjectLength(olaps)))
17
-#    IRanges::splitAsList(GenomicRanges::mcols(query)[[column]][S4Vectors::queryHits(olaps)], f1)
18
-#}
19
-
20
-
21 7
 # transform the dataframe in GRangesList
22 8
 # @param df list of dataframes containing the peaks
23 9
 # @return returns a GRangesList
... ...
@@ -62,24 +48,27 @@ findGenes <- function(region, m) {
62 48
     return(gr)
63 49
 }
64 50
 
51
+# Helper function for the two find UTR functions
52
+
53
+f <- function(UTR) {
54
+  return_object <- GenomicRanges::GRanges()
55
+  for (gene in unique(UTR$external_gene_name)) {
56
+    temp <- UTR[which(UTR$external_gene_name == gene), ]
57
+    temp <- GenomicRanges::makeGRangesFromDataFrame(temp, keep.extra.columns = TRUE)
58
+    temp <- temp[queryHits(GenomicRanges::findOverlaps(temp, region, ignore.strand = TRUE))]
59
+    if (length(temp) > 0) {
60
+      temp <- temp[which(temp$transcript_length == max(temp$transcript_length))]
61
+    }
62
+    return_object <- c(return_object, temp)
63
+  }
64
+  return(return_object)
65
+}
66
+
65 67
 # find UTR overlaping with the region for the given database
66 68
 # @param region GRanges object containing the region to plot
67 69
 # @param m biomaRt object @return returns GRanges with the UTR hits found in the region
68 70
 
69 71
 findUTR5 <- function(region, m) {
70
-    f <- function(UTR) {
71
-        return_object <- GenomicRanges::GRanges()
72
-        for (gene in unique(UTR$external_gene_name)) {
73
-            temp <- UTR[which(UTR$external_gene_name == gene), ]
74
-            temp <- GenomicRanges::makeGRangesFromDataFrame(temp, keep.extra.columns = TRUE)
75
-            temp <- temp[queryHits(GenomicRanges::findOverlaps(temp, region, ignore.strand = TRUE))]
76
-            if (length(temp) > 0) {
77
-                temp <- temp[which(temp$transcript_length == max(temp$transcript_length))]
78
-            }
79
-            return_object <- c(return_object, temp)
80
-        }
81
-        return(return_object)
82
-    }
83 72
     filters <- c("chromosome_name", "start", "end")
84 73
     chr <- substr(as.character(seqnames(region)), 4, nchar(as.character(seqnames(region))))
85 74
     values <- list(chr, GenomicRanges::start(region), GenomicRanges::end(region))
... ...
@@ -102,19 +91,6 @@ findUTR5 <- function(region, m) {
102 91
 
103 92
 
104 93
 findUTR3 <- function(region, m) {
105
-    f <- function(UTR) {
106
-        return_object <- GenomicRanges::GRanges()
107
-        for (gene in unique(UTR$external_gene_name)) {
108
-            temp <- UTR[which(UTR$external_gene_name == gene), ]
109
-            temp <- GenomicRanges::makeGRangesFromDataFrame(temp, keep.extra.columns = TRUE)
110
-            temp <- temp[queryHits(GenomicRanges::findOverlaps(temp, region, ignore.strand = TRUE))]
111
-            if (length(temp) > 0) {
112
-                temp <- temp[which(temp$transcript_length == max(temp$transcript_length))]
113
-            }
114
-            return_object <- c(return_object, temp)
115
-        }
116
-        return(return_object)
117
-    }
118 94
     filters <- c("chromosome_name", "start", "end")
119 95
     chr <- substr(as.character(seqnames(region)), 4, nchar(as.character(seqnames(region))))
120 96
     values <- list(chr, GenomicRanges::start(region), GenomicRanges::end(region))
... ...
@@ -141,20 +117,26 @@ findUTR3 <- function(region, m) {
141 117
 
142 118
 getBiomaRt <- function(region, genome = c("hg19", "GRCh38", "mm10")) {
143 119
 
144
-    if (genome == "hg19") {
145
-        m <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL",
146
-                                host = "grch37.ensembl.org", path = "/biomart/martservice",
147
-                                dataset = "hsapiens_gene_ensembl")
148
-    }
149
-
150
-    if (genome == "GRCh38") {
151
-        m <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL",
152
-                                host = "grch38.ensembl.org", path = "/biomart/martservice",
153
-                                dataset = "hsapiens_gene_ensembl")
154
-    }
155
-    if (genome == "mm10") {
156
-        m <- biomaRt::useMart("ensembl", dataset = "mmusculus_gene_ensembl")
157
-    }
120
+  switch(genome, 
121
+         hg19={
122
+             m <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL",
123
+                                 host = "grch37.ensembl.org", path = "/biomart/martservice",
124
+                                 dataset = "hsapiens_gene_ensembl")
125
+         },
126
+         GRCh38={
127
+             m <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL",
128
+                                 host = "grch38.ensembl.org", path = "/biomart/martservice",
129
+                                 dataset = "hsapiens_gene_ensembl")  
130
+         },
131
+         mm10={
132
+             m <- biomaRt::useMart("ensembl", dataset = "mmusculus_gene_ensembl")  
133
+         },
134
+         {
135
+             m <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL",
136
+                                 host = "grch37.ensembl.org", path = "/biomart/martservice",
137
+                                 dataset = "hsapiens_gene_ensembl")
138
+         }
139
+  )
158 140
 
159 141
     return(m)
160 142
 }
Browse code

the function splitColumnByOverlap was not used at all in the code. I removed it

Thomas authored on 15/04/2019 09:57:12
Showing1 changed files
... ...
@@ -11,11 +11,11 @@
11 11
 #               (default ENTREZ_ID)
12 12
 # @return returns a GRangesList object
13 13
 
14
-splitColumnByOverlap <- function(query, subject, column = "ENTREZID") {
15
-    olaps <- GenomicRanges::findOverlaps(query, subject)
16
-    f1 <- factor(S4Vectors::subjectHits(olaps), levels = seq_len(S4Vectors::subjectLength(olaps)))
17
-    IRanges::splitAsList(GenomicRanges::mcols(query)[[column]][S4Vectors::queryHits(olaps)], f1)
18
-}
14
+#splitColumnByOverlap <- function(query, subject, column = "ENTREZID") {
15
+#    olaps <- GenomicRanges::findOverlaps(query, subject)
16
+#    f1 <- factor(S4Vectors::subjectHits(olaps), levels = seq_len(S4Vectors::subjectLength(olaps)))
17
+#    IRanges::splitAsList(GenomicRanges::mcols(query)[[column]][S4Vectors::queryHits(olaps)], f1)
18
+#}
19 19
 
20 20
 
21 21
 # transform the dataframe in GRangesList
Browse code

the function geneRanges was not used at all in the code. I removed it

Thomas authored on 15/04/2019 09:51:57
Showing1 changed files
... ...
@@ -4,20 +4,6 @@
4 4
 ## Medical Bioinformatics Centre
5 5
 ###############################################################################
6 6
 
7
-# get the genes as GRanges
8
-# @param db object of the type BSgenome.Hsapiens.UCSC.hg19
9
-# @param column character, name of the column you wish to retrive (default ENTREZ_ID)
10
-# @return returns a GRanges of genes
11
-
12
-geneRanges <- function(db, column = "ENTREZID") {
13
-    g <- genes(db, columns = column)
14
-    col <- mcols(g)[[column]]
15
-    genes <- granges(g)[rep(seq_along(g), IRanges::elementNROWS(col))]
16
-    mcols(genes)[[column]] <- as.character(unlist(col))
17
-    genes
18
-}
19
-
20
-
21 7
 # helper function to split the genes found
22 8
 # @param query GRanges query object
23 9
 # @param subject GRanges subject
Browse code

0.99.14 comments from code review

Thomas authored on 10/04/2019 08:11:21
Showing1 changed files
... ...
@@ -331,7 +331,7 @@ plotGenomicTrack <- function(gr, UTR3, UTR5, region) {
331 331
 
332 332
         }
333 333
     } else {
334
-        print(paste0("There is no genes in the given region :", region))
334
+        error(paste0("There is no genes in the given region :", region))
335 335
     }
336 336
 
337 337
 }
Browse code

0.99.9 indentation change and version bump

Thomas authored on 27/03/2019 16:11:23
Showing1 changed files
... ...
@@ -157,14 +157,14 @@ getBiomaRt <- function(region, genome = c("hg19", "GRCh38", "mm10")) {
157 157
 
158 158
     if (genome == "hg19") {
159 159
         m <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL",
160
-                               host = "grch37.ensembl.org", path = "/biomart/martservice",
161
-                               dataset = "hsapiens_gene_ensembl")
160
+                                host = "grch37.ensembl.org", path = "/biomart/martservice",
161
+                                dataset = "hsapiens_gene_ensembl")
162 162
     }
163 163
 
164 164
     if (genome == "GRCh38") {
165 165
         m <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL",
166
-                               host = "grch38.ensembl.org", path = "/biomart/martservice",
167
-                               dataset = "hsapiens_gene_ensembl")
166
+                                host = "grch38.ensembl.org", path = "/biomart/martservice",
167
+                                dataset = "hsapiens_gene_ensembl")
168 168
     }
169 169
     if (genome == "mm10") {
170 170
         m <- biomaRt::useMart("ensembl", dataset = "mmusculus_gene_ensembl")
... ...
@@ -229,11 +229,11 @@ plotGRanges <- function(gr, region, y, UTR3, UTR5) {
229 229
             N <- arrowNumbers(value)
230 230
             if (as.character(strand(gr_inv[j])) == "-") {
231 231
                 arrowLine(GenomicRanges::end(gr_inv[j]), y, GenomicRanges::start(gr_inv[j]), y,
232
-                  n_arr = N, length = 0.1)
232
+                n_arr = N, length = 0.1)
233 233
             }
234 234
             if (as.character(strand(gr_inv[j])) == "+") {
235 235
                 arrowLine(GenomicRanges::start(gr_inv[j]), y, GenomicRanges::end(gr_inv[j]), y,
236
-                  n_arr = N, length = 0.1)
236
+                n_arr = N, length = 0.1)
237 237
             }
238 238
         }
239 239
     }
... ...
@@ -241,14 +241,14 @@ plotGRanges <- function(gr, region, y, UTR3, UTR5) {
241 241
         plotRectangles(region, UTR5, y, size = 0.25)
242 242
         if (as.character(unique(strand(gr))) == "-") {
243 243
             GenomicRanges::end(
244
-               gr[queryHits(GenomicRanges::findOverlaps(gr, UTR5))]
245
-               ) <- GenomicRanges::start(
244
+                gr[queryHits(GenomicRanges::findOverlaps(gr, UTR5))]
245
+                ) <- GenomicRanges::start(
246 246
                     UTR5[subjectHits(GenomicRanges::findOverlaps(gr,UTR5))])
247 247
         }
248 248
         if (as.character(unique(strand(gr))) == "+") {
249 249
             GenomicRanges::start(
250
-               gr[queryHits(GenomicRanges::findOverlaps(gr, UTR5))]
251
-               ) <- GenomicRanges::end(
250
+                gr[queryHits(GenomicRanges::findOverlaps(gr, UTR5))]
251
+                ) <- GenomicRanges::end(
252 252
                     UTR5[subjectHits(GenomicRanges::findOverlaps(gr,UTR5))])
253 253
         }
254 254
     }
... ...
@@ -256,14 +256,14 @@ plotGRanges <- function(gr, region, y, UTR3, UTR5) {
256 256
         plotRectangles(region, UTR3, y, size = 0.25)
257 257
         if (as.character(unique(strand(gr))) == "+") {
258 258
             GenomicRanges::end(
259
-               gr[queryHits(GenomicRanges::findOverlaps(gr, UTR3))]
260
-               ) <- GenomicRanges::start(
259
+                gr[queryHits(GenomicRanges::findOverlaps(gr, UTR3))]
260
+                ) <- GenomicRanges::start(
261 261
                     UTR3[subjectHits(GenomicRanges::findOverlaps(gr,UTR3))])
262 262
         }
263 263
         if (as.character(unique(strand(gr))) == "-") {
264 264
             GenomicRanges::start(
265
-               gr[queryHits(GenomicRanges::findOverlaps(gr, UTR3))]
266
-               ) <- GenomicRanges::end(
265
+                gr[queryHits(GenomicRanges::findOverlaps(gr, UTR3))]
266
+                ) <- GenomicRanges::end(
267 267
                     UTR3[subjectHits(GenomicRanges::findOverlaps(gr,UTR3))])
268 268
         }
269 269
     }
... ...
@@ -323,9 +323,9 @@ plotGenomicTrack <- function(gr, UTR3, UTR5, region) {
323 323
                 index <- index + 1
324 324
                 gr2 <- gr[(GenomicRanges::elementMetadata(gr)[, "external_gene_name"] %in% i)]
325 325
                 UTR3_ind <- UTR3[(GenomicRanges::elementMetadata(UTR3)[, "external_gene_name"] %in%
326
-                  i)]
326
+                    i)]
327 327
                 UTR5_ind <- UTR5[(GenomicRanges::elementMetadata(UTR5)[, "external_gene_name"] %in%
328
-                  i)]
328
+                    i)]
329 329
                 plotGRanges(gr2, region, index - 0.5, UTR3 = UTR3_ind, UTR5 = UTR5_ind)
330 330
             }
331 331
 
Browse code

0.99.8 further indent

Thomas authored on 22/03/2019 21:14:31
Showing1 changed files
... ...
@@ -53,27 +53,27 @@ toGRList <- function(df) {
53 53
 # @param m biomaRt object @return returns GRanges with the Genes hits found in the region
54 54
 
55 55
 findGenes <- function(region, m) {
56
-       filters <- c("chromosome_name", "start", "end")
57
-       chr <- substr(as.character(seqnames(region)), 4,
58
-                   nchar(as.character(seqnames(region))))
59
-       values <- list(chr, GenomicRanges::start(region),
60
-                   GenomicRanges::end(region))
61
-       map <- biomaRt::getBM(mart = m, attributes = c("ensembl_gene_id",
62
-                                                      "external_gene_name",
63
-                                                      "ensembl_exon_id",
64
-                                                      "chromosome_name",
65
-                                                      "exon_chrom_start",
66
-                                                      "exon_chrom_end",
67
-                                                      "strand"),
68
-                                                      filters = filters,
69
-                                                      values = values)
70
-
71
-       map[which(map$strand == -1), "strand"] <- "-"
72
-       map[which(map$strand == 1), "strand"] <- "+"
73
-       map$chromosome_name <- as.character(GenomicRanges::seqnames(region))
74
-       gr <- GenomicRanges::makeGRangesFromDataFrame(map,
75
-                                       keep.extra.columns = TRUE)
76
-       return(gr)
56
+    filters <- c("chromosome_name", "start", "end")
57
+    chr <- substr(as.character(seqnames(region)), 4,
58
+        nchar(as.character(seqnames(region))))
59
+    values <- list(chr, GenomicRanges::start(region),
60
+        GenomicRanges::end(region))
61
+    map <- biomaRt::getBM(mart = m, attributes = c("ensembl_gene_id",
62
+                                                    "external_gene_name",
63
+                                                    "ensembl_exon_id",
64
+                                                    "chromosome_name",
65
+                                                    "exon_chrom_start",
66
+                                                    "exon_chrom_end",
67
+                                                    "strand"),
68
+                                                    filters = filters,
69
+                                                    values = values)
70
+
71
+    map[which(map$strand == -1), "strand"] <- "-"
72
+    map[which(map$strand == 1), "strand"] <- "+"
73
+    map$chromosome_name <- as.character(GenomicRanges::seqnames(region))
74
+    gr <- GenomicRanges::makeGRangesFromDataFrame(map,
75
+                                        keep.extra.columns = TRUE)
76
+    return(gr)
77 77
 }
78 78
 
79 79
 # find UTR overlaping with the region for the given database
Browse code

0.99.7 further indent

Thomas authored on 22/03/2019 21:00:49
Showing1 changed files
... ...
@@ -53,23 +53,27 @@ toGRList <- function(df) {
53 53
 # @param m biomaRt object @return returns GRanges with the Genes hits found in the region
54 54
 
55 55
 findGenes <- function(region, m) {
56
-      filters <- c("chromosome_name", "start", "end")
57
-      chr <- substr(as.character(seqnames(region)), 4, nchar(as.character(seqnames(region))))
58
-      values <- list(chr, GenomicRanges::start(region), GenomicRanges::end(region))
59
-      map <- biomaRt::getBM(mart = m, attributes = c("ensembl_gene_id",
56
+       filters <- c("chromosome_name", "start", "end")
57
+       chr <- substr(as.character(seqnames(region)), 4,
58
+                   nchar(as.character(seqnames(region))))
59
+       values <- list(chr, GenomicRanges::start(region),
60
+                   GenomicRanges::end(region))
61
+       map <- biomaRt::getBM(mart = m, attributes = c("ensembl_gene_id",
60 62
                                                       "external_gene_name",
61 63
                                                       "ensembl_exon_id",
62 64
                                                       "chromosome_name",
63 65
                                                       "exon_chrom_start",
64 66
                                                       "exon_chrom_end",
65
-                                                      "strand"), filters = filters,
67
+                                                      "strand"),
68
+                                                      filters = filters,
66 69
                                                       values = values)
67 70
 
68
-      map[which(map$strand == -1), "strand"] <- "-"
69
-      map[which(map$strand == 1), "strand"] <- "+"
70
-      map$chromosome_name <- as.character(GenomicRanges::seqnames(region))
71
-      gr <- GenomicRanges::makeGRangesFromDataFrame(map, keep.extra.columns = TRUE)
72
-      return(gr)
71
+       map[which(map$strand == -1), "strand"] <- "-"
72
+       map[which(map$strand == 1), "strand"] <- "+"
73
+       map$chromosome_name <- as.character(GenomicRanges::seqnames(region))
74
+       gr <- GenomicRanges::makeGRangesFromDataFrame(map,
75
+                                       keep.extra.columns = TRUE)
76
+       return(gr)
73 77
 }
74 78
 
75 79
 # find UTR overlaping with the region for the given database
Browse code

0.99.6 indent

Thomas authored on 22/03/2019 20:48:31
Showing1 changed files
... ...
@@ -53,23 +53,23 @@ toGRList <- function(df) {
53 53
 # @param m biomaRt object @return returns GRanges with the Genes hits found in the region
54 54
 
55 55
 findGenes <- function(region, m) {
56
-    filters <- c("chromosome_name", "start", "end")
57
-    chr <- substr(as.character(seqnames(region)), 4, nchar(as.character(seqnames(region))))
58
-    values <- list(chr, GenomicRanges::start(region), GenomicRanges::end(region))
59
-    map <- biomaRt::getBM(mart = m, attributes = c("ensembl_gene_id",
60
-                                                     "external_gene_name",
61
-                                                     "ensembl_exon_id",
62
-                                                     "chromosome_name",
63
-                                                     "exon_chrom_start",
64
-                                                     "exon_chrom_end",
65
-                                                     "strand"), filters = filters,
66
-                                                     values = values)
67
-
68
-    map[which(map$strand == -1), "strand"] <- "-"
69
-    map[which(map$strand == 1), "strand"] <- "+"
70
-    map$chromosome_name <- as.character(GenomicRanges::seqnames(region))
71
-    gr <- GenomicRanges::makeGRangesFromDataFrame(map, keep.extra.columns = TRUE)
72
-    return(gr)
56
+      filters <- c("chromosome_name", "start", "end")
57
+      chr <- substr(as.character(seqnames(region)), 4, nchar(as.character(seqnames(region))))
58
+      values <- list(chr, GenomicRanges::start(region), GenomicRanges::end(region))
59
+      map <- biomaRt::getBM(mart = m, attributes = c("ensembl_gene_id",
60
+                                                      "external_gene_name",
61
+                                                      "ensembl_exon_id",
62
+                                                      "chromosome_name",
63
+                                                      "exon_chrom_start",
64
+                                                      "exon_chrom_end",
65
+                                                      "strand"), filters = filters,
66
+                                                      values = values)
67
+
68
+      map[which(map$strand == -1), "strand"] <- "-"
69
+      map[which(map$strand == 1), "strand"] <- "+"
70
+      map$chromosome_name <- as.character(GenomicRanges::seqnames(region))
71
+      gr <- GenomicRanges::makeGRangesFromDataFrame(map, keep.extra.columns = TRUE)
72
+      return(gr)
73 73
 }
74 74
 
75 75
 # find UTR overlaping with the region for the given database
Browse code

0.99.5 further formatting

Thomas authored on 22/03/2019 20:34:10
Showing1 changed files
... ...
@@ -56,9 +56,14 @@ findGenes <- function(region, m) {
56 56
     filters <- c("chromosome_name", "start", "end")
57 57
     chr <- substr(as.character(seqnames(region)), 4, nchar(as.character(seqnames(region))))
58 58
     values <- list(chr, GenomicRanges::start(region), GenomicRanges::end(region))
59
-    map <- biomaRt::getBM(mart = m, attributes = c("ensembl_gene_id", "external_gene_name", "ensembl_exon_id",
60
-        "chromosome_name", "exon_chrom_start", "exon_chrom_end", "strand"), filters = filters,
61
-        values = values)
59
+    map <- biomaRt::getBM(mart = m, attributes = c("ensembl_gene_id",
60
+                                                     "external_gene_name",
61
+                                                     "ensembl_exon_id",
62
+                                                     "chromosome_name",
63
+                                                     "exon_chrom_start",
64
+                                                     "exon_chrom_end",
65
+                                                     "strand"), filters = filters,
66
+                                                     values = values)
62 67
 
63 68
     map[which(map$strand == -1), "strand"] <- "-"
64 69
     map[which(map$strand == 1), "strand"] <- "+"
... ...
@@ -147,12 +152,15 @@ findUTR3 <- function(region, m) {
147 152
 getBiomaRt <- function(region, genome = c("hg19", "GRCh38", "mm10")) {
148 153
 
149 154
     if (genome == "hg19") {
150
-        m <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL", host = "grch37.ensembl.org", path = "/biomart/martservice",
151
-            dataset = "hsapiens_gene_ensembl")
155
+        m <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL",
156
+                               host = "grch37.ensembl.org", path = "/biomart/martservice",
157
+                               dataset = "hsapiens_gene_ensembl")
152 158
     }
159
+
153 160
     if (genome == "GRCh38") {
154
-        m <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL", host = "grch38.ensembl.org", path = "/biomart/martservice",
155
-            dataset = "hsapiens_gene_ensembl")
161
+        m <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL",
162
+                               host = "grch38.ensembl.org", path = "/biomart/martservice",
163
+                               dataset = "hsapiens_gene_ensembl")
156 164
     }
157 165
     if (genome == "mm10") {
158 166
         m <- biomaRt::useMart("ensembl", dataset = "mmusculus_gene_ensembl")
... ...
@@ -228,23 +236,31 @@ plotGRanges <- function(gr, region, y, UTR3, UTR5) {
228 236
     if (length(UTR5) > 0) {
229 237
         plotRectangles(region, UTR5, y, size = 0.25)
230 238
         if (as.character(unique(strand(gr))) == "-") {
231
-            GenomicRanges::end(gr[queryHits(GenomicRanges::findOverlaps(gr, UTR5))]) <- GenomicRanges::start(UTR5[subjectHits(GenomicRanges::findOverlaps(gr,
232
-                UTR5))])
239
+            GenomicRanges::end(
240
+               gr[queryHits(GenomicRanges::findOverlaps(gr, UTR5))]
241
+               ) <- GenomicRanges::start(
242
+                    UTR5[subjectHits(GenomicRanges::findOverlaps(gr,UTR5))])
233 243
         }
234 244
         if (as.character(unique(strand(gr))) == "+") {
235
-            GenomicRanges::start(gr[queryHits(GenomicRanges::findOverlaps(gr, UTR5))]) <- GenomicRanges::end(UTR5[subjectHits(GenomicRanges::findOverlaps(gr,
236
-                UTR5))])
245
+            GenomicRanges::start(
246
+               gr[queryHits(GenomicRanges::findOverlaps(gr, UTR5))]
247
+               ) <- GenomicRanges::end(
248
+                    UTR5[subjectHits(GenomicRanges::findOverlaps(gr,UTR5))])
237 249
         }
238 250
     }
239 251
     if (length(UTR3) > 0) {
240 252
         plotRectangles(region, UTR3, y, size = 0.25)
241 253
         if (as.character(unique(strand(gr))) == "+") {
242
-            GenomicRanges::end(gr[queryHits(GenomicRanges::findOverlaps(gr, UTR3))]) <- GenomicRanges::start(UTR3[subjectHits(GenomicRanges::findOverlaps(gr,
243
-                UTR3))])
254
+            GenomicRanges::end(
255
+               gr[queryHits(GenomicRanges::findOverlaps(gr, UTR3))]
256
+               ) <- GenomicRanges::start(
257
+                    UTR3[subjectHits(GenomicRanges::findOverlaps(gr,UTR3))])
244 258
         }
245 259
         if (as.character(unique(strand(gr))) == "-") {
246
-            GenomicRanges::start(gr[queryHits(GenomicRanges::findOverlaps(gr, UTR3))]) <- GenomicRanges::end(UTR3[subjectHits(GenomicRanges::findOverlaps(gr,
247
-                UTR3))])
260
+            GenomicRanges::start(
261
+               gr[queryHits(GenomicRanges::findOverlaps(gr, UTR3))]
262
+               ) <- GenomicRanges::end(
263
+                    UTR3[subjectHits(GenomicRanges::findOverlaps(gr,UTR3))])
248 264
         }
249 265
     }
250 266
     plotRectangles(region, gr, y, size = 0.5)
... ...
@@ -280,14 +296,18 @@ plotGenomicTrack <- function(gr, UTR3, UTR5, region) {
280 296
     if (dim(as.data.frame(gr))[1] > 0) {
281 297
         index <- 0
282 298
         if (length(unique(gr$external_gene_name)) == 1) {
283
-            graphics::plot(x = 1, ylim = c(0, 2), xlim = c(GenomicRanges::start(region), GenomicRanges::end(region)),
284
-                xlab = GenomicRanges::seqnames(region), yaxt = "n", ylab = "", xaxt = "n")
299
+            graphics::plot(x = 1, ylim = c(0, 2),
300
+                xlim = c(GenomicRanges::start(region),
301
+                GenomicRanges::end(region)),
302
+                xlab = GenomicRanges::seqnames(region), yaxt = "n", ylab = "",
303
+                xaxt = "n")
285 304
             graphics::axis(2, at = 1, unique(gr$external_gene_name), srt = 45)
286 305
             plotGRanges(gr, region, 1, UTR3, UTR5)
287 306
         }
288 307
         if (length(unique(gr$external_gene_name)) > 1) {
289
-            graphics::plot(x = 1, ylim = c(0, length(unique(gr$external_gene_name))), xlim = c(start(region),
290
-                end(region)), xlab = seqnames(region), yaxt = "n", ylab = "")
308
+            graphics::plot(x = 1, ylim = c(0, length(unique(gr$external_gene_name))),
309
+                xlim = c(start(region), end(region)), xlab = seqnames(region),
310
+                yaxt = "n", ylab = "")
291 311
             tmin <- 1
292 312
             tmax <- length(IRanges::unique(gr$external_gene_name))
293 313
             tlab <- seq(tmin, tmax) - 0.5
Browse code

0.99.3 Formated the code with formatR

Thomas authored on 22/03/2019 19:47:09
Showing1 changed files
... ...
@@ -1,442 +1,314 @@
1 1
 ###############################################################################
2
-##
3 2
 ## plotRegiont.R -- plot a snapshot of a genomic region
4
-## 30 november 2018
5
-## Thomas Faux
3
+## 30 november 2018 Thomas Faux
6 4
 ## Medical Bioinformatics Centre
7 5
 ###############################################################################
8 6
 
9
-## geneRanges -- get the genes as GRanges
10
-
11
-## splitColumnByOverlap -- helper function
12
-
13
-## toGRList -- transform the dataframe in GRangesList
14
-
15
-## findGenes -- find genes overlaping with the region for the given database
16
-
17
-## plotGenomicTrack -- plot the genomic track
18
-
19 7
 # get the genes as GRanges
20
-#
21 8
 # @param db object of the type BSgenome.Hsapiens.UCSC.hg19
22 9
 # @param column character, name of the column you wish to retrive (default ENTREZ_ID)
23 10
 # @return returns a GRanges of genes
24
-###############################################################################
25
-##
26
-## geneRanges -- get the genes as GRanges
27
-##
28
-## db     -- db object : object of the type BSgenome.Hsapiens.UCSC.hg19
29
-##
30
-## column -- character : name of the column you wish to retrive (default ENTREZ_ID)
31
-##
32
-## returns a GRanges of genes
33
-###############################################################################
34
-geneRanges <- function(db, column="ENTREZID")
35
-  {
36
-    g <- genes(db, columns=column)
11
+
12
+geneRanges <- function(db, column = "ENTREZID") {
13
+    g <- genes(db, columns = column)
37 14
     col <- mcols(g)[[column]]
38 15
     genes <- granges(g)[rep(seq_along(g), IRanges::elementNROWS(col))]
39 16
     mcols(genes)[[column]] <- as.character(unlist(col))
40 17
     genes
41
-  }
18
+}
42 19
 
43 20
 
44 21
 # helper function to split the genes found
45
-#
46 22
 # @param query GRanges query object
47 23
 # @param subject GRanges subject
48
-# @param column character for the name of columns you whish to retrive(default ENTREZ_ID)
24
+# @param column character for the name of columns you whish to retrive
25
+#               (default ENTREZ_ID)
49 26
 # @return returns a GRangesList object
50
-###############################################################################
51
-##
52
-## splitColumnByOverlap -- helper function to split the genes found
53
-##
54
-## query   -- GRanges : query
55
-##
56
-## subject -- GRanges : subject
57
-##
58
-## column -- character : name of the column you wish to retrive (default ENTREZ_ID)
59
-##
60
-## returns a GRangesList object
61
-###############################################################################
62
-splitColumnByOverlap <- function(query, subject, column="ENTREZID")
63
-  {
27
+
28
+splitColumnByOverlap <- function(query, subject, column = "ENTREZID") {
64 29
     olaps <- GenomicRanges::findOverlaps(query, subject)
65
-    f1 <- factor(S4Vectors::subjectHits(olaps),
66
-                 levels=seq_len(S4Vectors::subjectLength(olaps)))
30
+    f1 <- factor(S4Vectors::subjectHits(olaps), levels = seq_len(S4Vectors::subjectLength(olaps)))
67 31
     IRanges::splitAsList(GenomicRanges::mcols(query)[[column]][S4Vectors::queryHits(olaps)], f1)
68 32
 }
69 33
 
70 34
 
71 35
 # transform the dataframe in GRangesList
72
-#
73 36
 # @param df list of dataframes containing the peaks
74 37
 # @return returns a GRangesList
75
-###############################################################################
76
-## not used ?
77
-## toGRList -- transform the dataframe in GRangesList
78
-##
79
-## df    -- dataframe    : list of dataframes containing the peaks
80
-##
81
-## returns a GRangesList
82
-###############################################################################
83
-toGRList <- function(df){
84
-  grlist <- GenomicRanges::GRangesList()
85
-  for(i in unique(df$external_gene_name)){
86
-    temp <- GenomicRanges::makeGRangesFromDataFrame(df[which(df$external_gene_name == i),])
87
-    grlist[[i]] <- temp
88
-  }
89
-  names(grlist) <- unique(df$external_gene_name)
90
-  return(grlist)
38
+
39
+toGRList <- function(df) {
40
+    grlist <- GenomicRanges::GRangesList()
41
+    for (i in unique(df$external_gene_name)) {
42
+        temp <- GenomicRanges::makeGRangesFromDataFrame(df[which(df$external_gene_name == i),
43
+            ])
44
+        grlist[[i]] <- temp
45
+    }
46
+    names(grlist) <- unique(df$external_gene_name)
47
+    return(grlist)
91 48
 }
92 49
 
93 50
 # find genes overlaping with the region for the given database
94
-#
95 51
 # @param region GRanges object containing the region to plot
96
-# @param genome Genome used by the db object "hg19","GRCh38" or "mm10"
97
-# @param m biomaRt object
98
-# @return returns GRanges with the Genes hits found in the region
99
-###############################################################################
100
-##
101
-## findGenes -- find genes overlaping with the region for the given database
102
-##
103
-## region -- GRanges : GRanges object containing the region to plot
104
-##
105
-## m -- biomaRt object
106
-##
107
-## returns genomic range of the genes present in the genomic region
108
-###############################################################################
109
-findGenes <- function(region,m){
110
-  filters <-c("chromosome_name","start","end")
111
-  chr <- substr(as.character(seqnames(region)),4,nchar(as.character(seqnames(region))))
112
-  values <-  list(chr,GenomicRanges::start(region),GenomicRanges::end(region))
113
-  map <- biomaRt::getBM(mart = m,
114
-        attributes = c("ensembl_gene_id", "external_gene_name","ensembl_exon_id","chromosome_name","exon_chrom_start","exon_chrom_end","strand"),
115
-        filters = filters,
116
-        values = values
117
-  )
118
-
119
-  map[which(map$strand == -1),"strand"] <- "-"
120
-  map[which(map$strand == 1),"strand"] <- "+"
121
-  map$chromosome_name <- as.character(GenomicRanges::seqnames(region))
122
-  gr <- GenomicRanges::makeGRangesFromDataFrame(map,keep.extra.columns = TRUE)
123
-  return(gr)
52
+# @param genome Genome used by the db object 'hg19','GRCh38' or 'mm10'
53
+# @param m biomaRt object @return returns GRanges with the Genes hits found in the region
54
+
55
+findGenes <- function(region, m) {
56
+    filters <- c("chromosome_name", "start", "end")
57
+    chr <- substr(as.character(seqnames(region)), 4, nchar(as.character(seqnames(region))))
58
+    values <- list(chr, GenomicRanges::start(region), GenomicRanges::end(region))
59
+    map <- biomaRt::getBM(mart = m, attributes = c("ensembl_gene_id", "external_gene_name", "ensembl_exon_id",
60
+        "chromosome_name", "exon_chrom_start", "exon_chrom_end", "strand"), filters = filters,
61
+        values = values)
62
+
63
+    map[which(map$strand == -1), "strand"] <- "-"
64
+    map[which(map$strand == 1), "strand"] <- "+"
65
+    map$chromosome_name <- as.character(GenomicRanges::seqnames(region))
66
+    gr <- GenomicRanges::makeGRangesFromDataFrame(map, keep.extra.columns = TRUE)
67
+    return(gr)
124 68
 }
125 69
 
126 70
 # find UTR overlaping with the region for the given database
127
-#
128 71
 # @param region GRanges object containing the region to plot
129
-# @param m biomaRt object
130
-# @return returns GRanges with the UTR hits found in the region
131
-###############################################################################
132
-##
133
-## findUTR -- find genes overlaping with the region for the given database
134
-##
135
-## region -- GRanges : GRanges object containing the region to plot
136
-##
137
-## genome -- character : Genome used by the db object "hg19","GRCh38" or "mm10"
138
-##
139
-## returns genomic rangelist of the UTR regions present in the genomic region per gene
140
-###############################################################################
141
-findUTR5 <- function(region,m){
142
-  f <- function(UTR){
143
-    return_object <- GenomicRanges::GRanges()
144
-    for(gene in unique(UTR$external_gene_name)){
145
-      temp <- UTR[which(UTR$external_gene_name == gene),]
146
-      temp <- GenomicRanges::makeGRangesFromDataFrame(temp,keep.extra.columns = TRUE)
147
-      temp <- temp[queryHits(GenomicRanges::findOverlaps(temp,region,ignore.strand=TRUE))]
148
-      if(length(temp) > 0){
149
-        temp <- temp[which(temp$transcript_length == max(temp$transcript_length))]
150
-      }
151
-      return_object <- c(return_object,temp)
72
+# @param m biomaRt object @return returns GRanges with the UTR hits found in the region
73
+
74
+findUTR5 <- function(region, m) {
75
+    f <- function(UTR) {
76
+        return_object <- GenomicRanges::GRanges()
77
+        for (gene in unique(UTR$external_gene_name)) {
78
+            temp <- UTR[which(UTR$external_gene_name == gene), ]
79
+            temp <- GenomicRanges::makeGRangesFromDataFrame(temp, keep.extra.columns = TRUE)
80
+            temp <- temp[queryHits(GenomicRanges::findOverlaps(temp, region, ignore.strand = TRUE))]
81
+            if (length(temp) > 0) {
82
+                temp <- temp[which(temp$transcript_length == max(temp$transcript_length))]
83
+            }
84
+            return_object <- c(return_object, temp)
85
+        }
86
+        return(return_object)
152 87
     }
153
-    return(return_object)
154
-  }
155
-  filters <-c("chromosome_name","start","end")
156
-  chr <- substr(as.character(seqnames(region)),4,nchar(as.character(seqnames(region))))
157
-  values <-  list(chr,GenomicRanges::start(region),GenomicRanges::end(region))
158
-  map <- biomaRt::getBM(mart = m,
159
-                        attributes = c("external_gene_name","chromosome_name","strand","5_utr_start","5_utr_end","transcript_length"),
160
-                        filters = filters,
161
-                        values = values
162
-  )
163
-
164
-  map[which(map$strand == -1),"strand"] <- "-"
165
-  map[which(map$strand == 1),"strand"] <- "+"
166
-
167
-  UTR5 <- map[!is.na(map$`5_utr_start`),]
168
-  if(dim(UTR5)[1] > 0){
169
-    UTR5$chromosome_name <- as.character(GenomicRanges::seqnames(region))
170
-    UTR5 <- f(UTR5)
171
-
172
-  }else{
173
-    UTR5 <- GenomicRanges::GRanges()
174
-  }
175
-  return(UTR5)
88
+    filters <- c("chromosome_name", "start", "end")
89
+    chr <- substr(as.character(seqnames(region)), 4, nchar(as.character(seqnames(region))))
90
+    values <- list(chr, GenomicRanges::start(region), GenomicRanges::end(region))
91
+    map <- biomaRt::getBM(mart = m, attributes = c("external_gene_name", "chromosome_name", "strand",
92
+        "5_utr_start", "5_utr_end", "transcript_length"), filters = filters, values = values)
93
+
94
+    map[which(map$strand == -1), "strand"] <- "-"
95
+    map[which(map$strand == 1), "strand"] <- "+"
96
+
97
+    UTR5 <- map[!is.na(map$`5_utr_start`), ]
98
+    if (dim(UTR5)[1] > 0) {
99
+        UTR5$chromosome_name <- as.character(GenomicRanges::seqnames(region))
100
+        UTR5 <- f(UTR5)
101
+
102
+    } else {
103
+        UTR5 <- GenomicRanges::GRanges()
104
+    }
105
+    return(UTR5)
176 106
 }
177 107
 
178 108
 
179
-findUTR3 <- function(region,m){
180
-  f <- function(UTR){
181
-    return_object <- GenomicRanges::GRanges()
182
-    for(gene in unique(UTR$external_gene_name)){
183
-      temp <- UTR[which(UTR$external_gene_name == gene),]
184
-      temp <- GenomicRanges::makeGRangesFromDataFrame(temp,keep.extra.columns = TRUE)
185
-      temp <- temp[queryHits(GenomicRanges::findOverlaps(temp,region,ignore.strand=TRUE))]
186
-      if(length(temp) > 0){
187
-        temp <- temp[which(temp$transcript_length == max(temp$transcript_length))]
188
-      }
189
-      return_object <- c(return_object,temp)
109
+findUTR3 <- function(region, m) {
110
+    f <- function(UTR) {
111
+        return_object <- GenomicRanges::GRanges()
112
+        for (gene in unique(UTR$external_gene_name)) {
113
+            temp <- UTR[which(UTR$external_gene_name == gene), ]
114
+            temp <- GenomicRanges::makeGRangesFromDataFrame(temp, keep.extra.columns = TRUE)
115
+            temp <- temp[queryHits(GenomicRanges::findOverlaps(temp, region, ignore.strand = TRUE))]
116
+            if (length(temp) > 0) {
117
+                temp <- temp[which(temp$transcript_length == max(temp$transcript_length))]
118
+            }
119
+            return_object <- c(return_object, temp)
120
+        }
121
+        return(return_object)
122
+    }
123
+    filters <- c("chromosome_name", "start", "end")
124
+    chr <- substr(as.character(seqnames(region)), 4, nchar(as.character(seqnames(region))))
125
+    values <- list(chr, GenomicRanges::start(region), GenomicRanges::end(region))
126
+    map <- biomaRt::getBM(mart = m, attributes = c("external_gene_name", "chromosome_name", "strand",
127
+        "3_utr_start", "3_utr_end", "transcript_length"), filters = filters, values = values)
128
+
129
+    map[which(map$strand == -1), "strand"] <- "-"
130
+    map[which(map$strand == 1), "strand"] <- "+"
131
+
132
+    UTR3 <- map[!is.na(map$`3_utr_start`), ]
133
+    if (dim(UTR3)[1] > 0) {
134
+        UTR3$chromosome_name <- as.character(GenomicRanges::seqnames(region))
135
+        UTR3 <- f(UTR3)
136
+    } else {
137
+        UTR3 <- GenomicRanges::GRanges()
190 138
     }
191
-    return(return_object)
192
-  }
193
-  filters <-c("chromosome_name","start","end")
194
-  chr <- substr(as.character(seqnames(region)),4,nchar(as.character(seqnames(region))))
195
-  values <-  list(chr,GenomicRanges::start(region),GenomicRanges::end(region))
196
-  map <- biomaRt::getBM(mart = m,
197
-                        attributes = c("external_gene_name","chromosome_name","strand","3_utr_start","3_utr_end","transcript_length"),
198
-                        filters = filters,
199
-                        values = values
200
-  )
201
-
202
-  map[which(map$strand == -1),"strand"] <- "-"
203
-  map[which(map$strand == 1),"strand"] <- "+"
204
-
205
-  UTR3 <- map[!is.na(map$`3_utr_start`),]
206
-  if(dim(UTR3)[1] > 0){
207
-    UTR3$chromosome_name <- as.character(GenomicRanges::seqnames(region))
208
-    UTR3 <- f(UTR3)
209
-  }else{
210
-    UTR3 <- GenomicRanges::GRanges()
211
-  }
212
-
213
-  return(UTR3)
139
+
140
+    return(UTR3)
214 141
 }
215 142
 # get The biomaRt object
216
-#
217 143
 # @param region GRanges object containing the region to plot
218
-# @param genome Genome used by the db object "hg19","GRCh38" or "mm10"
144
+# @param genome Genome used by the db object 'hg19','GRCh38' or 'mm10'
219 145
 # @return returns GRanges with the Genes hits found in the region
220
-###############################################################################
221
-##
222
-## getBiomaRt -- find genes overlaping with the region for the given database
223
-##
224
-## region -- GRanges : GRanges object containing the region to plot
225
-##
226
-## genome -- character : Genome used by the db object "hg19","GRCh38" or "mm10"
227
-##
228
-## returns a biomaRt object
229
-###############################################################################
230
-getBiomaRt <- function(region,genome=c("hg19","GRCh38","mm10")){
231
-
232
-  if(genome == "hg19"){
233
-    m <- biomaRt::useMart(biomart="ENSEMBL_MART_ENSEMBL", host="grch37.ensembl.org", path="/biomart/martservice", dataset="hsapiens_gene_ensembl")
234
-  }
235
-  if(genome == "GRCh38"){
236
-    m <-  biomaRt::useMart(biomart="ENSEMBL_MART_ENSEMBL", host="grch38.ensembl.org", path="/biomart/martservice", dataset="hsapiens_gene_ensembl")
237
-  }
238
-  if(genome == "mm10"){
239
-    m <-  biomaRt::useMart("ensembl", dataset = "mmusculus_gene_ensembl")
240
-  }
241
-
242
-  return(m)
146
+
147
+getBiomaRt <- function(region, genome = c("hg19", "GRCh38", "mm10")) {
148
+
149
+    if (genome == "hg19") {
150
+        m <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL", host = "grch37.ensembl.org", path = "/biomart/martservice",
151
+            dataset = "hsapiens_gene_ensembl")
152
+    }
153
+    if (genome == "GRCh38") {
154
+        m <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL", host = "grch38.ensembl.org", path = "/biomart/martservice",
155
+            dataset = "hsapiens_gene_ensembl")
156
+    }
157
+    if (genome == "mm10") {
158
+        m <- biomaRt::useMart("ensembl", dataset = "mmusculus_gene_ensembl")
159
+    }
160
+
161
+    return(m)
243 162
 }
244 163
 
245 164
 
246
-#  plot the rectangles of the genomic track
247
-#
165
+# plot the rectangles of the genomic track
248 166
 # @param rg GRanges list of exons in the gene
249 167
 # @param gr GRanges object containing the region to plot
250 168
 # @param y numeric index for color purpose
251 169
 # @param size value to substract and add around the y value to create a rectangle
252
-###############################################################################
253
-##
254
-## plotRectangles -- plot the rectangles of the genomic track
255
-##
256
-## gr    -- GRanges    : list of exons in the gene
257
-##
258
-## region -- GRanges : GRanges object containing the region to plot
259
-##
260
-## y  -- numeric : index for color purpose
261
-##
262
-## size  -- value to substract and add around the y value to create a rectangle
263
-###############################################################################
264
-plotRectangles <- function(rg,gr,y,size){
265
-  graphics::rect(GenomicRanges::end(gr),rep(y,length(gr))-size,GenomicRanges::start(gr),rep(y,length(gr))+size,border=NA, col = "grey")#y+5)
170
+
171
+plotRectangles <- function(rg, gr, y, size) {
172
+    graphics::rect(GenomicRanges::end(gr), rep(y, length(gr)) - size, GenomicRanges::start(gr),
173
+        rep(y, length(gr)) + size, border = NA, col = "grey")  #y+5)
266 174
 
267 175
 }
268 176
 
269
-#  plot the arrows between the rectangles in the genomic track
270
-#
177
+# plot the arrows between the rectangles in the genomic track
271 178
 # @param x0 list of x start points
272 179
 # @param y0 list of y start points
273
-# @param x1 list of x end points
274
-# @param y1 list of y end points
180
+# @param x1 list of x end points @param y1 list of y end points
275 181
 # @param n_arr number of arrows needed
276 182
 # @param ... graphics::arrows basic parameters
277
-###############################################################################
278
-##
279
-## arrowLines -- plot the arrows between the rectangles in the genomic track
280
-##
281
-## x  -- vector : list of x points
282
-##
283
-## y  -- vector : list of y points
284
-##
285
-## N  -- vector : number of break points needed
286
-##
287
-###############################################################################
288
-arrowLine <- function(x0,y0, x1,y1,n_arr,...) {
289
-  x<- seq(x0,x1, length=n_arr+1)
290
-  y<-seq(y0,y1, length=n_arr+1)
291
-  graphics::arrows(x[-length(x)],y[-length(y)], x[-1],y[-1],...)
183
+
184
+arrowLine <- function(x0, y0, x1, y1, n_arr, ...) {
185
+    x <- seq(x0, x1, length = n_arr + 1)
186
+    y <- seq(y0, y1, length = n_arr + 1)
187
+    graphics::arrows(x[-length(x)], y[-length(y)], x[-1], y[-1], ...)
292 188
 }
293 189
 
294
-#  return the regions contained between the ranges of a GRanges
295
-#
190
+# return the regions contained between the ranges of a GRanges
296 191
 # @param gr GRanges object
297
-# @return GRange object
298
-###############################################################################
299
-##
300
-## inverseGRanges -- return the regions contained between the ranges of a GRanges
301
-##
302
-## gr    -- GRanges    : list of exons in the gene
303
-##
304
-## returns GRange
305
-###############################################################################
306
-inverseGRanges <- function(gr){
307
-  temp <- as.data.frame(gr)
308
-  temp <- cbind(as.character(temp[seq_len(dim(temp)[1]-1),1]),
309
-                temp[seq_len(dim(temp)[1]-1),3],
310
-                temp[2:dim(temp)[1],2],
311
-                as.character(temp[seq_len(dim(temp)[1]-1),5]))
312
-  temp <- as.data.frame(temp)
313
-  colnames(temp) <- c("seqnames","start","end","strand")
314
-  temp <- GRanges(temp)
315
-  return(temp)
192
+# @return GRanges object
193
+
194
+inverseGRanges <- function(gr) {
195
+    temp <- as.data.frame(gr)
196
+    temp <- cbind(as.character(temp[seq_len(dim(temp)[1] - 1), 1]), temp[seq_len(dim(temp)[1] -
197
+        1), 3], temp[2:dim(temp)[1], 2], as.character(temp[seq_len(dim(temp)[1] - 1), 5]))
198
+    temp <- as.data.frame(temp)
199
+    colnames(temp) <- c("seqnames", "start", "end", "strand")
200
+    temp <- GRanges(temp)
201
+    return(temp)
316 202
 }
317 203
 
318
-#  wrapper function to plot the rectangles and arrows of the genomic track
319
-#
204
+# wrapper function to plot the rectangles and arrows of the genomic track
320 205
 # @param gr GRanges object containing the region to plot
321 206
 # @param region GRanges object containing the region to plot
322 207
 # @param y Numeric value used for the y position of the boxes
323
-###############################################################################
324
-##
325
-## plotGRanges-- wrapper function to plot the rectangles and arrows of the genomic track
326
-##
327
-## gr    -- GRanges    : list of exons in the gene
328
-##
329
-## region -- GRanges : GRanges object containing the region to plot
330
-##
331
-## returns a plot
332
-###############################################################################
333
-plotGRanges <- function(gr,region,y,UTR3,UTR5){
334
-  gr <- GenomicRanges::reduce(gr)
335
-
336
-  if(length(gr) > 1){
337
-    gr_inv <- inverseGRanges(gr)
338
-    for(j in seq_len(length(gr_inv))){
339
-      value <- (GenomicRanges::end(gr_inv[j])-GenomicRanges::start(gr_inv[j]))/(GenomicRanges::end(region)-GenomicRanges::start(region))
340
-      N <- arrowNumbers(value)
341
-      if(as.character(strand(gr_inv[j])) == "-"){
342
-        arrowLine(GenomicRanges::end(gr_inv[j]),y,GenomicRanges::start(gr_inv[j]),y,n_arr=N,length=0.1)
343
-      }
344
-      if(as.character(strand(gr_inv[j])) == "+"){
345
-        arrowLine(GenomicRanges::start(gr_inv[j]),y,GenomicRanges::end(gr_inv[j]),y,n_arr=N,length=0.1)
346
-      }
347
-    }
348
-  }
349
-  if(length(UTR5) > 0){
350
-    plotRectangles(region,UTR5,y,size=0.25)
351
-    if(as.character(unique(strand(gr))) == "-"){
352
-      GenomicRanges::end(gr[queryHits(GenomicRanges::findOverlaps(gr,UTR5))]) <- GenomicRanges::start(UTR5[subjectHits(GenomicRanges::findOverlaps(gr,UTR5))])
353
-    }
354
-    if(as.character(unique(strand(gr))) == "+"){
355
-      GenomicRanges::start(gr[queryHits(GenomicRanges::findOverlaps(gr,UTR5))]) <- GenomicRanges::end(UTR5[subjectHits(GenomicRanges::findOverlaps(gr,UTR5))])
208
+
209
+plotGRanges <- function(gr, region, y, UTR3, UTR5) {
210
+    gr <- GenomicRanges::reduce(gr)
211
+
212
+    if (length(gr) > 1) {
213
+        gr_inv <- inverseGRanges(gr)
214
+        for (j in seq_len(length(gr_inv))) {
215
+            value <- (GenomicRanges::end(gr_inv[j]) - GenomicRanges::start(gr_inv[j]))/(GenomicRanges::end(region) -
216
+                GenomicRanges::start(region))
217
+            N <- arrowNumbers(value)
218
+            if (as.character(strand(gr_inv[j])) == "-") {
219
+                arrowLine(GenomicRanges::end(gr_inv[j]), y, GenomicRanges::start(gr_inv[j]), y,
220
+                  n_arr = N, length = 0.1)
221
+            }
222
+            if (as.character(strand(gr_inv[j])) == "+") {
223
+                arrowLine(GenomicRanges::start(gr_inv[j]), y, GenomicRanges::end(gr_inv[j]), y,
224
+                  n_arr = N, length = 0.1)
225
+            }
226
+        }
356 227
     }
357
-  }
358
-  if(length(UTR3) > 0){
359
-    plotRectangles(region,UTR3,y,size=0.25)
360
-    if(as.character(unique(strand(gr))) == "+"){
361
-      GenomicRanges::end(gr[queryHits(GenomicRanges::findOverlaps(gr,UTR3))]) <- GenomicRanges::start(UTR3[subjectHits(GenomicRanges::findOverlaps(gr,UTR3))])
228
+    if (length(UTR5) > 0) {
229
+        plotRectangles(region, UTR5, y, size = 0.25)
230
+        if (as.character(unique(strand(gr))) == "-") {
231
+            GenomicRanges::end(gr[queryHits(GenomicRanges::findOverlaps(gr, UTR5))]) <- GenomicRanges::start(UTR5[subjectHits(GenomicRanges::findOverlaps(gr,
232
+                UTR5))])
233
+        }
234
+        if (as.character(unique(strand(gr))) == "+") {
235
+            GenomicRanges::start(gr[queryHits(GenomicRanges::findOverlaps(gr, UTR5))]) <- GenomicRanges::end(UTR5[subjectHits(GenomicRanges::findOverlaps(gr,
236
+                UTR5))])
237
+        }
362 238
     }
363
-    if(as.character(unique(strand(gr))) == "-"){
364
-      GenomicRanges::start(gr[queryHits(GenomicRanges::findOverlaps(gr,UTR3))]) <- GenomicRanges::end(UTR3[subjectHits(GenomicRanges::findOverlaps(gr,UTR3))])
239
+    if (length(UTR3) > 0) {
240
+        plotRectangles(region, UTR3, y, size = 0.25)
241
+        if (as.character(unique(strand(gr))) == "+") {
242
+            GenomicRanges::end(gr[queryHits(GenomicRanges::findOverlaps(gr, UTR3))]) <- GenomicRanges::start(UTR3[subjectHits(GenomicRanges::findOverlaps(gr,
243
+                UTR3))])
244
+        }
245
+        if (as.character(unique(strand(gr))) == "-") {
246
+            GenomicRanges::start(gr[queryHits(GenomicRanges::findOverlaps(gr, UTR3))]) <- GenomicRanges::end(UTR3[subjectHits(GenomicRanges::findOverlaps(gr,
247
+                UTR3))])
248
+        }
365 249
     }
366
-  }
367
-  plotRectangles(region,gr,y,size=0.5)
250
+    plotRectangles(region, gr, y, size = 0.5)
368 251
 
369 252
 }
370 253
 
371
-arrowNumbers <- function(value){
372
-  N<-0
373
-  if(value < 0.25){
374
-    N=1
375
-  }
376
-  if(value > 0.25){
377
-    N=2
378
-  }
379
-  if(value > 0.75){
380
-    N=3
381
-  }
382
-  if(value > 1){
383
-    N=6
384
-  }
385
-  if(value > 2){
386
-    N=12
387
-  }
388
-  return(N)
254
+arrowNumbers <- function(value) {
255
+    N <- 0
256
+    if (value < 0.25) {
257
+        N = 1
258
+    }
259
+    if (value > 0.25) {
260
+        N = 2
261
+    }
262
+    if (value > 0.75) {
263
+        N = 3
264
+    }
265
+    if (value > 1) {
266
+        N = 6
267
+    }
268
+    if (value > 2) {
269
+        N = 12
270
+    }
271
+    return(N)
389 272
 }
390 273
 
391 274
 # plot the genomic track
392
-#
393 275
 # @param gr GRanges object containing the region to plot
394 276
 # @param region GRanges object containing the region to plot
395
-###############################################################################
396
-##
397
-## plotGenomicTrack -- plot the genomic track
398
-##
399
-## gr    -- GRanges    : list of exons in the gene
400
-##
401
-## region -- GRanges : GRanges object containing the region to plot
402
-##
403
-## returns a plot
404
-###############################################################################
405
-plotGenomicTrack <- function(gr,UTR3,UTR5,region){
406
-
407
-  if(dim(as.data.frame(gr))[1]>0){
408
-    index <- 0
409
-    if(length(unique(gr$external_gene_name))==1){
410
-      graphics::plot(x=1, ylim=c(0,2),xlim=c(GenomicRanges::start(region),GenomicRanges::end(region)),xlab=GenomicRanges::seqnames(region),yaxt="n",ylab="",xaxt="n")
411
-      graphics::axis(2, at=1, unique(gr$external_gene_name),srt=45)
412
-      plotGRanges(gr,region,1,UTR3,UTR5)
413
-    }
414
-    if(length(unique(gr$external_gene_name))>1){
415
-      graphics::plot(x=1, ylim=c(0,length(unique(gr$external_gene_name))),xlim=c(start(region),end(region)),xlab=seqnames(region),yaxt="n",ylab="")
416
-      tmin <- 1
417
-      tmax <- length(IRanges::unique(gr$external_gene_name))
418
-      tlab <- seq(tmin, tmax)-0.5
419
-      lab <- IRanges::unique(gr$external_gene_name)
420
-      graphics::axis(2, at=tlab, labels = FALSE)
421
-      graphics::text(x=par()$usr[1]-0.01*(par()$usr[2]-par()$usr[1]),
422
-                     y=tlab,
423
-                     labels=lab,
424
-                     srt=45,
425
-                     xpd=NA,
426
-                     adj=1)
427
-      for(i in unique(gr$external_gene_name)){
428
-        index <- index+1
429
-        gr2 <- gr[(GenomicRanges::elementMetadata(gr)[,"external_gene_name"] %in% i)]
430
-        UTR3_ind <- UTR3[(GenomicRanges::elementMetadata(UTR3)[,"external_gene_name"] %in% i)]
431
-        UTR5_ind <- UTR5[(GenomicRanges::elementMetadata(UTR5)[,"external_gene_name"] %in% i)]
432
-        plotGRanges(gr2,region,index-0.5,UTR3=UTR3_ind,UTR5=UTR5_ind)
433
-      }
434 277
 
278
+plotGenomicTrack <- function(gr, UTR3, UTR5, region) {
279
+
280
+    if (dim(as.data.frame(gr))[1] > 0) {
281
+        index <- 0
282
+        if (length(unique(gr$external_gene_name)) == 1) {
283
+            graphics::plot(x = 1, ylim = c(0, 2), xlim = c(GenomicRanges::start(region), GenomicRanges::end(region)),
284
+                xlab = GenomicRanges::seqnames(region), yaxt = "n", ylab = "", xaxt = "n")
285
+            graphics::axis(2, at = 1, unique(gr$external_gene_name), srt = 45)
286
+            plotGRanges(gr, region, 1, UTR3, UTR5)
287
+        }
288
+        if (length(unique(gr$external_gene_name)) > 1) {
289
+            graphics::plot(x = 1, ylim = c(0, length(unique(gr$external_gene_name))), xlim = c(start(region),
290
+                end(region)), xlab = seqnames(region), yaxt = "n", ylab = "")
291
+            tmin <- 1
292
+            tmax <- length(IRanges::unique(gr$external_gene_name))
293
+            tlab <- seq(tmin, tmax) - 0.5
294
+            lab <- IRanges::unique(gr$external_gene_name)
295
+            graphics::axis(2, at = tlab, labels = FALSE)
296
+            graphics::text(x = par()$usr[1] - 0.01 * (par()$usr[2] - par()$usr[1]), y = tlab,
297
+                labels = lab, srt = 45, xpd = NA, adj = 1)
298
+            for (i in unique(gr$external_gene_name)) {
299
+                index <- index + 1
300
+                gr2 <- gr[(GenomicRanges::elementMetadata(gr)[, "external_gene_name"] %in% i)]
301
+                UTR3_ind <- UTR3[(GenomicRanges::elementMetadata(UTR3)[, "external_gene_name"] %in%
302
+                  i)]
303
+                UTR5_ind <- UTR5[(GenomicRanges::elementMetadata(UTR5)[, "external_gene_name"] %in%
304
+                  i)]
305
+                plotGRanges(gr2, region, index - 0.5, UTR3 = UTR3_ind, UTR5 = UTR5_ind)
306
+            }
307
+
308
+        }
309
+    } else {
310
+        print(paste0("There is no genes in the given region :", region))
435 311
     }
436
-  }
437
-  else{
438
-    print(paste0("There is no genes in the given region :",region))
439
-  }
440 312
 
441 313
 }
442 314
 
Browse code

0.99.0 Implementation of the Bioconductor building comments

Thomas authored on 22/03/2019 17:13:03
Showing1 changed files
... ...
@@ -1,10 +1,10 @@
1
-############################################################################################################################################
1
+###############################################################################
2 2
 ##
3 3
 ## plotRegiont.R -- plot a snapshot of a genomic region
4 4
 ## 30 november 2018
5 5
 ## Thomas Faux
6 6
 ## Medical Bioinformatics Centre
7
-############################################################################################################################################
7
+###############################################################################
8 8
 
9 9
 ## geneRanges -- get the genes as GRanges
10 10
 
... ...
@@ -21,7 +21,7 @@
21 21
 # @param db object of the type BSgenome.Hsapiens.UCSC.hg19
22 22
 # @param column character, name of the column you wish to retrive (default ENTREZ_ID)
23 23
 # @return returns a GRanges of genes
24
-############################################################################################################################################
24
+###############################################################################
25 25
 ##
26 26
 ## geneRanges -- get the genes as GRanges
27 27
 ##
... ...
@@ -30,7 +30,7 @@
30 30
 ## column -- character : name of the column you wish to retrive (default ENTREZ_ID)
31 31
 ##
32 32
 ## returns a GRanges of genes
33
-############################################################################################################################################
33
+###############################################################################
34 34
 geneRanges <- function(db, column="ENTREZID")
35 35
   {
36 36
     g <- genes(db, columns=column)
... ...
@@ -47,7 +47,7 @@ geneRanges <- function(db, column="ENTREZID")
47 47
 # @param subject GRanges subject
48 48
 # @param column character for the name of columns you whish to retrive(default ENTREZ_ID)
49 49
 # @return returns a GRangesList object
50
-############################################################################################################################################
50
+###############################################################################
51 51
 ##
52 52
 ## splitColumnByOverlap -- helper function to split the genes found
53 53
 ##
... ...
@@ -58,7 +58,7 @@ geneRanges <- function(db, column="ENTREZID")
58 58
 ## column -- character : name of the column you wish to retrive (default ENTREZ_ID)
59 59
 ##
60 60
 ## returns a GRangesList object
61
-############################################################################################################################################
61
+###############################################################################
62 62
 splitColumnByOverlap <- function(query, subject, column="ENTREZID")
63 63
   {
64 64
     olaps <- GenomicRanges::findOverlaps(query, subject)
... ...
@@ -72,14 +72,14 @@ splitColumnByOverlap <- function(query, subject, column="ENTREZID")
72 72
 #
73 73
 # @param df list of dataframes containing the peaks
74 74
 # @return returns a GRangesList
75
-############################################################################################################################################
75
+###############################################################################
76 76
 ## not used ?
77 77
 ## toGRList -- transform the dataframe in GRangesList
78 78
 ##
79 79
 ## df    -- dataframe    : list of dataframes containing the peaks
80 80
 ##
81 81
 ## returns a GRangesList
82
-############################################################################################################################################
82
+###############################################################################
83 83
 toGRList <- function(df){
84 84
   grlist <- GenomicRanges::GRangesList()
85 85
   for(i in unique(df$external_gene_name)){
... ...
@@ -96,7 +96,7 @@ toGRList <- function(df){
96 96
 # @param genome Genome used by the db object "hg19","GRCh38" or "mm10"
97 97
 # @param m biomaRt object
98 98
 # @return returns GRanges with the Genes hits found in the region
99
-############################################################################################################################################
99
+###############################################################################
100 100
 ##
101 101
 ## findGenes -- find genes overlaping with the region for the given database
102 102
 ##
... ...
@@ -105,7 +105,7 @@ toGRList <- function(df){
105 105
 ## m -- biomaRt object
106 106
 ##
107 107
 ## returns genomic range of the genes present in the genomic region
108
-############################################################################################################################################
108
+###############################################################################
109 109
 findGenes <- function(region,m){
110 110
   filters <-c("chromosome_name","start","end")
111 111
   chr <- substr(as.character(seqnames(region)),4,nchar(as.character(seqnames(region))))
... ...
@@ -128,7 +128,7 @@ findGenes <- function(region,m){
128 128
 # @param region GRanges object containing the region to plot
129 129
 # @param m biomaRt object
130 130
 # @return returns GRanges with the UTR hits found in the region
131
-############################################################################################################################################
131
+###############################################################################
132 132
 ##
133 133
 ## findUTR -- find genes overlaping with the region for the given database
134 134
 ##
... ...
@@ -137,7 +137,7 @@ findGenes <- function(region,m){
137 137
 ## genome -- character : Genome used by the db object "hg19","GRCh38" or "mm10"
138 138
 ##
139 139
 ## returns genomic rangelist of the UTR regions present in the genomic region per gene
140
-############################################################################################################################################
140
+###############################################################################
141 141
 findUTR5 <- function(region,m){
142