... | ... |
@@ -1,20 +1,20 @@ |
1 | 1 |
Package: NoRCE |
2 | 2 |
Type: Package |
3 | 3 |
Title: NoRCE: Noncoding RNA Sets Cis Annotation and Enrichment |
4 |
-Version: 1.9.0 |
|
4 |
+Version: 1.9.1 |
|
5 | 5 |
Authors@R: c(person("Gulden", "Olgun", |
6 | 6 |
email = "gulden@cs.bilkent.edu.tr", |
7 | 7 |
role = c("aut", "cre"))) |
8 | 8 |
Description: While some non-coding RNAs (ncRNAs) are assigned critical regulatory roles, most remain functionally uncharacterized. This presents a challenge whenever an interesting set of ncRNAs needs to be analyzed in a functional context. Transcripts located close-by on the genome are often regulated together. This genomic proximity on the sequence can hint to a functional association. We present a tool, NoRCE, that performs cis enrichment analysis for a given set of ncRNAs. Enrichment is carried out using the functional annotations of the coding genes located proximal to the input ncRNAs. Other biologically relevant information such as topologically associating domain (TAD) boundaries, co-expression patterns, and miRNA target prediction information can be incorporated to conduct a richer enrichment analysis. To this end, NoRCE includes several relevant datasets as part of its data repository, including cell-line specific TAD boundaries, functional gene sets, and expression data for coding & ncRNAs specific to cancer. Additionally, the users can utilize custom data files in their investigation. Enrichment results can be retrieved in a tabular format or visualized in several different ways. NoRCE is currently available for the following species: human, mouse, rat, zebrafish, fruit fly, worm, and yeast. |
9 | 9 |
License: MIT + file LICENSE |
10 |
-Depends: R (>= 4.0) |
|
10 |
+Depends: R (>= 4.2.0) |
|
11 | 11 |
Imports: |
12 |
- KEGGREST,png,dplyr,graphics,RSQLite,DBI,tidyr,grDevices, |
|
12 |
+ KEGGREST,png,dplyr,graphics,RSQLite,DBI,tidyr,grDevices,stringr, |
|
13 | 13 |
S4Vectors,SummarizedExperiment,reactome.db,rWikiPathways,RCurl, |
14 | 14 |
dbplyr,utils,ggplot2,igraph,stats,reshape2,readr, GO.db,zlibbioc, |
15 | 15 |
biomaRt,rtracklayer,IRanges,GenomicRanges,GenomicFeatures,AnnotationDbi |
16 | 16 |
Encoding: UTF-8 |
17 |
-RoxygenNote: 7.1.1 |
|
17 |
+RoxygenNote: 7.2.1 |
|
18 | 18 |
Suggests: |
19 | 19 |
knitr, TxDb.Hsapiens.UCSC.hg38.knownGene,TxDb.Drerio.UCSC.danRer10.refGene, |
20 | 20 |
TxDb.Mmusculus.UCSC.mm10.knownGene,TxDb.Dmelanogaster.UCSC.dm6.ensGene, |
... | ... |
@@ -89,18 +89,22 @@ goEnrichment <- |
89 | 89 |
enrichTest = c("hyper", "binom", "fisher", "chi")) { |
90 | 90 |
if (missing(org_assembly)) { |
91 | 91 |
message("Genome assembly version is missing.") |
92 |
- assembly(org_assembly) |
|
93 | 92 |
} |
94 | 93 |
if (missing(genes)) { |
95 | 94 |
message("Genes are missing. Expected input: FOXP2 SOX4 HOXC6") |
96 | 95 |
} |
96 |
+ assembly(org_assembly) |
|
97 | 97 |
# annot <- unique(annGO(genes, GOtype, org_assembly)) |
98 | 98 |
goData <- annGO(genes, GOtype, org_assembly) |
99 | 99 |
annot <- goData[[2]] |
100 | 100 |
uniqueGO <- annot$GOID[!duplicated(annot$GOID)] |
101 | 101 |
|
102 |
- |
|
103 |
- gofreq <- as.data.frame(table(annot$GOID)) |
|
102 |
+ if(is.null(dim(backG)[1])){ |
|
103 |
+ gofreq <- as.data.frame(table(annot$GOID)) |
|
104 |
+ }else{ |
|
105 |
+ |
|
106 |
+ } |
|
107 |
+ |
|
104 | 108 |
notGene <- |
105 | 109 |
getBackGenes( |
106 | 110 |
backgroundGene = backG, |
... | ... |
@@ -118,45 +122,25 @@ goEnrichment <- |
118 | 122 |
freq <- merge(gofreq, notGene, by = "Var1") |
119 | 123 |
found <- freq$Freq.x |
120 | 124 |
|
121 |
- ifelse(backG == '', |
|
125 |
+ ifelse(is.null(dim(backG)[1]), |
|
122 | 126 |
geneSize <- length(unique(goData[[1]]$Gene)), |
123 |
- geneSize <- length(unique(backG))) |
|
124 |
- |
|
127 |
+ geneSize <- dim(unique(backG))[1]) |
|
125 | 128 |
|
126 | 129 |
M <- freq$Freq.y |
127 | 130 |
n <- rep(length(unique(annot$Gene)), length(M)) |
128 | 131 |
|
129 |
- |
|
130 |
- # if (enrichTest == "binom") { |
|
131 |
- # pvalues <- 2 * (1 - pbinom(found, n, pCut)) |
|
132 |
- # } |
|
133 |
- # if (enrichTest == "fisher") { |
|
134 |
- # pvalues <- |
|
135 |
- # fisher.test(matrix(c( |
|
136 |
- # found, (M - found), (n - found), (geneSize - M - n + found) |
|
137 |
- # ), 2, 2), alternative = 'greater')$p.value |
|
138 |
- # } |
|
139 |
- # if (enrichTest == "chi") { |
|
140 |
- # pvalues <- |
|
141 |
- # chisq.test(matrix(c( |
|
142 |
- # found, (M - found), (n - found), (geneSize - M - n + found) |
|
143 |
- # )))$p.value |
|
144 |
- # } |
|
145 |
- # else { |
|
146 |
- # pvalues <- phyper(found - 1, M, geneSize - M, n, lower.tail = FALSE) |
|
147 |
- # } |
|
148 | 132 |
ifelse( |
149 |
- enrichTest == "binom", |
|
133 |
+ pkg.env$enrichTest == "binom", |
|
150 | 134 |
pvalues <- |
151 | 135 |
2 * (1 - pbinom(found, n, pCut)), |
152 | 136 |
ifelse( |
153 |
- enrichTest == "fisher", |
|
137 |
+ pkg.env$enrichTest == "fisher", |
|
154 | 138 |
pvalues <- |
155 | 139 |
fisher.test(matrix(c( |
156 | 140 |
found, (M - found), (n - found), (geneSize - M - n + found) |
157 | 141 |
), 2, 2), alternative = 'greater')$p.value, |
158 | 142 |
ifelse( |
159 |
- enrichTest == "chi", |
|
143 |
+ pkg.env$enrichTest == "chi", |
|
160 | 144 |
pvalues <- |
161 | 145 |
chisq.test(matrix(c( |
162 | 146 |
found, (M - found), (n - found), (geneSize - M - n + found) |
... | ... |
@@ -229,12 +213,12 @@ getBackGenes <- |
229 | 213 |
"dm6", |
230 | 214 |
"ce11", |
231 | 215 |
"sc3"), |
232 |
- type = 'pc_gene') { |
|
233 |
- if (backgroundGene == '') { |
|
216 |
+ type = c('pc_gene', 'mirna')) { |
|
217 |
+ backgroundGene = as.data.frame(backgroundGene) |
|
218 |
+ if (length(backgroundGene[,1]) == 1) { |
|
234 | 219 |
bckfreq <- as.data.frame(table(all$GOID)) |
235 | 220 |
} |
236 | 221 |
else{ |
237 |
- backgroundGene = as.data.frame(backgroundGene) |
|
238 | 222 |
colnames(backgroundGene) = 'bg' |
239 | 223 |
|
240 | 224 |
if (type == 'mirna') { |
... | ... |
@@ -252,13 +236,12 @@ getBackGenes <- |
252 | 236 |
getUCSC(geneTargetLoc, 10000, 10000, org_assembly) |
253 | 237 |
colnames(backgroundGene) = 'bg' |
254 | 238 |
} |
255 |
- annot <- |
|
256 |
- unique(annGO(backgroundGene$bg, GOtype, org_assembly)) |
|
257 |
- uniqueGO <- annot$GOID[!duplicated(annot$GOID)] |
|
239 |
+ goData <-annGO(backgroundGene$bg, GOtype, org_assembly) |
|
240 |
+ annot <- goData[[2]] |
|
258 | 241 |
|
259 | 242 |
bckfreq <- as.data.frame(table(annot$GOID)) |
260 | 243 |
} |
261 | 244 |
bb <- bckfreq[bckfreq$Var1 %in% gofreq$Var1,] |
262 | 245 |
|
263 | 246 |
return(bb) |
264 |
- } |
|
247 |
+ } |
|
265 | 248 |
\ No newline at end of file |
... | ... |
@@ -14,6 +14,7 @@ |
14 | 14 |
#' @return Data frame of the miRNA-mRNA correlation result |
15 | 15 |
#' |
16 | 16 |
#' @import dbplyr |
17 |
+#' @import stringr |
|
17 | 18 |
#' |
18 | 19 |
#' @export |
19 | 20 |
corrbased <- function(mirnagene, |
... | ... |
@@ -22,29 +23,20 @@ corrbased <- function(mirnagene, |
22 | 23 |
databaseFile) { |
23 | 24 |
colnames(mirnagene) <- c('g') |
24 | 25 |
|
25 |
- conn <- DBI::dbConnect(RSQLite::SQLite(), databaseFile) |
|
26 |
- |
|
27 | 26 |
a <- |
28 | 27 |
as.data.frame(gsub(paste(c("-3p", "-5p"), collapse = "|"), "", |
29 | 28 |
mirnagene$g)) |
30 | 29 |
|
31 |
- colnames(a) <- 'genes' |
|
32 |
- a <- unique(rbind(a, mirnagene$g)) |
|
30 |
+ a <- data.frame(str_replace_all(a[,1], 'miR', 'mir')) |
|
31 |
+ colnames(a) <- 'g' |
|
32 |
+ a <- unique(rbind(a, mirnagene)) |
|
33 | 33 |
|
34 |
- # dat <- |
|
35 |
- # cRegulome::get_mir( |
|
36 |
- # conn = conn, |
|
37 |
- # mir = as.character(a$genes), |
|
38 |
- # study = cancer, |
|
39 |
- # min_abs_cor = minAbsCor |
|
40 |
- # ) |
|
41 |
- # colnames(dat) <- c("mirna_base", "feature", "cor", "cancer") |
|
42 |
- |
|
34 |
+ conn <- DBI::dbConnect(RSQLite::SQLite(), databaseFile) |
|
43 | 35 |
|
44 | 36 |
dat <- conn %>% |
45 | 37 |
dplyr::tbl('cor_mir') %>% |
46 | 38 |
dplyr::select(mirna_base, feature, cancer) %>% |
47 |
- dplyr::filter(mirna_base %in% local(mirnagene$g)) %>% |
|
39 |
+ dplyr::filter(mirna_base %in% local(a$g)) %>% |
|
48 | 40 |
dplyr::collect() %>% |
49 | 41 |
tidyr::gather(cancer, cor, -mirna_base, -feature) %>% |
50 | 42 |
dplyr::mutate(cor = cor / 100) %>% dplyr::filter(abs(cor) > minAbsCor) %>% |
... | ... |
@@ -93,24 +85,34 @@ corrbasedMrna <- |
93 | 85 |
#' Get TCGA miRNAseq expression of miRNA genes for the given cancer |
94 | 86 |
#' |
95 | 87 |
#' @param mirnagene Data frame of the mature format |
96 |
-#' @param cancer Name of the TCGA project code such as 'BRCA' that is |
|
97 |
-#' analyzed for miRNA-mRNA correlation |
|
88 |
+#' @param cancer Name of the TCGA project code such as 'BRCA' |
|
98 | 89 |
#' @param databaseFile Path of miRcancer.db file |
99 | 90 |
#' |
100 | 91 |
#' @return Data frame of the raw read count of the given miRNA genes |
101 | 92 |
#' for different patients |
93 |
+#' |
|
94 |
+#' @import dbplyr |
|
95 |
+#' @import stringr |
|
102 | 96 |
#' |
103 | 97 |
#' @export |
104 | 98 |
getmiRNACount <- function(mirnagene, cancer, databaseFile) { |
105 | 99 |
colnames(mirnagene) <- c('g') |
106 | 100 |
|
101 |
+ a <- |
|
102 |
+ as.data.frame(gsub(paste(c("-3p", "-5p"), collapse = "|"), "", |
|
103 |
+ mirnagene$g)) |
|
104 |
+ |
|
105 |
+ a <- data.frame(str_replace_all(a[,1], 'miR', 'mir')) |
|
106 |
+ colnames(a) <- 'g' |
|
107 |
+ a <- unique(rbind(a, mirnagene)) |
|
108 |
+ |
|
107 | 109 |
conn <- DBI::dbConnect(RSQLite::SQLite(), databaseFile) |
108 | 110 |
|
109 | 111 |
dat <- |
110 | 112 |
conn %>% |
111 | 113 |
dplyr::tbl('profiles') %>% |
112 | 114 |
dplyr::select(study, mirna_base, count) %>% |
113 |
- dplyr::filter(mirna_base %in% !!mirnagene$g) %>% |
|
115 |
+ dplyr::filter(mirna_base %in% !!a$g) %>% |
|
114 | 116 |
dplyr::filter(study %in% cancer) %>% |
115 | 117 |
dplyr::collect() %>% na.omit() |
116 | 118 |
|
... | ... |
@@ -98,9 +98,8 @@ geneGOEnricher <- |
98 | 98 |
if (missing(genetype)) |
99 | 99 |
message("Input gene type is missing.") |
100 | 100 |
|
101 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
102 |
- assembly(org_assembly) |
|
103 |
- } |
|
101 |
+ assembly(org_assembly) |
|
102 |
+ |
|
104 | 103 |
gene <- as.data.frame(gene) |
105 | 104 |
colnames(gene) <- c("genes") |
106 | 105 |
geneLoc <- |
... | ... |
@@ -350,9 +349,8 @@ genePathwayEnricher <- |
350 | 349 |
if (missing(org_assembly)) { |
351 | 350 |
message("Assembly version is missing?") |
352 | 351 |
} |
353 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
354 |
- assembly(org_assembly) |
|
355 |
- } |
|
352 |
+ assembly(org_assembly) |
|
353 |
+ |
|
356 | 354 |
|
357 | 355 |
if (missing(genetype)) |
358 | 356 |
message("Input gene type is missing.") |
... | ... |
@@ -619,9 +617,7 @@ geneRegionGOEnricher <- |
619 | 617 |
if (missing(org_assembly)) { |
620 | 618 |
message("Assembly version is missing?") |
621 | 619 |
} |
622 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
623 |
- assembly(org_assembly) |
|
624 |
- } |
|
620 |
+ assembly(org_assembly) |
|
625 | 621 |
if (near) { |
626 | 622 |
ifelse( |
627 | 623 |
pkg.env$searchRegion == 'all', |
... | ... |
@@ -738,7 +734,16 @@ geneRegionGOEnricher <- |
738 | 734 |
pAdjust = pkg.env$pAdjust, |
739 | 735 |
min = pkg.env$min |
740 | 736 |
) |
741 |
- |
|
737 |
+ if (length(enrichedGene@Term) > 0) |
|
738 |
+ { |
|
739 |
+ enrichedGene@ncGeneList <- commonGeneRegion( |
|
740 |
+ mrnaobject = enrichedGene, |
|
741 |
+ org_assembly = org_assembly, |
|
742 |
+ downstream = pkg.env$downstream, |
|
743 |
+ upstream = pkg.env$upstream, |
|
744 |
+ inRegion = region |
|
745 |
+ ) |
|
746 |
+ } |
|
742 | 747 |
|
743 | 748 |
return(enrichedGene) |
744 | 749 |
} |
... | ... |
@@ -832,9 +837,9 @@ geneRegionPathwayEnricher <- |
832 | 837 |
if (missing(org_assembly)) { |
833 | 838 |
message("Assembly version is missing?") |
834 | 839 |
} |
835 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
836 |
- assembly(org_assembly) |
|
837 |
- } |
|
840 |
+ |
|
841 |
+ assembly(org_assembly) |
|
842 |
+ |
|
838 | 843 |
|
839 | 844 |
if (near) { |
840 | 845 |
ifelse( |
... | ... |
@@ -277,9 +277,7 @@ getUCSC <- |
277 | 277 |
"dm6", |
278 | 278 |
"ce11", |
279 | 279 |
"sc3")) { |
280 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
281 |
- assembly(org_assembly) |
|
282 |
- } |
|
280 |
+ |
|
283 | 281 |
if (missing(bedfile)) { |
284 | 282 |
message("Bed file is missing?") |
285 | 283 |
} |
... | ... |
@@ -293,6 +291,8 @@ getUCSC <- |
293 | 291 |
message("genomee assembly version is missing.") |
294 | 292 |
} |
295 | 293 |
|
294 |
+ assembly(org_assembly) |
|
295 |
+ |
|
296 | 296 |
big_islands <- |
297 | 297 |
resize(bedfile, width = downstream + width(bedfile), fix = "end") |
298 | 298 |
rt1 <- |
... | ... |
@@ -345,18 +345,19 @@ getNearToExon <- |
345 | 345 |
"dm6", |
346 | 346 |
"ce11", |
347 | 347 |
"sc3")) { |
348 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
349 |
- assembly(org_assembly) |
|
350 |
- } |
|
351 | 348 |
if (missing(bedfile)) { |
352 | 349 |
message("Bed file is missing?") |
353 | 350 |
} |
351 |
+ if (missing(org_assembly)) { |
|
352 |
+ message("Assembly is missing?") |
|
353 |
+ } |
|
354 | 354 |
if (missing(upstream)) { |
355 | 355 |
message("Upstream information is missing?") |
356 | 356 |
} |
357 | 357 |
if (missing(downstream)) { |
358 | 358 |
message("Downstream information is missing?") |
359 | 359 |
} |
360 |
+ assembly(org_assembly) |
|
360 | 361 |
|
361 | 362 |
big_islands <- |
362 | 363 |
resize(bedfile, width = downstream + width(bedfile), fix = "end") |
... | ... |
@@ -412,9 +413,8 @@ getNearToIntron <- |
412 | 413 |
if (missing(org_assembly)) { |
413 | 414 |
message("Assembly is missing?") |
414 | 415 |
} |
415 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
416 |
- assembly(org_assembly) |
|
417 |
- } |
|
416 |
+ assembly(org_assembly) |
|
417 |
+ |
|
418 | 418 |
if (missing(bedfile)) { |
419 | 419 |
message("Bed file is missing?") |
420 | 420 |
} |
... | ... |
@@ -496,9 +496,9 @@ getTADOverlap <- |
496 | 496 |
if (missing(org_assembly)) { |
497 | 497 |
message("Assembly is missing?") |
498 | 498 |
} |
499 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
500 |
- assembly(org_assembly) |
|
501 |
- } |
|
499 |
+ |
|
500 |
+ assembly(org_assembly) |
|
501 |
+ |
|
502 | 502 |
if (missing(bedfile)) { |
503 | 503 |
message("Bed file is missing?") |
504 | 504 |
} |
... | ... |
@@ -576,7 +576,7 @@ convertGeneID <- |
576 | 576 |
"ce11", |
577 | 577 |
"sc3")) { |
578 | 578 |
if (missing(org_assembly)) { |
579 |
- message("genomee assembly version is missing.") |
|
579 |
+ message("Genome assembly version is missing.") |
|
580 | 580 |
} |
581 | 581 |
if (missing(genetype)) { |
582 | 582 |
message("Format of the gene is missing.") |
... | ... |
@@ -584,9 +584,9 @@ convertGeneID <- |
584 | 584 |
if (missing(genelist)) { |
585 | 585 |
message("List of gene is missing.") |
586 | 586 |
} |
587 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
588 |
- assembly(org_assembly) |
|
589 |
- } |
|
587 |
+ |
|
588 |
+ assembly(org_assembly) |
|
589 |
+ |
|
590 | 590 |
attributes <- |
591 | 591 |
c("chromosome_name", |
592 | 592 |
"start_position", |
... | ... |
@@ -630,31 +630,30 @@ convertGeneID <- |
630 | 630 |
mart = pkg.env$mart |
631 | 631 |
), |
632 | 632 |
ifelse( |
633 |
- genetype == "NCBI", |
|
633 |
+ (genetype == "NCBI" & (org_assembly == 'hg19' | org_assembly == 'hg38')), |
|
634 | 634 |
output <- |
635 | 635 |
getBM( |
636 | 636 |
attributes = c("hgnc_symbol", attributes), |
637 | 637 |
filters = "hgnc_symbol", |
638 | 638 |
values = genelist, |
639 |
- mart = pkg.env$mart, |
|
640 |
- ifelse( |
|
641 |
- genetype == "mgi_symbol", |
|
642 |
- output <- |
|
643 |
- getBM( |
|
644 |
- attributes = c("mgi_symbol", attributes), |
|
645 |
- filters = "mgi_symbol", |
|
646 |
- values = genelist, |
|
647 |
- mart = pkg.env$mart |
|
648 |
- ), |
|
649 |
- output <- |
|
650 |
- getBM( |
|
651 |
- attributes = c("external_gene_name", attributes), |
|
652 |
- filters = "external_gene_name", |
|
653 |
- values = genelist, |
|
654 |
- mart = pkg.env$mart |
|
655 |
- ) |
|
639 |
+ mart = pkg.env$mart), |
|
640 |
+ ifelse( |
|
641 |
+ (genetype == "NCBI" & org_assembly == 'mm10'), |
|
642 |
+ output <- |
|
643 |
+ getBM( |
|
644 |
+ attributes = c("mgi_symbol", attributes), |
|
645 |
+ filters = "mgi_symbol", |
|
646 |
+ values = genelist, |
|
647 |
+ mart = pkg.env$mart |
|
648 |
+ ), |
|
649 |
+ output <- |
|
650 |
+ getBM( |
|
651 |
+ attributes = c("external_gene_name", attributes), |
|
652 |
+ filters = "external_gene_name", |
|
653 |
+ values = genelist, |
|
654 |
+ mart = pkg.env$mart |
|
656 | 655 |
) |
657 |
- ) |
|
656 |
+ ) |
|
658 | 657 |
) |
659 | 658 |
) |
660 | 659 |
) |
... | ... |
@@ -96,9 +96,9 @@ mirnaGOEnricher <- |
96 | 96 |
!is.character(gene) & !is.factor(gene)) |
97 | 97 |
message("Type of the gene should be data.frame or character") |
98 | 98 |
|
99 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
100 |
- assembly(org_assembly) |
|
101 |
- } |
|
99 |
+ |
|
100 |
+ assembly(org_assembly) |
|
101 |
+ |
|
102 | 102 |
|
103 | 103 |
gene <- as.data.frame(gene) |
104 | 104 |
colnames(gene) <- c("genes") |
... | ... |
@@ -380,9 +380,9 @@ mirnaPathwayEnricher <- |
380 | 380 |
if (missing(org_assembly)) { |
381 | 381 |
message("Assembly version is missing.") |
382 | 382 |
} |
383 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
384 |
- assembly(org_assembly) |
|
385 |
- } |
|
383 |
+ |
|
384 |
+ assembly(org_assembly) |
|
385 |
+ |
|
386 | 386 |
|
387 | 387 |
if (!is.data.frame(gene) & |
388 | 388 |
!is.character(gene) & !is.factor(gene)) |
... | ... |
@@ -692,9 +692,9 @@ mirnaRegionGOEnricher <- |
692 | 692 |
if (missing(org_assembly)) { |
693 | 693 |
message("Assembly version is missing.") |
694 | 694 |
} |
695 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
696 |
- assembly(org_assembly) |
|
697 |
- } |
|
695 |
+ |
|
696 |
+ assembly(org_assembly) |
|
697 |
+ |
|
698 | 698 |
|
699 | 699 |
if (target) { |
700 | 700 |
genes <- |
... | ... |
@@ -863,6 +863,16 @@ mirnaRegionGOEnricher <- |
863 | 863 |
min = pkg.env$min, |
864 | 864 |
enrichTest = pkg.env$enrichTest |
865 | 865 |
) |
866 |
+ if (length(miEnrich@Term) > 0) |
|
867 |
+ { |
|
868 |
+ miEnrich@ncGeneList <- commonGeneRegion( |
|
869 |
+ mrnaobject = miEnrich, |
|
870 |
+ org_assembly = org_assembly, |
|
871 |
+ downstream = pkg.env$downstream, |
|
872 |
+ upstream = pkg.env$upstream, |
|
873 |
+ inRegion = region |
|
874 |
+ ) |
|
875 |
+ } |
|
866 | 876 |
|
867 | 877 |
return(miEnrich) |
868 | 878 |
} |
... | ... |
@@ -961,9 +971,8 @@ mirnaRegionPathwayEnricher <- |
961 | 971 |
message("Assembly version is missing.") |
962 | 972 |
} |
963 | 973 |
|
964 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
965 |
- assembly(org_assembly) |
|
966 |
- } |
|
974 |
+ assembly(org_assembly) |
|
975 |
+ |
|
967 | 976 |
|
968 | 977 |
if (target) { |
969 | 978 |
genes <- |
... | ... |
@@ -1210,14 +1219,14 @@ predictmiTargets <- function(gene, type, org_assembly) |
1210 | 1219 |
ifelse( |
1211 | 1220 |
org_assembly == 'mm10', |
1212 | 1221 |
targets <- read.table(paste0("https://raw.githubusercontent.com/", |
1213 |
- "guldenolgun/NoRCE-data/master/target/target_mouse.txt")), |
|
1222 |
+ "guldenolgun/NoRCE-data/master/target/target_mouse.txt")), |
|
1214 | 1223 |
ifelse( |
1215 | 1224 |
org_assembly == 'dre10', |
1216 | 1225 |
markk <- 2, |
1217 | 1226 |
ifelse ( |
1218 | 1227 |
org_assembly == 'ce11', |
1219 | 1228 |
targets <- read.table(paste0("https://raw.githubusercontent.com/", |
1220 |
- "guldenolgun/NoRCE-data/master/target/target_worm.txt")), |
|
1229 |
+ "guldenolgun/NoRCE-data/master/target/target_worm.txt")), |
|
1221 | 1230 |
ifelse ( |
1222 | 1231 |
org_assembly == 'rn6', |
1223 | 1232 |
markk <- 1, |
... | ... |
@@ -1225,10 +1234,10 @@ predictmiTargets <- function(gene, type, org_assembly) |
1225 | 1234 |
org_assembly == 'dm6', |
1226 | 1235 |
targets <- read.table(paste0( |
1227 | 1236 |
"https://raw.githubusercontent.com/", |
1228 |
- "guldenolgun/NoRCE-data/master/target/target_fly.txt"), |
|
1229 |
- skip = 1), |
|
1230 |
- targets <- read.table(paste0("https://raw.githubusercontent.com/", |
|
1231 |
- "guldenolgun/NoRCE-data/master/target/target_human.txt")) |
|
1237 |
+ "guldenolgun/NoRCE-data/master/target/target_fly.txt"), |
|
1238 |
+ skip = 1), |
|
1239 |
+ targets <- read.table(paste0("https://raw.githubusercontent.com/", |
|
1240 |
+ "guldenolgun/NoRCE-data/master/target/target_human.txt")) |
|
1232 | 1241 |
) |
1233 | 1242 |
) |
1234 | 1243 |
) |
... | ... |
@@ -1239,13 +1248,13 @@ predictmiTargets <- function(gene, type, org_assembly) |
1239 | 1248 |
|
1240 | 1249 |
if(markk == 1){ |
1241 | 1250 |
tmp1 <- read.table(paste0("https://raw.githubusercontent.com/", |
1242 |
- "guldenolgun/NoRCE-data/master/target/target_rat.txt")) |
|
1251 |
+ "guldenolgun/NoRCE-data/master/target/target_rat.txt")) |
|
1243 | 1252 |
tmp2 <- read.table(paste0("https://raw.githubusercontent.com/", |
1244 |
- "guldenolgun/NoRCE-data/master/target/target_rat1.txt")) |
|
1253 |
+ "guldenolgun/NoRCE-data/master/target/target_rat1.txt")) |
|
1245 | 1254 |
tmp3 <- read.table(paste0("https://raw.githubusercontent.com/", |
1246 |
- "guldenolgun/NoRCE-data/master/target/target_rat2.txt")) |
|
1255 |
+ "guldenolgun/NoRCE-data/master/target/target_rat2.txt")) |
|
1247 | 1256 |
tmp4 <- read.table(paste0("https://raw.githubusercontent.com/", |
1248 |
- "guldenolgun/NoRCE-data/master/target/target_rat3.txt")) |
|
1257 |
+ "guldenolgun/NoRCE-data/master/target/target_rat3.txt")) |
|
1249 | 1258 |
target <- rbind(tmp1,tmp2,tmp3) |
1250 | 1259 |
targets <- merge(target,tmp4, by = 'V1') |
1251 | 1260 |
colnames(targets) <- c("ens","mir","sym","trans") |
... | ... |
@@ -1253,9 +1262,9 @@ predictmiTargets <- function(gene, type, org_assembly) |
1253 | 1262 |
|
1254 | 1263 |
if(markk == 2){ |
1255 | 1264 |
tmp1 <- read.table(paste0("https://raw.githubusercontent.com/", |
1256 |
- "guldenolgun/NoRCE-data/master/target/target_zebra.txt")) |
|
1265 |
+ "guldenolgun/NoRCE-data/master/target/target_zebra.txt")) |
|
1257 | 1266 |
tmp2 <- read.table(paste0("https://raw.githubusercontent.com/", |
1258 |
- "guldenolgun/NoRCE-data/master/target/target_zebra1.txt")) |
|
1267 |
+ "guldenolgun/NoRCE-data/master/target/target_zebra1.txt")) |
|
1259 | 1268 |
targets <- cbind(rbind(tmp1,tmp2),"") |
1260 | 1269 |
colnames(target) <- c("ens","sym","mir","trans") |
1261 | 1270 |
} |
... | ... |
@@ -1265,16 +1274,16 @@ predictmiTargets <- function(gene, type, org_assembly) |
1265 | 1274 |
|
1266 | 1275 |
ifelse(type == "NCBI", |
1267 | 1276 |
where <- |
1268 |
- targets[which(tolower(targets$sym) %in% gene$genes),], |
|
1277 |
+ targets[which(tolower(targets$sym) %in% tolower(gene$genes)),], |
|
1269 | 1278 |
ifelse ( |
1270 | 1279 |
type == "mirna", |
1271 | 1280 |
where <- |
1272 | 1281 |
targets[which(tolower(targets$mir) %in% tolower(gene$genes)),], |
1273 | 1282 |
ifelse (type == "Ensembl_gene" , |
1274 | 1283 |
where <- |
1275 |
- targets[which(tolower(targets$ens) %in% gene$genes),], |
|
1284 |
+ targets[which(tolower(targets$ens) %in% tolower(gene$genes)),], |
|
1276 | 1285 |
where <- |
1277 |
- targets[which(tolower(targets$trans) %in% gene$genes),]) |
|
1286 |
+ targets[which(tolower(targets$trans) %in% tolower(gene$genes)),]) |
|
1278 | 1287 |
)) |
1279 | 1288 |
if (nrow(where) == 0) { |
1280 | 1289 |
return(NULL) |
... | ... |
@@ -59,9 +59,8 @@ KeggEnrichment <- |
59 | 59 |
if (missing(org_assembly)) { |
60 | 60 |
message("Assembly version is missing.") |
61 | 61 |
} |
62 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
63 | 62 |
assembly(org_assembly) |
64 |
- } |
|
63 |
+ |
|
65 | 64 |
pathTable <- unique(keggPathwayDB(org_assembly)) |
66 | 65 |
genes <- as.data.frame(genes) |
67 | 66 |
colnames(genes) <- 'g' |
... | ... |
@@ -197,9 +196,9 @@ reactomeEnrichment <- |
197 | 196 |
if (missing(org_assembly)) { |
198 | 197 |
message("Assembly version is missing.") |
199 | 198 |
} |
200 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
199 |
+ |
|
201 | 200 |
assembly(org_assembly) |
202 |
- } |
|
201 |
+ |
|
203 | 202 |
pathTable <- unique(reactomePathwayDB(org_assembly)) |
204 | 203 |
genes <- as.data.frame(genes) |
205 | 204 |
colnames(genes) <- 'g' |
... | ... |
@@ -659,10 +658,9 @@ pathwayEnrichment <- function(genes, |
659 | 658 |
|
660 | 659 |
if (missing(isSymbol)) |
661 | 660 |
message("GMT gene format is missing.") |
662 |
- |
|
663 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
661 |
+ |
|
664 | 662 |
assembly(org_assembly) |
665 |
- } |
|
663 |
+ |
|
666 | 664 |
genes <- as.data.frame(genes) |
667 | 665 |
colnames(genes) <- 'g' |
668 | 666 |
|
... | ... |
@@ -24,7 +24,6 @@ drawDotPlot <- function(mrnaObject, type = "pAdjust", n) { |
24 | 24 |
tmp <- topEnrichment(mrnaObject, type, n) |
25 | 25 |
if (type == "pvalue") { |
26 | 26 |
p <- |
27 |
- p <- |
|
28 | 27 |
ggplot(tmp, aes(x = Pvalue, y = reorder(Term, Pvalue))) + |
29 | 28 |
geom_point(aes(size = EnrichGeneNumber, color = |
30 | 29 |
Pvalue), position = "dodge") + theme_bw() + theme( |
... | ... |
@@ -39,7 +38,7 @@ drawDotPlot <- function(mrnaObject, type = "pAdjust", n) { |
39 | 38 |
) + labs( |
40 | 39 |
x = |
41 | 40 |
"p-value", |
42 |
- y = "GO Terms", |
|
41 |
+ y = "Terms", |
|
43 | 42 |
color = "p-value", |
44 | 43 |
size = "Gene Count" |
45 | 44 |
) |
... | ... |
@@ -60,7 +59,7 @@ drawDotPlot <- function(mrnaObject, type = "pAdjust", n) { |
60 | 59 |
) + labs( |
61 | 60 |
x = |
62 | 61 |
"pAdjust-value", |
63 |
- y = "GO Terms", |
|
62 |
+ y = "Terms", |
|
64 | 63 |
color = "p-Adj", |
65 | 64 |
size = "Gene Count" |
66 | 65 |
) |
... | ... |
@@ -124,6 +123,12 @@ writeEnrichment <- |
124 | 123 |
#' @return Give top n enrichment results |
125 | 124 |
#' |
126 | 125 |
#' @importFrom dplyr %>% |
126 |
+#' |
|
127 |
+#' @examples |
|
128 |
+#' ncGO<-geneGOEnricher(gene = brain_disorder_ncRNA, org_assembly='hg19', |
|
129 |
+#' near=TRUE, genetype = 'Ensembl_gene') |
|
130 |
+#' |
|
131 |
+#' result = topEnrichment(mrnaObject = ncGO, type = "pvalue", n = 10) |
|
127 | 132 |
#' |
128 | 133 |
#' @export |
129 | 134 |
topEnrichment <- function(mrnaObject, type, n) { |
... | ... |
@@ -145,11 +150,13 @@ topEnrichment <- function(mrnaObject, type, n) { |
145 | 150 |
), |
146 | 151 |
go = unlist(mrnaObject@geneList)) |
147 | 152 |
|
153 |
+ tmp <- mrnaObject@ncGeneList |
|
154 |
+ is.na(tmp) <- lengths(tmp) == 0 |
|
148 | 155 |
table1 <- data.frame(gene = rep( |
149 | 156 |
names(mrnaObject@geneList), |
150 |
- lapply(mrnaObject@ncGeneList, length) |
|
157 |
+ lapply(tmp, length) |
|
151 | 158 |
), |
152 |
- go = unlist(mrnaObject@ncGeneList)) |
|
159 |
+ go = unlist(tmp)) |
|
153 | 160 |
|
154 | 161 |
table <- table[!duplicated(table), ] |
155 | 162 |
|
... | ... |
@@ -357,7 +364,7 @@ createNetwork <- |
357 | 364 |
layout = l * 1.0 |
358 | 365 |
) |
359 | 366 |
return(p) |
360 |
- } |
|
367 |
+ } |
|
361 | 368 |
|
362 | 369 |
#' Plot and save the GO term DAG of the top n enrichments in terms of |
363 | 370 |
#' p-values or adjusted p-values with an user provided format |
... | ... |
@@ -414,7 +421,7 @@ getGoDag <- |
414 | 421 |
|
415 | 422 |
node_color <- |
416 | 423 |
grDevices::colorRampPalette(c("lightgoldenrodyellow", "orangered"), |
417 |
- bias = 0.5)(length(p_range)) |
|
424 |
+ bias = 0.5)(length(p_range)) |
|
418 | 425 |
|
419 | 426 |
color <- seq_along(dt$Pvalue)#seq_len(2) |
420 | 427 |
if (type == 'pvalue') { |
... | ... |
@@ -499,9 +506,7 @@ getKeggDiagram <- |
499 | 506 |
if (missing(pathway)) |
500 | 507 |
message("Expected pathway ID is missing. Please specify pathway ID") |
501 | 508 |
|
502 |
- if (class(pkg.env$mart)[1] != "Mart") { |
|
503 |
- assembly(org_assembly) |
|
504 |
- } |
|
509 |
+ assembly(org_assembly) |
|
505 | 510 |
|
506 | 511 |
path_index <- which(names(mrnaObject@geneList) == pathway) |
507 | 512 |
ns <- unique( |
... | ... |
@@ -514,15 +519,15 @@ getKeggDiagram <- |
514 | 519 |
) |
515 | 520 |
n <- paste(ns$entrezgene_id, collapse = '/') |
516 | 521 |
if(identical(interactive(), TRUE)){ |
517 |
- browseURL( |
|
518 |
- paste0( |
|
519 |
- "http://www.kegg.jp/kegg-bin/show_pathway?", |
|
520 |
- pathway, |
|
521 |
- '/', |
|
522 |
- n, |
|
523 |
- collapse = '' |
|
522 |
+ browseURL( |
|
523 |
+ paste0( |
|
524 |
+ "http://www.kegg.jp/kegg-bin/show_pathway?", |
|
525 |
+ pathway, |
|
526 |
+ '/', |
|
527 |
+ n, |
|
528 |
+ collapse = '' |
|
529 |
+ ) |
|
524 | 530 |
) |
525 |
- ) |
|
526 | 531 |
} |
527 | 532 |
} |
528 | 533 |
|
... | ... |
@@ -567,5 +572,5 @@ getReactomeDiagram <- function(mrnaObject, pathway, imageFormat) { |
567 | 572 |
n |
568 | 573 |
) |
569 | 574 |
if(identical(interactive(), TRUE)) |
570 |
- browseURL(a) |
|
575 |
+ browseURL(a) |
|
571 | 576 |
} |
... | ... |
@@ -27,7 +27,74 @@ commonGene <- function(mrnaobject, |
27 | 27 |
tmp <- big_islands[S4Vectors::queryHits(hits), ] |
28 | 28 |
tmp1 <- aa[S4Vectors::subjectHits(hits), ] |
29 | 29 |
|
30 |
- pairss <- rbind(pairss, data.frame(tmp$gene, tmp1$gene)) |
|
30 |
+ pairss <- unique(rbind(pairss, data.frame(tmp$gene, tmp1$gene))) |
|
31 |
+ |
|
32 |
+ getNoncode <- function(x) { |
|
33 |
+ a <- |
|
34 |
+ pairss[which(pairss[, 2] %in% unlist(mrnaobject@geneList[[x]])), 1] |
|
35 |
+ a[!duplicated(a)] |
|
36 |
+ } |
|
37 |
+ |
|
38 |
+ ab <- lapply(seq_along(mrnaobject@Term), getNoncode) |
|
39 |
+ |
|
40 |
+ ab[IRanges::isEmpty(ab)] <- 'NA' |
|
41 |
+ if (length(mrnaobject@geneList) == 1) |
|
42 |
+ ab <- list(ab) |
|
43 |
+ return(ab) |
|
44 |
+} |
|
45 |
+commonGeneRegion <- function(mrnaobject, |
|
46 |
+ org_assembly, |
|
47 |
+ downstream, |
|
48 |
+ upstream, |
|
49 |
+ inRegion) { |
|
50 |
+ a <- unique(unlist(mrnaobject@geneList)) |
|
51 |
+ aa <- |
|
52 |
+ convertGeneID(genelist = a, |
|
53 |
+ genetype = 'NCBI', |
|
54 |
+ org_assembly = org_assembly) |
|
55 |
+ |
|
56 |
+ regions <- paste(sub('chr','',seqlevels(inRegion)), start(inRegion), end(inRegion), sep=":") |
|
57 |
+ if(org_assembly == 'hg19' | org_assembly == 'hg38'){ |
|
58 |
+ results <- getBM(attributes = c( "chromosome_name", "start_position","end_position", 'strand', "hgnc_symbol"), |
|
59 |
+ filters = c("chromosomal_region"), |
|
60 |
+ values=regions, |
|
61 |
+ mart=pkg.env$mart) |
|
62 |
+ }else{ |
|
63 |
+ results <- getBM(attributes = c( "chromosome_name", "start_position","end_position", 'strand', "external_gene_name"), |
|
64 |
+ filters = c("chromosomal_region"), |
|
65 |
+ values=regions, |
|
66 |
+ mart=pkg.env$mart) |
|
67 |
+ } |
|
68 |
+ |
|
69 |
+ colnames(results)[5] = 'hgnc_symbol' |
|
70 |
+ |
|
71 |
+ for(i in 1 : dim(results)){ |
|
72 |
+ if(results$hgnc_symbol[i] == ''){ |
|
73 |
+ results$hgnc_symbol[i] = paste0('chr',results$chromosome_name, ':', results$start_position,'-', results$end_position) |
|
74 |
+ }} |
|
75 |
+ |
|
76 |
+ file1 <- |
|
77 |
+ with(results, GRanges( |
|
78 |
+ paste0("chr", chromosome_name), |
|
79 |
+ IRanges::IRanges(start_position, end_position), |
|
80 |
+ strand, |
|
81 |
+ hgnc_symbol |
|
82 |
+ )) |
|
83 |
+ |
|
84 |
+ big_islands <- |
|
85 |
+ resize(file1, width = upstream + width(inRegion), fix = "start") |
|
86 |
+ hits <- findOverlaps(big_islands, aa, ignore.strand = TRUE) |
|
87 |
+ tmp <- big_islands[S4Vectors::queryHits(hits), ] |
|
88 |
+ tmp1 <- aa[S4Vectors::subjectHits(hits), ] |
|
89 |
+ pairss <- data.frame(tmp$hgnc_symbol, tmp1$gene) |
|
90 |
+ |
|
91 |
+ big_islands <- |
|
92 |
+ resize(file1, width = downstream + width(inRegion), fix = "end") |
|
93 |
+ hits <- findOverlaps(big_islands, aa, ignore.strand = TRUE) |
|
94 |
+ tmp <- big_islands[S4Vectors::queryHits(hits), ] |
|
95 |
+ tmp1 <- aa[S4Vectors::subjectHits(hits), ] |
|
96 |
+ |
|
97 |
+ pairss <- unique(rbind(pairss, data.frame(tmp$hgnc_symbol, tmp1$gene))) |
|
31 | 98 |
|
32 | 99 |
getNoncode <- function(x) { |
33 | 100 |
a <- |
... | ... |
@@ -9,8 +9,7 @@ getmiRNACount(mirnagene, cancer, databaseFile) |
9 | 9 |
\arguments{ |
10 | 10 |
\item{mirnagene}{Data frame of the mature format} |
11 | 11 |
|
12 |
-\item{cancer}{Name of the TCGA project code such as 'BRCA' that is |
|
13 |
-analyzed for miRNA-mRNA correlation} |
|
12 |
+\item{cancer}{Name of the TCGA project code such as 'BRCA'} |
|
14 | 13 |
|
15 | 14 |
\item{databaseFile}{Path of miRcancer.db file} |
16 | 15 |
} |
... | ... |
@@ -23,3 +23,10 @@ Give top n enrichment results |
23 | 23 |
Number of top enrichment results of the pathway or GO terms for the given |
24 | 24 |
object and the order type - p-value or adjusted p-value. |
25 | 25 |
} |
26 |
+\examples{ |
|
27 |
+ncGO<-geneGOEnricher(gene = brain_disorder_ncRNA, org_assembly='hg19', |
|
28 |
+ near=TRUE, genetype = 'Ensembl_gene') |
|
29 |
+ |
|
30 |
+result = topEnrichment(mrnaObject = ncGO, type = "pvalue", n = 10) |
|
31 |
+ |
|
32 |
+} |