... | ... |
@@ -247,7 +247,7 @@ setMethod('show', 'MotifList', |
247 | 247 |
if (length (object) == 0) |
248 | 248 |
return () |
249 | 249 |
|
250 |
- cat ('| Created from downloaded public sources: 2013-Aug-30', '\n', sep='') |
|
250 |
+ cat ('| Created from downloaded public sources, last update: 2022-Mar-04', '\n', sep='') |
|
251 | 251 |
|
252 | 252 |
tbl.dataSource = as.data.frame (table (mcols (object)$dataSource)) |
253 | 253 |
tbl.org = as.data.frame (table (mcols (object)$organism)) |
... | ... |
@@ -159,7 +159,7 @@ matrixToMemeText = function (matrices) |
159 | 159 |
s [index] = sprintf ('MOTIF %s', fixed.name) |
160 | 160 |
index = index + 1 |
161 | 161 |
new.line = |
162 |
- sprintf ('letter-probability matrix: alength= 4 w= %d nsites= %d E=8.1e-020', |
|
162 |
+ sprintf ('letter-probability matrix: alength= 4 w= %d nsites= %d E=%f', |
|
163 | 163 |
nrow (tfMat), 45, 8.1e-020) |
164 | 164 |
s [index] = new.line |
165 | 165 |
index = index + 1 |
... | ... |
@@ -4,8 +4,8 @@ setGeneric('geneToMotif', signature='object', function(object, geneSymbols, sour |
4 | 4 |
setGeneric('associateTranscriptionFactors', signature='object', |
5 | 5 |
function(object, tbl.withMotifs, source, expand.rows, motifColumnName="motifName") |
6 | 6 |
standardGeneric('associateTranscriptionFactors')) |
7 |
-setGeneric('matchMotif', signature='object', function(object, motifs, genomeName, regions, pval.cutoff, |
|
8 |
- fimoDataFrameStyle=FALSE) standardGeneric('matchMotif')) |
|
7 |
+#setGeneric('matchMotif', signature='object', function(object, motifs, genomeName, regions, pval.cutoff, |
|
8 |
+# fimoDataFrameStyle=FALSE) standardGeneric('matchMotif')) |
|
9 | 9 |
#------------------------------------------------------------------------------------------------------------------------ |
10 | 10 |
setClass ('MotifList', |
11 | 11 |
contains='SimpleList', |
... | ... |
@@ -64,29 +64,29 @@ MotifList = function (matrices=list(), tbl.metadata=data.frame ()) |
64 | 64 |
|
65 | 65 |
} # ctor |
66 | 66 |
#------------------------------------------------------------------------------- |
67 |
-setMethod('matchMotif', signature='MotifList', |
|
68 |
- |
|
69 |
- function(object, motifs, genomeName, regions, pval.cutoff, fimoDataFrameStyle=FALSE){ |
|
70 |
- motifs.pfmatrix <- lapply(motifs, function(motif) convert_motifs(motif, "TFBSTools-PFMatrix")) |
|
71 |
- motifs.pfmList <- do.call(PFMatrixList, motifs.pfmatrix) |
|
72 |
- gr.list <- matchMotifs(motifs.pfmList, regions, genome=genomeName, out="positions", p.cutoff=pval.cutoff) |
|
73 |
- result <- gr.list |
|
74 |
- if(fimoDataFrameStyle){ |
|
75 |
- gr <- unlist(gr.list) |
|
76 |
- motif.names <- names(gr) |
|
77 |
- names(gr) <- NULL |
|
78 |
- tbl <- as.data.frame(gr) |
|
79 |
- tbl$motif_id <- motif.names |
|
80 |
- colnames(tbl)[1] <- "chrom" |
|
81 |
- tbl$chrom <- as.character(tbl$chrom) |
|
82 |
- colnames(tbl)[grep("score", colnames(tbl))] <- "mood.score" |
|
83 |
- new.order <- order(tbl$start, decreasing=FALSE) |
|
84 |
- tbl <- tbl[new.order,] |
|
85 |
- result <- tbl |
|
86 |
- } |
|
87 |
- return(result) |
|
88 |
- }) |
|
89 |
- |
|
67 |
+# setMethod('matchMotif', signature='MotifList', |
|
68 |
+# |
|
69 |
+# function(object, motifs, genomeName, regions, pval.cutoff, fimoDataFrameStyle=FALSE){ |
|
70 |
+# motifs.pfmatrix <- lapply(motifs, function(motif) convert_motifs(motif, "TFBSTools-PFMatrix")) |
|
71 |
+# motifs.pfmList <- do.call(PFMatrixList, motifs.pfmatrix) |
|
72 |
+# gr.list <- motifmatchr::matchMotifs(motifs.pfmList, regions, genome=genomeName, out="positions", p.cutoff=pval.cutoff) |
|
73 |
+# result <- gr.list |
|
74 |
+# if(fimoDataFrameStyle){ |
|
75 |
+# gr <- unlist(gr.list) |
|
76 |
+# motif.names <- names(gr) |
|
77 |
+# names(gr) <- NULL |
|
78 |
+# tbl <- as.data.frame(gr) |
|
79 |
+# tbl$motif_id <- motif.names |
|
80 |
+# colnames(tbl)[1] <- "chrom" |
|
81 |
+# tbl$chrom <- as.character(tbl$chrom) |
|
82 |
+# colnames(tbl)[grep("score", colnames(tbl))] <- "mood.score" |
|
83 |
+# new.order <- order(tbl$start, decreasing=FALSE) |
|
84 |
+# tbl <- tbl[new.order,] |
|
85 |
+# result <- tbl |
|
86 |
+# } |
|
87 |
+# return(result) |
|
88 |
+# }) |
|
89 |
+# |
|
90 | 90 |
#------------------------------------------------------------------------------- |
91 | 91 |
setMethod ('subset', signature = 'MotifList', |
92 | 92 |
|
... | ... |
@@ -4,6 +4,8 @@ setGeneric('geneToMotif', signature='object', function(object, geneSymbols, sour |
4 | 4 |
setGeneric('associateTranscriptionFactors', signature='object', |
5 | 5 |
function(object, tbl.withMotifs, source, expand.rows, motifColumnName="motifName") |
6 | 6 |
standardGeneric('associateTranscriptionFactors')) |
7 |
+setGeneric('matchMotif', signature='object', function(object, motifs, genomeName, regions, pval.cutoff, |
|
8 |
+ fimoDataFrameStyle=FALSE) standardGeneric('matchMotif')) |
|
7 | 9 |
#------------------------------------------------------------------------------------------------------------------------ |
8 | 10 |
setClass ('MotifList', |
9 | 11 |
contains='SimpleList', |
... | ... |
@@ -61,6 +63,30 @@ MotifList = function (matrices=list(), tbl.metadata=data.frame ()) |
61 | 63 |
object |
62 | 64 |
|
63 | 65 |
} # ctor |
66 |
+#------------------------------------------------------------------------------- |
|
67 |
+setMethod('matchMotif', signature='MotifList', |
|
68 |
+ |
|
69 |
+ function(object, motifs, genomeName, regions, pval.cutoff, fimoDataFrameStyle=FALSE){ |
|
70 |
+ motifs.pfmatrix <- lapply(motifs, function(motif) convert_motifs(motif, "TFBSTools-PFMatrix")) |
|
71 |
+ motifs.pfmList <- do.call(PFMatrixList, motifs.pfmatrix) |
|
72 |
+ gr.list <- matchMotifs(motifs.pfmList, regions, genome=genomeName, out="positions", p.cutoff=pval.cutoff) |
|
73 |
+ result <- gr.list |
|
74 |
+ if(fimoDataFrameStyle){ |
|
75 |
+ gr <- unlist(gr.list) |
|
76 |
+ motif.names <- names(gr) |
|
77 |
+ names(gr) <- NULL |
|
78 |
+ tbl <- as.data.frame(gr) |
|
79 |
+ tbl$motif_id <- motif.names |
|
80 |
+ colnames(tbl)[1] <- "chrom" |
|
81 |
+ tbl$chrom <- as.character(tbl$chrom) |
|
82 |
+ colnames(tbl)[grep("score", colnames(tbl))] <- "mood.score" |
|
83 |
+ new.order <- order(tbl$start, decreasing=FALSE) |
|
84 |
+ tbl <- tbl[new.order,] |
|
85 |
+ result <- tbl |
|
86 |
+ } |
|
87 |
+ return(result) |
|
88 |
+ }) |
|
89 |
+ |
|
64 | 90 |
#------------------------------------------------------------------------------- |
65 | 91 |
setMethod ('subset', signature = 'MotifList', |
66 | 92 |
|
... | ... |
@@ -307,8 +307,10 @@ setMethod ('query', 'MotifList', |
307 | 307 |
|
308 | 308 |
# get the cumulative intersection of all the "and" terms |
309 | 309 |
# this steadily dimishes the set of indices |
310 |
- for(indices in and.indices) |
|
311 |
- final.indices <- intersect(final.indices, indices) |
|
310 |
+ for(indices in and.indices){ |
|
311 |
+ final.indices <- intersect(final.indices, indices) |
|
312 |
+ #message(sprintf(" final.indices length is now %d", length(final.indices))) |
|
313 |
+ } |
|
312 | 314 |
|
313 | 315 |
# lump all of the "or" terms together: they all get included |
314 | 316 |
final.indices <- intersect(unlist(or.indices), final.indices) |
... | ... |
@@ -319,6 +321,7 @@ setMethod ('query', 'MotifList', |
319 | 321 |
|
320 | 322 |
object [final.indices] |
321 | 323 |
}) |
324 |
+ |
|
322 | 325 |
#------------------------------------------------------------------------------- |
323 | 326 |
# Addition on 2017/06/15 from Matt Richards |
324 | 327 |
|
... | ... |
@@ -2,7 +2,8 @@ setGeneric('query', signature='object', function(object, andStrings, orStrings=c |
2 | 2 |
setGeneric('motifToGene', signature='object', function(object, motifs, source) standardGeneric('motifToGene')) |
3 | 3 |
setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source, ignore.case=FALSE) standardGeneric('geneToMotif')) |
4 | 4 |
setGeneric('associateTranscriptionFactors', signature='object', |
5 |
- function(object, tbl.withMotifs, source, expand.rows) standardGeneric('associateTranscriptionFactors')) |
|
5 |
+ function(object, tbl.withMotifs, source, expand.rows, motifColumnName="motifName") |
|
6 |
+ standardGeneric('associateTranscriptionFactors')) |
|
6 | 7 |
#------------------------------------------------------------------------------------------------------------------------ |
7 | 8 |
setClass ('MotifList', |
8 | 9 |
contains='SimpleList', |
... | ... |
@@ -529,10 +530,10 @@ setMethod ('geneToMotif', 'MotifList', |
529 | 530 |
#------------------------------------------------------------------------------- |
530 | 531 |
setMethod('associateTranscriptionFactors', 'MotifList', |
531 | 532 |
|
532 |
- function(object, tbl.withMotifs, source, expand.rows){ |
|
533 |
- stopifnot("motifName" %in% colnames(tbl.withMotifs)) |
|
534 |
- tbl.tf <- motifToGene(object, tbl.withMotifs$motifName, source) |
|
535 |
- merge(tbl.withMotifs, tbl.tf, by.x="motifName", by.y="motif", all.x=TRUE) |
|
533 |
+ function(object, tbl.withMotifs, source, expand.rows, motifColumnName="motifName"){ |
|
534 |
+ stopifnot(motifColumnName %in% colnames(tbl.withMotifs)) |
|
535 |
+ tbl.tf <- motifToGene(object, unique(tbl.withMotifs[, motifColumnName]), source) |
|
536 |
+ merge(tbl.withMotifs, tbl.tf, by.x=motifColumnName, by.y="motif", all.x=TRUE) |
|
536 | 537 |
}) |
537 | 538 |
|
538 | 539 |
#------------------------------------------------------------------------------- |
... | ... |
@@ -491,7 +491,7 @@ setMethod ('geneToMotif', 'MotifList', |
491 | 491 |
|
492 | 492 |
function (object, geneSymbols, source, ignore.case=FALSE) { |
493 | 493 |
source <- tolower(source) |
494 |
- stopifnot(source %in% c("motifdb", "tfclass")) |
|
494 |
+ stopifnot(all(source %in% c("motifdb", "tfclass"))) |
|
495 | 495 |
extract.mdb <- function(gene){ |
496 | 496 |
geneSymbol <- NULL # workaround the R CMD check "no visible binding for global variable" |
497 | 497 |
if(ignore.case) |
... | ... |
@@ -503,13 +503,13 @@ setMethod ('geneToMotif', 'MotifList', |
503 | 503 |
colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
504 | 504 |
tbl |
505 | 505 |
} |
506 |
- if(source %in% c("motifdb")){ |
|
506 |
+ if("motifdb" %in% source){ |
|
507 | 507 |
tbls <- lapply(geneSymbols, extract.mdb) |
508 | 508 |
result <- do.call(rbind, tbls) |
509 | 509 |
if(nrow(result) > 0) |
510 | 510 |
result$source <- "MotifDb" |
511 | 511 |
} |
512 |
- if(source %in% c("tfclass")){ |
|
512 |
+ if("tfclass" %in% source){ |
|
513 | 513 |
if(ignore.case) |
514 | 514 |
tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tolower(tf.gene) %in% tolower(geneSymbols)) |
515 | 515 |
else |
... | ... |
@@ -431,6 +431,9 @@ setMethod ('motifToGene', 'MotifList', |
431 | 431 |
# the mcols (the metadata, the annotation) which accompanies |
432 | 432 |
# each pfm matrix |
433 | 433 |
|
434 |
+ tbl.mdb <- data.frame() |
|
435 |
+ tbl.tfc <- data.frame() |
|
436 |
+ |
|
434 | 437 |
name.map <- as.list(motifs) |
435 | 438 |
names(name.map) <- motifs |
436 | 439 |
for(i in seq_len(length(motifs))){ |
... | ... |
@@ -441,46 +444,44 @@ setMethod ('motifToGene', 'MotifList', |
441 | 444 |
motifs[i] <-newValue |
442 | 445 |
} |
443 | 446 |
} # for i |
444 |
- #browser() |
|
447 |
+ |
|
445 | 448 |
source <- tolower(source) |
446 | 449 |
stopifnot(all(source %in% c("motifdb", "tfclass"))) |
447 |
- tbl.mdb <- data.frame() |
|
448 | 450 |
if("motifdb" %in% source){ |
449 | 451 |
providerId <- NULL # avoid R CMD check note |
450 | 452 |
tbl.mdb <- as.data.frame(subset(mcols(object), providerId %in% motifs)) |
451 |
- if(nrow(tbl.mdb) == 0) |
|
452 |
- return(data.frame()) |
|
453 |
- tbl.mdb <- unique(tbl.mdb [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")]) |
|
454 |
- colnames(tbl.mdb) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
|
455 |
- tbl.mdb <- tbl.mdb[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")] |
|
456 | 453 |
if(nrow(tbl.mdb) > 0){ |
454 |
+ tbl.mdb <- unique(tbl.mdb [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")]) |
|
455 |
+ colnames(tbl.mdb) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
|
456 |
+ tbl.mdb <- tbl.mdb[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")] |
|
457 | 457 |
tbl.mdb$source <- "MotifDb" |
458 | 458 |
tbl.mdb <- tbl.mdb[, c("motif", "geneSymbol", "pubmedID", "organism", "source")] |
459 | 459 |
rownames(tbl.mdb) <- NULL |
460 |
- } |
|
460 |
+ } # nrow of tbl.mdb > 0 |
|
461 | 461 |
} # motifDb |
462 |
- tbl.tfc <- data.frame() |
|
462 |
+ |
|
463 | 463 |
if("tfclass" %in% source){ |
464 | 464 |
motif <- NULL |
465 | 465 |
tbl.tfc <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs) |
466 |
- if(nrow(tbl.tfc) == 0) |
|
467 |
- return(data.frame()) |
|
468 |
- tbl.tfc <- unique(tbl.tfc[, c("motif", "tf.gene", "pubmedID")]) |
|
469 |
- tbl.tfc <- tbl.tfc[order(tbl.tfc$motif),] |
|
470 |
- rownames(tbl.tfc) <- NULL |
|
471 |
- colnames(tbl.tfc) <- c("motif", "geneSymbol", "pubmedID") |
|
472 | 466 |
if(nrow(tbl.tfc) > 0){ |
467 |
+ tbl.tfc <- unique(tbl.tfc[, c("motif", "tf.gene", "pubmedID")]) |
|
468 |
+ tbl.tfc <- tbl.tfc[order(tbl.tfc$motif),] |
|
469 |
+ rownames(tbl.tfc) <- NULL |
|
470 |
+ colnames(tbl.tfc) <- c("motif", "geneSymbol", "pubmedID") |
|
473 | 471 |
tbl.tfc$source <- "TFClass" |
474 | 472 |
tbl.tfc$organism <- "Hsapiens" |
475 |
- } |
|
476 |
- } |
|
473 |
+ } # nrow(tbl.tfc) > 0 |
|
474 |
+ } # tfclass |
|
475 |
+ |
|
476 |
+ if(nrow(tbl.mdb) == 0 && nrow(tbl.tfc) == 0) |
|
477 |
+ return(data.frame()) |
|
478 |
+ |
|
477 | 479 |
tbl.out <- rbind(tbl.mdb, tbl.tfc) |
478 | 480 |
dups <- which(duplicated(tbl.out[, c("motif", "geneSymbol", "organism", "source")])) |
479 | 481 |
if(length(dups) > 0) |
480 | 482 |
tbl.out <- tbl.out[-dups,] |
481 | 483 |
if(length(name.map) > 0) |
482 | 484 |
tbl.out$motif <- as.character(name.map[tbl.out$motif]) |
483 |
- #browser() |
|
484 | 485 |
tbl.out |
485 | 486 |
}) |
486 | 487 |
|
... | ... |
@@ -475,6 +475,9 @@ setMethod ('motifToGene', 'MotifList', |
475 | 475 |
} |
476 | 476 |
} |
477 | 477 |
tbl.out <- rbind(tbl.mdb, tbl.tfc) |
478 |
+ dups <- which(duplicated(tbl.out[, c("motif", "geneSymbol", "organism", "source")])) |
|
479 |
+ if(length(dups) > 0) |
|
480 |
+ tbl.out <- tbl.out[-dups,] |
|
478 | 481 |
if(length(name.map) > 0) |
479 | 482 |
tbl.out$motif <- as.character(name.map[tbl.out$motif]) |
480 | 483 |
#browser() |
... | ... |
@@ -525,53 +528,62 @@ setMethod ('geneToMotif', 'MotifList', |
525 | 528 |
#------------------------------------------------------------------------------- |
526 | 529 |
setMethod('associateTranscriptionFactors', 'MotifList', |
527 | 530 |
|
528 |
- function(object, tbl.withMotifs, source, expand.rows){ |
|
529 |
- source <- tolower(source) |
|
530 |
- stopifnot(source %in% c("motifdb", "tfclass")) |
|
531 |
- tbl.out <- data.frame() |
|
532 |
- if(source %in% c("motifdb")){ |
|
533 |
- # lookup up in the object metadata, expect one TF geneSymbol per matrix name |
|
534 |
- pfm.ids <- tbl.withMotifs[, "motifName"] |
|
535 |
- matched.rows <- match(pfm.ids, names(as.list(object))) |
|
536 |
- #if(length(matched.rows) == nrow(tbl.withMotifs)) { |
|
537 |
- tbl.new <- mcols(object)[matched.rows, c("geneSymbol", "pubmedID")] |
|
538 |
- tbl.new$geneSymbol[nchar(tbl.new$geneSymbol)==0] <- NA |
|
539 |
- tbl.new$pubmedID[nchar(tbl.new$pubmedID)==0] <- NA |
|
540 |
- tbl.out <- as.data.frame(cbind(tbl.withMotifs, tbl.new)) |
|
541 |
- } # direct |
|
542 |
- if(source %in% c("tfclass")){ |
|
543 |
- if(! "shortMotif" %in% colnames(tbl.withMotifs)){ |
|
544 |
- stop("MotifDb::assoicateTranscriptionFactors needs a 'shortMotif' column with the TFClass source") |
|
545 |
- } |
|
546 |
- tbl.tfClass <- read.table(system.file(package="MotifDb", "extdata", "tfClass.tsv"), sep="\t", as.is=TRUE, header=TRUE) |
|
547 |
- motif.ids <- tbl.withMotifs[, "shortMotif"] |
|
548 |
- geneSymbols <- lapply(motif.ids, function(id) |
|
549 |
- paste(tbl.tfClass$tf.gene[grep(id, tbl.tfClass$motif, fixed=TRUE)], collapse=";")) |
|
550 |
- geneSymbols <- unlist(geneSymbols) |
|
551 |
- pubmedIds <- lapply(motif.ids, function(id) |
|
552 |
- unique(tbl.tfClass$pubmedID[grep(id, tbl.tfClass$motif, fixed=TRUE)])) |
|
553 |
- pubmedIds <- as.character(pubmedIds) |
|
554 |
- pubmedIds <- gsub("integer(0)", "", pubmedIds, fixed=TRUE) |
|
555 |
- tbl.new <- data.frame(geneSymbol=geneSymbols, pubmedID=pubmedIds, stringsAsFactors=FALSE) |
|
556 |
- tbl.new$geneSymbol[nchar(tbl.new$geneSymbol)==0] <- NA |
|
557 |
- tbl.new$pubmedID[nchar(tbl.new$pubmedID)==0] <- NA |
|
558 |
- tbl.out <- as.data.frame(cbind(tbl.withMotifs, tbl.new)) |
|
559 |
- |
|
560 |
- if(expand.rows){ |
|
561 |
- rows.with.na <- which(is.na(tbl.out$geneSymbol)) |
|
562 |
- rows.with.geneSymbol <- setdiff(1:nrow(tbl.out), rows.with.na) |
|
563 |
- tbl.asIs <- tbl.out[rows.with.na,] |
|
564 |
- tbl.toExpand <- tbl.out[rows.with.geneSymbol,] |
|
565 |
- geneSymbols.split <- strsplit(tbl.toExpand$geneSymbol, ";") |
|
566 |
- counts <- unlist(lapply(geneSymbols.split, length)) |
|
567 |
- geneSymbols.split.vec <- unlist(geneSymbols.split) |
|
568 |
- tbl.expanded <- splitstackshape::expandRows(tbl.toExpand, counts, count.is.col=FALSE, drop=FALSE) |
|
569 |
- stopifnot(length(geneSymbols.split.vec) == nrow(tbl.expanded)) |
|
570 |
- tbl.expanded$geneSymbol <- geneSymbols.split.vec |
|
571 |
- tbl.out <- rbind(tbl.expanded, tbl.asIs) |
|
572 |
- } |
|
573 |
- } # indirect |
|
574 |
- tbl.out |
|
575 |
- }) |
|
531 |
+ function(object, tbl.withMotifs, source, expand.rows){ |
|
532 |
+ stopifnot("motifName" %in% colnames(tbl.withMotifs)) |
|
533 |
+ tbl.tf <- motifToGene(object, tbl.withMotifs$motifName, source) |
|
534 |
+ merge(tbl.withMotifs, tbl.tf, by.x="motifName", by.y="motif", all.x=TRUE) |
|
535 |
+ }) |
|
576 | 536 |
|
577 | 537 |
#------------------------------------------------------------------------------- |
538 |
+# setMethod('associateTranscriptionFactors', 'MotifList', |
|
539 |
+# |
|
540 |
+# function(object, tbl.withMotifs, source, expand.rows){ |
|
541 |
+# source <- tolower(source) |
|
542 |
+# stopifnot(source %in% c("motifdb", "tfclass")) |
|
543 |
+# tbl.out <- data.frame() |
|
544 |
+# if(source %in% c("motifdb")){ |
|
545 |
+# # lookup up in the object metadata, expect one TF geneSymbol per matrix name |
|
546 |
+# pfm.ids <- tbl.withMotifs[, "motifName"] |
|
547 |
+# matched.rows <- match(pfm.ids, names(as.list(object))) |
|
548 |
+# #if(length(matched.rows) == nrow(tbl.withMotifs)) { |
|
549 |
+# tbl.new <- mcols(object)[matched.rows, c("geneSymbol", "pubmedID")] |
|
550 |
+# tbl.new$geneSymbol[nchar(tbl.new$geneSymbol)==0] <- NA |
|
551 |
+# tbl.new$pubmedID[nchar(tbl.new$pubmedID)==0] <- NA |
|
552 |
+# tbl.out <- as.data.frame(cbind(tbl.withMotifs, tbl.new)) |
|
553 |
+# } # direct |
|
554 |
+# if(source %in% c("tfclass")){ |
|
555 |
+# if(! "shortMotif" %in% colnames(tbl.withMotifs)){ |
|
556 |
+# stop("MotifDb::assoicateTranscriptionFactors needs a 'shortMotif' column with the TFClass source") |
|
557 |
+# } |
|
558 |
+# tbl.tfClass <- read.table(system.file(package="MotifDb", "extdata", "tfClass.tsv"), sep="\t", as.is=TRUE, header=TRUE) |
|
559 |
+# motif.ids <- tbl.withMotifs[, "shortMotif"] |
|
560 |
+# geneSymbols <- lapply(motif.ids, function(id) |
|
561 |
+# paste(tbl.tfClass$tf.gene[grep(id, tbl.tfClass$motif, fixed=TRUE)], collapse=";")) |
|
562 |
+# geneSymbols <- unlist(geneSymbols) |
|
563 |
+# pubmedIds <- lapply(motif.ids, function(id) |
|
564 |
+# unique(tbl.tfClass$pubmedID[grep(id, tbl.tfClass$motif, fixed=TRUE)])) |
|
565 |
+# pubmedIds <- as.character(pubmedIds) |
|
566 |
+# pubmedIds <- gsub("integer(0)", "", pubmedIds, fixed=TRUE) |
|
567 |
+# tbl.new <- data.frame(geneSymbol=geneSymbols, pubmedID=pubmedIds, stringsAsFactors=FALSE) |
|
568 |
+# tbl.new$geneSymbol[nchar(tbl.new$geneSymbol)==0] <- NA |
|
569 |
+# tbl.new$pubmedID[nchar(tbl.new$pubmedID)==0] <- NA |
|
570 |
+# tbl.out <- as.data.frame(cbind(tbl.withMotifs, tbl.new)) |
|
571 |
+# |
|
572 |
+# if(expand.rows){ |
|
573 |
+# rows.with.na <- which(is.na(tbl.out$geneSymbol)) |
|
574 |
+# rows.with.geneSymbol <- setdiff(1:nrow(tbl.out), rows.with.na) |
|
575 |
+# tbl.asIs <- tbl.out[rows.with.na,] |
|
576 |
+# tbl.toExpand <- tbl.out[rows.with.geneSymbol,] |
|
577 |
+# geneSymbols.split <- strsplit(tbl.toExpand$geneSymbol, ";") |
|
578 |
+# counts <- unlist(lapply(geneSymbols.split, length)) |
|
579 |
+# geneSymbols.split.vec <- unlist(geneSymbols.split) |
|
580 |
+# tbl.expanded <- splitstackshape::expandRows(tbl.toExpand, counts, count.is.col=FALSE, drop=FALSE) |
|
581 |
+# stopifnot(length(geneSymbols.split.vec) == nrow(tbl.expanded)) |
|
582 |
+# tbl.expanded$geneSymbol <- geneSymbols.split.vec |
|
583 |
+# tbl.out <- rbind(tbl.expanded, tbl.asIs) |
|
584 |
+# } |
|
585 |
+# } # indirect |
|
586 |
+# tbl.out |
|
587 |
+# }) |
|
588 |
+# |
|
589 |
+#------------------------------------------------------------------------------- |
... | ... |
@@ -1,5 +1,4 @@ |
1 |
-setGeneric('query', signature='object', function(object, queryString, ignore.case=TRUE) standardGeneric ('query')) |
|
2 |
-setGeneric('query2', signature='object', function(object, andStrings, orStrings=c(), notStrings=c(), ignore.case=TRUE) standardGeneric ('query2')) |
|
1 |
+setGeneric('query', signature='object', function(object, andStrings, orStrings=c(), notStrings=c(), ignore.case=TRUE) standardGeneric ('query')) |
|
3 | 2 |
setGeneric('motifToGene', signature='object', function(object, motifs, source) standardGeneric('motifToGene')) |
4 | 3 |
setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source, ignore.case=FALSE) standardGeneric('geneToMotif')) |
5 | 4 |
setGeneric('associateTranscriptionFactors', signature='object', |
... | ... |
@@ -271,17 +270,17 @@ setMethod('show', 'MotifList', |
271 | 270 |
} |
272 | 271 |
}) |
273 | 272 |
#------------------------------------------------------------------------------- |
274 |
-setMethod ('query', 'MotifList', |
|
275 |
- |
|
276 |
- function (object, queryString, ignore.case=TRUE) { |
|
277 |
- indices = unique (as.integer (unlist (sapply (colnames (mcols (object)), |
|
278 |
- function (colname) |
|
279 |
- grep (queryString, mcols (object)[, colname], |
|
280 |
- ignore.case=ignore.case))))) |
|
281 |
- object [indices] |
|
282 |
- }) |
|
273 |
+#setMethod ('query', 'MotifList', |
|
274 |
+# |
|
275 |
+# function (object, queryString, ignore.case=TRUE) { |
|
276 |
+# indices = unique (as.integer (unlist (sapply (colnames (mcols (object)), |
|
277 |
+# function (colname) |
|
278 |
+# grep (queryString, mcols (object)[, colname], |
|
279 |
+# ignore.case=ignore.case))))) |
|
280 |
+# object [indices] |
|
281 |
+# }) |
|
283 | 282 |
#------------------------------------------------------------------------------- |
284 |
-setMethod ('query2', 'MotifList', |
|
283 |
+setMethod ('query', 'MotifList', |
|
285 | 284 |
|
286 | 285 |
function (object, andStrings, orStrings=c(), notStrings=c(), ignore.case=TRUE) { |
287 | 286 |
find.indices <- function(queryString) |
... | ... |
@@ -384,37 +383,103 @@ matrixToJasparText <- function (matrices) |
384 | 383 |
} # matrixToJasparText |
385 | 384 |
#------------------------------------------------------------------------------- |
386 | 385 |
# returns a data.frame with motif, geneSymbol, source, pubmedID columns |
386 |
+# setMethod ('oldMotifToGene', 'MotifList', |
|
387 |
+# |
|
388 |
+# function (object, motifs, source) { |
|
389 |
+# source <- tolower(source) |
|
390 |
+# stopifnot(source %in% c("motifdb", "tfclass")) |
|
391 |
+# tbl <- data.frame() |
|
392 |
+# if(source %in% c("motifdb")){ |
|
393 |
+# providerId <- NULL # avoid R CMD check note |
|
394 |
+# tbl <- as.data.frame(subset(mcols(object), providerId %in% motifs)) |
|
395 |
+# if(nrow(tbl) == 0) |
|
396 |
+# return(data.frame()) |
|
397 |
+# tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")]) |
|
398 |
+# colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
|
399 |
+# tbl <- tbl[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")] |
|
400 |
+# if(nrow(tbl) > 0) |
|
401 |
+# tbl$source <- "MotifDb" |
|
402 |
+# } |
|
403 |
+# if(source %in% c("tfclass")){ |
|
404 |
+# motif <- NULL |
|
405 |
+# tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs) |
|
406 |
+# if(nrow(tbl) == 0) |
|
407 |
+# return(data.frame()) |
|
408 |
+# tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")]) |
|
409 |
+# tbl <- tbl[order(tbl$motif),] |
|
410 |
+# rownames(tbl) <- NULL |
|
411 |
+# colnames(tbl) <- c("motif", "geneSymbol", "pubmedID") |
|
412 |
+# if(nrow(tbl) > 0) |
|
413 |
+# tbl$source <- "TFClass" |
|
414 |
+# } |
|
415 |
+# tbl |
|
416 |
+# }) # oldMotifToGene |
|
417 |
+# |
|
418 |
+#------------------------------------------------------------------------------- |
|
419 |
+# returns a data.frame with motif, geneSymbol, source, pubmedID columns |
|
387 | 420 |
setMethod ('motifToGene', 'MotifList', |
388 | 421 |
|
389 | 422 |
function (object, motifs, source) { |
423 |
+ # for MotifDb, motif names come in a variety of forms, and our first step |
|
424 |
+ # is to convert them all, if needed, into that which is found in |
|
425 |
+ # the "providerId" column of the MotifDb metadata table. |
|
426 |
+ # |
|
427 |
+ # first check to see if the supplied motif name is actually a MotifDb |
|
428 |
+ # matrix list name, e.g., Hsapiens-HOCOMOCOv10-IKZF1_HUMAN.H10MO.C |
|
429 |
+ # when those cases are discovered, they are translated to the matrices |
|
430 |
+ # providerId - which is our standard currency for lookup, using |
|
431 |
+ # the mcols (the metadata, the annotation) which accompanies |
|
432 |
+ # each pfm matrix |
|
433 |
+ |
|
434 |
+ name.map <- as.list(motifs) |
|
435 |
+ names(name.map) <- motifs |
|
436 |
+ for(i in seq_len(length(motifs))){ |
|
437 |
+ x <-match(motifs[i], names(MotifDb)); |
|
438 |
+ if(!is.na(x)){ |
|
439 |
+ newValue <- mcols(MotifDb[motifs[i]])$providerId |
|
440 |
+ names(name.map)[i] <- newValue |
|
441 |
+ motifs[i] <-newValue |
|
442 |
+ } |
|
443 |
+ } # for i |
|
444 |
+ #browser() |
|
390 | 445 |
source <- tolower(source) |
391 |
- stopifnot(source %in% c("motifdb", "tfclass")) |
|
392 |
- tbl <- data.frame() |
|
393 |
- if(source %in% c("motifdb")){ |
|
446 |
+ stopifnot(all(source %in% c("motifdb", "tfclass"))) |
|
447 |
+ tbl.mdb <- data.frame() |
|
448 |
+ if("motifdb" %in% source){ |
|
394 | 449 |
providerId <- NULL # avoid R CMD check note |
395 |
- tbl <- as.data.frame(subset(mcols(object), providerId %in% motifs)) |
|
396 |
- if(nrow(tbl) == 0) |
|
450 |
+ tbl.mdb <- as.data.frame(subset(mcols(object), providerId %in% motifs)) |
|
451 |
+ if(nrow(tbl.mdb) == 0) |
|
397 | 452 |
return(data.frame()) |
398 |
- tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")]) |
|
399 |
- colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
|
400 |
- tbl <- tbl[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")] |
|
401 |
- if(nrow(tbl) > 0) |
|
402 |
- tbl$source <- "MotifDb" |
|
403 |
- } |
|
404 |
- if(source %in% c("tfclass")){ |
|
453 |
+ tbl.mdb <- unique(tbl.mdb [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")]) |
|
454 |
+ colnames(tbl.mdb) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
|
455 |
+ tbl.mdb <- tbl.mdb[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")] |
|
456 |
+ if(nrow(tbl.mdb) > 0){ |
|
457 |
+ tbl.mdb$source <- "MotifDb" |
|
458 |
+ tbl.mdb <- tbl.mdb[, c("motif", "geneSymbol", "pubmedID", "organism", "source")] |
|
459 |
+ rownames(tbl.mdb) <- NULL |
|
460 |
+ } |
|
461 |
+ } # motifDb |
|
462 |
+ tbl.tfc <- data.frame() |
|
463 |
+ if("tfclass" %in% source){ |
|
405 | 464 |
motif <- NULL |
406 |
- tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs) |
|
407 |
- if(nrow(tbl) == 0) |
|
465 |
+ tbl.tfc <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs) |
|
466 |
+ if(nrow(tbl.tfc) == 0) |
|
408 | 467 |
return(data.frame()) |
409 |
- tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")]) |
|
410 |
- tbl <- tbl[order(tbl$motif),] |
|
411 |
- rownames(tbl) <- NULL |
|
412 |
- colnames(tbl) <- c("motif", "geneSymbol", "pubmedID") |
|
413 |
- if(nrow(tbl) > 0) |
|
414 |
- tbl$source <- "TFClass" |
|
468 |
+ tbl.tfc <- unique(tbl.tfc[, c("motif", "tf.gene", "pubmedID")]) |
|
469 |
+ tbl.tfc <- tbl.tfc[order(tbl.tfc$motif),] |
|
470 |
+ rownames(tbl.tfc) <- NULL |
|
471 |
+ colnames(tbl.tfc) <- c("motif", "geneSymbol", "pubmedID") |
|
472 |
+ if(nrow(tbl.tfc) > 0){ |
|
473 |
+ tbl.tfc$source <- "TFClass" |
|
474 |
+ tbl.tfc$organism <- "Hsapiens" |
|
475 |
+ } |
|
415 | 476 |
} |
416 |
- tbl |
|
417 |
- }) |
|
477 |
+ tbl.out <- rbind(tbl.mdb, tbl.tfc) |
|
478 |
+ if(length(name.map) > 0) |
|
479 |
+ tbl.out$motif <- as.character(name.map[tbl.out$motif]) |
|
480 |
+ #browser() |
|
481 |
+ tbl.out |
|
482 |
+ }) |
|
418 | 483 |
|
419 | 484 |
#------------------------------------------------------------------------------- |
420 | 485 |
# returns a data.frame with motif, geneSymbol, source, pubmedID columns |
... | ... |
@@ -1,5 +1,5 @@ |
1 |
-setGeneric('query', signature='object', function(object, queryString, ignore.case=TRUE) |
|
2 |
- standardGeneric ('query')) |
|
1 |
+setGeneric('query', signature='object', function(object, queryString, ignore.case=TRUE) standardGeneric ('query')) |
|
2 |
+setGeneric('query2', signature='object', function(object, andStrings, orStrings=c(), notStrings=c(), ignore.case=TRUE) standardGeneric ('query2')) |
|
3 | 3 |
setGeneric('motifToGene', signature='object', function(object, motifs, source) standardGeneric('motifToGene')) |
4 | 4 |
setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source, ignore.case=FALSE) standardGeneric('geneToMotif')) |
5 | 5 |
setGeneric('associateTranscriptionFactors', signature='object', |
... | ... |
@@ -281,6 +281,45 @@ setMethod ('query', 'MotifList', |
281 | 281 |
object [indices] |
282 | 282 |
}) |
283 | 283 |
#------------------------------------------------------------------------------- |
284 |
+setMethod ('query2', 'MotifList', |
|
285 |
+ |
|
286 |
+ function (object, andStrings, orStrings=c(), notStrings=c(), ignore.case=TRUE) { |
|
287 |
+ find.indices <- function(queryString) |
|
288 |
+ {unique(as.integer(unlist(sapply(colnames(mcols(object)), |
|
289 |
+ function(colname) grep(queryString, mcols(object)[,colname],ignore.case=ignore.case))))) |
|
290 |
+ } |
|
291 |
+ # setup defaults |
|
292 |
+ and.indices <- list(seq_len(length(object))) |
|
293 |
+ or.indices <- list(seq_len(length(object))) |
|
294 |
+ not.indices <- list(c()) |
|
295 |
+ |
|
296 |
+ if(length(andStrings) > 0) |
|
297 |
+ and.indices <- lapply(andStrings, find.indices) |
|
298 |
+ |
|
299 |
+ if(length(orStrings) > 0) |
|
300 |
+ or.indices <- lapply(orStrings, find.indices) |
|
301 |
+ |
|
302 |
+ if(length(notStrings) > 0) |
|
303 |
+ not.indices <- lapply(notStrings, find.indices) |
|
304 |
+ |
|
305 |
+ # start with the indices of all elements |
|
306 |
+ final.indices <- seq_len(length(object)) |
|
307 |
+ |
|
308 |
+ # get the cumulative intersection of all the "and" terms |
|
309 |
+ # this steadily dimishes the set of indices |
|
310 |
+ for(indices in and.indices) |
|
311 |
+ final.indices <- intersect(final.indices, indices) |
|
312 |
+ |
|
313 |
+ # lump all of the "or" terms together: they all get included |
|
314 |
+ final.indices <- intersect(unlist(or.indices), final.indices) |
|
315 |
+ |
|
316 |
+ # finally reduce the set to exclude all indices of all "not" terms |
|
317 |
+ for(indices in not.indices) |
|
318 |
+ final.indices <- setdiff(final.indices, indices) |
|
319 |
+ |
|
320 |
+ object [final.indices] |
|
321 |
+ }) |
|
322 |
+#------------------------------------------------------------------------------- |
|
284 | 323 |
# Addition on 2017/06/15 from Matt Richards |
285 | 324 |
|
286 | 325 |
# This will not exactly match JASPAR because units are PFM and JASPAR uses PCM |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
setGeneric('query', signature='object', function(object, queryString, ignore.case=TRUE) |
2 | 2 |
standardGeneric ('query')) |
3 | 3 |
setGeneric('motifToGene', signature='object', function(object, motifs, source) standardGeneric('motifToGene')) |
4 |
-setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source) standardGeneric('geneToMotif')) |
|
4 |
+setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source, ignore.case=FALSE) standardGeneric('geneToMotif')) |
|
5 | 5 |
setGeneric('associateTranscriptionFactors', signature='object', |
6 | 6 |
function(object, tbl.withMotifs, source, expand.rows) standardGeneric('associateTranscriptionFactors')) |
7 | 7 |
#------------------------------------------------------------------------------------------------------------------------ |
... | ... |
@@ -359,7 +359,8 @@ setMethod ('motifToGene', 'MotifList', |
359 | 359 |
tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")]) |
360 | 360 |
colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
361 | 361 |
tbl <- tbl[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")] |
362 |
- tbl$source <- "MotifDb" |
|
362 |
+ if(nrow(tbl) > 0) |
|
363 |
+ tbl$source <- "MotifDb" |
|
363 | 364 |
} |
364 | 365 |
if(source %in% c("tfclass")){ |
365 | 366 |
motif <- NULL |
... | ... |
@@ -370,7 +371,8 @@ setMethod ('motifToGene', 'MotifList', |
370 | 371 |
tbl <- tbl[order(tbl$motif),] |
371 | 372 |
rownames(tbl) <- NULL |
372 | 373 |
colnames(tbl) <- c("motif", "geneSymbol", "pubmedID") |
373 |
- tbl$source <- "TFClass" |
|
374 |
+ if(nrow(tbl) > 0) |
|
375 |
+ tbl$source <- "TFClass" |
|
374 | 376 |
} |
375 | 377 |
tbl |
376 | 378 |
}) |
... | ... |
@@ -379,12 +381,16 @@ setMethod ('motifToGene', 'MotifList', |
379 | 381 |
# returns a data.frame with motif, geneSymbol, source, pubmedID columns |
380 | 382 |
setMethod ('geneToMotif', 'MotifList', |
381 | 383 |
|
382 |
- function (object, geneSymbols, source) { |
|
384 |
+ function (object, geneSymbols, source, ignore.case=FALSE) { |
|
383 | 385 |
source <- tolower(source) |
384 | 386 |
stopifnot(source %in% c("motifdb", "tfclass")) |
385 | 387 |
extract.mdb <- function(gene){ |
386 | 388 |
geneSymbol <- NULL # workaround the R CMD check "no visible binding for global variable" |
387 |
- tbl <- as.data.frame(subset(mcols(object), geneSymbol == gene)) |
|
389 |
+ if(ignore.case) |
|
390 |
+ tbl <- as.data.frame(subset(mcols(object), tolower(geneSymbol) == tolower(gene))) |
|
391 |
+ else |
|
392 |
+ tbl <- as.data.frame(subset(mcols(object), geneSymbol == gene)) |
|
393 |
+ |
|
388 | 394 |
tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")]) |
389 | 395 |
colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
390 | 396 |
tbl |
... | ... |
@@ -392,17 +398,22 @@ setMethod ('geneToMotif', 'MotifList', |
392 | 398 |
if(source %in% c("motifdb")){ |
393 | 399 |
tbls <- lapply(geneSymbols, extract.mdb) |
394 | 400 |
result <- do.call(rbind, tbls) |
395 |
- result$source <- "MotifDb" |
|
401 |
+ if(nrow(result) > 0) |
|
402 |
+ result$source <- "MotifDb" |
|
396 | 403 |
} |
397 | 404 |
if(source %in% c("tfclass")){ |
398 |
- tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tf.gene %in% geneSymbols) |
|
405 |
+ if(ignore.case) |
|
406 |
+ tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tolower(tf.gene) %in% tolower(geneSymbols)) |
|
407 |
+ else |
|
408 |
+ tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tf.gene %in% geneSymbols) |
|
399 | 409 |
tf.gene <- NULL; motif <- NULL # workaround R CMD CHECK "no visible binding ..." bogus error |
400 | 410 |
tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")]) |
401 | 411 |
tbl <- tbl[order(tbl$tf.gene),] |
402 | 412 |
rownames(tbl) <- NULL |
403 | 413 |
colnames(tbl) <- c("motif", "geneSymbol", "pubmedID") |
404 | 414 |
result <- tbl[, c("geneSymbol", "motif", "pubmedID")] |
405 |
- result$source <- "TFClass" |
|
415 |
+ if(nrow(result) > 0) |
|
416 |
+ result$source <- "TFClass" |
|
406 | 417 |
} |
407 | 418 |
result |
408 | 419 |
}) |
... | ... |
@@ -352,23 +352,25 @@ setMethod ('motifToGene', 'MotifList', |
352 | 352 |
stopifnot(source %in% c("motifdb", "tfclass")) |
353 | 353 |
tbl <- data.frame() |
354 | 354 |
if(source %in% c("motifdb")){ |
355 |
+ providerId <- NULL # avoid R CMD check note |
|
355 | 356 |
tbl <- as.data.frame(subset(mcols(object), providerId %in% motifs)) |
356 | 357 |
if(nrow(tbl) == 0) |
357 | 358 |
return(data.frame()) |
358 | 359 |
tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")]) |
359 | 360 |
colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
360 | 361 |
tbl <- tbl[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")] |
361 |
- tbl$from <- "MotifDb" |
|
362 |
+ tbl$source <- "MotifDb" |
|
362 | 363 |
} |
363 | 364 |
if(source %in% c("tfclass")){ |
365 |
+ motif <- NULL |
|
364 | 366 |
tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs) |
365 | 367 |
if(nrow(tbl) == 0) |
366 | 368 |
return(data.frame()) |
367 | 369 |
tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")]) |
368 | 370 |
tbl <- tbl[order(tbl$motif),] |
369 | 371 |
rownames(tbl) <- NULL |
370 |
- colnames(tbl) <- c("motif", "gene", "pubmedID") |
|
371 |
- tbl$from <- "TFClass" |
|
372 |
+ colnames(tbl) <- c("motif", "geneSymbol", "pubmedID") |
|
373 |
+ tbl$source <- "TFClass" |
|
372 | 374 |
} |
373 | 375 |
tbl |
374 | 376 |
}) |
... | ... |
@@ -380,8 +382,8 @@ setMethod ('geneToMotif', 'MotifList', |
380 | 382 |
function (object, geneSymbols, source) { |
381 | 383 |
source <- tolower(source) |
382 | 384 |
stopifnot(source %in% c("motifdb", "tfclass")) |
383 |
- #browser() |
|
384 | 385 |
extract.mdb <- function(gene){ |
386 |
+ geneSymbol <- NULL # workaround the R CMD check "no visible binding for global variable" |
|
385 | 387 |
tbl <- as.data.frame(subset(mcols(object), geneSymbol == gene)) |
386 | 388 |
tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")]) |
387 | 389 |
colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
... | ... |
@@ -390,16 +392,17 @@ setMethod ('geneToMotif', 'MotifList', |
390 | 392 |
if(source %in% c("motifdb")){ |
391 | 393 |
tbls <- lapply(geneSymbols, extract.mdb) |
392 | 394 |
result <- do.call(rbind, tbls) |
393 |
- result$from <- "MotifDb" |
|
395 |
+ result$source <- "MotifDb" |
|
394 | 396 |
} |
395 | 397 |
if(source %in% c("tfclass")){ |
396 | 398 |
tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tf.gene %in% geneSymbols) |
399 |
+ tf.gene <- NULL; motif <- NULL # workaround R CMD CHECK "no visible binding ..." bogus error |
|
397 | 400 |
tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")]) |
398 | 401 |
tbl <- tbl[order(tbl$tf.gene),] |
399 | 402 |
rownames(tbl) <- NULL |
400 |
- colnames(tbl) <- c("motif", "gene", "pubmedID") |
|
401 |
- result <- tbl[, c("gene", "motif", "pubmedID")] |
|
402 |
- result$from <- "TFClass" |
|
403 |
+ colnames(tbl) <- c("motif", "geneSymbol", "pubmedID") |
|
404 |
+ result <- tbl[, c("geneSymbol", "motif", "pubmedID")] |
|
405 |
+ result$source <- "TFClass" |
|
403 | 406 |
} |
404 | 407 |
result |
405 | 408 |
}) |
... | ... |
@@ -353,14 +353,17 @@ setMethod ('motifToGene', 'MotifList', |
353 | 353 |
tbl <- data.frame() |
354 | 354 |
if(source %in% c("motifdb")){ |
355 | 355 |
tbl <- as.data.frame(subset(mcols(object), providerId %in% motifs)) |
356 |
+ if(nrow(tbl) == 0) |
|
357 |
+ return(data.frame()) |
|
356 | 358 |
tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")]) |
357 | 359 |
colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
358 |
- tbl |
|
359 | 360 |
tbl <- tbl[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")] |
360 | 361 |
tbl$from <- "MotifDb" |
361 | 362 |
} |
362 | 363 |
if(source %in% c("tfclass")){ |
363 | 364 |
tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs) |
365 |
+ if(nrow(tbl) == 0) |
|
366 |
+ return(data.frame()) |
|
364 | 367 |
tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")]) |
365 | 368 |
tbl <- tbl[order(tbl$motif),] |
366 | 369 |
rownames(tbl) <- NULL |
... | ... |
@@ -424,9 +427,11 @@ setMethod('associateTranscriptionFactors', 'MotifList', |
424 | 427 |
} |
425 | 428 |
tbl.tfClass <- read.table(system.file(package="MotifDb", "extdata", "tfClass.tsv"), sep="\t", as.is=TRUE, header=TRUE) |
426 | 429 |
motif.ids <- tbl.withMotifs[, "shortMotif"] |
427 |
- geneSymbols <- lapply(motif.ids, function(id) paste(tbl.tfClass$tf.gene[grep(id, tbl.tfClass$motif)], collapse=";")) |
|
430 |
+ geneSymbols <- lapply(motif.ids, function(id) |
|
431 |
+ paste(tbl.tfClass$tf.gene[grep(id, tbl.tfClass$motif, fixed=TRUE)], collapse=";")) |
|
428 | 432 |
geneSymbols <- unlist(geneSymbols) |
429 |
- pubmedIds <- lapply(motif.ids, function(id) unique(tbl.tfClass$pubmedID[grep(id, tbl.tfClass$motif)])) |
|
433 |
+ pubmedIds <- lapply(motif.ids, function(id) |
|
434 |
+ unique(tbl.tfClass$pubmedID[grep(id, tbl.tfClass$motif, fixed=TRUE)])) |
|
430 | 435 |
pubmedIds <- as.character(pubmedIds) |
431 | 436 |
pubmedIds <- gsub("integer(0)", "", pubmedIds, fixed=TRUE) |
432 | 437 |
tbl.new <- data.frame(geneSymbol=geneSymbols, pubmedID=pubmedIds, stringsAsFactors=FALSE) |
... | ... |
@@ -348,9 +348,10 @@ matrixToJasparText <- function (matrices) |
348 | 348 |
setMethod ('motifToGene', 'MotifList', |
349 | 349 |
|
350 | 350 |
function (object, motifs, source) { |
351 |
- stopifnot(source %in% c("MotifDb", "TFClass")) |
|
351 |
+ source <- tolower(source) |
|
352 |
+ stopifnot(source %in% c("motifdb", "tfclass")) |
|
352 | 353 |
tbl <- data.frame() |
353 |
- if(source %in% c("MotifDb")){ |
|
354 |
+ if(source %in% c("motifdb")){ |
|
354 | 355 |
tbl <- as.data.frame(subset(mcols(object), providerId %in% motifs)) |
355 | 356 |
tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")]) |
356 | 357 |
colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
... | ... |
@@ -358,7 +359,7 @@ setMethod ('motifToGene', 'MotifList', |
358 | 359 |
tbl <- tbl[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")] |
359 | 360 |
tbl$from <- "MotifDb" |
360 | 361 |
} |
361 |
- if(source %in% c("TFClass")){ |
|
362 |
+ if(source %in% c("tfclass")){ |
|
362 | 363 |
tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs) |
363 | 364 |
tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")]) |
364 | 365 |
tbl <- tbl[order(tbl$motif),] |
... | ... |
@@ -374,7 +375,8 @@ setMethod ('motifToGene', 'MotifList', |
374 | 375 |
setMethod ('geneToMotif', 'MotifList', |
375 | 376 |
|
376 | 377 |
function (object, geneSymbols, source) { |
377 |
- stopifnot(source %in% c("MotifDb", "TFClass")) |
|
378 |
+ source <- tolower(source) |
|
379 |
+ stopifnot(source %in% c("motifdb", "tfclass")) |
|
378 | 380 |
#browser() |
379 | 381 |
extract.mdb <- function(gene){ |
380 | 382 |
tbl <- as.data.frame(subset(mcols(object), geneSymbol == gene)) |
... | ... |
@@ -382,12 +384,12 @@ setMethod ('geneToMotif', 'MotifList', |
382 | 384 |
colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
383 | 385 |
tbl |
384 | 386 |
} |
385 |
- if(source %in% c("MotifDb")){ |
|
387 |
+ if(source %in% c("motifdb")){ |
|
386 | 388 |
tbls <- lapply(geneSymbols, extract.mdb) |
387 | 389 |
result <- do.call(rbind, tbls) |
388 | 390 |
result$from <- "MotifDb" |
389 | 391 |
} |
390 |
- if(source %in% c("TFClass")){ |
|
392 |
+ if(source %in% c("tfclass")){ |
|
391 | 393 |
tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tf.gene %in% geneSymbols) |
392 | 394 |
tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")]) |
393 | 395 |
tbl <- tbl[order(tbl$tf.gene),] |
... | ... |
@@ -403,9 +405,10 @@ setMethod ('geneToMotif', 'MotifList', |
403 | 405 |
setMethod('associateTranscriptionFactors', 'MotifList', |
404 | 406 |
|
405 | 407 |
function(object, tbl.withMotifs, source, expand.rows){ |
406 |
- stopifnot(source %in% c("MotifDb", "TFClass")) |
|
408 |
+ source <- tolower(source) |
|
409 |
+ stopifnot(source %in% c("motifdb", "tfclass")) |
|
407 | 410 |
tbl.out <- data.frame() |
408 |
- if(source %in% c("MotifDb")){ |
|
411 |
+ if(source %in% c("motifdb")){ |
|
409 | 412 |
# lookup up in the object metadata, expect one TF geneSymbol per matrix name |
410 | 413 |
pfm.ids <- tbl.withMotifs[, "motifName"] |
411 | 414 |
matched.rows <- match(pfm.ids, names(as.list(object))) |
... | ... |
@@ -415,7 +418,7 @@ setMethod('associateTranscriptionFactors', 'MotifList', |
415 | 418 |
tbl.new$pubmedID[nchar(tbl.new$pubmedID)==0] <- NA |
416 | 419 |
tbl.out <- as.data.frame(cbind(tbl.withMotifs, tbl.new)) |
417 | 420 |
} # direct |
418 |
- if(source %in% c("TFClass")){ |
|
421 |
+ if(source %in% c("tfclass")){ |
|
419 | 422 |
if(! "shortMotif" %in% colnames(tbl.withMotifs)){ |
420 | 423 |
stop("MotifDb::assoicateTranscriptionFactors needs a 'shortMotif' column with the TFClass source") |
421 | 424 |
} |
... | ... |
@@ -3,8 +3,7 @@ setGeneric('query', signature='object', function(object, queryString, ignore.cas |
3 | 3 |
setGeneric('motifToGene', signature='object', function(object, motifs, source) standardGeneric('motifToGene')) |
4 | 4 |
setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source) standardGeneric('geneToMotif')) |
5 | 5 |
setGeneric('associateTranscriptionFactors', signature='object', |
6 |
- function(object, tbl.withMotifs, motif.column.name, source, expand.rows) |
|
7 |
- standardGeneric('associateTranscriptionFactors')) |
|
6 |
+ function(object, tbl.withMotifs, source, expand.rows) standardGeneric('associateTranscriptionFactors')) |
|
8 | 7 |
#------------------------------------------------------------------------------------------------------------------------ |
9 | 8 |
setClass ('MotifList', |
10 | 9 |
contains='SimpleList', |
... | ... |
@@ -400,23 +399,25 @@ setMethod ('geneToMotif', 'MotifList', |
400 | 399 |
#------------------------------------------------------------------------------- |
401 | 400 |
setMethod('associateTranscriptionFactors', 'MotifList', |
402 | 401 |
|
403 |
- function(object, tbl.withMotifs, motif.column.name, source, expand.rows){ |
|
402 |
+ function(object, tbl.withMotifs, source, expand.rows){ |
|
404 | 403 |
stopifnot(source %in% c("MotifDb", "TFClass")) |
405 | 404 |
tbl.out <- data.frame() |
406 | 405 |
if(source %in% c("MotifDb")){ |
407 |
- # "direct" means: lookup up in the object metadata, expect one TF geneSymbol per matrix name |
|
408 |
- pfm.ids <- tbl.withMotifs[, motif.column.name] |
|
406 |
+ # lookup up in the object metadata, expect one TF geneSymbol per matrix name |
|
407 |
+ pfm.ids <- tbl.withMotifs[, "motifName"] |
|
409 | 408 |
matched.rows <- match(pfm.ids, names(as.list(object))) |
410 | 409 |
#if(length(matched.rows) == nrow(tbl.withMotifs)) { |
411 | 410 |
tbl.new <- mcols(object)[matched.rows, c("geneSymbol", "pubmedID")] |