... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: MotifDb |
2 | 2 |
Type: Package |
3 | 3 |
Title: An Annotated Collection of Protein-DNA Binding Sequence Motifs |
4 |
-Version: 1.19.22 |
|
5 |
-Date: 2018-01-23 |
|
4 |
+Version: 1.21.2 |
|
5 |
+Date: 2018-01-31 |
|
6 | 6 |
Author: Paul Shannon, Matt Richards |
7 | 7 |
Maintainer: Paul Shannon <pshannon@systemsbiology.org> |
8 | 8 |
Depends: R (>= 2.15.0), methods, BiocGenerics, S4Vectors, IRanges, Biostrings |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
setGeneric('query', signature='object', function(object, queryString, ignore.case=TRUE) |
2 | 2 |
standardGeneric ('query')) |
3 | 3 |
setGeneric('motifToGene', signature='object', function(object, motifs, source) standardGeneric('motifToGene')) |
4 |
-setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source) standardGeneric('geneToMotif')) |
|
4 |
+setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source, ignore.case=FALSE) standardGeneric('geneToMotif')) |
|
5 | 5 |
setGeneric('associateTranscriptionFactors', signature='object', |
6 | 6 |
function(object, tbl.withMotifs, source, expand.rows) standardGeneric('associateTranscriptionFactors')) |
7 | 7 |
#------------------------------------------------------------------------------------------------------------------------ |
... | ... |
@@ -359,7 +359,8 @@ setMethod ('motifToGene', 'MotifList', |
359 | 359 |
tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")]) |
360 | 360 |
colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
361 | 361 |
tbl <- tbl[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")] |
362 |
- tbl$source <- "MotifDb" |
|
362 |
+ if(nrow(tbl) > 0) |
|
363 |
+ tbl$source <- "MotifDb" |
|
363 | 364 |
} |
364 | 365 |
if(source %in% c("tfclass")){ |
365 | 366 |
motif <- NULL |
... | ... |
@@ -370,7 +371,8 @@ setMethod ('motifToGene', 'MotifList', |
370 | 371 |
tbl <- tbl[order(tbl$motif),] |
371 | 372 |
rownames(tbl) <- NULL |
372 | 373 |
colnames(tbl) <- c("motif", "geneSymbol", "pubmedID") |
373 |
- tbl$source <- "TFClass" |
|
374 |
+ if(nrow(tbl) > 0) |
|
375 |
+ tbl$source <- "TFClass" |
|
374 | 376 |
} |
375 | 377 |
tbl |
376 | 378 |
}) |
... | ... |
@@ -379,12 +381,16 @@ setMethod ('motifToGene', 'MotifList', |
379 | 381 |
# returns a data.frame with motif, geneSymbol, source, pubmedID columns |
380 | 382 |
setMethod ('geneToMotif', 'MotifList', |
381 | 383 |
|
382 |
- function (object, geneSymbols, source) { |
|
384 |
+ function (object, geneSymbols, source, ignore.case=FALSE) { |
|
383 | 385 |
source <- tolower(source) |
384 | 386 |
stopifnot(source %in% c("motifdb", "tfclass")) |
385 | 387 |
extract.mdb <- function(gene){ |
386 | 388 |
geneSymbol <- NULL # workaround the R CMD check "no visible binding for global variable" |
387 |
- tbl <- as.data.frame(subset(mcols(object), geneSymbol == gene)) |
|
389 |
+ if(ignore.case) |
|
390 |
+ tbl <- as.data.frame(subset(mcols(object), tolower(geneSymbol) == tolower(gene))) |
|
391 |
+ else |
|
392 |
+ tbl <- as.data.frame(subset(mcols(object), geneSymbol == gene)) |
|
393 |
+ |
|
388 | 394 |
tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")]) |
389 | 395 |
colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
390 | 396 |
tbl |
... | ... |
@@ -392,17 +398,22 @@ setMethod ('geneToMotif', 'MotifList', |
392 | 398 |
if(source %in% c("motifdb")){ |
393 | 399 |
tbls <- lapply(geneSymbols, extract.mdb) |
394 | 400 |
result <- do.call(rbind, tbls) |
395 |
- result$source <- "MotifDb" |
|
401 |
+ if(nrow(result) > 0) |
|
402 |
+ result$source <- "MotifDb" |
|
396 | 403 |
} |
397 | 404 |
if(source %in% c("tfclass")){ |
398 |
- tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tf.gene %in% geneSymbols) |
|
405 |
+ if(ignore.case) |
|
406 |
+ tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tolower(tf.gene) %in% tolower(geneSymbols)) |
|
407 |
+ else |
|
408 |
+ tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tf.gene %in% geneSymbols) |
|
399 | 409 |
tf.gene <- NULL; motif <- NULL # workaround R CMD CHECK "no visible binding ..." bogus error |
400 | 410 |
tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")]) |
401 | 411 |
tbl <- tbl[order(tbl$tf.gene),] |
402 | 412 |
rownames(tbl) <- NULL |
403 | 413 |
colnames(tbl) <- c("motif", "geneSymbol", "pubmedID") |
404 | 414 |
result <- tbl[, c("geneSymbol", "motif", "pubmedID")] |
405 |
- result$source <- "TFClass" |
|
415 |
+ if(nrow(result) > 0) |
|
416 |
+ result$source <- "TFClass" |
|
406 | 417 |
} |
407 | 418 |
result |
408 | 419 |
}) |
... | ... |
@@ -43,6 +43,7 @@ runTests = function () |
43 | 43 |
|
44 | 44 |
test.geneToMotif() |
45 | 45 |
test.geneToMotif.ignore.jasparSuffixes() |
46 |
+ test.geneToMotif.oneGene.noMotifs |
|
46 | 47 |
test.motifToGene() |
47 | 48 |
|
48 | 49 |
test.associateTranscriptionFactors() |
... | ... |
@@ -762,13 +763,15 @@ test.geneToMotif <- function() |
762 | 763 |
printf("--- test.geneToMotif") |
763 | 764 |
mdb <- MotifDb |
764 | 765 |
|
765 |
- genes <- c("FOS", "ATF5", "bogus") |
|
766 |
- |
|
766 |
+ genes <- c("FOS", "ATF5", "bogus", "SATB2") |
|
767 |
+ good.genes <- genes[-which(genes=="bogus")] |
|
767 | 768 |
# use TFClass family classifcation |
768 | 769 |
tbl.tfClass <- geneToMotif(mdb, genes, source="TfClaSS") # intentional mis-capitalization |
769 |
- checkEquals(sort(tbl.tfClass$gene), sort(c("ATF5", "FOS", "FOS"))) |
|
770 |
- checkEquals(sort(tbl.tfClass$motif), sort(c("MA0833.1", "MA0099.2", "MA0476.1"))) |
|
771 |
- checkEquals(tbl.tfClass$source, rep("TFClass", 3)) |
|
770 |
+ checkTrue(all(good.genes %in% tbl.tfClass$gene)) |
|
771 |
+ |
|
772 |
+ expected.motifs <- c("MA0833.1", "MA0099.2", "MA0476.1", "MA0679.1", "MA0754.1", "MA0755.1", "MA0756.1", "MA0757.1") |
|
773 |
+ checkTrue(all(expected.motifs %in% tbl.tfClass$motif)) |
|
774 |
+ checkEquals(unique(tbl.tfClass$source), "TFClass") |
|
772 | 775 |
|
773 | 776 |
# MotifDb mode uses the MotifDb metadata, pulled from many sources |
774 | 777 |
tbl.mdb <- geneToMotif(mdb, genes, source="mOtifdb") # intentional mis-capitalization |
... | ... |
@@ -778,8 +781,30 @@ test.geneToMotif <- function() |
778 | 781 |
# MotifDb for ATF5 |
779 | 782 |
# todo: compare the MA0110596_1.02 matrix of cisp_1.02 to japar MA0833.1 |
780 | 783 |
|
784 |
+ # check use of ignore.case |
|
785 |
+ tbl.caseSensitive <- geneToMotif(MotifDb, "STAT4", source="MotifDb") |
|
786 |
+ checkEquals(length(grep("jaspar", tbl.caseSensitive$dataSource, ignore.case=TRUE)), 0) |
|
787 |
+ tbl.caseInsensitive <- geneToMotif(MotifDb, "STAT4", source="MotifDb", ignore.case=TRUE) |
|
788 |
+ checkTrue(length(grep("jaspar", tbl.caseInsensitive$dataSource, ignore.case=TRUE)) >= 3) |
|
789 |
+ |
|
790 |
+ tbl.caseSensitive <- geneToMotif(MotifDb, "stat4", source="TFclass") |
|
791 |
+ checkEquals(nrow(tbl.caseSensitive), 0) |
|
792 |
+ tbl.caseInsensitive <- geneToMotif(MotifDb, "stat4", source="TFclass", ignore.case=TRUE) |
|
793 |
+ checkTrue(nrow(tbl.caseInsensitive) >= 5) |
|
794 |
+ |
|
781 | 795 |
} # test.geneToMotif |
782 | 796 |
#------------------------------------------------------------------------------------------------------------------------ |
797 |
+# this case discovered (31 jan 2018). when called on a gene/source combination for which there are |
|
798 |
+# no motifs, i attempted to add the mapping source (either "MotifDb", "TFClass") as a column |
|
799 |
+# to an empty data.frame. check for that and its fix here |
|
800 |
+test.geneToMotif.oneGene.noMotifs <- function() |
|
801 |
+{ |
|
802 |
+ checkEquals(nrow(geneToMotif(MotifDb, "SATB2", "MotifDb")), 0) |
|
803 |
+ checkEquals(nrow(geneToMotif(MotifDb, "bogus-arandum", "MotifDb")), 0) |
|
804 |
+ checkEquals(nrow(geneToMotif(MotifDb, "bogus-arandum", "TFclass")), 0) |
|
805 |
+ |
|
806 |
+} # test.geneToMotif.oneGene.noMotifs |
|
807 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
783 | 808 |
# sad to say I do not recall what problem/fix is tested here (pshannon, 23 jan 2018). |
784 | 809 |
# however, it demonstrates the variety of results which can be returned by non-jaspar datasets |
785 | 810 |
# when using the MotifDb mapping source, and the relative paucity which is sometimes |
... | ... |
@@ -14,12 +14,13 @@ The MotifDb source is in fact the usually 1:1 gene/motif mapping |
14 | 14 |
provided by each of the data sources upon which MotifDb is built. |
15 | 15 |
} |
16 | 16 |
\usage{ |
17 |
-\S4method{geneToMotif}{MotifList}(object, geneSymbols, source) |
|
17 |
+\S4method{geneToMotif}{MotifList}(object, geneSymbols, source, ignore.case) |
|
18 | 18 |
} |
19 | 19 |
\arguments{ |
20 | 20 |
\item{object}{a \code{MotifList} object.} |
21 | 21 |
\item{geneSymbols}{a \code{character} string} |
22 |
- \item{source}{a \code{character} string, either 'MotifDb' or "TFclass' (case insensitive)} |
|
22 |
+ \item{source}{a \code{character} string, either 'MotifDb' or 'TFclass' (case insensitive)} |
|
23 |
+ \item{ignore.case}{a \code{logical} variable, default FALSE, guiding gene name matching} |
|
23 | 24 |
} |
24 | 25 |
|
25 | 26 |
\value{ |