Browse code

motifToGene and geneToMotif now guard against emtpy tables, ignore.cae option added

paul-shannon authored on 02/02/2018 00:04:51
Showing 4 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: MotifDb
2 2
 Type: Package
3 3
 Title: An Annotated Collection of Protein-DNA Binding Sequence Motifs
4
-Version: 1.19.22
5
-Date: 2018-01-23
4
+Version: 1.21.2
5
+Date: 2018-01-31
6 6
 Author: Paul Shannon, Matt Richards
7 7
 Maintainer: Paul Shannon <pshannon@systemsbiology.org>
8 8
 Depends: R (>= 2.15.0), methods, BiocGenerics, S4Vectors, IRanges, Biostrings
... ...
@@ -1,7 +1,7 @@
1 1
 setGeneric('query', signature='object', function(object, queryString, ignore.case=TRUE)
2 2
               standardGeneric ('query'))
3 3
 setGeneric('motifToGene', signature='object', function(object, motifs, source) standardGeneric('motifToGene'))
4
-setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source) standardGeneric('geneToMotif'))
4
+setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source, ignore.case=FALSE) standardGeneric('geneToMotif'))
5 5
 setGeneric('associateTranscriptionFactors', signature='object',
6 6
            function(object, tbl.withMotifs,  source, expand.rows) standardGeneric('associateTranscriptionFactors'))
7 7
 #------------------------------------------------------------------------------------------------------------------------
... ...
@@ -359,7 +359,8 @@ setMethod ('motifToGene', 'MotifList',
359 359
         tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
360 360
         colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
361 361
         tbl <- tbl[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")]
362
-        tbl$source <- "MotifDb"
362
+        if(nrow(tbl) > 0)
363
+           tbl$source <- "MotifDb"
363 364
         }
364 365
      if(source %in% c("tfclass")){
365 366
         motif <- NULL
... ...
@@ -370,7 +371,8 @@ setMethod ('motifToGene', 'MotifList',
370 371
         tbl <- tbl[order(tbl$motif),]
371 372
         rownames(tbl) <- NULL
372 373
         colnames(tbl) <- c("motif", "geneSymbol", "pubmedID")
373
-        tbl$source <- "TFClass"
374
+        if(nrow(tbl) > 0)
375
+           tbl$source <- "TFClass"
374 376
         }
375 377
      tbl
376 378
      })
... ...
@@ -379,12 +381,16 @@ setMethod ('motifToGene', 'MotifList',
379 381
 # returns a data.frame with motif, geneSymbol, source, pubmedID columns
380 382
 setMethod ('geneToMotif', 'MotifList',
381 383
 
382
-   function (object, geneSymbols, source) {
384
+   function (object, geneSymbols, source, ignore.case=FALSE) {
383 385
      source <- tolower(source)
384 386
      stopifnot(source %in% c("motifdb", "tfclass"))
385 387
      extract.mdb <- function(gene){
386 388
         geneSymbol <- NULL # workaround the R CMD check "no visible binding for global variable"
387
-        tbl <- as.data.frame(subset(mcols(object), geneSymbol == gene))
389
+        if(ignore.case)
390
+           tbl <- as.data.frame(subset(mcols(object), tolower(geneSymbol) == tolower(gene)))
391
+        else
392
+           tbl <- as.data.frame(subset(mcols(object), geneSymbol == gene))
393
+
388 394
         tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
389 395
         colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
390 396
         tbl
... ...
@@ -392,17 +398,22 @@ setMethod ('geneToMotif', 'MotifList',
392 398
      if(source %in% c("motifdb")){
393 399
         tbls <- lapply(geneSymbols, extract.mdb)
394 400
         result <- do.call(rbind, tbls)
395
-        result$source <- "MotifDb"
401
+        if(nrow(result) > 0)
402
+           result$source <- "MotifDb"
396 403
         }
397 404
      if(source %in% c("tfclass")){
398
-        tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tf.gene %in% geneSymbols)
405
+        if(ignore.case)
406
+           tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tolower(tf.gene) %in% tolower(geneSymbols))
407
+        else
408
+           tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tf.gene %in% geneSymbols)
399 409
         tf.gene <- NULL; motif <- NULL  # workaround R CMD CHECK "no visible binding ..." bogus error
400 410
         tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")])
401 411
         tbl <- tbl[order(tbl$tf.gene),]
402 412
         rownames(tbl) <- NULL
403 413
         colnames(tbl) <- c("motif", "geneSymbol", "pubmedID")
404 414
         result <- tbl[, c("geneSymbol", "motif", "pubmedID")]
405
-        result$source <- "TFClass"
415
+        if(nrow(result) > 0)
416
+           result$source <- "TFClass"
406 417
         }
407 418
      result
408 419
      })
... ...
@@ -43,6 +43,7 @@ runTests = function ()
43 43
 
44 44
   test.geneToMotif()
45 45
   test.geneToMotif.ignore.jasparSuffixes()
46
+  test.geneToMotif.oneGene.noMotifs
46 47
   test.motifToGene()
47 48
 
48 49
   test.associateTranscriptionFactors()
... ...
@@ -762,13 +763,15 @@ test.geneToMotif <- function()
762 763
    printf("--- test.geneToMotif")
763 764
    mdb <- MotifDb
764 765
 
765
-   genes <- c("FOS", "ATF5", "bogus")
766
-
766
+   genes <- c("FOS", "ATF5", "bogus", "SATB2")
767
+   good.genes <- genes[-which(genes=="bogus")]
767 768
       # use  TFClass family classifcation
768 769
    tbl.tfClass <- geneToMotif(mdb, genes, source="TfClaSS")   # intentional mis-capitalization
769
-   checkEquals(sort(tbl.tfClass$gene),  sort(c("ATF5", "FOS", "FOS")))
770
-   checkEquals(sort(tbl.tfClass$motif),  sort(c("MA0833.1", "MA0099.2", "MA0476.1")))
771
-   checkEquals(tbl.tfClass$source, rep("TFClass", 3))
770
+   checkTrue(all(good.genes %in% tbl.tfClass$gene))
771
+
772
+   expected.motifs <- c("MA0833.1", "MA0099.2", "MA0476.1", "MA0679.1", "MA0754.1", "MA0755.1", "MA0756.1", "MA0757.1")
773
+   checkTrue(all(expected.motifs %in% tbl.tfClass$motif))
774
+   checkEquals(unique(tbl.tfClass$source), "TFClass")
772 775
 
773 776
       # MotifDb mode uses the MotifDb metadata, pulled from many sources
774 777
    tbl.mdb <- geneToMotif(mdb, genes, source="mOtifdb")     # intentional mis-capitalization
... ...
@@ -778,8 +781,30 @@ test.geneToMotif <- function()
778 781
       # MotifDb for ATF5
779 782
       # todo: compare the MA0110596_1.02 matrix of cisp_1.02 to japar MA0833.1
780 783
 
784
+     # check use of ignore.case
785
+   tbl.caseSensitive <-  geneToMotif(MotifDb, "STAT4", source="MotifDb")
786
+   checkEquals(length(grep("jaspar", tbl.caseSensitive$dataSource, ignore.case=TRUE)), 0)
787
+   tbl.caseInsensitive <-  geneToMotif(MotifDb, "STAT4", source="MotifDb", ignore.case=TRUE)
788
+   checkTrue(length(grep("jaspar", tbl.caseInsensitive$dataSource, ignore.case=TRUE)) >= 3)
789
+
790
+   tbl.caseSensitive <-  geneToMotif(MotifDb, "stat4", source="TFclass")
791
+   checkEquals(nrow(tbl.caseSensitive), 0)
792
+   tbl.caseInsensitive <-  geneToMotif(MotifDb, "stat4", source="TFclass", ignore.case=TRUE)
793
+   checkTrue(nrow(tbl.caseInsensitive) >= 5)
794
+
781 795
 } # test.geneToMotif
782 796
 #------------------------------------------------------------------------------------------------------------------------
797
+# this case discovered (31 jan 2018). when called on a gene/source combination for which there are
798
+# no motifs, i attempted to add the mapping source (either "MotifDb", "TFClass") as a column
799
+# to an empty data.frame.  check for that and its fix here
800
+test.geneToMotif.oneGene.noMotifs <- function()
801
+{
802
+   checkEquals(nrow(geneToMotif(MotifDb, "SATB2", "MotifDb")), 0)
803
+   checkEquals(nrow(geneToMotif(MotifDb, "bogus-arandum", "MotifDb")), 0)
804
+   checkEquals(nrow(geneToMotif(MotifDb, "bogus-arandum", "TFclass")), 0)
805
+
806
+} # test.geneToMotif.oneGene.noMotifs
807
+#------------------------------------------------------------------------------------------------------------------------
783 808
 # sad to say I do not recall what problem/fix is tested here (pshannon, 23 jan 2018).
784 809
 # however, it demonstrates the variety of results which can be returned by non-jaspar datasets
785 810
 # when using the MotifDb mapping source, and the relative paucity which is sometimes
... ...
@@ -14,12 +14,13 @@ The MotifDb source is in fact the usually 1:1 gene/motif mapping
14 14
 provided by each of the data sources upon which MotifDb is built.
15 15
 }
16 16
 \usage{
17
-\S4method{geneToMotif}{MotifList}(object, geneSymbols, source)
17
+\S4method{geneToMotif}{MotifList}(object, geneSymbols, source, ignore.case)
18 18
 }
19 19
 \arguments{
20 20
   \item{object}{a \code{MotifList} object.}
21 21
   \item{geneSymbols}{a \code{character} string}
22
-  \item{source}{a \code{character} string, either 'MotifDb' or "TFclass' (case insensitive)}
22
+  \item{source}{a \code{character} string, either 'MotifDb' or 'TFclass' (case insensitive)}
23
+  \item{ignore.case}{a \code{logical} variable, default FALSE, guiding gene name matching}
23 24
 }
24 25
 
25 26
 \value{