Browse code

added unit test to clarify geneToMotif results

paul-shannon authored on 23/01/2018 16:54:36
Showing 2 changed files

... ...
@@ -1,14 +1,14 @@
1 1
 Package: MotifDb
2 2
 Type: Package
3 3
 Title: An Annotated Collection of Protein-DNA Binding Sequence Motifs
4
-Version: 1.19.19
5
-Date: 2017-12-07
4
+Version: 1.19.20
5
+Date: 2018-01-23
6 6
 Author: Paul Shannon, Matt Richards
7 7
 Maintainer: Paul Shannon <pshannon@systemsbiology.org>
8 8
 Depends: R (>= 2.15.0), methods, BiocGenerics, S4Vectors, IRanges, Biostrings
9 9
 Suggests: RUnit, seqLogo, MotIV
10 10
 Imports: rtracklayer, splitstackshape
11
-Description: More than 8000 annotated position frequency matrices from 13 public sources, for multiple organisms.
11
+Description: More than 9000 annotated position frequency matrices from 14 public sources, for multiple organisms.
12 12
 License: Artistic-2.0 | file LICENSE
13 13
 License_is_FOSS: no
14 14
 License_restricts_use: yes
... ...
@@ -777,6 +777,40 @@ test.geneToMotif <- function()
777 777
       # MotifDb for ATF5
778 778
       # todo: compare the MA0110596_1.02 matrix of cisp_1.02 to japar MA0833.1
779 779
 
780
+} # test.geneToMotif
781
+#------------------------------------------------------------------------------------------------------------------------
782
+# sad to say I do not recall what problem/fix is tested here (pshannon, 23 jan 2018).
783
+# however, it demonstrates the variety of results which can be returned by non-jaspar datasets
784
+# when using the MotifDb mapping source, and the relative paucity which is sometimes
785
+# seen with the TFclass mapper
786
+test.geneToMotif.ignore.jasparSuffixes <- function()
787
+{
788
+   printf("--- test.geneToMotif.ignore.jasparSuffixes")
789
+   mdb <- MotifDb
790
+
791
+   genes <- c("FOS", "ATF5", "bogus")
792
+
793
+      # use  TFClass family classifcation
794
+   tbl.tfClass <- geneToMotif(mdb, genes, source="TfClaSS")   # intentional mis-capitalization
795
+   checkEquals(sort(tbl.tfClass$gene),  sort(c("ATF5", "FOS", "FOS")))
796
+   checkEquals(sort(tbl.tfClass$motif),  sort(c("MA0833.1", "MA0099.2", "MA0476.1")))
797
+   checkEquals(tbl.tfClass$source, rep("TFClass", 3))
798
+
799
+      # MotifDb mode uses the MotifDb metadata, pulled from many sources
800
+   tbl.mdb <- geneToMotif(mdb, genes, source="mOtifdb")     # intentional mis-capitalization
801
+   checkEquals(dim(tbl.mdb), c(13, 6))
802
+   checkEquals(subset(tbl.mdb, dataSource=="jaspar2016" & geneSymbol== "FOS")$motif, "MA0476.1")
803
+      # no recognizable (i.e., jaspar standard) motif name returned by MotifDb metadata
804
+      # MotifDb for ATF5
805
+
806
+      # compare the MA0110599_1.02 matrix of cisp_1.02 to japar MA0476.1: the identical matrix!
807
+      # 1         FOS    MA0110599_1.02   cisbp_1.02  Hsapiens 24194598 MotifDb
808
+      # 10        FOS          MA0476.1   jaspar2018  Hsapiens 17916232 MotifDb
809
+      # this establishes the need for careful scrutiny as one winnows a geneToMotif result into
810
+      # useful non-reduplicative sequence analysis
811
+
812
+   checkEquals(as.list(query(mdb, "MA0110599")), as.list(query(query(mdb, "MA0476.1"), "jaspar2018")))
813
+
780 814
 } # test.geneToMotif
781 815
 #------------------------------------------------------------------------------------------------------------------------
782 816
 test.motifToGene <- function()