Browse code

added DGF motifs from the stamlab

git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/MotifDb@70836 bc3139a8-67e5-0310-9ffc-ced21a209358

p.shannon authored on 30/10/2012 03:57:35
Showing 6 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: MotifDb
2 2
 Type: Package
3 3
 Title: An Annotated Collection of Protein-DNA Binding Sequence Motifs
4
-Version: 1.1.0
5
-Date: 2012-08-12
4
+Version: 1.1.2
5
+Date: 2012-10-29
6 6
 Author: Paul Shannon
7 7
 Maintainer: Paul Shannon <pshannon@fhcrc.org>
8 8
 Depends: R (>= 2.15.0), methods, IRanges, Biostrings
9 9
new file mode 100644
... ...
@@ -0,0 +1,7 @@
1
+CHANGES IN VERSION 1.1.2
2
+-------------------------
3
+
4
+NEW FEATURES
5
+
6
+    o 683 new motifs derived from DGF (digital genome footprinting) 
7
+      from the stamlab (http://www.stamlab.org/) added.
... ...
@@ -1,36 +1,36 @@
1 1
 MotifDb <- NULL
2 2
 #-------------------------------------------------------------------------------
3
-.MotifDb = function (loadAllSources=TRUE, quiet=TRUE)
3
+.MotifDb = function(loadAllSources=TRUE, quiet=TRUE)
4 4
 {
5
-  mdb = MotifList ()
5
+  mdb = MotifList()
6 6
 
7
-  if (loadAllSources) {
8
-    data.path = system.file ('extdata', package='MotifDb') # e (system.file (package='MotifDb'), 'data', sep='/')
9
-    data.files = dir (data.path, full.names=TRUE)
7
+  if(loadAllSources) {
8
+    data.path = system.file('extdata', package='MotifDb')
9
+    data.files = dir(data.path, full.names=TRUE)
10 10
   
11
-    if (length (data.files) > 0)   
12
-      for (data.file in data.files) {
11
+    if(length(data.files) > 0)   
12
+      for(data.file in data.files) {
13 13
          # define these to keep 'check' happy.  they are loaded by 'load'
14 14
         tbl.md = NA; matrices = NA;  
15
-        variables = load (data.file)
16
-        mdb = append (mdb, MotifList (matrices, tbl.md))
17
-        if (!quiet) 
18
-          message (noquote (sprintf ('added %s (%d) matrices, length now: %d',
19
-                   basename (data.file), length (matrices), length (mdb))))
15
+        variables = load(data.file)
16
+        mdb = append(mdb, MotifList(matrices, tbl.md))
17
+        if(!quiet) 
18
+          message(noquote(sprintf('added %s(%d) matrices, length now: %d',
19
+                   basename(data.file), length(matrices), length(mdb))))
20 20
       } # for data.file
21 21
   
22
-    if (!quiet) {
23
-      print (table (values (mdb)$dataSource))
22
+    if(!quiet) {
23
+      print(table(values(mdb)$dataSource))
24 24
       }
25 25
     } # if loadAllSources
26 26
 
27
-  return (mdb)
27
+  return(mdb)
28 28
 
29 29
 } # MotifDb
30 30
 #-------------------------------------------------------------------------------
31 31
 .onLoad <- function(libname, pkgname)
32 32
 {
33
-    MotifDb <<- .MotifDb (loadAllSources=TRUE, quiet=TRUE)
33
+    MotifDb <<- .MotifDb(loadAllSources=TRUE, quiet=TRUE)
34 34
 }
35 35
 #-------------------------------------------------------------------------------
36 36
 
37 37
new file mode 100644
38 38
Binary files /dev/null and b/inst/extdata/stamlab.RData differ
... ...
@@ -121,12 +121,10 @@ test.allMatricesAreNormalized = function ()
121 121
   print ('--- test.allMatricesAreNormalized')
122 122
   mdb = MotifDb# (quiet=TRUE)
123 123
   matrices = mdb@listData
124
-  colsums = as.integer (sapply (matrices, function (mtx) as.integer (mean (round (colSums (mtx))))))
125
-  failures = which (colsums != 1)
126
-  if (length (failures > 0))
127
-    browser ()
128
-  checkTrue (length (failures) == 0)
129
-
124
+    # a lenient test required by "Cparvum-UniPROBE-Cgd2_3490.UP00395" and  "Hsapiens-UniPROBE-Sox4.UP00401"
125
+    # for reasons not yet explored.  10e-8 should be be possible
126
+  checkTrue (all (sapply (matrices, function (m) all (abs (colSums (m) - 1.0) < 0.02))))
127
+             
130 128
 } # test.allMatricesAreNormalized
131 129
 #------------------------------------------------------------------------------------------------------------------------
132 130
 test.providerNames = function ()
... ...
@@ -144,7 +142,7 @@ test.geneSymbols = function ()
144 142
   print ('--- test.getGeneSymbols')
145 143
   mdb = MotifDb # ()
146 144
   syms = values (mdb)$geneSymbol
147
-  checkEquals (length (which (is.na (syms))), 0)
145
+  checkEquals (length (which (is.na (syms))), 683)  # no symols yet for the dgf stamlab motifs
148 146
   checkEquals (length (which (syms == '')), 0)
149 147
 
150 148
 } # test.geneSymbols
... ...
@@ -157,10 +155,10 @@ test.geneIdsAndTypes = function ()
157 155
   geneIdTypes = values (mdb)$geneIdType
158 156
   tbl.types = as.data.frame (table (geneIdTypes, useNA='always'), stringsAsFactors=FALSE)
159 157
   checkEquals (tbl.types$geneIdTypes,  c ('ENTREZ', 'FLYBASE', 'SGD', NA))
160
-  checkEquals (tbl.types$Freq,  c (763, 614, 453, 256))
158
+  checkEquals (tbl.types$Freq,  c (763, 614, 453, 939))
161 159
 
162 160
   na.count = length (which (is.na (geneIds)))
163
-  checkEquals (na.count, 256)   # see geneIdTypes == NA, just above
161
+  checkEquals (na.count, 939)   # see geneIdTypes == NA, just above
164 162
   empty.count = length (which (geneIds == ''))
165 163
   checkEquals (empty.count, 0)
166 164
 
... ...
@@ -21,13 +21,14 @@ FlyFactorSurvey: \tab 614\cr
21 21
 hPDI: \tab 437\cr
22 22
 JASPAR_CORE: \tab 459\cr
23 23
 ScerTF: \tab 196\cr
24
+stamlab: \tab 683\cr
24 25
 UniPROBE: \tab 380\cr
25 26
 }
26 27
 
27 28
 Representing primarily four organsisms:
28 29
 \tabular{ll}{
29 30
 Dmelanogaster: \tab 739\cr
30
-Hsapiens: \tab 505\cr
31
+Hsapiens: \tab 1188\cr
31 32
 Scerevisiae: \tab 464\cr
32 33
 Mmusculus: \tab 329\cr
33 34
 Rnorvegicus: \tab 8\cr
... ...
@@ -172,6 +173,10 @@ an example (see below for the associated position frequency matrix):
172 173
 
173 174
 \itemize{
174 175
 
176
+\item Neph S, Stergachis AB, Reynolds A, Sandstrom R, Borenstein E,
177
+Stamatoyannopoulos JA. Circuitry and dynamics of human transcription factor regulatory networks.
178
+Cell. 2012 Sep 14;150(6):1274-86.
179
+  
175 180
 \item Portales-Casamar E, Thongjuea S, Kwon AT, Arenillas D, Zhao X, Valen E, Yusuf D, Lenhard B, Wasserman WW, Sandelin A. JASPAR 2010: the greatly expanded open-access database of transcription factor binding profiles. Nucleic Acids Res. 2010 Jan;38(Database issue):D105-10. Epub 2009 Nov 11.
176 181
 
177 182
 \item Robasky K, Bulyk ML. UniPROBE, update 2011: expanded content and search tools in the online database of protein-binding microarray data on protein-DNA interactions. Nucleic Acids Res. 2011 Jan;39(Database issue):D124-8. Epub 2010 Oct 30.