git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/MotifDb@70836 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: MotifDb |
2 | 2 |
Type: Package |
3 | 3 |
Title: An Annotated Collection of Protein-DNA Binding Sequence Motifs |
4 |
-Version: 1.1.0 |
|
5 |
-Date: 2012-08-12 |
|
4 |
+Version: 1.1.2 |
|
5 |
+Date: 2012-10-29 |
|
6 | 6 |
Author: Paul Shannon |
7 | 7 |
Maintainer: Paul Shannon <pshannon@fhcrc.org> |
8 | 8 |
Depends: R (>= 2.15.0), methods, IRanges, Biostrings |
... | ... |
@@ -1,36 +1,36 @@ |
1 | 1 |
MotifDb <- NULL |
2 | 2 |
#------------------------------------------------------------------------------- |
3 |
-.MotifDb = function (loadAllSources=TRUE, quiet=TRUE) |
|
3 |
+.MotifDb = function(loadAllSources=TRUE, quiet=TRUE) |
|
4 | 4 |
{ |
5 |
- mdb = MotifList () |
|
5 |
+ mdb = MotifList() |
|
6 | 6 |
|
7 |
- if (loadAllSources) { |
|
8 |
- data.path = system.file ('extdata', package='MotifDb') # e (system.file (package='MotifDb'), 'data', sep='/') |
|
9 |
- data.files = dir (data.path, full.names=TRUE) |
|
7 |
+ if(loadAllSources) { |
|
8 |
+ data.path = system.file('extdata', package='MotifDb') |
|
9 |
+ data.files = dir(data.path, full.names=TRUE) |
|
10 | 10 |
|
11 |
- if (length (data.files) > 0) |
|
12 |
- for (data.file in data.files) { |
|
11 |
+ if(length(data.files) > 0) |
|
12 |
+ for(data.file in data.files) { |
|
13 | 13 |
# define these to keep 'check' happy. they are loaded by 'load' |
14 | 14 |
tbl.md = NA; matrices = NA; |
15 |
- variables = load (data.file) |
|
16 |
- mdb = append (mdb, MotifList (matrices, tbl.md)) |
|
17 |
- if (!quiet) |
|
18 |
- message (noquote (sprintf ('added %s (%d) matrices, length now: %d', |
|
19 |
- basename (data.file), length (matrices), length (mdb)))) |
|
15 |
+ variables = load(data.file) |
|
16 |
+ mdb = append(mdb, MotifList(matrices, tbl.md)) |
|
17 |
+ if(!quiet) |
|
18 |
+ message(noquote(sprintf('added %s(%d) matrices, length now: %d', |
|
19 |
+ basename(data.file), length(matrices), length(mdb)))) |
|
20 | 20 |
} # for data.file |
21 | 21 |
|
22 |
- if (!quiet) { |
|
23 |
- print (table (values (mdb)$dataSource)) |
|
22 |
+ if(!quiet) { |
|
23 |
+ print(table(values(mdb)$dataSource)) |
|
24 | 24 |
} |
25 | 25 |
} # if loadAllSources |
26 | 26 |
|
27 |
- return (mdb) |
|
27 |
+ return(mdb) |
|
28 | 28 |
|
29 | 29 |
} # MotifDb |
30 | 30 |
#------------------------------------------------------------------------------- |
31 | 31 |
.onLoad <- function(libname, pkgname) |
32 | 32 |
{ |
33 |
- MotifDb <<- .MotifDb (loadAllSources=TRUE, quiet=TRUE) |
|
33 |
+ MotifDb <<- .MotifDb(loadAllSources=TRUE, quiet=TRUE) |
|
34 | 34 |
} |
35 | 35 |
#------------------------------------------------------------------------------- |
36 | 36 |
|
... | ... |
@@ -121,12 +121,10 @@ test.allMatricesAreNormalized = function () |
121 | 121 |
print ('--- test.allMatricesAreNormalized') |
122 | 122 |
mdb = MotifDb# (quiet=TRUE) |
123 | 123 |
matrices = mdb@listData |
124 |
- colsums = as.integer (sapply (matrices, function (mtx) as.integer (mean (round (colSums (mtx)))))) |
|
125 |
- failures = which (colsums != 1) |
|
126 |
- if (length (failures > 0)) |
|
127 |
- browser () |
|
128 |
- checkTrue (length (failures) == 0) |
|
129 |
- |
|
124 |
+ # a lenient test required by "Cparvum-UniPROBE-Cgd2_3490.UP00395" and "Hsapiens-UniPROBE-Sox4.UP00401" |
|
125 |
+ # for reasons not yet explored. 10e-8 should be be possible |
|
126 |
+ checkTrue (all (sapply (matrices, function (m) all (abs (colSums (m) - 1.0) < 0.02)))) |
|
127 |
+ |
|
130 | 128 |
} # test.allMatricesAreNormalized |
131 | 129 |
#------------------------------------------------------------------------------------------------------------------------ |
132 | 130 |
test.providerNames = function () |
... | ... |
@@ -144,7 +142,7 @@ test.geneSymbols = function () |
144 | 142 |
print ('--- test.getGeneSymbols') |
145 | 143 |
mdb = MotifDb # () |
146 | 144 |
syms = values (mdb)$geneSymbol |
147 |
- checkEquals (length (which (is.na (syms))), 0) |
|
145 |
+ checkEquals (length (which (is.na (syms))), 683) # no symols yet for the dgf stamlab motifs |
|
148 | 146 |
checkEquals (length (which (syms == '')), 0) |
149 | 147 |
|
150 | 148 |
} # test.geneSymbols |
... | ... |
@@ -157,10 +155,10 @@ test.geneIdsAndTypes = function () |
157 | 155 |
geneIdTypes = values (mdb)$geneIdType |
158 | 156 |
tbl.types = as.data.frame (table (geneIdTypes, useNA='always'), stringsAsFactors=FALSE) |
159 | 157 |
checkEquals (tbl.types$geneIdTypes, c ('ENTREZ', 'FLYBASE', 'SGD', NA)) |
160 |
- checkEquals (tbl.types$Freq, c (763, 614, 453, 256)) |
|
158 |
+ checkEquals (tbl.types$Freq, c (763, 614, 453, 939)) |
|
161 | 159 |
|
162 | 160 |
na.count = length (which (is.na (geneIds))) |
163 |
- checkEquals (na.count, 256) # see geneIdTypes == NA, just above |
|
161 |
+ checkEquals (na.count, 939) # see geneIdTypes == NA, just above |
|
164 | 162 |
empty.count = length (which (geneIds == '')) |
165 | 163 |
checkEquals (empty.count, 0) |
166 | 164 |
|
... | ... |
@@ -21,13 +21,14 @@ FlyFactorSurvey: \tab 614\cr |
21 | 21 |
hPDI: \tab 437\cr |
22 | 22 |
JASPAR_CORE: \tab 459\cr |
23 | 23 |
ScerTF: \tab 196\cr |
24 |
+stamlab: \tab 683\cr |
|
24 | 25 |
UniPROBE: \tab 380\cr |
25 | 26 |
} |
26 | 27 |
|
27 | 28 |
Representing primarily four organsisms: |
28 | 29 |
\tabular{ll}{ |
29 | 30 |
Dmelanogaster: \tab 739\cr |
30 |
-Hsapiens: \tab 505\cr |
|
31 |
+Hsapiens: \tab 1188\cr |
|
31 | 32 |
Scerevisiae: \tab 464\cr |
32 | 33 |
Mmusculus: \tab 329\cr |
33 | 34 |
Rnorvegicus: \tab 8\cr |
... | ... |
@@ -172,6 +173,10 @@ an example (see below for the associated position frequency matrix): |
172 | 173 |
|
173 | 174 |
\itemize{ |
174 | 175 |
|
176 |
+\item Neph S, Stergachis AB, Reynolds A, Sandstrom R, Borenstein E, |
|
177 |
+Stamatoyannopoulos JA. Circuitry and dynamics of human transcription factor regulatory networks. |
|
178 |
+Cell. 2012 Sep 14;150(6):1274-86. |
|
179 |
+ |
|
175 | 180 |
\item Portales-Casamar E, Thongjuea S, Kwon AT, Arenillas D, Zhao X, Valen E, Yusuf D, Lenhard B, Wasserman WW, Sandelin A. JASPAR 2010: the greatly expanded open-access database of transcription factor binding profiles. Nucleic Acids Res. 2010 Jan;38(Database issue):D105-10. Epub 2009 Nov 11. |
176 | 181 |
|
177 | 182 |
\item Robasky K, Bulyk ML. UniPROBE, update 2011: expanded content and search tools in the online database of protein-binding microarray data on protein-DNA interactions. Nucleic Acids Res. 2011 Jan;39(Database issue):D124-8. Epub 2010 Oct 30. |