Browse code

provisional addition of hocomoco v1, with reliability scores, A-D

paul-shannon authored on 10/03/2020 00:06:57
Showing10 changed files

... ...
@@ -1,3 +1,7 @@
1
+misc/hocomoco-v11/HOCOMOCOv11_core_HUMAN_mono_jaspar_format.txt
2
+misc/hocomoco-v11/HOCOMOCOv11_core_annotation_HUMAN_mono.tsv
3
+misc/hocomoco-v11/hocomoco11.RData
4
+
1 5
 Meta
2 6
 doc
3 7
 # History files
... ...
@@ -1,13 +1,13 @@
1 1
 Package: MotifDb
2 2
 Type: Package
3 3
 Title: An Annotated Collection of Protein-DNA Binding Sequence Motifs
4
-Version: 1.29.0
5
-Date: 2018-10-24
4
+Version: 1.30.1
5
+Date: 2020-03-09
6 6
 Author: Paul Shannon, Matt Richards
7 7
 Maintainer: Paul Shannon <pshannon@systemsbiology.org>
8
-Depends: R (>= 3.5.0), methods, BiocGenerics, S4Vectors, IRanges, Biostrings
8
+Depends: R (>= 3.5.0), methods, BiocGenerics, S4Vectors, IRanges, GenomicRanges, Biostrings
9 9
 Suggests: RUnit
10
-Imports: rtracklayer, splitstackshape
10
+Imports: rtracklayer, splitstackshape, TFBSTools, universalmotif, motifmatchr
11 11
 Description: More than 9900 annotated position frequency matrices from 14 public sources, for multiple organisms.
12 12
 License: Artistic-2.0 | file LICENSE
13 13
 License_is_FOSS: no
... ...
@@ -4,6 +4,7 @@ exportMethods (
4 4
    export,
5 5
    show,
6 6
    query,
7
+   matchMotif,
7 8
    motifToGene,
8 9
    geneToMotif,
9 10
    associateTranscriptionFactors
... ...
@@ -13,9 +14,15 @@ export(
13 14
     MotifDb
14 15
 )
15 16
 
17
+import(motifmatchr)
18
+import(TFBSTools)
19
+importFrom(universalmotif, convert_motifs)
20
+
21
+
16 22
 import(BiocGenerics)
17 23
 import(S4Vectors)
18 24
 import(IRanges)
25
+import(GenomicRanges)
19 26
 importFrom(rtracklayer, export)
20 27
 import(Biostrings)
21 28
 import(methods)
... ...
@@ -4,6 +4,8 @@ setGeneric('geneToMotif', signature='object', function(object, geneSymbols, sour
4 4
 setGeneric('associateTranscriptionFactors', signature='object',
5 5
            function(object, tbl.withMotifs,  source, expand.rows, motifColumnName="motifName")
6 6
               standardGeneric('associateTranscriptionFactors'))
7
+setGeneric('matchMotif', signature='object', function(object, motifs, genomeName, regions, pval.cutoff,
8
+                                                      fimoDataFrameStyle=FALSE) standardGeneric('matchMotif'))
7 9
 #------------------------------------------------------------------------------------------------------------------------
8 10
 setClass ('MotifList',
9 11
           contains='SimpleList',
... ...
@@ -61,6 +63,30 @@ MotifList = function (matrices=list(), tbl.metadata=data.frame ())
61 63
   object
62 64
 
63 65
 } # ctor
66
+#-------------------------------------------------------------------------------
67
+setMethod('matchMotif', signature='MotifList',
68
+
69
+   function(object, motifs, genomeName, regions, pval.cutoff, fimoDataFrameStyle=FALSE){
70
+     motifs.pfmatrix <- lapply(motifs, function(motif) convert_motifs(motif, "TFBSTools-PFMatrix"))
71
+     motifs.pfmList <- do.call(PFMatrixList, motifs.pfmatrix)
72
+     gr.list <- matchMotifs(motifs.pfmList, regions, genome=genomeName, out="positions", p.cutoff=pval.cutoff)
73
+     result <- gr.list
74
+     if(fimoDataFrameStyle){
75
+        gr <- unlist(gr.list)
76
+        motif.names <- names(gr)
77
+        names(gr) <- NULL
78
+        tbl <- as.data.frame(gr)
79
+        tbl$motif_id <- motif.names
80
+        colnames(tbl)[1] <- "chrom"
81
+        tbl$chrom <- as.character(tbl$chrom)
82
+        colnames(tbl)[grep("score", colnames(tbl))] <- "mood.score"
83
+        new.order <- order(tbl$start, decreasing=FALSE)
84
+        tbl <- tbl[new.order,]
85
+        result <- tbl
86
+        }
87
+     return(result)
88
+     })
89
+
64 90
 #-------------------------------------------------------------------------------
65 91
 setMethod ('subset', signature = 'MotifList',
66 92
 
67 93
new file mode 100644
... ...
@@ -0,0 +1,26 @@
1
+quick: install
2
+
3
+all:  vig build check
4
+
5
+vig:
6
+	R -e "devtools::build_vignettes()"
7
+
8
+build:
9
+	(cd ..; R CMD build --no-build-vignettes MotifDb)
10
+
11
+install:
12
+	(cd ..; R CMD INSTALL --no-test-load MotifDb)
13
+
14
+check: build
15
+	(cd ..; R CMD check --no-manual --no-build-vignettes --ignore-vignettes `ls -t MotifDb_* | head -1`)
16
+
17
+biocCheck:
18
+	(cd ..; R CMD BiocCheck `ls -t MotifDb_* | head -1`)
19
+
20
+unitTests: test
21
+
22
+test:
23
+	 for x in inst/unitTests/test_*.R; do echo ============== $$x; R -f $$x; done
24
+
25
+site:
26
+	R -e "devtools::build_site()"
0 27
new file mode 100644
1 28
Binary files /dev/null and b/inst/extdata/hocomoco11.RData differ
... ...
@@ -1,43 +1,43 @@
1
-library (MotifDb)
2
-library (RUnit)
3
-library (MotIV)
4
-library (seqLogo)
1
+library(MotifDb)
2
+library(RUnit)
3
+library(MotIV)
4
+library(seqLogo)
5 5
 #----------------------------------------------------------------------------------------------------
6 6
 printf <- function(...) print(noquote(sprintf(...)))
7 7
 #----------------------------------------------------------------------------------------------------
8
-runTests = function ()
8
+runTests = function()
9 9
 {
10
-  test.emptyCtor ()
11
-  test.nonEmptyCtor ()
12
-  test.MotifDb.normalMode ()
13
-  test.MotifDb.emptyMode ()
14
-  test.allMatricesAreNormalized ()
15
-  test.providerNames ()
16
-  test.geneSymbols ()
17
-  test.geneIdsAndTypes ()
18
-  test.proteinIds ()
19
-  test.sequenceCount ()
20
-  test.longNames ()
21
-  test.organisms ()
22
-  test.bindingDomains ()
23
-  test.experimentTypes ()
24
-  test.tfFamilies ()
25
-  test.bindingSequences ()
26
-  test.flyBindingDomains ()
27
-  test.pubmedIDs ()
28
-  test.allFullNames ()
29
-  test.subset ()
30
-  test.subsetWithVariables ()
31
-  test.queryOldStyle ()
10
+  test.emptyCtor()
11
+  test.nonEmptyCtor()
12
+  test.MotifDb.normalMode()
13
+  test.MotifDb.emptyMode()
14
+  test.allMatricesAreNormalized()
15
+  test.providerNames()
16
+  test.geneSymbols()
17
+  test.geneIdsAndTypes()
18
+  test.proteinIds()
19
+  test.sequenceCount()
20
+  test.longNames()
21
+  test.organisms()
22
+  test.bindingDomains()
23
+  test.experimentTypes()
24
+  test.tfFamilies()
25
+  test.bindingSequences()
26
+  test.flyBindingDomains()
27
+  test.pubmedIDs()
28
+  test.allFullNames()
29
+  test.subset()
30
+  test.subsetWithVariables()
31
+  test.queryOldStyle()
32 32
   test.query()
33
-  test.transformMatrixToMemeRepresentation ()
34
-  test.matrixToMemeText ()
35
-  test.export_memeFormatStdOut ()
36
-  test.export_memeFormatToFile ()
37
-  test.export_memeFormatToFileDuplication ()
38
-  test.export_memeFormatToFile_run_tomtom ()
39
-  test.MotIV.toTable ()
40
-  test.run_MotIV.motifMatch()
33
+  test.transformMatrixToMemeRepresentation()
34
+  test.matrixToMemeText()
35
+  test.export_memeFormatStdOut()
36
+  test.export_memeFormatToFile()
37
+  test.export_memeFormatToFileDuplication()
38
+  test.export_memeFormatToFile_run_tomtom()
39
+  #test.MotIV.toTable()
40
+  #test.run_MotIV.motifMatch()
41 41
   test.flyFactorGeneSymbols()
42 42
   test.export_jasparFormatStdOut()
43 43
   test.export_jasparFormatToFile()
... ...
@@ -46,27 +46,30 @@ runTests = function ()
46 46
   test.geneToMotif.ignore.jasparSuffixes()
47 47
   test.geneToMotif.oneGene.noMotifs
48 48
   test.motifToGene()
49
-
50 49
   test.associateTranscriptionFactors()
51 50
 
51
+  test.match()
52
+
53
+  test.hocomoco11.with.reliabilityScores()
54
+
52 55
 } # runTests
53 56
 #------------------------------------------------------------------------------------------------------------------------
54
-test.emptyCtor = function ()
57
+test.emptyCtor = function()
55 58
 {
56
-  print ('--- test.emptyCtor')
57
-  motif.list = MotifDb:::MotifList ()
58
-  checkEquals (length (motif.list), 0)
59
+  print('--- test.emptyCtor')
60
+  motif.list = MotifDb:::MotifList()
61
+  checkEquals(length(motif.list), 0)
59 62
 
60 63
 } # test.emptyCtor
61 64
 #------------------------------------------------------------------------------------------------------------------------
62
-test.nonEmptyCtor = function ()
65
+test.nonEmptyCtor = function()
63 66
 {
64
-  print ('--- test.nonEmptyCtor')
65
-  mtx = matrix (runif (20), nrow=4, ncol=5, byrow=T, dimnames=list(c ('A', 'C', 'G', 'T'), as.character (1:5)))
66
-  mtx.normalized = apply (mtx, 2, function (colvector) colvector / sum (colvector))
67
-  matrixList = list (mtx.normalized)
67
+  print('--- test.nonEmptyCtor')
68
+  mtx = matrix(runif(20), nrow=4, ncol=5, byrow=T, dimnames=list(c('A', 'C', 'G', 'T'), as.character(1:5)))
69
+  mtx.normalized = apply(mtx, 2, function(colvector) colvector / sum(colvector))
70
+  matrixList = list(mtx.normalized)
68 71
 
69
-  tbl.md = data.frame (providerName='',
72
+  tbl.md = data.frame(providerName='',
70 73
                        providerId='',
71 74
                        dataSource='',
72 75
                        geneSymbol='',
... ...
@@ -82,36 +85,36 @@ test.nonEmptyCtor = function ()
82 85
                        experimentType='',
83 86
                        pubmedID='',
84 87
                        stringsAsFactors=FALSE)
85
-  names (matrixList) = 'test'
86
-  rownames (tbl.md) = 'test'
87
-  motif.list = MotifDb:::MotifList (matrixList, tbl.md)
88
-  checkEquals (length (motif.list), 1)
88
+  names(matrixList) = 'test'
89
+  rownames(tbl.md) = 'test'
90
+  motif.list = MotifDb:::MotifList(matrixList, tbl.md)
91
+  checkEquals(length(motif.list), 1)
89 92
 
90 93
 } # test.nonEmptyCtor
91 94
 #------------------------------------------------------------------------------------------------------------------------
92 95
 # 'normal' in that all included data sources are already loaded
93
-test.MotifDb.normalMode = function ()
96
+test.MotifDb.normalMode = function()
94 97
 {
95
-  print ('--- test.MotifDb.normalMode')
98
+  print('--- test.MotifDb.normalMode')
96 99
 
97
-  mdb = MotifDb #  (quiet=TRUE)
98
-    # (5 jun 2012)
100
+  mdb = MotifDb # (quiet=TRUE)
101
+    #(5 jun 2012)
99 102
     # JASPAR_CORE: 459
100 103
     # ScerTF: 196
101 104
     # UniPROBE: 380
102 105
     # FlyFactorSurvey: 614
103 106
     # hPDI: 437
104
-  checkTrue (length (mdb) > 2080)
107
+  checkTrue(length(mdb) > 2080)
105 108
 
106 109
 } # test.MotifDb.normalMode
107 110
 #------------------------------------------------------------------------------------------------------------------------
108 111
 # this mode is not intended for users, but may see use in the future.
109
-test.MotifDb.emptyMode = function ()
112
+test.MotifDb.emptyMode = function()
110 113
 {
111
-  print ('--- test.MotifDb.emptyMode')
114
+  print('--- test.MotifDb.emptyMode')
112 115
 
113
-  mdb = MotifDb:::.MotifDb (loadAllSources=FALSE, quiet=TRUE)
114
-  checkTrue (length (mdb) == 0)
116
+  mdb = MotifDb:::.MotifDb(loadAllSources=FALSE, quiet=TRUE)
117
+  checkTrue(length(mdb) == 0)
115 118
 
116 119
 } # test.MotifDb.emptyMode
117 120
 #------------------------------------------------------------------------------------------------------------------------
... ...
@@ -122,86 +125,86 @@ test.MotifDb.emptyMode = function ()
122 125
 
123 126
 # Many more NA's exist...need to fix these; here's a quick fix for now
124 127
 
125
-test.noNAorganisms = function ()
128
+test.noNAorganisms = function()
126 129
 
127 130
 {
128
-  print ('--- test.noNAorganisms')
129
-  #checkEquals (which (is.na (mcols(MotifDb)$organism)), integer (0))
131
+  print('--- test.noNAorganisms')
132
+  #checkEquals(which(is.na(mcols(MotifDb)$organism)), integer(0))
130 133
 
131 134
   # There's a fair number of NA organisms, mostly due to including the homer DB
132
-  checkEquals(sum(is.na (mcols(MotifDb)$organism)), 366)
135
+  checkEquals(sum(is.na(mcols(MotifDb)$organism)), 366)
133 136
 
134 137
 } # test.noNAorganisms
135 138
 #------------------------------------------------------------------------------------------------------------------------
136
-test.allMatricesAreNormalized = function ()
139
+test.allMatricesAreNormalized = function()
137 140
 {
138
-  print ('--- test.allMatricesAreNormalized')
139
-  mdb = MotifDb# (quiet=TRUE)
141
+  print('--- test.allMatricesAreNormalized')
142
+  mdb = MotifDb#(quiet=TRUE)
140 143
   matrices = mdb@listData
141 144
     # a lenient test required by "Cparvum-UniPROBE-Cgd2_3490.UP00395" and  "Hsapiens-UniPROBE-Sox4.UP00401"
142 145
     # for reasons not yet explored.  10e-8 should be be possible
143
-  checkTrue(all(sapply(matrices, function (m) all (abs (colSums (m) - 1.0) < 0.02))))
146
+  checkTrue(all(sapply(matrices, function(m) all(abs(colSums(m) - 1.0) < 0.02))))
144 147
 
145 148
 } # test.allMatricesAreNormalized
146 149
 #------------------------------------------------------------------------------------------------------------------------
147
-test.providerNames = function ()
150
+test.providerNames = function()
148 151
 {
149
-  print ('--- test.getProviderNames')
150
-  mdb = MotifDb # ()
152
+  print('--- test.getProviderNames')
153
+  mdb = MotifDb #()
151 154
   pn = mcols(mdb)$providerName
152
-  checkEquals (length (which (is.na (pn))), 0)
153
-  checkEquals (length (which (pn == '')), 0)
155
+  checkEquals(length(which(is.na(pn))), 0)
156
+  checkEquals(length(which(pn == '')), 0)
154 157
 
155 158
 } # test.providerNames
156 159
 #------------------------------------------------------------------------------------------------------------------------
157
-test.geneSymbols = function ()
160
+test.geneSymbols = function()
158 161
 {
159
-  print ('--- test.getGeneSymbols')
160
-  mdb = MotifDb # ()
162
+  print('--- test.getGeneSymbols')
163
+  mdb = MotifDb #()
161 164
   syms = mcols(mdb)$geneSymbol
162
-  checkEquals (length (which (is.na (syms))), 683)  # no symols yet for the dgf stamlab motifs
163
-  checkEquals (length (which (syms == '')), 0)
165
+  checkEquals(length(which(is.na(syms))), 683)  # no symols yet for the dgf stamlab motifs
166
+  checkEquals(length(which(syms == '')), 0)
164 167
 
165 168
 } # test.geneSymbols
166 169
 #------------------------------------------------------------------------------------------------------------------------
167
-test.geneIdsAndTypes = function ()
170
+test.geneIdsAndTypes = function()
168 171
 {
169
-  print ('--- test.getGeneIdsAndTypes')
172
+  print('--- test.getGeneIdsAndTypes')
170 173
   mdb = MotifDb
171 174
   tbl <- mcols(mdb)
172 175
   geneIds = tbl$geneId
173 176
   geneIdTypes = tbl$geneIdType
174
-  typeCounts = as.list (table (geneIdTypes))
177
+  typeCounts = as.list(table(geneIdTypes))
175 178
 
176 179
   checkTrue(typeCounts$ENTREZ > 2300)
177 180
   checkTrue(typeCounts$FLYBASE >= 45)
178 181
   checkTrue(typeCounts$SGD >= 600)
179 182
   checkTrue(nrow(subset(tbl, is.na(geneIdType))) > 2000)
180 183
 
181
-  empty.count = length (which (geneIds == ''))
182
-  checkEquals (empty.count, 0)
184
+  empty.count = length(which(geneIds == ''))
185
+  checkEquals(empty.count, 0)
183 186
 
184 187
 
185 188
 } # test.geneIdsAndTypes
186 189
 #------------------------------------------------------------------------------------------------------------------------
187 190
 # make sure that all proteinIds have explicit values, either proper identifiers or NA
188 191
 # currently tested by looking for empty string assignments
189
-test.proteinIds = function ()
192
+test.proteinIds = function()
190 193
 {
191
-  print ('--- test.proteinIds')
192
-  mdb = MotifDb # (quiet=TRUE)
194
+  print('--- test.proteinIds')
195
+  mdb = MotifDb #(quiet=TRUE)
193 196
   NA.string.count <- sum(is.na(mcols(mdb)$proteinId))
194
-#  NA.string.count = length (grep ('NA', mcols(mdb)$proteinId))
197
+#  NA.string.count = length(grep('NA', mcols(mdb)$proteinId))
195 198
 
196 199
   checkEquals(NA.string.count, 2514)
197 200
   # FIX THIS; Currently 2514 don't have protein IDs
198
-  #checkEquals (NA.string.count, 0)
201
+  #checkEquals(NA.string.count, 0)
199 202
 
200
-  empty.count = length (which (mcols(mdb)$proteinId==""))
201
-  if (empty.count > 0)
202
-    browser ('test.proteinIds')
203
+  empty.count = length(which(mcols(mdb)$proteinId==""))
204
+  if(empty.count > 0)
205
+    browser('test.proteinIds')
203 206
 
204
-  checkEquals (empty.count, 0)
207
+  checkEquals(empty.count, 0)
205 208
 
206 209
      # FlyFactorSurvey, as digested by me, had a blanket assigment of UNIPROT to all proteinIds
207 210
      # Herve' pointed out that this applied also to entries with no proteinId.
... ...
@@ -209,86 +212,88 @@ test.proteinIds = function ()
209 212
 
210 213
   ### FIX THIS TOO! Currently have 913 entries with a proteinIdType and no proteinId
211 214
   x = mcols(mdb)
212
-  # checkEquals (nrow (subset (x, !is.na (proteinIdType) & is.na (proteinId))), 0)
215
+  # checkEquals(nrow(subset(x, !is.na(proteinIdType) & is.na(proteinId))), 0)
213 216
 
214 217
 
215 218
 } # test.proteinIds
216 219
 #------------------------------------------------------------------------------------------------------------------------
217 220
 # only for UniPROBE do we not have sequence count.  might be possible to get them along with 'insertion sequences'
218
-test.sequenceCount = function ()
221
+test.sequenceCount = function()
219 222
 {
220
-  print ('--- test.sequenceCount')
221
-  mdb = MotifDb # ()
223
+  print('--- test.sequenceCount')
224
+  mdb = MotifDb #()
222 225
   x = mcols(mdb)
223
-  if (interactive ()) {
224
-    x.up = subset (x, dataSource == 'UniPROBE')
225
-    checkTrue (all (is.na (x.up$sequenceCount)))
226
+  if(interactive()) {
227
+    x.up = subset(x, dataSource == 'UniPROBE')
228
+    checkTrue(all(is.na(x.up$sequenceCount)))
226 229
     }
227 230
   else {
228
-    uniprobe.indices = which (x$dataSource == 'UniPROBE')
229
-    checkTrue (all (is.na (x$sequenceCount [uniprobe.indices])))
231
+    uniprobe.indices = which(x$dataSource == 'UniPROBE')
232
+    checkTrue(all(is.na(x$sequenceCount [uniprobe.indices])))
230 233
     }
231 234
 
232 235
 } # test.sequenceCount
233 236
 #------------------------------------------------------------------------------------------------------------------------
234 237
 # make sure that a legitimate organism-dataSource-identifier is supplied for each matrix and as a rowname
235 238
 # of the corresponding DataFrame
236
-test.longNames = function ()
239
+test.longNames = function()
237 240
 {
238
-  print ('--- test.longNames')
241
+  print('--- test.longNames')
239 242
   mdb = MotifDb
240
-  longNames = strsplit (names (mdb), '-')
241
-  organisms = unique (sapply (longNames, '[', 1))
243
+  longNames = strsplit(names(mdb), '-')
244
+  organisms = unique(sapply(longNames, '[', 1))
242 245
 
243
-  dataSources = unique (lapply (longNames, '[', 2))
246
+  dataSources = unique(lapply(longNames, '[', 2))
244 247
 
245
-  recognized.dataSources = unique (mcols(mdb)$dataSource)
246
-  recognized.organisms = unique (mcols(mdb)$organism)
247
-    # a few (3) matrices from JASPAR core have NA organism.  make this into a character
248
+  recognized.dataSources = c(unique(mcols(mdb)$dataSource),
249
+                             c("HOCOMOCOv11B", "HOCOMOCOv11C", "HOCOMOCOv11A"))
250
+
251
+  recognized.organisms = unique(mcols(mdb)$organism)
252
+    # a few(3) matrices from JASPAR core have NA organism.  make this into a character
248 253
     # so that it can be matched up against the 'NA' extracted from longNames just above
249
-  na.indices = which (is.na (recognized.organisms))
250
-  if (length (na.indices) > 0)
254
+  na.indices = which(is.na(recognized.organisms))
255
+  if(length(na.indices) > 0)
251 256
      recognized.organisms [na.indices] = 'NA'
252 257
 
253
-  checkTrue (all (organisms %in% recognized.organisms))
254
-  checkTrue (all (dataSources %in% recognized.dataSources))
258
+  checkTrue(all(organisms %in% recognized.organisms))
259
+  checkTrue(all(dataSources %in% recognized.dataSources))
255 260
 
256 261
 } # test.longNames
257 262
 #------------------------------------------------------------------------------------------------------------------------
258 263
 # make sure that a legitimate organism is specified for each matrix
259
-test.organisms = function ()
264
+test.organisms = function()
260 265
 {
261
-  print ('--- test.organisms')
262
-  mdb = MotifDb # (quiet=TRUE)
266
+  print('--- test.organisms')
267
+  mdb = MotifDb #(quiet=TRUE)
263 268
   organisms = mcols(mdb)$organism
264 269
 
265
-     # jaspar_core has 3 NA speciesId: TBP, HNF4A and CEBPA (MA0108.2, MA0114.1, MA0102.2)
270
+     # jaspar_core has 3 NA speciesId: TBP, HNF4A and CEBPA(MA0108.2, MA0114.1, MA0102.2)
266 271
      # their website shows these as vertebrates, which I map to 'Vertebrata'.  An organismID of '-'
267 272
   # gets the same treatment, matching website also.
268 273
 
269 274
   ### Note: this failing test is the same as the test.noNAorganisms test!
270 275
   # As in case of noNA, need to add organisms for these
271
-  #checkEquals (which (is.na (mcols(MotifDb)$organism)), integer (0))
276
+  #checkEquals(which(is.na(mcols(MotifDb)$organism)), integer(0))
272 277
 
273
-  empty.count = length (which (mcols(mdb)$organism==""))
274
-  checkEquals (empty.count, 0)
278
+  empty.count = length(which(mcols(mdb)$organism==""))
279
+  checkEquals(empty.count, 0)
275 280
 
276 281
 } # test.organisms
277 282
 #------------------------------------------------------------------------------------------------------------------------
278
-test.bindingDomains = function ()
283
+test.bindingDomains = function()
279 284
 {
280
-  print ('--- test.bindingDomains')
281
-  mdb = MotifDb # (quiet=TRUE)
282
-  checkTrue (length (unique (mcols(mdb)$bindingDomain)) > 1)
285
+  print('--- test.bindingDomains')
286
+  mdb = MotifDb #(quiet=TRUE)
287
+  checkTrue(length(unique(mcols(mdb)$bindingDomain)) > 1)
283 288
 
284 289
 } # test.bindingDomains
285 290
 #------------------------------------------------------------------------------------------------------------------------
286
-test.flyBindingDomains = function ()
291
+test.flyBindingDomains = function()
287 292
 {
288
-  print ('--- test.flyBindingDomains')
293
+  print('--- test.flyBindingDomains')
289 294
 
290 295
   x = mcols(MotifDb)
291
-  tmp = as.list (head (sort (table (subset (x, organism=='Dmelanogaster')$bindingDomain), decreasing=TRUE), n=3))
296
+  tmp = as.list(head(sort(table(subset(x, organism=='Dmelanogaster')$bindingDomain), decreasing=TRUE), n=3))
292 297
 
293 298
     # these counts will likely change with a fresh load of data from FlyFactorSurvey.
294 299
 
... ...
@@ -299,38 +304,38 @@ test.flyBindingDomains = function ()
299 304
 
300 305
 } # test.flyBindingDomains
301 306
 #------------------------------------------------------------------------------------------------------------------------
302
-test.experimentTypes = function ()
307
+test.experimentTypes = function()
303 308
 {
304
-  print ('--- test.experimentTypes')
305
-  mdb = MotifDb # (quiet=TRUE)
309
+  print('--- test.experimentTypes')
310
+  mdb = MotifDb #(quiet=TRUE)
306 311
   x = mcols(mdb)
307
-  checkTrue (length (unique (x$experimentType)) >= 18)
308
-  checkEquals (length (which (x$experimentType=='')), 0)
312
+  checkTrue(length(unique(x$experimentType)) >= 18)
313
+  checkEquals(length(which(x$experimentType=='')), 0)
309 314
 
310 315
 } # test.experimentTypes
311 316
 #------------------------------------------------------------------------------------------------------------------------
312
-test.tfFamilies = function ()
317
+test.tfFamilies = function()
313 318
 {
314
-  print ('--- test.tfFamilies')
315
-  mdb = MotifDb # (quiet=TRUE)
316
-  checkTrue (length (unique (mcols(mdb)$tfFamily)) > 1)
319
+  print('--- test.tfFamilies')
320
+  mdb = MotifDb #(quiet=TRUE)
321
+  checkTrue(length(unique(mcols(mdb)$tfFamily)) > 1)
317 322
 
318 323
 } # test.tfFamilies
319 324
 #------------------------------------------------------------------------------------------------------------------------
320
-test.bindingSequences = function ()
325
+test.bindingSequences = function()
321 326
 {
322
-  print ('--- test.bindingSequences')
323
-  mdb = MotifDb # (quiet=TRUE)
324
-  checkTrue (length (unique (mcols(mdb)$bindingSequence)) > 1)
327
+  print('--- test.bindingSequences')
328
+  mdb = MotifDb #(quiet=TRUE)
329
+  checkTrue(length(unique(mcols(mdb)$bindingSequence)) > 1)
325 330
 
326 331
 } # test.tfFamilies
327 332
 #------------------------------------------------------------------------------------------------------------------------
328
-test.pubmedIDs = function ()
333
+test.pubmedIDs = function()
329 334
 {
330
-  print ('--- test.pubmedIDs')
331
-  x = mcols(MotifDb) # (quiet=TRUE))
332
-  checkTrue (length (unique (x$pubmedID)) >= 139)
333
-  checkEquals (length (which (x$pubmedID == '')), 0)
335
+  print('--- test.pubmedIDs')
336
+  x = mcols(MotifDb) #(quiet=TRUE))
337
+  checkTrue(length(unique(x$pubmedID)) >= 139)
338
+  checkEquals(length(which(x$pubmedID == '')), 0)
334 339
 
335 340
 } # test.pubmedIDs
336 341
 #------------------------------------------------------------------------------------------------------------------------
... ...
@@ -345,96 +350,96 @@ test.pubmedIDs = function ()
345 350
 #          ScerTF-Scerevisiae-ABF2-badis
346 351
 #          JASPAR_CORE-Rrattus-Ar-MA0007.1
347 352
 #
348
-test.allFullNames = function ()
353
+test.allFullNames = function()
349 354
 {
350
-  print ('--- test.allFullNames')
351
-  mdb = MotifDb # (quiet=TRUE)
355
+  print('--- test.allFullNames')
356
+  mdb = MotifDb #(quiet=TRUE)
352 357
   matrices = mdb@listData
353
-  fullNames = names (matrices)
358
+  fullNames = names(matrices)
354 359
 
355
-  all.dataSources = unique (mcols(mdb)$dataSource)
356
-  checkTrue (length (all.dataSources) >= 4)
360
+  all.dataSources = unique(mcols(mdb)$dataSource)
361
+  checkTrue(length(all.dataSources) >= 4)
357 362
 
358
-  for (source in all.dataSources) {
363
+  for(source in all.dataSources) {
359 364
      this.dataSource <<- source
360
-     matrices.by.source = subset (mdb, dataSource==this.dataSource)
361
-     matrix.name = names (matrices.by.source)[1]
365
+     matrices.by.source = subset(mdb, dataSource==this.dataSource)
366
+     matrix.name = names(matrices.by.source)[1]
362 367
         #  FlyFactorSurvey: Dmelanogaster-FlyFactorSurvey-ab_SANGER_10_FBgn0259750
363 368
         #             hPDI: Hsapiens-hPDI-ABCF2
364 369
         #      JASPAR_CORE: JASPAR_CORE-Athaliana-AGL3-MA0001.1
365 370
         #         UniPROBE: Hsapiens-UniPROBE-Sox4.UP00401
366
-     checkTrue (grep (this.dataSource, matrix.name) == 1)
367
-     #printf ('%20s: %s', source, matrix.name)
371
+     checkTrue(grep(this.dataSource, matrix.name) == 1)
372
+     #printf('%20s: %s', source, matrix.name)
368 373
      }
369 374
 
370
-  return (TRUE)
375
+  return(TRUE)
371 376
 
372 377
 } # test.allFullNames
373 378
 #------------------------------------------------------------------------------------------------------------------------
374
-test.subset = function ()
379
+test.subset = function()
375 380
 {
376
-  if (interactive ()) {
377
-    print ('--- test.subset')
381
+  if(interactive()) {
382
+    print('--- test.subset')
378 383
     mdb = MotifDb
379
-    checkTrue ('geneSymbol' %in% colnames (elementMetadata (mdb)))
380
-    mdb.sub = subset (mdb, geneSymbol=='ABCF2')
381
-    checkEquals (length (mdb.sub), 1)
384
+    checkTrue('geneSymbol' %in% colnames(elementMetadata(mdb)))
385
+    mdb.sub = subset(mdb, geneSymbol=='ABCF2')
386
+    checkEquals(length(mdb.sub), 1)
382 387
     } # if interactive
383 388
 
384 389
 } # test.subset
385 390
 #------------------------------------------------------------------------------------------------------------------------
386
-test.subsetWithVariables = function ()
391
+test.subsetWithVariables = function()
387 392
 {
388
-  if (interactive ()) {
389
-    print ('--- test.subsetWithVariables')
393
+  if(interactive()) {
394
+    print('--- test.subsetWithVariables')
390 395
 
391
-    mdb = MotifDb # ()
392
-    checkTrue ('geneSymbol' %in% colnames (elementMetadata (mdb)))
396
+    mdb = MotifDb #()
397
+    checkTrue('geneSymbol' %in% colnames(elementMetadata(mdb)))
393 398
     target.gene <<- 'ABCF2'
394
-    mdb.sub = subset (mdb, geneSymbol==target.gene)
395
-    checkEquals (length (mdb.sub), 1)
399
+    mdb.sub = subset(mdb, geneSymbol==target.gene)
400
+    checkEquals(length(mdb.sub), 1)
396 401
     } # if interactive
397 402
 
398 403
 } # test.subsetWithVariables
399 404
 #------------------------------------------------------------------------------------------------------------------------
400 405
 # "old style": just one query term allowed
401
-test.queryOldStyle = function ()
406
+test.queryOldStyle = function()
402 407
 {
403
-  print ('--- test.queryOldStyle')
408
+  print('--- test.queryOldStyle')
404 409
   mdb = MotifDb
405 410
 
406 411
     # do queries on dataSource counts match those from a contingency table?
407
-  sources.list = as.list (table (mcols(mdb)$dataSource))
408
-  checkEquals (length (query (mdb, 'flyfactorsurvey')), sources.list$FlyFactorSurvey)
409
-  checkEquals (length (query (mdb, 'uniprobe')), sources.list$UniPROBE)
410
-  checkEquals (length (query (mdb, 'UniPROBE')), sources.list$UniPROBE)
412
+  sources.list = as.list(table(mcols(mdb)$dataSource))
413
+  checkEquals(length(query(mdb, 'flyfactorsurvey')), sources.list$FlyFactorSurvey)
414
+  checkEquals(length(query(mdb, 'uniprobe')), sources.list$UniPROBE)
415
+  checkEquals(length(query(mdb, 'UniPROBE')), sources.list$UniPROBE)
411 416
 
412 417
     # gene symbols which begin with 'sox' are quite common.  can we them?
413
-    # there are currently (19 jul 2012) 18, but since this may change, our test is approximate
418
+    # there are currently(19 jul 2012) 18, but since this may change, our test is approximate
414 419
 
415 420
   # Change on 8/1/2017: increase top limit of sox entries as they've expanded
416
-  sox.entries = query (mdb, '^sox')
417
-  checkTrue (length (sox.entries) > 10)
418
-  checkTrue (length (sox.entries) < 200)
421
+  sox.entries = query(mdb, '^sox')
422
+  checkTrue(length(sox.entries) > 10)
423
+  checkTrue(length(sox.entries) < 200)
419 424
 
420 425
     # manual inspection reveals that some of these genes have names which are all capitalized.  test that.
421
-  checkTrue (length (query (mdb, '^sox', ignore.case=TRUE)) > length (query (mdb, '^SOX', ignore.case=FALSE)))
426
+  checkTrue(length(query(mdb, '^sox', ignore.case=TRUE)) > length(query(mdb, '^SOX', ignore.case=FALSE)))
422 427
 
423 428
     # make sure that queries can be stacked up, and that order of call does not affect the outcome
424
-   uniprobe.sox.matrices = query (query (mdb, 'uniprobe'), '^sox')
425
-   sox.uniprobe.matrices = query (query (mdb, '^sox'), 'uniprobe')
429
+   uniprobe.sox.matrices = query(query(mdb, 'uniprobe'), '^sox')
430
+   sox.uniprobe.matrices = query(query(mdb, '^sox'), 'uniprobe')
426 431
 
427
-   checkEquals (length (uniprobe.sox.matrices), length (sox.uniprobe.matrices))
432
+   checkEquals(length(uniprobe.sox.matrices), length(sox.uniprobe.matrices))
428 433
 
429 434
      # an approximate count check
430
-  checkTrue (length (uniprobe.sox.matrices) > 10)
431
-  checkTrue (length (uniprobe.sox.matrices) < 30)
435
+  checkTrue(length(uniprobe.sox.matrices) > 10)
436
+  checkTrue(length(uniprobe.sox.matrices) < 30)
432 437
 
433
-   checkEquals (unique (mcols(uniprobe.sox.matrices)$dataSource), 'UniPROBE')
434
-   checkEquals (unique (mcols(sox.uniprobe.matrices)$dataSource), 'UniPROBE')
435
-   gene.symbols = sort (unique (mcols(uniprobe.sox.matrices)$geneSymbol))
438
+   checkEquals(unique(mcols(uniprobe.sox.matrices)$dataSource), 'UniPROBE')
439
+   checkEquals(unique(mcols(sox.uniprobe.matrices)$dataSource), 'UniPROBE')
440
+   gene.symbols = sort(unique(mcols(uniprobe.sox.matrices)$geneSymbol))
436 441
 
437
-    # query on a string (in this case, an oddly named motif) which contains
442
+    # query on a string(in this case, an oddly named motif) which contains
438 443
     # regular expression characters.  no solution to this yet.
439 444
     # query uses base R's grep, in which
440 445
     #  x <- query(mdb, "ELK1,4_GABP{A,B1}.p3")
... ...
@@ -443,7 +448,7 @@ test.queryOldStyle = function ()
443 448
 #------------------------------------------------------------------------------------------------------------------------
444 449
 test.query <- function()
445 450
 {
446
-  print ('--- test.query')
451
+  print('--- test.query')
447 452
   mdb = MotifDb
448 453
 
449 454
   ors <- c("MA0511.1", "MA0057.1")
... ...
@@ -471,10 +476,10 @@ test.query <- function()
471 476
   #checkEquals(sort(names(x)),
472 477
 
473 478
     # do queries on dataSource counts match those from a contingency table?
474
-  sources.list = as.list (table (mcols(mdb)$dataSource))
475
-  checkEquals (length (query (mdb, 'flyfactorsurvey')), sources.list$FlyFactorSurvey)
476
-  checkEquals (length (query (mdb, 'uniprobe')), sources.list$UniPROBE)
477
-  checkEquals (length (query (mdb, 'UniPROBE')), sources.list$UniPROBE)
479
+  sources.list = as.list(table(mcols(mdb)$dataSource))
480
+  checkEquals(length(query(mdb, 'flyfactorsurvey')), sources.list$FlyFactorSurvey)
481
+  checkEquals(length(query(mdb, 'uniprobe')), sources.list$UniPROBE)
482
+  checkEquals(length(query(mdb, 'UniPROBE')), sources.list$UniPROBE)
478 483
 
479 484
 } # test.query
480 485
 #------------------------------------------------------------------------------------------------------------------------
... ...
@@ -486,7 +491,7 @@ test.query2 <- function()
486 491
 
487 492
   matrices.human.sox4 <- query(mdb, c("hsapiens", "sox4"))
488 493
   matrices.human.sox4.oldStyle <- query(matrices.human, "sox4")
489
-  checkTrue(length(matrices.human.sox4) > 0)   # 6 found on (24 oct 2018)
494
+  checkTrue(length(matrices.human.sox4) > 0)   # 6 found on(24 oct 2018)
490 495
   checkEquals(length(matrices.human.sox4), length(matrices.human.sox4.oldStyle))
491 496
 
492 497
   checkTrue(length(matrices.human.sox4) < length(matrices.human))
... ...
@@ -496,260 +501,264 @@ test.query2 <- function()
496 501
 
497 502
 } # test.query2
498 503
 #------------------------------------------------------------------------------------------------------------------------
499
-test.transformMatrixToMemeRepresentation = function ()
504
+test.transformMatrixToMemeRepresentation = function()
500 505
 {
501
-  print ('--- test.transformMatrixToMemeRepresentation')
502
-  mdb = MotifDb # ()
503
-  matrix.agl3 = subset (mdb, dataSource=='JASPAR_CORE' & organism=='Athaliana' & geneSymbol=='AGL3')[[1]]
504
-  checkEquals (dim (matrix.agl3), c (4, 10))
506
+  print('--- test.transformMatrixToMemeRepresentation')
507
+  mdb = MotifDb #()
508
+  matrix.agl3 = subset(mdb, dataSource=='JASPAR_CORE' & organism=='Athaliana' & geneSymbol=='AGL3')[[1]]
509
+  checkEquals(dim(matrix.agl3), c(4, 10))
505 510
 
506 511
     # a transposed frequency is needed for meme.  we store normalized frequency matrices, to just transposition is needed
507
-  tfm = MotifDb:::transformMatrixToMemeRepresentation (matrix.agl3)
508
-  checkEquals (dim (tfm), c (10, 4))
509
-  checkTrue (all (round (rowSums (tfm)) == 1.0))
512
+  tfm = MotifDb:::transformMatrixToMemeRepresentation(matrix.agl3)
513
+  checkEquals(dim(tfm), c(10, 4))
514
+  checkTrue(all(round(rowSums(tfm)) == 1.0))
510 515
 
511 516
 } # test.transformMatrixToMemeRepresentation
512 517
 #------------------------------------------------------------------------------------------------------------------------
513 518
 # translate a motif matrix from MotifList into text suitable for meme and tomtom input.
514 519
 # choose the top two from UniPROBE.  they reliably have high counts, and easily distinguished normalized frequencies
515
-test.matrixToMemeText = function ()
520
+test.matrixToMemeText = function()
516 521
 {
517
-  print ('--- test.matrixToMemeText')
518
-  sox4 = subset (MotifDb, dataSource=='UniPROBE' & organism=='Hsapiens' & geneSymbol=='Sox4')
519
-  rtg3 = subset (MotifDb, dataSource=='UniPROBE' & organism=='Scerevisiae' & geneSymbol=='Rtg3')
522
+  print('--- test.matrixToMemeText')
523
+  sox4 = subset(MotifDb, dataSource=='UniPROBE' & organism=='Hsapiens' & geneSymbol=='Sox4')
524
+  rtg3 = subset(MotifDb, dataSource=='UniPROBE' & organism=='Scerevisiae' & geneSymbol=='Rtg3')
520 525
 
521
-  text.sox4 = MotifDb:::matrixToMemeText (sox4)
522
-     # check these with t (sox4 [[1]])
526
+  text.sox4 = MotifDb:::matrixToMemeText(sox4)
527
+     # check these with t(sox4 [[1]])
523 528
   line1.sox4  = " 0.2457457130  0.1950426500  0.2287887620  0.3304228750"
524 529
   line14.sox4 = " 0.2821643030  0.2286132160  0.1585395830  0.3306828990"
525 530
 
526
-  checkEquals (length (text.sox4), 29)
527
-  checkEquals (text.sox4 [1], "MEME version 4")
528
-  checkEquals (text.sox4 [10], "MOTIF Hsapiens-UniPROBE-Sox4.UP00401")
529
-  checkEquals (grep (line1.sox4, text.sox4), 12)
530
-  checkEquals (grep (line14.sox4, text.sox4), 25)
531
+  checkEquals(length(text.sox4), 29)
532
+  checkEquals(text.sox4 [1], "MEME version 4")
533
+  checkEquals(text.sox4 [10], "MOTIF Hsapiens-UniPROBE-Sox4.UP00401")
534
+  checkEquals(grep(line1.sox4, text.sox4), 12)
535
+  checkEquals(grep(line14.sox4, text.sox4), 25)
531 536
 
532
-  text.rtg3 = MotifDb:::matrixToMemeText (rtg3)
537
+  text.rtg3 = MotifDb:::matrixToMemeText(rtg3)
533 538
   line1.rtg3  = " 0.3935122858  0.1453016447  0.3308830322  0.1303030373"
534 539
   line20.rtg3 = " 0.2490417648  0.3966478493  0.1083586569  0.2459517291"
535
-  checkEquals (length (text.rtg3), 35)         # 4 trailing empty lines
536
-  checkEquals (text.rtg3 [1], "MEME version 4")
537
-  checkEquals (text.rtg3 [10], "MOTIF Scerevisiae-UniPROBE-Rtg3.UP00356")
538
-  checkEquals (grep (line1.rtg3, text.rtg3), 12)
539
-  checkEquals (grep (line20.rtg3, text.rtg3), 31)
540
+  checkEquals(length(text.rtg3), 35)         # 4 trailing empty lines
541
+  checkEquals(text.rtg3 [1], "MEME version 4")
542
+  checkEquals(text.rtg3 [10], "MOTIF Scerevisiae-UniPROBE-Rtg3.UP00356")
543
+  checkEquals(grep(line1.rtg3, text.rtg3), 12)
544
+  checkEquals(grep(line20.rtg3, text.rtg3), 31)
540 545
 
541 546
     # now call with both matrices, and see if the right results are returned
542
-  text.both = MotifDb:::matrixToMemeText (c (sox4, rtg3))
543
-  checkEquals (text.both [1], "MEME version 4")
544
-  checkEquals (grep (line1.sox4, text.both), 12)
545
-  checkEquals (grep (line14.sox4, text.both), 25)
546
-  checkEquals (grep (line1.rtg3, text.both),  29)
547
-  checkEquals (grep (line20.rtg3, text.both), 48)
547
+  text.both = MotifDb:::matrixToMemeText(c(sox4, rtg3))
548
+  checkEquals(text.both [1], "MEME version 4")
549
+  checkEquals(grep(line1.sox4, text.both), 12)
550
+  checkEquals(grep(line14.sox4, text.both), 25)
551
+  checkEquals(grep(line1.rtg3, text.both),  29)
552
+  checkEquals(grep(line20.rtg3, text.both), 48)
548 553
 
549 554
 } # test.matrixToMemeText
550 555
 #------------------------------------------------------------------------------------------------------------------------
551 556
 # translate a motif matrix from MotifList into text suitable for meme and tomtom input.
552 557
 # choose the top two from UniPROBE.  they reliably have high counts, and easily distinguished normalized frequencies
553
-#test.matrixToMemeText_mapVersion = function ()
558
+#test.matrixToMemeText_mapVersion = function()
554 559
 #{
555
-#  print ('--- test.matrixToMemeText_mapVersion')
556
-#  sox4 = subset (MotifDb, dataSource=='UniPROBE' & organism=='Hsapiens' & geneSymbol=='Sox4')
557
-#  rtg3 = subset (MotifDb, dataSource=='UniPROBE' & organism=='Scerevisiae' & geneSymbol=='Rtg3')
560
+#  print('--- test.matrixToMemeText_mapVersion')
561
+#  sox4 = subset(MotifDb, dataSource=='UniPROBE' & organism=='Hsapiens' & geneSymbol=='Sox4')
562
+#  rtg3 = subset(MotifDb, dataSource=='UniPROBE' & organism=='Scerevisiae' & geneSymbol=='Rtg3')
558 563
 #
559
-#  text.sox4 = MotifDb:::mapped.broken.matrixToMemeText (sox4)
560
-#  text.rtg3 = MotifDb:::mapped.broken.matrixToMemeText (rtg3)
561
-#  text.both = MotifDb:::mapped.broken.matrixToMemeText (c (sox4, rtg3))
564
+#  text.sox4 = MotifDb:::mapped.broken.matrixToMemeText(sox4)
565
+#  text.rtg3 = MotifDb:::mapped.broken.matrixToMemeText(rtg3)
566
+#  text.both = MotifDb:::mapped.broken.matrixToMemeText(c(sox4, rtg3))
562 567
 #
563
-#     # check these with t (sox4 [[1]])
568
+#     # check these with t(sox4 [[1]])
564 569
 #  line1.sox4  = " 0.2457457130  0.1950426500  0.2287887620  0.3304228750"
565 570
 #  line14.sox4 = " 0.2821643030  0.2286132160  0.1585395830  0.3306828990"
566 571
 #
567
-#  checkEquals (length (text.sox4), 29)
568
-#  checkEquals (text.sox4 [1], "MEME version 4")
569
-#  checkEquals (text.sox4 [10], "MOTIF Hsapiens-UniPROBE-Sox4.UP00401")
570
-#  checkEquals (grep (line1.sox4, text.sox4), 12)
571
-#  checkEquals (grep (line14.sox4, text.sox4), 25)
572
+#  checkEquals(length(text.sox4), 29)
573
+#  checkEquals(text.sox4 [1], "MEME version 4")
574
+#  checkEquals(text.sox4 [10], "MOTIF Hsapiens-UniPROBE-Sox4.UP00401")
575
+#  checkEquals(grep(line1.sox4, text.sox4), 12)
576
+#  checkEquals(grep(line14.sox4, text.sox4), 25)
572 577
 #
573 578
 #  line1.rtg3  = " 0.3935122858  0.1453016447  0.3308830322  0.1303030373"
574 579
 #  line20.rtg3 = " 0.2490417648  0.3966478493  0.1083586569  0.2459517291"
575
-#  checkEquals (length (text.rtg3), 35)         # 4 trailing empty lines
576
-#  checkEquals (text.rtg3 [1], "MEME version 4")
577
-#  checkEquals (text.rtg3 [10], "MOTIF Scerevisiae-UniPROBE-Rtg3.UP00356")
578
-#  checkEquals (grep (line1.rtg3, text.rtg3), 12)
579
-#  checkEquals (grep (line20.rtg3, text.rtg3), 31)
580
+#  checkEquals(length(text.rtg3), 35)         # 4 trailing empty lines
581
+#  checkEquals(text.rtg3 [1], "MEME version 4")
582
+#  checkEquals(text.rtg3 [10], "MOTIF Scerevisiae-UniPROBE-Rtg3.UP00356")
583
+#  checkEquals(grep(line1.rtg3, text.rtg3), 12)
584
+#  checkEquals(grep(line20.rtg3, text.rtg3), 31)
580 585
 #
581 586
 #    # now call with both matrices, and see if the right results are returned
582
-#  checkEquals (text.both [1], "MEME version 4")
583
-#  checkEquals (grep (line1.sox4, text.both), 12)
584
-#  checkEquals (grep (line14.sox4, text.both), 25)
585
-#  checkEquals (grep (line1.rtg3, text.both),  29)
586
-#  checkEquals (grep (line20.rtg3, text.both), 48)
587
+#  checkEquals(text.both [1], "MEME version 4")
588
+#  checkEquals(grep(line1.sox4, text.both), 12)
589
+#  checkEquals(grep(line14.sox4, text.both), 25)
590
+#  checkEquals(grep(line1.rtg3, text.both),  29)
591
+#  checkEquals(grep(line20.rtg3, text.both), 48)
587 592
 #
588 593
 #} # test.matrixToMemeText_mapVersion
589 594
 #------------------------------------------------------------------------------------------------------------------------
590
-test.export_memeFormatStdOut = function ()
595
+test.export_memeFormatStdOut = function()
591 596
 {
592
-  print ('--- test.export_memeFormatStdOut')
593
-  mdb = MotifDb # ()
594
-  mdb.chicken = subset (mdb, organism=='Gallus')
595
-  checkEquals (length (mdb.chicken), 3)
597
+  print('--- test.export_memeFormatStdOut')
598
+  mdb = MotifDb #()
599
+  mdb.chicken = subset(mdb, organism=='Gallus')
600
+  checkEquals(length(mdb.chicken), 3)
596 601
     # text is cat-ed to stdout, so not avaialable here to check.
597 602
     # but just like print, export also returns the text invisibly.
598 603
     # so that CAN be checked.
599 604
 
600
-  meme.text = export (mdb.chicken, format='meme')
601
-  checkEquals (length (meme.text), 1)   # just one long string
602
-  checkTrue (is.character (meme.text))
603
-  checkTrue (nchar (meme.text) > 800)   # 1002 as of (10 aug 2012)
604
-  return (TRUE)
605
+  meme.text = export(mdb.chicken, format='meme')
606
+  checkEquals(length(meme.text), 1)   # just one long string
607
+  checkTrue(is.character(meme.text))
608
+  checkTrue(nchar(meme.text) > 800)   # 1002 as of(10 aug 2012)
609
+  return(TRUE)
605 610
 
606 611
 } # test.exportMemeFormatToStdOut
607 612
 #------------------------------------------------------------------------------------------------------------------------
608
-test.export_memeFormatToFile = function ()
613
+test.export_memeFormatToFile = function()
609 614
 {
610
-  print ('--- test.export_memeFormatToFile')
611
-  mdb = MotifDb # ()
612
-  mdb.chicken = subset (mdb, organism=='Gallus')
613
-  checkEquals (length (mdb.chicken), 3)
614
-  output.file = tempfile ()
615
-  meme.text = export (mdb.chicken, output.file, 'meme')
616
-  retrieved = scan (output.file, what=character (0), sep='\n', quiet=TRUE)
617
-  invisible (retrieved)
615
+  print('--- test.export_memeFormatToFile')
616
+  mdb = MotifDb #()
617
+  mdb.chicken = subset(mdb, organism=='Gallus')
618
+  checkEquals(length(mdb.chicken), 3)
619
+  output.file = tempfile()
620
+  meme.text = export(mdb.chicken, output.file, 'meme')
621
+  retrieved = scan(output.file, what=character(0), sep='\n', quiet=TRUE)
622
+  invisible(retrieved)
618 623
 
619 624
 } # test.exportMemeFormatToFile
620 625
 #------------------------------------------------------------------------------------------------------------------------
621
-test.export_memeFormatToFileDuplication = function ()
626
+test.export_memeFormatToFileDuplication = function()
622 627
 {
623
-  print ('--- test.export_memeFormatToFileDuplication')
624
-  mdb = MotifDb # ()
625
-  mdb.mouse = subset (mdb, organism=='Mmusculus')
628
+  print('--- test.export_memeFormatToFileDuplication')
629
+  mdb = MotifDb #()
630
+  mdb.mouse = subset(mdb, organism=='Mmusculus')
626 631
   checkTrue(length(mdb.mouse) > 1300)
627
-  output.file = 'mouse.txt' # tempfile ()
632
+  output.file = 'mouse.txt' # tempfile()
628 633
   max = 3
629
-  meme.text = export (mdb.mouse [1:max], output.file, 'meme')
630
-  retrieved = scan (output.file, what=character (0), sep='\n', quiet=TRUE)
631
-  invisible (retrieved)
634
+  meme.text = export(mdb.mouse [1:max], output.file, 'meme')
635
+  retrieved = scan(output.file, what=character(0), sep='\n', quiet=TRUE)
636
+  invisible(retrieved)
632 637
 
633 638
 } # test.exportMemeFormatToFileDuplication
634 639
 #------------------------------------------------------------------------------------------------------------------------
635
-test.export_memeFormatToFile_run_tomtom = function (max=50)
640
+test.export_memeFormatToFile_run_tomtom = function(max=50)
636 641
 {
637
-  if (interactive ()) {
638
-    print ('--- test.export_memeFormatToFile_run_tomtom')
642
+  if(interactive()) {
643
+    print('--- test.export_memeFormatToFile_run_tomtom')
639 644
     mdb = MotifDb
640
-    sox4.mouse = subset (mdb, organism=='Mmusculus' & geneSymbol=='Sox4')
641
-    all.human = subset (mdb, organism=='Hsapiens')
645
+    sox4.mouse = subset(mdb, organism=='Mmusculus' & geneSymbol=='Sox4')
646
+    all.human = subset(mdb, organism=='Hsapiens')
642 647
 
643
-    tomtom.tmp.dir = tempfile ()
644
-    print (tomtom.tmp.dir)
645
-    stopifnot (dir.create (tomtom.tmp.dir))
646
-    sox4.file.path = file.path (tomtom.tmp.dir, 'sox4.mouse.text')
647
-    all.human.file.path = file.path (tomtom.tmp.dir,'all.human.text')
648
-    export (sox4.mouse, sox4.file.path, 'meme')
649
-    export (all.human, all.human.file.path, 'meme')
648
+    tomtom.tmp.dir = tempfile()
649
+    print(tomtom.tmp.dir)
650
+    stopifnot(dir.create(tomtom.tmp.dir))
651
+    sox4.file.path = file.path(tomtom.tmp.dir, 'sox4.mouse.text')
652
+    all.human.file.path = file.path(tomtom.tmp.dir,'all.human.text')
653
+    export(sox4.mouse, sox4.file.path, 'meme')
654
+    export(all.human, all.human.file.path, 'meme')
650 655
 
651 656
        # find similarity of motif #1 to all the motifs in mdbMany
652 657
 
653 658
        # cannot rely upon tomtom being present
654 659
 
655
-    #cmd = sprintf ('tomtom -no-ssc -oc %s -verbosity 3 -min-overlap 5 -mi 1 -dist pearson -evalue -thresh 10 %s %s',
660
+    #cmd = sprintf('tomtom -no-ssc -oc %s -verbosity 3 -min-overlap 5 -mi 1 -dist pearson -evalue -thresh 10 %s %s',
656 661
     #                tomtom.tmp.dir, sox4.file.path, all.human.file.path)
657
-    #system (cmd)
658
-    #cmd = sprintf ('open %s/tomtom.html', tomtom.tmp.dir)
659
-    #system (cmd)
662
+    #system(cmd)
663
+    #cmd = sprintf('open %s/tomtom.html', tomtom.tmp.dir)
664
+    #system(cmd)
660 665
     } # if interactive
661 666
 
662 667
 } # test.export_memeFormatToFile_run_tomtom
663 668
 #------------------------------------------------------------------------------------------------------------------------
664 669
 # MotIV::motifMatch fails with MotIV_1.25.0.  will look into this in September, 2015, pshannon
665
-test.run_MotIV.motifMatch = function ()
670
+# MotIV abandoned, all of my own motif/sequence matching is done via an independently installed
671
+# FIMO from the meme suite.
672
+# another option is the MOODs motif matching capability provided by the bioc package "motifmatchr"
673
+# this possibility, and these tests, are deferred for now.
674
+disabled_test.run_MotIV.motifMatch = function()
666 675
 {
667
-  library (MotIV)
668
-  print ('--- test.run_MotIV.motifMatch')
669
-  mdb <- MotifDb # ()
676
+  require(MotIV)
677
+  print('--- test.run_MotIV.motifMatch')
678
+  mdb <- MotifDb #()
670 679
 
671 680
   db.tmp <- mdb@listData
672 681
 
673 682
      # match motif 1 against the entire MotifDb  collection
674
-  motif.hits <- motifMatch (db.tmp [1], database<-db.tmp)
683
+  motif.hits <- motifMatch(db.tmp [1], database<-db.tmp)
675 684
      # the long way to extract the matrix name.  see MotIV.toTable below for more convenient way
676
-  checkEquals (motif.hits@bestMatch[[1]]@aligns[[1]]@TF@name, names (db.tmp)[1])
685
+  checkEquals(motif.hits@bestMatch[[1]]@aligns[[1]]@TF@name, names(db.tmp)[1])
677 686
 
678 687
      # match the last motif against all
679 688
   last <- length(db.tmp)
680 689
      # MotIV:motifMatch works differently on linux and macos.  by asking for 50 matches,
681
-     # the search target (db.tmp[last]) is sure to be in the hit list.
682
-  motif.hits <-  motifMatch (db.tmp [last], database=db.tmp, top=50)
683
-  tbl.hits <- MotIV.toTable (motif.hits)
690
+     # the search target(db.tmp[last]) is sure to be in the hit list.
691
+  motif.hits <-  motifMatch(db.tmp [last], database=db.tmp, top=50)
692
+  tbl.hits <- MotIV.toTable(motif.hits)
684 693
     # the 5 hits return should include the one we tried to match, but the MotIV search strategy
685 694
     # may not place it first
686 695
   checkTrue(names(db.tmp[last]) %in% tbl.hits$name)
687
-  invisible (tbl.hits)
696
+  invisible(tbl.hits)
688 697
 
689
-} # test.run_MotIV.motifMatch
698
+} # distable_test.run_MotIV.motifMatch
690 699
 #------------------------------------------------------------------------------------------------------------------------
691
-MotIV.toTable = function (match)
700
+MotIV.toTable = function(match)
692 701
 {
693
-  stopifnot (length (match@bestMatch) >= 1)
702
+  stopifnot(length(match@bestMatch) >= 1)
694 703
   alignments = match@bestMatch[[1]]@aligns
695 704
 
696
-  df = data.frame (stringsAsFactors=FALSE)
697
-  for (alignment in alignments) {
705
+  df = data.frame(stringsAsFactors=FALSE)
706
+  for(alignment in alignments) {
698 707
     x = alignment
699 708
     name = x@TF@name
700 709
     eVal = x@evalue
701 710
     sequence = x@sequence
702 711
     match = x@match
703 712
     strand = x@strand
704
-    df = rbind (df, data.frame (name=name, eVal=eVal, sequence=sequence, match=match, strand=strand, stringsAsFactors=FALSE))
713
+    df = rbind(df, data.frame(name=name, eVal=eVal, sequence=sequence, match=match, strand=strand, stringsAsFactors=FALSE))
705 714
     } # for alignment
706 715
 
707
-  return (df)
716
+  return(df)
708 717
 
709 718
 } # MotIV.toTable
710 719
 #------------------------------------------------------------------------------------------------------------------------
711
-test.MotIV.toTable = function ()
720
+disabled_test.MotIV.toTable = function()
712 721
 {
713
-  print ('--- test.MotIVtoTable')
714
-  mdb = MotifDb # ()
715
-  test.hits = motifMatch (mdb[1]@listData, database=jaspar)
716
-  tbl.hits =  MotIV.toTable (test.hits)
717
-  checkEquals (dim (tbl.hits), c (5, 5))
718
-  checkEquals (sort(colnames (tbl.hits)), sort(c("name", "eVal", "sequence", "match", "strand")))
722
+  print('--- test.MotIVtoTable')
723
+  mdb = MotifDb #()
724
+  test.hits = motifMatch(mdb[1]@listData, database=jaspar)
725
+  tbl.hits =  MotIV.toTable(test.hits)
726
+  checkEquals(dim(tbl.hits), c(5, 5))
727
+  checkEquals(sort(colnames(tbl.hits)), sort(c("name", "eVal", "sequence", "match", "strand")))
719 728
 
720 729
 } # test.MotIV.toTable
721 730
 #------------------------------------------------------------------------------------------------------------------------
722
-pwmMatch.toTable = function (motifMatch) {
723
-   if (length (motifMatch@bestMatch) == 0)
724
-   return (NA)
731
+pwmMatch.toTable = function(motifMatch) {
732
+   if(length(motifMatch@bestMatch) == 0)
733
+   return(NA)
725 734
 
726 735
    df.list = vector("list", length(motifMatch@bestMatch))
727
-   for (k in seq(length(motifMatch@bestMatch)))
736
+   for(k in seq(length(motifMatch@bestMatch)))
728 737
    {
729 738
        alignments = motifMatch@bestMatch[[k]]@aligns
730
-       df = data.frame (stringsAsFactors=FALSE)
731
-       for (alignment in alignments) {
739
+       df = data.frame(stringsAsFactors=FALSE)
740
+       for(alignment in alignments) {
732 741
            x = alignment
733 742
            name = x@TF@name
734 743
            eVal = x@evalue
735 744
            sequence = x@sequence
736 745
            match = x@match
737 746
            strand = x@strand
738
-           df = rbind (df, data.frame (name=name, eVal=eVal, sequence=sequence, match=match, strand=strand, stringsAsFactors=FALSE))
747
+           df = rbind(df, data.frame(name=name, eVal=eVal, sequence=sequence, match=match, strand=strand, stringsAsFactors=FALSE))
739 748
        } # for alignment
740 749
        df.list[[k]]=df
741 750
    }
742 751
    names(df.list) <- names(motifMatch)
743
-   return (df.list)
752
+   return(df.list)
744 753
 
745 754
 } # pwmMatch.toTable
746 755
 #------------------------------------------------------------------------------
747 756
 # Robert Stojnic reports incorrect gene symbols for matrices obtained from
748 757
 # flyFactorSurvey.
749 758
 # the solution was to abandon the original strategy of extracting the
750
-# symbol from the matrix (and file) name.
751
-# now, the flybase importer ("inst/scripts/import/flyFactorSurvey/import.R")
752
-# uses FBgn id (which can be reliably extracted) and uses indpendent
759
+# symbol from the matrix(and file) name.
760
+# now, the flybase importer("inst/scripts/import/flyFactorSurvey/import.R")
761
+# uses FBgn id(which can be reliably extracted) and uses indpendent
753 762
 # data sources to learn the gene symbol.
754 763
 #
755 764
 # robert's email:
... ...
@@ -776,7 +785,7 @@ pwmMatch.toTable = function (motifMatch) {
776 785
 #
777 786
 test.flyFactorGeneSymbols <- function()
778 787
 {
779
-    print ("--- test.flyFactorGeneSymbols")
788
+    print("--- test.flyFactorGeneSymbols")
780 789
     mdb = MotifDb
781 790
     checkEquals(sort(mcols(query(mdb, "FBgn0259750"))$geneSymbol),
782 791
                 sort(c("FBgn0259750", "FBgn0259750")))
... ...
@@ -785,34 +794,34 @@ test.flyFactorGeneSymbols <- function()
785 794
 
786 795
 } # test.flyFactorGeneSymbols
787 796
 #-------------------------------------------------------------------------------
788
-test.export_jasparFormatStdOut = function ()
797
+test.export_jasparFormatStdOut = function()
789 798
 {
790
-  print ('--- test.export_jasparFormatStdOut')
791
-  mdb = MotifDb # ()
792
-  mdb.chicken = subset (mdb, organism=='Gallus')
793
-  checkEquals (length (mdb.chicken), 3)
799
+  print('--- test.export_jasparFormatStdOut')
800
+  mdb = MotifDb #()
801
+  mdb.chicken = subset(mdb, organism=='Gallus')
802
+  checkEquals(length(mdb.chicken), 3)
794 803
     # text is cat-ed to stdout, so not avaialable here to check.
795 804
     # but just like print, export also returns the text invisibly.
796 805
     # so that CAN be checked.
797 806
 
798
-  jaspar.text = export (mdb.chicken, format='jaspar')
799
-  checkEquals (length (jaspar.text), 1)   # just one long string
800
-  checkTrue (is.character (jaspar.text))
801
-  checkTrue (nchar (jaspar.text) > 800)   # 1002 as of (10 aug 2012)
802
-  return (TRUE)
807
+  jaspar.text = export(mdb.chicken, format='jaspar')
808
+  checkEquals(length(jaspar.text), 1)   # just one long string
809
+  checkTrue(is.character(jaspar.text))
810
+  checkTrue(nchar(jaspar.text) > 800)   # 1002 as of(10 aug 2012)
811
+  return(TRUE)
803 812
 
804 813
 } # test.exportjasparFormatToStdOut
805 814
 #------------------------------------------------------------------------------------------------------------------------
806
-test.export_jasparFormatToFile = function ()
815
+test.export_jasparFormatToFile = function()
807 816
 {
808
-  print ('--- test.export_jasparFormatToFile')
809
-  mdb = MotifDb # ()
810
-  mdb.chicken = subset (mdb, organism=='Gallus')
811
-  checkEquals (length (mdb.chicken), 3)
812
-  output.file = tempfile ()
813
-  jaspar.text = export (mdb.chicken, output.file, 'jaspar')
814
-  retrieved = scan (output.file, what=character (0), sep='\n', quiet=TRUE)
815
-  invisible (retrieved)
817
+  print('--- test.export_jasparFormatToFile')
818
+  mdb = MotifDb #()
819
+  mdb.chicken = subset(mdb, organism=='Gallus')
820
+  checkEquals(length(mdb.chicken), 3)
821
+  output.file = tempfile()
822
+  jaspar.text = export(mdb.chicken, output.file, 'jaspar')
823
+  retrieved = scan(output.file, what=character(0), sep='\n', quiet=TRUE)
824
+  invisible(retrieved)
816 825
 
817 826
 } # test.exportjasparFormatToFile
818 827
 #------------------------------------------------------------------------------------------------------------------------
... ...
@@ -833,9 +842,9 @@ test.geneToMotif <- function()
833 842
 
834 843
       # MotifDb mode uses the MotifDb metadata, pulled from many sources
835 844
    tbl.mdb <- geneToMotif(mdb, genes, source="mOtifdb")     # intentional mis-capitalization
836
-   checkEquals(dim(tbl.mdb), c(13, 6))
845
+   checkEquals(dim(tbl.mdb), c(14, 6))
837 846
    checkEquals(subset(tbl.mdb, dataSource=="jaspar2016" & geneSymbol== "FOS")$motif, "MA0476.1")
838
-      # no recognizable (i.e., jaspar standard) motif name returned by MotifDb metadata
847
+      # no recognizable(i.e., jaspar standard) motif name returned by MotifDb metadata
839 848
       # MotifDb for ATF5
840 849
       # todo: compare the MA0110596_1.02 matrix of cisp_1.02 to japar MA0833.1
841 850
 
... ...
@@ -852,8 +861,8 @@ test.geneToMotif <- function()
852 861
 
853 862
 } # test.geneToMotif
854 863
 #------------------------------------------------------------------------------------------------------------------------
855
-# this case discovered (31 jan 2018). when called on a gene/source combination for which there are
856
-# no motifs, i attempted to add the mapping source (either "MotifDb", "TFClass") as a column
864
+# this case discovered(31 jan 2018). when called on a gene/source combination for which there are
865
+# no motifs, i attempted to add the mapping source(either "MotifDb", "TFClass") as a column
857 866
 # to an empty data.frame.  check for that and its fix here
858 867
 test.geneToMotif.oneGene.noMotifs <- function()
859 868
 {
... ...
@@ -863,7 +872,7 @@ test.geneToMotif.oneGene.noMotifs <- function()
863 872
 
864 873
 } # test.geneToMotif.oneGene.noMotifs
865 874
 #------------------------------------------------------------------------------------------------------------------------
866
-# sad to say I do not recall what problem/fix is tested here (pshannon, 23 jan 2018).
875
+# sad to say I do not recall what problem/fix is tested here(pshannon, 23 jan 2018).
867 876
 # however, it demonstrates the variety of results which can be returned by non-jaspar datasets
868 877
 # when using the MotifDb mapping source, and the relative paucity which is sometimes
869 878
 # seen with the TFclass mapper
... ...
@@ -882,9 +891,9 @@ test.geneToMotif.ignore.jasparSuffixes <- function()
882 891
 
883 892
       # MotifDb mode uses the MotifDb metadata, pulled from many sources
884 893
    tbl.mdb <- geneToMotif(mdb, genes, source="mOtifdb")     # intentional mis-capitalization
885
-   checkEquals(dim(tbl.mdb), c(13, 6))
894
+   checkEquals(dim(tbl.mdb), c(14, 6))
886 895
    checkEquals(subset(tbl.mdb, dataSource=="jaspar2016" & geneSymbol== "FOS")$motif, "MA0476.1")
887
-      # no recognizable (i.e., jaspar standard) motif name returned by MotifDb metadata
896
+      # no recognizable(i.e., jaspar standard) motif name returned by MotifDb metadata
888 897
       # MotifDb for ATF5
889 898
 
890 899
       # compare the MA0110599_1.02 matrix of cisp_1.02 to japar MA0476.1: the identical matrix!
... ...
@@ -972,7 +981,7 @@ test.motifToGene <- function()
972 981
    checkEquals(sort(unique(tbl$geneSymbol)),
973 982
                     c("AR", "RUNX1", "TFAP2A", "TFAP2A(var.3)", "TFAP2B", "TFAP2C", "TFAP2D", "TFAP2E"))
974 983
 
975
-      # (23 may 2018) found that MotifDb works, but c("MotifDb", "TFClass") does not
984
+      #(23 may 2018) found that MotifDb works, but c("MotifDb", "TFClass") does not
976 985
       # test the fix here
977 986
 
978 987
    motifs <- c("Hsapiens-jolma2013-IRF5-2", "Hsapiens-SwissRegulon-IRF5.SwissRegulon")
... ...
@@ -1066,6 +1075,70 @@ test.associateTranscriptionFactors <- function()
1066 1075
       # now some motif names
1067 1076
 } # test.associateTranscriptionFactors
1068 1077
 #------------------------------------------------------------------------------------------------------------------------
1078
+test.match <- function()
1079
+{
1080
+   printf("--- test.match")
1081
+   gr.region <- GRanges(seqnames="chr1", IRanges(start=47229520, end=47229560))
1082
+   motifs <- query(MotifDb, c("jaspar2018", "ZNF263"))
1083
+   checkEquals(length(motifs), 1)
1084
+   gr.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5)
1085
+   checkEquals(length(gr.match), 1)  # just one motif
1086
+   checkEquals(names(gr.match), names(motifs))
1087
+   checkEquals(length(gr.match[[1]]), 3)
1088
+
1089
+   tbl.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5, fimoDataFrameStyle=TRUE)
1090
+   checkEquals(dim(tbl.match), c(3, 7))
1091
+   checkTrue(all(tbl.match$motif == names(motifs)))
1092
+   checkEquals(class(tbl.match$chrom), "character")  # not a factor
1093
+
1094
+   motifs <- query(MotifDb, "ZNF263", c("jaspar2018", "swissregulon"))
1095
+   checkEquals(length(motifs), 2)
1096
+   gr.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5)
1097
+   checkEquals(names(gr.match), names(motifs))
1098
+   checkEquals(as.numeric(lapply(gr.match, length)), c(3, 1))
1099
+
1100
+   tbl.match <-matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5, fimoDataFrameStyle=TRUE)
1101
+   checkEquals(dim(tbl.match), c(4, 7))
1102
+   checkEquals(length(unique(tbl.match$motif)), 2)
1103
+   checkEquals(unique(tbl.match$motif), names(motifs))
1104
+   checkEquals(colnames(tbl.match), c("chrom", "start", "end", "width", "strand", "mood.score", "motif_id"))
1105
+
1106
+
1107
+        #------------------------------------------------
1108
+        # now all jaspar2018 human motifs
1109
+        #------------------------------------------------
1110
+
1111
+   motifs <- query(MotifDb, c("jaspar2018", "hsapiens"))
1112
+   tbl.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5, fimoDataFrameStyle=TRUE)
1113
+   checkEquals(dim(tbl.match), c(7, 7))
1114
+   checkEquals(sort(unique(tbl.match$motif)),
1115
+               c("Hsapiens-jaspar2018-EWSR1-FLI1-MA0149.1", "Hsapiens-jaspar2018-ZNF263-MA0528.1"))
1116
+
1117
+        #-----------------------------------------------------
1118
+        # now all jaspar2018 human motifs, loosen the pValue
1119
+        #-----------------------------------------------------
1120
+
1121
+   motifs <- query(MotifDb, c("jaspar2018", "hsapiens"))
1122
+   tbl.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-4, fimoDataFrameStyle=TRUE)
1123
+   checkTrue(nrow(tbl.match) > 15)
1124
+
1125
+   tbl.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-3, fimoDataFrameStyle=TRUE)
1126
+   checkTrue(nrow(tbl.match) > 50)
1127
+
1128
+       #-------------------------------------------------------------
1129
+       # now all jaspar2018 and hocomoco human motifs across 10kb
1130
+       #------------------------------------------------------------
1131
+
1132
+   motifs <- query(MotifDb, "hsapiens", orStrings=c("jaspar2018", "hocomoco"))
1133
+   checkTrue(length(motifs) > 1000)
1134
+   gr.region <- GRanges(seqnames="chr1", IRanges(start=47229000, end=47239000))
1135
+
1136
+   tbl.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-7, fimoDataFrameStyle=TRUE)
1137
+   checkTrue(nrow(tbl.match) > 200 && nrow(tbl.match) < 275)
1138
+   checkEquals(order(tbl.match$start), seq_len(nrow(tbl.match)))
1139
+
1140
+} # test.match
1141
+#------------------------------------------------------------------------------------------------------------------------
1069 1142
 findMotifsWithMutuallyExclusiveMappings <- function()
1070 1143
 {
1071 1144
    xtab <- as.data.frame(table(MotifDb@manuallyCuratedGeneMotifAssociationTable$motif))
... ...
@@ -1086,8 +1159,21 @@ findMotifsWithMutuallyExclusiveMappings <- function()
1086 1159
    #  [1]   mdb.genes: AP1, JUN::FOS, FOS::JUN, FOS::JUN
1087 1160
    #  [1]   tfc.genes: FOS, JUN
1088 1161
 
1089
-
1090 1162
 } # findMotifsWithMutuallyExclusiveMappings
1091 1163
 #------------------------------------------------------------------------------------------------------------------------
1164
+test.hocomoco11.with.reliabilityScores <- function()
1165
+{
1166
+   printf("--- test.hocomoco11.with.reliabilityScores")
1167
+
1168
+   checkEquals(length(query(MotifDb, "hocomoco")), 1466)
1169
+   checkEquals(length(query(MotifDb, "hocomocov10")), 1066)
1170
+   checkEquals(length(query(MotifDb, "hocomocov11")), 400)
1171
+
1172
+   checkEquals(length(query(MotifDb, "hocomocov11A")), 181)
1173
+   checkEquals(length(query(MotifDb, "hocomocov11B")),  84)
1174
+   checkEquals(length(query(MotifDb, "hocomocov11C")), 135)
1175
+
1176
+} # test.hocomoco11.with.reliabilityScores
1177
+#------------------------------------------------------------------------------------------------------------------------
1092 1178
 if(!interactive())
1093 1179
    runTests()
1094 1180
new file mode 100644
... ...
@@ -0,0 +1,45 @@
1
+\name{matchMotif}
2
+\alias{matchMotif,MotifList-method}
3
+\alias{matchMotif}
4
+\title{matchMotif}
5
+\description{
6
+A very general search tool, returning all matrices whose metadata, in ANY
7
+column, is matched by the query string.
8
+}
9
+\usage{
10
+\S4method{matchMotif}{MotifList}(object, motifs, genomeName, regions, pval.cutoff, fimoDataFrameStyle=FALSE)
11
+}
12
+\arguments{
13
+  \item{object}{a \code{MotifList} object.}
14
+  \item{motifs}{a \code{MotifList} typically the result of a MotifDb query}
15
+  \item{genomeName}{a \code{character} string vector, e.g. "hg38" or "mm10"}
16
+  \item{regions}{a \code{GRange} specifies regions in which to try to match the motifs}
17
+  \item{pval.cutoff}{a \code{numeric} value, for example, 1e-5}
18
+  \item{fimoDataFrameStyle}{a \code{logical} value, default FALSE}
19
+
20
+}
21
+
22
+\value{
23
+A list of the matrices
24
+}
25
+\author{Paul Shannon}
26
+
27
+\examples{
28
+   motifs <- query(MotifDb, "ZNF263", c("jaspar2018", "swissregulon"))
29
+   gr.region <- GRanges(seqnames="chr1", IRanges(start=47229520, end=47229560))
30
+   gr.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5)
31
+   tbl.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5, fimoDataFrameStyle=TRUE)
32
+}
33
+\seealso{
34
+  MotifDb,
35
+  query,
36
+  subset,
37
+  export,
38
+  flyFactorSurvey,
39
+  hPDI,
40
+  jaspar,
41
+  ScerTF,
42
+  uniprobe
43
+}
44
+
45
+\keyword{utilities}
0 46
Binary files a/vignettes/MotifDb-egr1.pdf and b/vignettes/MotifDb-egr1.pdf differ
... ...
@@ -2,7 +2,7 @@
2 2
 %% %\VignetteIndexEntry{MotifDb Overview}
3 3
 %% %\VignettePackage{MotifDb}
4 4
 \usepackage[noae]{Sweave}
5
-\usepackage[left=0.5in,top=0.5in,right=0.5in,bottom=0.75in,nohead,nofoot]{geometry} 
5
+\usepackage[left=0.5in,top=0.5in,right=0.5in,bottom=0.75in,nohead,nofoot]{geometry}
6 6
 \usepackage{hyperref}
7 7
 \usepackage[noae]{Sweave}
8 8
 \usepackage{color}
... ...
@@ -30,16 +30,16 @@
30 30
 
31 31
 \renewenvironment{Schunk}{\vspace{\topsep}}{\vspace{\topsep}}
32 32
 \fvset{listparameters={\setlength{\topsep}{6pt}}}
33
-% These determine the rules used to place floating objects like figures 
33
+% These determine the rules used to place floating objects like figures
34 34
 % They are only guides, but read the manual to see the effect of each.
35 35
 \renewcommand{\topfraction}{.99}
36 36
 \renewcommand{\bottomfraction}{.99}
37 37
 \renewcommand{\textfraction}{0.0}
38 38
 
39
-\title{MotifDb} 
39
+\title{MotifDb}
40 40
 \author{Paul Shannon}
41 41
 
42
-\begin{document} 
42
+\begin{document}
43 43
 
44 44
 \maketitle
45 45
 \begin{abstract}
... ...
@@ -52,7 +52,7 @@ described as \emph{motifs} represented numerically as frequency matrices, and vi
52 52
 in current research, there has been until now no single, annotated, comprehensive collection of publicly available motifs.
53 53
 The current package provides such a collection, offering more than two thousand annotated matrices from multiple organisms, within the
54 54
 context of the Bioconductor project.  The matrices can be filtered and selected on the basis of their metadata, used with other
55
-Bioconductor packages (MotIV for motif comparison, seqLogo for visualization) or easily exported for use with 
55
+Bioconductor packages (MotIV for motif comparison, seqLogo for visualization) or easily exported for use with
56 56
 standard software and websites such as those provided by the MEME Suite\footnote{http://meme.sdsc.edu/meme/doc/meme.html}.
57 57
 \end{abstract}
58 58
 
... ...
@@ -75,25 +75,31 @@ The first step is to load the necessary packages:
75 75
 %% names, and the rownames of the metadata table, are identical, so it is easy to map back and forth between
76 76
 %% the two.  Some measure of convenience is gained by extracting these two kinds of data into separate variables,
77 77
 %% as we shall see.  The cost in extra memory should not significant.
78
-%% 
78
+%%
79 79
 %% <<all.matrices>>=
80 80
 %% matrices.all = as.list (MotifDb)
81 81
 %% metadata <- values (MotifDb)
82
-%% @ 
82
+%% @
83 83
 There are  more than two thousand  matrices, from five sources:
84 84
 \begin{Schunk}
85 85
 \begin{Sinput}
86 86
 > length (MotifDb)
87 87
 \end{Sinput}
88 88
 \begin{Soutput}
89
-[1] 2086
89
+[1] 10333
90 90
 \end{Soutput}
91 91
 \begin{Sinput}
92 92
 > sort (table (values (MotifDb)$dataSource), decreasing=TRUE)
93 93
 \end{Sinput}
94 94
 \begin{Soutput}
95
-FlyFactorSurvey     JASPAR_CORE            hPDI        UniPROBE          ScerTF 
96
-            614             459             437             380             196 
95
+     jaspar2018      jaspar2016     HOCOMOCOv10      cisbp_1.02       jolma2013 
96
+           1564            1209            1066             874             843 
97
+   SwissRegulon         stamlab FlyFactorSurvey     JASPAR_2014     JASPAR_CORE 
98
+            684             683             614             592             459 
99
+           hPDI        UniPROBE           HOMER          ScerTF    HOCOMOCOv11A 
100
+            437             380             332             196             181 
101
+   HOCOMOCOv11C    HOCOMOCOv11B 
102
+            135              84 
97 103
 \end{Soutput}
98 104
 \end{Schunk}
99 105
 And 22 organisms (though the majority of the matrices come from just four):
... ...
@@ -102,16 +108,126 @@ And 22 organisms (though the majority of the matrices come from just four):
102 108
 > sort (table (values (MotifDb)$organism), decreasing=TRUE)
103 109
 \end{Sinput}
104 110
 \begin{Soutput}
105
-Dmelanogaster      Hsapiens   Scerevisiae     Mmusculus   Rnorvegicus 
106
-          739           505           464           329             8 
107
-     Celegans         Zmays     Athaliana    Vertebrata        Amajus 
108
-            7             6             5             4             3 
109
-     Psativum        Gallus   Pfalciparum       Cparvum      Hroretzi 
110
-            3             2             2             1             1 
111
-     Hvulgare   Nsylvestris    Ocuniculus      Phybrida       Rrattus 
112
-            1             1             1             1             1 
113
-    Taestivam       Xlaevis 
114
-            1             1 
111
+                                                                     Hsapiens 
112
+                                                                         5016 
113
+                                                                    Mmusculus 
114
+                                                                         1411 
115
+                                                                Dmelanogaster 
116
+                                                                         1287 
117
+                                                                  Scerevisiae 
118
+                                                                         1051 
119
+                                                                    Athaliana 
120
+                                                                          803 
121
+                                                                     Celegans 
122
+                                                                           90 
123
+                                                                           NA 
124
+                                                                           40 
125
+                                                                  Rnorvegicus 
126
+                                                                           35 
127
+                                                                  Pfalciparum 
128
+                                                                           28 
129
+                                                                        Zmays 
130
+                                                                           27 
131
+                                                                   Vertebrata 
132
+                                                                           18 
133
+                                                                      Ncrassa 
134
+                                                                           15 
135
+                                                                     Psativum 
136
+                                                                           13 
137
+                                                                       Amajus 
138
+                                                                           12 
139
+                                                                  Ddiscoideum 
140
+                                                                            9 
141
+                                                                    Anidulans 
142
+                                                                            8 
143
+                                                                      Ggallus 
144
+                                                                            8 
145
+                                                                      Ppatens 
146
+                                                                            7 
147
+                                                                      Xlaevis 
148
+                                                                            7 
149
+                                               Mmusculus;Rnorvegicus;Hsapiens 
150
+                                                                            6 
151
+                                                                      Osativa 
152
+                                                                            5 
153
+                                                                     Hroretzi 
154
+                                                                            4 
155
+                                                                     Hvulgare 
156
+                                                                            4 
157
+                                                                   Ocuniculus 
158
+                                                                            4 
159
+                                                                     Phybrida 
160
+                                                                            4 
161
+                                                                      Rrattus 
162
+                                                                            4 
163
+                                                                    Taestivam 
164
+                                                                            4 
165
+                                                                       Drerio 
166
+                                                                            3 
167
+                                                                       Gallus 
168
+                                                                            3 
169
+                                                           Mmusculus;Hsapiens 
170
+                                                                            3 
171
+                                                                  Bdistachyon 
172
+                                                                            2 
173
+                                                                      Cparvum 
174
+                                                                            2 
175
+                                                                      Csativa 
176
+                                                                            2 
177
+                                                        Mmusculus;Rnorvegicus 
178
+                                                                            2 
179
+Mmusculus;Rnorvegicus;Xlaevis;Stropicalis;Ggallus;Hsapiens;Btaurus;Ocuniculus 
180
+                                                                            2 
181
+                                                                         Nsp. 
182
+                                                                            2 
183
+                                                                  Nsylvestris 
184
+                                                                            2 
185
+                                                                       Otauri 
186
+                                                                            2 
187
+                                                                Acarolinensis 
188
+                                                                            1 
189
+                                                                       Apisum 
190
+                                                                            1 
191
+                                                                     Aterreus 
192
+                                                                            1 
193
+                                                                   Gaculeatus 
194
+                                                                            1 
195
+                                                                  Hcapsulatum 
196
+                                                                            1 
197
+                                                                   Mdomestica 
198
+                                                                            1 
199
+                                                                   Mgallopavo 
200
+                                                                            1 
201
+                                                                     Mmurinus 
202
+                                                                            1 
203
+                                    Mmusculus;Rnorvegicus;Hsapiens;Ocuniculus 
204
+                                                                            1 
205
+                               Mmusculus;Rnorvegicus;Omykiss;Ggallus;Hsapiens 
206
+                                                                            1 
207
+                                        Mmusculus;Rrattus;Hsapiens;Ocuniculus 
208
+                                                                            1 
209
+                                                                  Mtruncatula 
210
+                                                                            1 
211
+                                                                     Ngruberi 
212
+                                                                            1 
213
+                                                                Nhaematococca 
214
+                                                                            1 
215
+                                                                   Nvectensis 
216
+                                                                            1 
217
+                                                                    Pcapensis 
218
+                                                                            1 
219
+                                                                    Ppygmaeus 
220
+                                                                            1 
221
+                                                                 Ptetraurelia 
222
+                                                                            1 
223
+                                                         Rnorvegicus;Hsapiens 
224
+                                                                            1 
225
+                                                                 Tthermophila 
226
+                                                                            1 
227
+                                                                    Vvinifera 
228
+                                                                            1 
229
+                                                                  Xtropicalis 
230
+                                                                            1 
115 231
 \end{Soutput}
116 232
 \end{Schunk}
117 233
 
... ...
@@ -130,34 +246,51 @@ With these categories of metadata
130 246
 \section{Selection}
131 247
 
132 248
 There are three ways to extract subsets of interest from the MotifDb collection.  All three operate upon the MotifDb metadata,
133
-matching values in one or more of those fifteen attributes (listed just above), and returning the subset of MotifDb  which 
249
+matching values in one or more of those fifteen attributes (listed just above), and returning the subset of MotifDb  which
134 250
 meet the specified criteria.  The three techniques:  \emph{query}, \emph{subset} and \emph{grep}
135 251
 
136 252
 \subsection{query}
137
-This is the simplest technique to use, and will suffice in many circumstances.  For example, if you want 
253
+This is the simplest technique to use, and will suffice in many circumstances.  For example, if you want
138 254
 all of the human matrices:
139 255
 \begin{Schunk}
140 256
 \begin{Sinput}
141 257
 > query (MotifDb, 'hsapiens')
142 258
 \end{Sinput}
143 259
 \begin{Soutput}
144
-MotifDb object of length 505
145
-| Created from downloaded public sources: 2012-Jul6
146
-| 505 position frequency matrices from 3 sources:
260
+MotifDb object of length 5031
261
+| Created from downloaded public sources: 2013-Aug-30
262
+| 5031 position frequency matrices from 14 sources:
263
+|         cisbp_1.02:  313
264
+|        HOCOMOCOv10:  640
265
+|       HOCOMOCOv11A:  181
266
+|       HOCOMOCOv11B:   84
267
+|       HOCOMOCOv11C:  135
268
+|               hPDI:  437
269
+|        JASPAR_2014:  117
147 270
 |        JASPAR_CORE:   66
271
+|         jaspar2016:  442
272
+|         jaspar2018:  537
273
+|          jolma2013:  710
274
+|            stamlab:  683
275
+|       SwissRegulon:  684
148 276
 |           UniPROBE:    2
149
-|               hPDI:  437
150
-| 1 organism/s
151
-|           Hsapiens:  505
152
-Hsapiens-hPDI-ABCF2 
153
-Hsapiens-hPDI-ACF 
154
-Hsapiens-hPDI-ACO1 
155
-Hsapiens-hPDI-ADARB1 
156
-Hsapiens-hPDI-AFF4 
277
+| 8 organism/s
278
+|           Hsapiens: 5016
279
+| Mmusculus;Rnorvegicus;Hsapiens:    6
280
+| Mmusculus;Hsapiens:    3
281
+| Mmusculus;Rnorvegicus;Xlaevis;Stropicalis;Ggallus;Hsapiens;Btaurus;Ocuniculus:    2
282
+| Mmusculus;Rnorvegicus;Hsapiens;Ocuniculus:    1
283
+| Mmusculus;Rnorvegicus;Omykiss;Ggallus;Hsapiens:    1
284
+|              other:    2
285
+Hsapiens-cisbp_1.02-M1838_1.02 
286
+Hsapiens-cisbp_1.02-M1857_1.02 
287
+Hsapiens-cisbp_1.02-M1875_1.02 
288
+Hsapiens-cisbp_1.02-M1880_1.02 
289
+Hsapiens-cisbp_1.02-M1889_1.02 
157 290
 ...
158
-Hsapiens-JASPAR_CORE-SP1-MA0079.2 
159
-Hsapiens-JASPAR_CORE-ESR2-MA0258.1 
160
-Hsapiens-JASPAR_CORE-HIF1A::ARNT-MA0259.1 
291
+Hsapiens-SwissRegulon-ZNF784.SwissRegulon 
292
+Hsapiens-SwissRegulon-ZNF8.SwissRegulon 
293
+Hsapiens-SwissRegulon-ZSCAN4.SwissRegulon 
161 294
 Hsapiens-UniPROBE-Sox4.UP00401 
162 295
 Hsapiens-UniPROBE-Oct_1.UP00399 
163 296
 \end{Soutput}
... ...
@@ -168,22 +301,36 @@ If you want all matrices associated with \textbf{\emph{Sox}} transcription facto
168 301
 > query (MotifDb, 'sox')
169 302
 \end{Sinput}
170 303
 \begin{Soutput}
171
-MotifDb object of length 24
172
-| Created from downloaded public sources: 2012-Jul6
173
-| 24 position frequency matrices from 4 sources:
304
+MotifDb object of length 184
305
+| Created from downloaded public sources: 2013-Aug-30
306
+| 184 position frequency matrices from 14 sources:
174 307
 |    FlyFactorSurvey:    2
308
+|        HOCOMOCOv10:   25
309
+|       HOCOMOCOv11A:    1
310
+|       HOCOMOCOv11B:    5
311
+|       HOCOMOCOv11C:    2
312
+|              HOMER:    9
313
+|               hPDI:    2
314
+|        JASPAR_2014:    8
175 315
 |        JASPAR_CORE:    5
316
+|         jaspar2016:   16
317
+|         jaspar2018:   19
318
+|          jolma2013:   56
319
+|       SwissRegulon:   19
176 320
 |           UniPROBE:   15
177
-|               hPDI:    2
178
-| 3 organism/s
179
-|          Mmusculus:   18
180
-|           Hsapiens:    4
321
+| 7 organism/s
322
+|           Hsapiens:  103
323
+|          Mmusculus:   67
181 324
 |      Dmelanogaster:    2
325
+| Mmusculus;Rnorvegicus;Hsapiens:    1
326
+|        Rnorvegicus:    1
327
+|         Vertebrata:    1
328
+|              other:    9
182 329
 Dmelanogaster-FlyFactorSurvey-Sox14_SANGER_10_FBgn0005612 
183 330
 Dmelanogaster-FlyFactorSurvey-Sox15_SANGER_5_FBgn0005613 
184
-Hsapiens-hPDI-SOX13 
185
-Hsapiens-hPDI-SOX14 
186
-Hsapiens-JASPAR_CORE-SOX9-MA0077.1 
331
+Hsapiens-HOCOMOCOv10-SOX10_HUMAN.H10MO.D 
332
+Hsapiens-HOCOMOCOv10-SOX11_HUMAN.H10MO.D 
333
+Hsapiens-HOCOMOCOv10-SOX13_HUMAN.H10MO.D 
187 334
 ...
188 335
 Mmusculus-UniPROBE-Sox30.UP00023 
189 336
 Mmusculus-UniPROBE-Sox4.UP00062 
... ...
@@ -198,28 +345,31 @@ For all yeast transcription factors with a homeo domain
198 345
 > query (query (MotifDb, 'cerevisiae'), 'homeo')
199 346
 \end{Sinput}
200 347
 \begin{Soutput}
201
-MotifDb object of length 14
202
-| Created from downloaded public sources: 2012-Jul6
203
-| 14 position frequency matrices from 2 sources:
348
+MotifDb object of length 32
349
+| Created from downloaded public sources: 2013-Aug-30
350
+| 32 position frequency matrices from 5 sources:
351
+|        JASPAR_2014:   10
204 352
 |        JASPAR_CORE:   10
353
+|         jaspar2016:    4
354
+|         jaspar2018:    4
205 355
 |           UniPROBE:    4
206 356
 | 1 organism/s
207
-|        Scerevisiae:   14
357
+|        Scerevisiae:   32
358
+Scerevisiae-JASPAR_CORE-CUP9-MA0288.1 
359
+Scerevisiae-JASPAR_CORE-HMRA2-MA0318.1 
360
+Scerevisiae-JASPAR_CORE-MATA1-MA0327.1 
361
+Scerevisiae-JASPAR_CORE-MATALPHA2-MA0328.1 
362
+Scerevisiae-JASPAR_CORE-PHO2-MA0356.1 
363
+...
364
+Scerevisiae-jaspar2018-TOS8-MA0408.1 
208 365
 Scerevisiae-UniPROBE-Cup9.UP00308 
209 366
 Scerevisiae-UniPROBE-Matalpha2.UP00307 
210 367
 Scerevisiae-UniPROBE-Pho2.UP00268 
211 368
 Scerevisiae-UniPROBE-Yox1.UP00274 
212
-Scerevisiae-JASPAR_CORE-CUP9-MA0288.1 
213
-...
214
-Scerevisiae-JASPAR_CORE-STE12-MA0393.1 
215
-Scerevisiae-JASPAR_CORE-TEC1-MA0406.1 
216
-Scerevisiae-JASPAR_CORE-TOS8-MA0408.1 
217
-Scerevisiae-JASPAR_CORE-YHP1-MA0426.1 
218
-Scerevisiae-JASPAR_CORE-YOX1-MA0433.1 
219 369
 \end{Soutput}
220 370
 \end{Schunk}
221 371
 The last example may inspire more confidence in the precision of the result than is justified, and for a couple
222
-of reasons.  First, the assignment of  protein binding domains to specific categories is, as of 2012, an ad hoc 
372
+of reasons.  First, the assignment of  protein binding domains to specific categories is, as of 2012, an ad hoc
223 373
 and incomplete process.  Second, the query commands matches the supplied character string to \emph{all} metadata
224 374
 columns.  In this case, 'homeo' appears both in the \emph{bindingDomain} column and the \emph{tfFamily} column,
225 375
 and the above \emph{query} will return matches from both.
... ...
@@ -241,8 +391,19 @@ illustrate:
241 391
 > unique (grep ('homeo', values(MotifDb)$tfFamily, ignore.case=T, v=T))
242 392
 \end{Sinput}
243 393
 \begin{Soutput}
244
-[1] "Homeo"                                
245
-[2] "Homeo::Nuclear Factor I-CCAAT-binding"
394
+ [1] "HOX-related factors{3.1.1}: CDX (Caudal type homeobox){3.1.1.9}"          
395
+ [2] "TALE-type homeo domain factors{3.1.4}: MEIS{3.1.4.2}"                     
396
+ [3] "Paired domain only{3.2.2}: PAX-2-like factors (partial homeobox){3.2.2.2}"
397
+ [4] "Paired plus homeo domain{3.2.1}: PAX-4/6{3.2.1.2}"                        
398
+ [5] "TALE-type homeo domain factors{3.1.4}: PBX{3.1.4.4}"                      
399
+ [6] "TALE-type homeo domain factors{3.1.4}: PKNOX{3.1.4.5}"                    
400
+ [7] "TALE-type homeo domain factors{3.1.4}: TGIF{3.1.4.6}"                     
401
+ [8] "Homeo"                                                                    
402
+ [9] "Homeo::Nuclear Factor I-CCAAT-binding"                                    
403
+[10] "Homeodomain"                                                              
404
+[11] "Paired plus homeo domain"                                                 
405
+[12] "TALE-type homeo domain factors"                                           
406
+[13] "homeodomain"                                                              
246 407
 \end{Soutput}
247 408
 \end{Schunk}
248 409
 \subsection{grep}
... ...
@@ -308,31 +469,50 @@ There are two of these in MotifDb, both from mouse, and each from a different da
308 469
 
309 470
 \begin{Schunk}
310 471
 \begin{Sinput}
311
->   # subset is convenient: 
472
+>   # subset is convenient:
312 473
 > if (interactive ())
313 474
 +   as.list (subset (MotifDb, tolower (geneSymbol) == 'egr1'))
314 475
 >   # grep returns indices which allow for more flexibility
315
-> indices = grep ('egr1', values (MotifDb)$geneSymbol, ignore.case=TRUE)  
476
+> indices = grep ('egr1', values (MotifDb)$geneSymbol, ignore.case=TRUE)
316 477
 > length (indices)
317 478
 \end{Sinput}
318 479
 \begin{Soutput}
319
-[1] 2
480
+[1] 17
320 481
 \end{Soutput}
321 482
 \end{Schunk}
322
-There are a variety of ways to examine and extract data from this object, a MotifList of length 2.  
483
+There are a variety of ways to examine and extract data from this object, a MotifList of length 2.
323 484
 \begin{Schunk}
324 485
 \begin{Sinput}
325 486
 > MotifDb [indices]
326 487
 \end{Sinput}
327 488
 \begin{Soutput}
328
-MotifDb object of length 2
329
-| Created from downloaded public sources: 2012-Jul6
330
-| 2 position frequency matrices from 2 sources:
489
+MotifDb object of length 17
490
+| Created from downloaded public sources: 2013-Aug-30
491
+| 17 position frequency matrices from 10 sources:
492
+|        HOCOMOCOv10:    3
493
+|       HOCOMOCOv11A:    1
494
+|              HOMER:    1
495
+|        JASPAR_2014:    1
331 496
 |        JASPAR_CORE:    1
497
+|         jaspar2016:    2
498
+|         jaspar2018:    3
499
+|          jolma2013:    3
500
+|       SwissRegulon:    1
332 501
 |           UniPROBE:    1
333
-| 1 organism/s
334
-|          Mmusculus:    2
335
-Mmusculus-JASPAR_CORE-Egr1-MA0162.1 
502
+| 3 organism/s
503
+|           Hsapiens:   10
504
+|          Mmusculus:    6
505
+|              other:    1
506
+Hsapiens-HOCOMOCOv10-EGR1_HUMAN.H10MO.A 
507
+Hsapiens-HOCOMOCOv10-EGR1_HUMAN.H10MO.S 
508
+Mmusculus-HOCOMOCOv10-EGR1_MOUSE.H10MO.A 
509
+Hsapiens-HOCOMOCOv11A-EGR1_HUMAN.H11MO.0.A 
510
+NA-HOMER-Egr1(Zf)/K562-Egr1-ChIP-Seq(GSE32465)/Homer 
511
+...
512
+Hsapiens-jolma2013-EGR1 
513
+Hsapiens-jolma2013-EGR1-2 
514
+Mmusculus-jolma2013-Egr1 
515
+Hsapiens-SwissRegulon-EGR1.SwissRegulon 
336 516
 Mmusculus-UniPROBE-Egr1.UP00007 
337 517
 \end{Soutput}
338 518
 \end{Schunk}
... ...
@@ -343,6 +523,61 @@ Now view the matrices as a named list:
343 523
 > as.list (MotifDb [indices])
344 524
 \end{Sinput}
345 525
 \begin{Soutput}
526
+$`Hsapiens-HOCOMOCOv10-EGR1_HUMAN.H10MO.A`
527
+      1     2     3     4     5     6     7     8     9    10    11    12    13
528
+A 0.190 0.208 0.212 0.270 0.222 0.116 0.168 0.042 0.034 0.160 0.008 0.032 0.262
529
+C 0.192 0.206 0.144 0.140 0.074 0.082 0.484 0.042 0.008 0.006 0.000 0.038 0.452
530
+G 0.438 0.446 0.452 0.468 0.380 0.756 0.050 0.808 0.452 0.804 0.976 0.914 0.006
531
+T 0.180 0.140 0.192 0.122 0.324 0.046 0.298 0.108 0.506 0.030 0.016 0.016 0.280
532
+     14    15    16    17    18
533
+A 0.180 0.072 0.236 0.278 0.218
534
+C 0.012 0.012 0.092 0.098 0.184
535
+G 0.750 0.774 0.534 0.458 0.490
536
+T 0.058 0.142 0.138 0.166 0.108
537
+
538
+$`Hsapiens-HOCOMOCOv10-EGR1_HUMAN.H10MO.S`
539
+          1          2          3           4           5          6
540
+A 0.1515633 0.04398516 0.05988341 0.003709592 0.009538951 0.07578166
541
+C 0.1886592 0.06041335 0.82829889 0.001059883 0.013248543 0.01907790
542
+G 0.3184950 0.88288288 0.01854796 0.993640700 0.490726020 0.90196078
543
+T 0.3412825 0.01271860 0.09326974 0.001589825 0.486486486 0.00317965
544
+            7          8           9          10         11
545
+A 0.025437202 0.01218866 0.089030207 0.021727610 0.09062003
546
+C 0.065712772 0.01006889 0.748277689 0.007419184 0.06518283
547
+G 0.900900901 0.97774245 0.007419184 0.955484897 0.60943296
548
+T 0.007949126 0.00000000 0.155272920 0.015368309 0.23476418
549
+
550
+$`Mmusculus-HOCOMOCOv10-EGR1_MOUSE.H10MO.A`
551
+          1          2          3           4           5          6
552
+A 0.1515633 0.04398516 0.05988341 0.003709592 0.009538951 0.07578166
553
+C 0.1886592 0.06041335 0.82829889 0.001059883 0.013248543 0.01907790
554
+G 0.3184950 0.88288288 0.01854796 0.993640700 0.490726020 0.90196078
555
+T 0.3412825 0.01271860 0.09326974 0.001589825 0.486486486 0.00317965
556
+            7          8           9          10         11
557
+A 0.025437202 0.01218866 0.089030207 0.021727610 0.09062003
558
+C 0.065712772 0.01006889 0.748277689 0.007419184 0.06518283
559
+G 0.900900901 0.97774245 0.007419184 0.955484897 0.60943296
560
+T 0.007949126 0.00000000 0.155272920 0.015368309 0.23476418
561
+
562
+$`Hsapiens-HOCOMOCOv11A-EGR1_HUMAN.H11MO.0.A`
563
+      1     2     3     4     5     6     7     8     9    10    11    12    13
564
+A 0.232 0.320 0.278 0.236 0.112 0.236 0.056 0.032 0.092 0.016 0.024 0.220 0.088
565
+C 0.248 0.128 0.180 0.112 0.030 0.506 0.034 0.004 0.002 0.004 0.032 0.476 0.010
566
+G 0.410 0.318 0.460 0.354 0.808 0.046 0.870 0.410 0.892 0.958 0.930 0.004 0.842
567
+T 0.110 0.234 0.082 0.298 0.050 0.212 0.040 0.554 0.014 0.022 0.014 0.300 0.060
568
+     14    15    16    17
569
+A 0.110 0.338 0.312 0.264
570
+C 0.022 0.128 0.082 0.182
571
+G 0.696 0.406 0.456 0.436
572
+T 0.172 0.128 0.150 0.118
573
+
574
+$`NA-HOMER-Egr1(Zf)/K562-Egr1-ChIP-Seq(GSE32465)/Homer`
575
+      1     2     3     4     5     6     7     8     9    10
576
+A 0.128 0.078 0.154 0.001 0.001 0.027 0.001 0.001 0.153 0.034
577
+C 0.072 0.036 0.523 0.001 0.001 0.001 0.002 0.001 0.415 0.002
578
+G 0.142 0.882 0.023 0.997 0.282 0.971 0.973 0.997 0.010 0.940
579
+T 0.658 0.004 0.300 0.001 0.716 0.001 0.024 0.001 0.422 0.024
580
+
346 581
 $`Mmusculus-JASPAR_CORE-Egr1-MA0162.1`
347 582
            1          2         3 4   5   6          7 8         9 10
348 583
 A 0.20000000 0.13333333 0.0000000 0 0.0 0.2 0.06666667 0 0.1333333  0
... ...
@@ -355,6 +590,146 @@ C 0.00000000
355 590
 G 0.46666667
356 591
 T 0.46666667
357 592
 
593
+$`Hsapiens-JASPAR_2014-EGR1-MA0162.2`
594
+           1         2          3          4          5         6 7          8