Browse code

provisional addition of hocomoco v1, with reliability scores, A-D

paul-shannon authored on 10/03/2020 00:06:57
Showing 10 changed files

... ...
@@ -1,3 +1,7 @@
1
+misc/hocomoco-v11/HOCOMOCOv11_core_HUMAN_mono_jaspar_format.txt
2
+misc/hocomoco-v11/HOCOMOCOv11_core_annotation_HUMAN_mono.tsv
3
+misc/hocomoco-v11/hocomoco11.RData
4
+
1 5
 Meta
2 6
 doc
3 7
 # History files
... ...
@@ -1,13 +1,13 @@
1 1
 Package: MotifDb
2 2
 Type: Package
3 3
 Title: An Annotated Collection of Protein-DNA Binding Sequence Motifs
4
-Version: 1.29.0
5
-Date: 2018-10-24
4
+Version: 1.30.1
5
+Date: 2020-03-09
6 6
 Author: Paul Shannon, Matt Richards
7 7
 Maintainer: Paul Shannon <pshannon@systemsbiology.org>
8
-Depends: R (>= 3.5.0), methods, BiocGenerics, S4Vectors, IRanges, Biostrings
8
+Depends: R (>= 3.5.0), methods, BiocGenerics, S4Vectors, IRanges, GenomicRanges, Biostrings
9 9
 Suggests: RUnit
10
-Imports: rtracklayer, splitstackshape
10
+Imports: rtracklayer, splitstackshape, TFBSTools, universalmotif, motifmatchr
11 11
 Description: More than 9900 annotated position frequency matrices from 14 public sources, for multiple organisms.
12 12
 License: Artistic-2.0 | file LICENSE
13 13
 License_is_FOSS: no
... ...
@@ -4,6 +4,7 @@ exportMethods (
4 4
    export,
5 5
    show,
6 6
    query,
7
+   matchMotif,
7 8
    motifToGene,
8 9
    geneToMotif,
9 10
    associateTranscriptionFactors
... ...
@@ -13,9 +14,15 @@ export(
13 14
     MotifDb
14 15
 )
15 16
 
17
+import(motifmatchr)
18
+import(TFBSTools)
19
+importFrom(universalmotif, convert_motifs)
20
+
21
+
16 22
 import(BiocGenerics)
17 23
 import(S4Vectors)
18 24
 import(IRanges)
25
+import(GenomicRanges)
19 26
 importFrom(rtracklayer, export)
20 27
 import(Biostrings)
21 28
 import(methods)
... ...
@@ -4,6 +4,8 @@ setGeneric('geneToMotif', signature='object', function(object, geneSymbols, sour
4 4
 setGeneric('associateTranscriptionFactors', signature='object',
5 5
            function(object, tbl.withMotifs,  source, expand.rows, motifColumnName="motifName")
6 6
               standardGeneric('associateTranscriptionFactors'))
7
+setGeneric('matchMotif', signature='object', function(object, motifs, genomeName, regions, pval.cutoff,
8
+                                                      fimoDataFrameStyle=FALSE) standardGeneric('matchMotif'))
7 9
 #------------------------------------------------------------------------------------------------------------------------
8 10
 setClass ('MotifList',
9 11
           contains='SimpleList',
... ...
@@ -61,6 +63,30 @@ MotifList = function (matrices=list(), tbl.metadata=data.frame ())
61 63
   object
62 64
 
63 65
 } # ctor
66
+#-------------------------------------------------------------------------------
67
+setMethod('matchMotif', signature='MotifList',
68
+
69
+   function(object, motifs, genomeName, regions, pval.cutoff, fimoDataFrameStyle=FALSE){
70
+     motifs.pfmatrix <- lapply(motifs, function(motif) convert_motifs(motif, "TFBSTools-PFMatrix"))
71
+     motifs.pfmList <- do.call(PFMatrixList, motifs.pfmatrix)
72
+     gr.list <- matchMotifs(motifs.pfmList, regions, genome=genomeName, out="positions", p.cutoff=pval.cutoff)
73
+     result <- gr.list
74
+     if(fimoDataFrameStyle){
75
+        gr <- unlist(gr.list)
76
+        motif.names <- names(gr)
77
+        names(gr) <- NULL
78
+        tbl <- as.data.frame(gr)
79
+        tbl$motif_id <- motif.names
80
+        colnames(tbl)[1] <- "chrom"
81
+        tbl$chrom <- as.character(tbl$chrom)
82
+        colnames(tbl)[grep("score", colnames(tbl))] <- "mood.score"
83
+        new.order <- order(tbl$start, decreasing=FALSE)
84
+        tbl <- tbl[new.order,]
85
+        result <- tbl
86
+        }
87
+     return(result)
88
+     })
89
+
64 90
 #-------------------------------------------------------------------------------
65 91
 setMethod ('subset', signature = 'MotifList',
66 92
 
67 93
new file mode 100644
... ...
@@ -0,0 +1,26 @@
1
+quick: install
2
+
3
+all:  vig build check
4
+
5
+vig:
6
+	R -e "devtools::build_vignettes()"
7
+
8
+build:
9
+	(cd ..; R CMD build --no-build-vignettes MotifDb)
10
+
11
+install:
12
+	(cd ..; R CMD INSTALL --no-test-load MotifDb)
13
+
14
+check: build
15
+	(cd ..; R CMD check --no-manual --no-build-vignettes --ignore-vignettes `ls -t MotifDb_* | head -1`)
16
+
17
+biocCheck:
18
+	(cd ..; R CMD BiocCheck `ls -t MotifDb_* | head -1`)
19
+
20
+unitTests: test
21
+
22
+test:
23
+	 for x in inst/unitTests/test_*.R; do echo ============== $$x; R -f $$x; done
24
+
25
+site:
26
+	R -e "devtools::build_site()"
0 27
new file mode 100644
1 28
Binary files /dev/null and b/inst/extdata/hocomoco11.RData differ
... ...
@@ -1,43 +1,43 @@
1
-library (MotifDb)
2
-library (RUnit)
3
-library (MotIV)
4
-library (seqLogo)
1
+library(MotifDb)
2
+library(RUnit)
3
+library(MotIV)
4
+library(seqLogo)
5 5
 #----------------------------------------------------------------------------------------------------
6 6
 printf <- function(...) print(noquote(sprintf(...)))
7 7
 #----------------------------------------------------------------------------------------------------
8
-runTests = function ()
8
+runTests = function()
9 9
 {
10
-  test.emptyCtor ()
11
-  test.nonEmptyCtor ()
12
-  test.MotifDb.normalMode ()
13
-  test.MotifDb.emptyMode ()
14
-  test.allMatricesAreNormalized ()
15
-  test.providerNames ()
16
-  test.geneSymbols ()
17
-  test.geneIdsAndTypes ()
18
-  test.proteinIds ()
19
-  test.sequenceCount ()
20
-  test.longNames ()
21
-  test.organisms ()
22
-  test.bindingDomains ()
23
-  test.experimentTypes ()
24
-  test.tfFamilies ()
25
-  test.bindingSequences ()
26
-  test.flyBindingDomains ()
27
-  test.pubmedIDs ()
28
-  test.allFullNames ()
29
-  test.subset ()
30
-  test.subsetWithVariables ()
31
-  test.queryOldStyle ()
10
+  test.emptyCtor()
11
+  test.nonEmptyCtor()
12
+  test.MotifDb.normalMode()
13
+  test.MotifDb.emptyMode()
14
+  test.allMatricesAreNormalized()
15
+  test.providerNames()
16
+  test.geneSymbols()
17
+  test.geneIdsAndTypes()
18
+  test.proteinIds()
19
+  test.sequenceCount()
20
+  test.longNames()
21
+  test.organisms()
22
+  test.bindingDomains()
23
+  test.experimentTypes()
24
+  test.tfFamilies()
25
+  test.bindingSequences()
26
+  test.flyBindingDomains()
27
+  test.pubmedIDs()
28
+  test.allFullNames()
29
+  test.subset()
30
+  test.subsetWithVariables()
31
+  test.queryOldStyle()
32 32
   test.query()
33
-  test.transformMatrixToMemeRepresentation ()
34
-  test.matrixToMemeText ()
35
-  test.export_memeFormatStdOut ()
36
-  test.export_memeFormatToFile ()
37
-  test.export_memeFormatToFileDuplication ()
38
-  test.export_memeFormatToFile_run_tomtom ()
39
-  test.MotIV.toTable ()
40
-  test.run_MotIV.motifMatch()
33
+  test.transformMatrixToMemeRepresentation()
34
+  test.matrixToMemeText()
35
+  test.export_memeFormatStdOut()
36
+  test.export_memeFormatToFile()
37
+  test.export_memeFormatToFileDuplication()
38
+  test.export_memeFormatToFile_run_tomtom()
39
+  #test.MotIV.toTable()
40
+  #test.run_MotIV.motifMatch()
41 41
   test.flyFactorGeneSymbols()
42 42
   test.export_jasparFormatStdOut()
43 43
   test.export_jasparFormatToFile()
... ...
@@ -46,27 +46,30 @@ runTests = function ()
46 46
   test.geneToMotif.ignore.jasparSuffixes()
47 47
   test.geneToMotif.oneGene.noMotifs
48 48
   test.motifToGene()
49
-
50 49
   test.associateTranscriptionFactors()
51 50
 
51
+  test.match()
52
+
53
+  test.hocomoco11.with.reliabilityScores()
54
+
52 55
 } # runTests
53 56
 #------------------------------------------------------------------------------------------------------------------------
54
-test.emptyCtor = function ()
57
+test.emptyCtor = function()
55 58
 {
56
-  print ('--- test.emptyCtor')
57
-  motif.list = MotifDb:::MotifList ()
58
-  checkEquals (length (motif.list), 0)
59
+  print('--- test.emptyCtor')
60
+  motif.list = MotifDb:::MotifList()
61
+  checkEquals(length(motif.list), 0)
59 62
 
60 63
 } # test.emptyCtor
61 64
 #------------------------------------------------------------------------------------------------------------------------
62
-test.nonEmptyCtor = function ()
65
+test.nonEmptyCtor = function()
63 66
 {
64
-  print ('--- test.nonEmptyCtor')
65
-  mtx = matrix (runif (20), nrow=4, ncol=5, byrow=T, dimnames=list(c ('A', 'C', 'G', 'T'), as.character (1:5)))
66
-  mtx.normalized = apply (mtx, 2, function (colvector) colvector / sum (colvector))
67
-  matrixList = list (mtx.normalized)
67
+  print('--- test.nonEmptyCtor')
68
+  mtx = matrix(runif(20), nrow=4, ncol=5, byrow=T, dimnames=list(c('A', 'C', 'G', 'T'), as.character(1:5)))
69
+  mtx.normalized = apply(mtx, 2, function(colvector) colvector / sum(colvector))
70
+  matrixList = list(mtx.normalized)
68 71
 
69
-  tbl.md = data.frame (providerName='',
72
+  tbl.md = data.frame(providerName='',
70 73
                        providerId='',
71 74
                        dataSource='',
72 75
                        geneSymbol='',
... ...
@@ -82,36 +85,36 @@ test.nonEmptyCtor = function ()
82 85
                        experimentType='',
83 86
                        pubmedID='',
84 87
                        stringsAsFactors=FALSE)
85
-  names (matrixList) = 'test'
86
-  rownames (tbl.md) = 'test'
87
-  motif.list = MotifDb:::MotifList (matrixList, tbl.md)
88
-  checkEquals (length (motif.list), 1)
88
+  names(matrixList) = 'test'
89
+  rownames(tbl.md) = 'test'
90
+  motif.list = MotifDb:::MotifList(matrixList, tbl.md)
91
+  checkEquals(length(motif.list), 1)
89 92
 
90 93
 } # test.nonEmptyCtor
91 94
 #------------------------------------------------------------------------------------------------------------------------
92 95
 # 'normal' in that all included data sources are already loaded
93
-test.MotifDb.normalMode = function ()
96
+test.MotifDb.normalMode = function()
94 97
 {
95
-  print ('--- test.MotifDb.normalMode')
98
+  print('--- test.MotifDb.normalMode')
96 99
 
97
-  mdb = MotifDb #  (quiet=TRUE)
98
-    # (5 jun 2012)
100
+  mdb = MotifDb # (quiet=TRUE)
101
+    #(5 jun 2012)
99 102
     # JASPAR_CORE: 459
100 103
     # ScerTF: 196
101 104
     # UniPROBE: 380
102 105
     # FlyFactorSurvey: 614
103 106
     # hPDI: 437
104
-  checkTrue (length (mdb) > 2080)
107
+  checkTrue(length(mdb) > 2080)
105 108
 
106 109
 } # test.MotifDb.normalMode
107 110
 #------------------------------------------------------------------------------------------------------------------------
108 111
 # this mode is not intended for users, but may see use in the future.
109
-test.MotifDb.emptyMode = function ()
112
+test.MotifDb.emptyMode = function()
110 113
 {
111
-  print ('--- test.MotifDb.emptyMode')
114
+  print('--- test.MotifDb.emptyMode')
112 115
 
113
-  mdb = MotifDb:::.MotifDb (loadAllSources=FALSE, quiet=TRUE)
114
-  checkTrue (length (mdb) == 0)
116
+  mdb = MotifDb:::.MotifDb(loadAllSources=FALSE, quiet=TRUE)
117
+  checkTrue(length(mdb) == 0)
115 118
 
116 119
 } # test.MotifDb.emptyMode
117 120
 #------------------------------------------------------------------------------------------------------------------------
... ...
@@ -122,86 +125,86 @@ test.MotifDb.emptyMode = function ()
122 125
 
123 126
 # Many more NA's exist...need to fix these; here's a quick fix for now
124 127
 
125
-test.noNAorganisms = function ()
128
+test.noNAorganisms = function()
126 129
 
127 130
 {
128
-  print ('--- test.noNAorganisms')
129
-  #checkEquals (which (is.na (mcols(MotifDb)$organism)), integer (0))
131
+  print('--- test.noNAorganisms')
132
+  #checkEquals(which(is.na(mcols(MotifDb)$organism)), integer(0))
130 133
 
131 134
   # There's a fair number of NA organisms, mostly due to including the homer DB
132
-  checkEquals(sum(is.na (mcols(MotifDb)$organism)), 366)
135
+  checkEquals(sum(is.na(mcols(MotifDb)$organism)), 366)
133 136
 
134 137
 } # test.noNAorganisms
135 138
 #------------------------------------------------------------------------------------------------------------------------
136
-test.allMatricesAreNormalized = function ()
139
+test.allMatricesAreNormalized = function()
137 140
 {
138
-  print ('--- test.allMatricesAreNormalized')
139
-  mdb = MotifDb# (quiet=TRUE)
141
+  print('--- test.allMatricesAreNormalized')
142
+  mdb = MotifDb#(quiet=TRUE)
140 143
   matrices = mdb@listData
141 144
     # a lenient test required by "Cparvum-UniPROBE-Cgd2_3490.UP00395" and  "Hsapiens-UniPROBE-Sox4.UP00401"
142 145
     # for reasons not yet explored.  10e-8 should be be possible
143
-  checkTrue(all(sapply(matrices, function (m) all (abs (colSums (m) - 1.0) < 0.02))))
146
+  checkTrue(all(sapply(matrices, function(m) all(abs(colSums(m) - 1.0) < 0.02))))
144 147
 
145 148
 } # test.allMatricesAreNormalized
146 149
 #------------------------------------------------------------------------------------------------------------------------
147
-test.providerNames = function ()
150
+test.providerNames = function()
148 151
 {
149
-  print ('--- test.getProviderNames')
150
-  mdb = MotifDb # ()
152
+  print('--- test.getProviderNames')
153
+  mdb = MotifDb #()
151 154
   pn = mcols(mdb)$providerName
152
-  checkEquals (length (which (is.na (pn))), 0)
153
-  checkEquals (length (which (pn == '')), 0)
155
+  checkEquals(length(which(is.na(pn))), 0)
156
+  checkEquals(length(which(pn == '')), 0)
154 157
 
155 158
 } # test.providerNames
156 159
 #------------------------------------------------------------------------------------------------------------------------
157
-test.geneSymbols = function ()
160
+test.geneSymbols = function()
158 161
 {
159
-  print ('--- test.getGeneSymbols')
160
-  mdb = MotifDb # ()
162
+  print('--- test.getGeneSymbols')
163
+  mdb = MotifDb #()
161 164
   syms = mcols(mdb)$geneSymbol
162
-  checkEquals (length (which (is.na (syms))), 683)  # no symols yet for the dgf stamlab motifs
163
-  checkEquals (length (which (syms == '')), 0)
165
+  checkEquals(length(which(is.na(syms))), 683)  # no symols yet for the dgf stamlab motifs
166
+  checkEquals(length(which(syms == '')), 0)
164 167
 
165 168
 } # test.geneSymbols
166 169
 #------------------------------------------------------------------------------------------------------------------------
167
-test.geneIdsAndTypes = function ()
170
+test.geneIdsAndTypes = function()
168 171
 {
169
-  print ('--- test.getGeneIdsAndTypes')
172
+  print('--- test.getGeneIdsAndTypes')
170 173
   mdb = MotifDb
171 174
   tbl <- mcols(mdb)
172 175
   geneIds = tbl$geneId
173 176
   geneIdTypes = tbl$geneIdType
174
-  typeCounts = as.list (table (geneIdTypes))
177
+  typeCounts = as.list(table(geneIdTypes))
175 178
 
176 179
   checkTrue(typeCounts$ENTREZ > 2300)
177 180
   checkTrue(typeCounts$FLYBASE >= 45)
178 181
   checkTrue(typeCounts$SGD >= 600)
179 182
   checkTrue(nrow(subset(tbl, is.na(geneIdType))) > 2000)
180 183
 
181
-  empty.count = length (which (geneIds == ''))
182
-  checkEquals (empty.count, 0)
184
+  empty.count = length(which(geneIds == ''))
185
+  checkEquals(empty.count, 0)
183 186
 
184 187
 
185 188
 } # test.geneIdsAndTypes
186 189
 #------------------------------------------------------------------------------------------------------------------------
187 190
 # make sure that all proteinIds have explicit values, either proper identifiers or NA
188 191
 # currently tested by looking for empty string assignments
189
-test.proteinIds = function ()
192
+test.proteinIds = function()
190 193
 {
191
-  print ('--- test.proteinIds')
192
-  mdb = MotifDb # (quiet=TRUE)
194
+  print('--- test.proteinIds')
195
+  mdb = MotifDb #(quiet=TRUE)
193 196
   NA.string.count <- sum(is.na(mcols(mdb)$proteinId))
194
-#  NA.string.count = length (grep ('NA', mcols(mdb)$proteinId))
197
+#  NA.string.count = length(grep('NA', mcols(mdb)$proteinId))
195 198
 
196 199
   checkEquals(NA.string.count, 2514)
197 200
   # FIX THIS; Currently 2514 don't have protein IDs
198
-  #checkEquals (NA.string.count, 0)
201
+  #checkEquals(NA.string.count, 0)
199 202
 
200
-  empty.count = length (which (mcols(mdb)$proteinId==""))
201
-  if (empty.count > 0)
202
-    browser ('test.proteinIds')
203
+  empty.count = length(which(mcols(mdb)$proteinId==""))
204
+  if(empty.count > 0)
205
+    browser('test.proteinIds')
203 206
 
204
-  checkEquals (empty.count, 0)
207
+  checkEquals(empty.count, 0)
205 208
 
206 209
      # FlyFactorSurvey, as digested by me, had a blanket assigment of UNIPROT to all proteinIds
207 210
      # Herve' pointed out that this applied also to entries with no proteinId.
... ...
@@ -209,86 +212,88 @@ test.proteinIds = function ()
209 212
 
210 213
   ### FIX THIS TOO! Currently have 913 entries with a proteinIdType and no proteinId
211 214
   x = mcols(mdb)
212
-  # checkEquals (nrow (subset (x, !is.na (proteinIdType) & is.na (proteinId))), 0)
215
+  # checkEquals(nrow(subset(x, !is.na(proteinIdType) & is.na(proteinId))), 0)
213 216
 
214 217
 
215 218
 } # test.proteinIds
216 219
 #------------------------------------------------------------------------------------------------------------------------
217 220
 # only for UniPROBE do we not have sequence count.  might be possible to get them along with 'insertion sequences'
218
-test.sequenceCount = function ()
221
+test.sequenceCount = function()
219 222
 {
220
-  print ('--- test.sequenceCount')
221
-  mdb = MotifDb # ()
223
+  print('--- test.sequenceCount')
224
+  mdb = MotifDb #()
222 225
   x = mcols(mdb)
223
-  if (interactive ()) {
224
-    x.up = subset (x, dataSource == 'UniPROBE')
225
-    checkTrue (all (is.na (x.up$sequenceCount)))
226
+  if(interactive()) {
227
+    x.up = subset(x, dataSource == 'UniPROBE')
228
+    checkTrue(all(is.na(x.up$sequenceCount)))
226 229
     }
227 230
   else {
228
-    uniprobe.indices = which (x$dataSource == 'UniPROBE')
229
-    checkTrue (all (is.na (x$sequenceCount [uniprobe.indices])))
231
+    uniprobe.indices = which(x$dataSource == 'UniPROBE')
232
+    checkTrue(all(is.na(x$sequenceCount [uniprobe.indices])))
230 233
     }
231 234
 
232 235
 } # test.sequenceCount
233 236
 #------------------------------------------------------------------------------------------------------------------------
234 237
 # make sure that a legitimate organism-dataSource-identifier is supplied for each matrix and as a rowname
235 238
 # of the corresponding DataFrame
236
-test.longNames = function ()
239
+test.longNames = function()
237 240
 {
238
-  print ('--- test.longNames')
241
+  print('--- test.longNames')
239 242
   mdb = MotifDb
240
-  longNames = strsplit (names (mdb), '-')
241
-  organisms = unique (sapply (longNames, '[', 1))
243
+  longNames = strsplit(names(mdb), '-')
244
+  organisms = unique(sapply(longNames, '[', 1))
242 245
 
243
-  dataSources = unique (lapply (longNames, '[', 2))
246
+  dataSources = unique(lapply(longNames, '[', 2))
244 247
 
245
-  recognized.dataSources = unique (mcols(mdb)$dataSource)
246
-  recognized.organisms = unique (mcols(mdb)$organism)
247
-    # a few (3) matrices from JASPAR core have NA organism.  make this into a character
248
+  recognized.dataSources = c(unique(mcols(mdb)$dataSource),
249
+                             c("HOCOMOCOv11B", "HOCOMOCOv11C", "HOCOMOCOv11A"))
250
+
251
+  recognized.organisms = unique(mcols(mdb)$organism)
252
+    # a few(3) matrices from JASPAR core have NA organism.  make this into a character
248 253
     # so that it can be matched up against the 'NA' extracted from longNames just above
249
-  na.indices = which (is.na (recognized.organisms))
250
-  if (length (na.indices) > 0)
254
+  na.indices = which(is.na(recognized.organisms))
255
+  if(length(na.indices) > 0)
251 256
      recognized.organisms [na.indices] = 'NA'
252 257
 
253
-  checkTrue (all (organisms %in% recognized.organisms))
254
-  checkTrue (all (dataSources %in% recognized.dataSources))
258
+  checkTrue(all(organisms %in% recognized.organisms))
259
+  checkTrue(all(dataSources %in% recognized.dataSources))
255 260
 
256 261
 } # test.longNames
257 262
 #------------------------------------------------------------------------------------------------------------------------
258 263
 # make sure that a legitimate organism is specified for each matrix
259
-test.organisms = function ()
264
+test.organisms = function()
260 265
 {
261
-  print ('--- test.organisms')
262
-  mdb = MotifDb # (quiet=TRUE)
266
+  print('--- test.organisms')
267
+  mdb = MotifDb #(quiet=TRUE)
263 268
   organisms = mcols(mdb)$organism
264 269
 
265
-     # jaspar_core has 3 NA speciesId: TBP, HNF4A and CEBPA (MA0108.2, MA0114.1, MA0102.2)
270
+     # jaspar_core has 3 NA speciesId: TBP, HNF4A and CEBPA(MA0108.2, MA0114.1, MA0102.2)
266 271
      # their website shows these as vertebrates, which I map to 'Vertebrata'.  An organismID of '-'
267 272
   # gets the same treatment, matching website also.
268 273
 
269 274
   ### Note: this failing test is the same as the test.noNAorganisms test!
270 275
   # As in case of noNA, need to add organisms for these
271
-  #checkEquals (which (is.na (mcols(MotifDb)$organism)), integer (0))
276
+  #checkEquals(which(is.na(mcols(MotifDb)$organism)), integer(0))
272 277
 
273
-  empty.count = length (which (mcols(mdb)$organism==""))
274
-  checkEquals (empty.count, 0)
278
+  empty.count = length(which(mcols(mdb)$organism==""))
279
+  checkEquals(empty.count, 0)
275 280
 
276 281
 } # test.organisms
277 282
 #------------------------------------------------------------------------------------------------------------------------
278
-test.bindingDomains = function ()
283
+test.bindingDomains = function()
279 284
 {
280
-  print ('--- test.bindingDomains')
281
-  mdb = MotifDb # (quiet=TRUE)
282
-  checkTrue (length (unique (mcols(mdb)$bindingDomain)) > 1)
285
+  print('--- test.bindingDomains')
286
+  mdb = MotifDb #(quiet=TRUE)
287
+  checkTrue(length(unique(mcols(mdb)$bindingDomain)) > 1)
283 288
 
284 289
 } # test.bindingDomains
285 290
 #------------------------------------------------------------------------------------------------------------------------
286
-test.flyBindingDomains = function ()
291
+test.flyBindingDomains = function()
287 292
 {
288
-  print ('--- test.flyBindingDomains')
293
+  print('--- test.flyBindingDomains')
289 294
 
290 295
   x = mcols(MotifDb)
291
-  tmp = as.list (head (sort (table (subset (x, organism=='Dmelanogaster')$bindingDomain), decreasing=TRUE), n=3))
296
+  tmp = as.list(head(sort(table(subset(x, organism=='Dmelanogaster')$bindingDomain), decreasing=TRUE), n=3))
292 297
 
293 298
     # these counts will likely change with a fresh load of data from FlyFactorSurvey.
294 299
 
... ...
@@ -299,38 +304,38 @@ test.flyBindingDomains = function ()
299 304
 
300 305
 } # test.flyBindingDomains
301 306
 #------------------------------------------------------------------------------------------------------------------------
302
-test.experimentTypes = function ()
307
+test.experimentTypes = function()
303 308
 {
304
-  print ('--- test.experimentTypes')
305
-  mdb = MotifDb # (quiet=TRUE)
309
+  print('--- test.experimentTypes')
310
+  mdb = MotifDb #(quiet=TRUE)
306 311
   x = mcols(mdb)
307
-  checkTrue (length (unique (x$experimentType)) >= 18)
308
-  checkEquals (length (which (x$experimentType=='')), 0)
312
+  checkTrue(length(unique(x$experimentType)) >= 18)
313
+  checkEquals(length(which(x$experimentType=='')), 0)
309 314
 
310 315
 } # test.experimentTypes
311 316
 #------------------------------------------------------------------------------------------------------------------------
312
-test.tfFamilies = function ()
317
+test.tfFamilies = function()
313 318
 {
314
-  print ('--- test.tfFamilies')
315
-  mdb = MotifDb # (quiet=TRUE)
316
-  checkTrue (length (unique (mcols(mdb)$tfFamily)) > 1)
319
+  print('--- test.tfFamilies')
320
+  mdb = MotifDb #(quiet=TRUE)
321
+  checkTrue(length(unique(mcols(mdb)$tfFamily)) > 1)
317 322
 
318 323
 } # test.tfFamilies
319 324
 #------------------------------------------------------------------------------------------------------------------------
320
-test.bindingSequences = function ()
325
+test.bindingSequences = function()
321 326
 {
322
-  print ('--- test.bindingSequences')
323
-  mdb = MotifDb # (quiet=TRUE)
324
-  checkTrue (length (unique (mcols(mdb)$bindingSequence)) > 1)
327
+  print('--- test.bindingSequences')
328
+  mdb = MotifDb #(quiet=TRUE)
329
+  checkTrue(length(unique(mcols(mdb)$bindingSequence)) > 1)
325 330
 
326 331
 } # test.tfFamilies
327 332
 #------------------------------------------------------------------------------------------------------------------------
328
-test.pubmedIDs = function ()
333
+test.pubmedIDs = function()
329 334
 {
330
-  print ('--- test.pubmedIDs')
331
-  x = mcols(MotifDb) # (quiet=TRUE))
332
-  checkTrue (length (unique (x$pubmedID)) >= 139)
333
-  checkEquals (length (which (x$pubmedID == '')), 0)
335
+  print('--- test.pubmedIDs')
336
+  x = mcols(MotifDb) #(quiet=TRUE))
337
+  checkTrue(length(unique(x$pubmedID)) >= 139)
338
+  checkEquals(length(which(x$pubmedID == '')), 0)
334 339
 
335 340
 } # test.pubmedIDs
336 341
 #------------------------------------------------------------------------------------------------------------------------
... ...
@@ -345,96 +350,96 @@ test.pubmedIDs = function ()
345 350
 #          ScerTF-Scerevisiae-ABF2-badis
346 351
 #          JASPAR_CORE-Rrattus-Ar-MA0007.1
347 352
 #
348
-test.allFullNames = function ()
353
+test.allFullNames = function()
349 354
 {
350
-  print ('--- test.allFullNames')
351
-  mdb = MotifDb # (quiet=TRUE)
355
+  print('--- test.allFullNames')
356
+  mdb = MotifDb #(quiet=TRUE)
352 357
   matrices = mdb@listData
353
-  fullNames = names (matrices)
358
+  fullNames = names(matrices)
354 359
 
355
-  all.dataSources = unique (mcols(mdb)$dataSource)
356
-  checkTrue (length (all.dataSources) >= 4)
360
+  all.dataSources = unique(mcols(mdb)$dataSource)
361
+  checkTrue(length(all.dataSources) >= 4)
357 362
 
358
-  for (source in all.dataSources) {
363
+  for(source in all.dataSources) {
359 364
      this.dataSource <<- source
360
-     matrices.by.source = subset (mdb, dataSource==this.dataSource)
361
-     matrix.name = names (matrices.by.source)[1]
365
+     matrices.by.source = subset(mdb, dataSource==this.dataSource)
366
+     matrix.name = names(matrices.by.source)[1]
362 367
         #  FlyFactorSurvey: Dmelanogaster-FlyFactorSurvey-ab_SANGER_10_FBgn0259750
363 368
         #             hPDI: Hsapiens-hPDI-ABCF2
364 369
         #      JASPAR_CORE: JASPAR_CORE-Athaliana-AGL3-MA0001.1
365 370
         #         UniPROBE: Hsapiens-UniPROBE-Sox4.UP00401
366
-     checkTrue (grep (this.dataSource, matrix.name) == 1)
367
-     #printf ('%20s: %s', source, matrix.name)
371
+     checkTrue(grep(this.dataSource, matrix.name) == 1)
372
+     #printf('%20s: %s', source, matrix.name)
368 373
      }
369 374
 
370
-  return (TRUE)
375
+  return(TRUE)
371 376
 
372 377
 } # test.allFullNames
373 378
 #------------------------------------------------------------------------------------------------------------------------
374
-test.subset = function ()
379
+test.subset = function()
375 380
 {
376
-  if (interactive ()) {
377
-    print ('--- test.subset')
381
+  if(interactive()) {
382
+    print('--- test.subset')
378 383
     mdb = MotifDb
379
-    checkTrue ('geneSymbol' %in% colnames (elementMetadata (mdb)))
380
-    mdb.sub = subset (mdb, geneSymbol=='ABCF2')
381
-    checkEquals (length (mdb.sub), 1)
384
+    checkTrue('geneSymbol' %in% colnames(elementMetadata(mdb)))
385
+    mdb.sub = subset(mdb, geneSymbol=='ABCF2')
386
+    checkEquals(length(mdb.sub), 1)
382 387
     } # if interactive
383 388
 
384 389
 } # test.subset
385 390
 #------------------------------------------------------------------------------------------------------------------------
386
-test.subsetWithVariables = function ()
391
+test.subsetWithVariables = function()
387 392
 {
388
-  if (interactive ()) {
389
-    print ('--- test.subsetWithVariables')
393
+  if(interactive()) {
394
+    print('--- test.subsetWithVariables')
390 395
 
391
-    mdb = MotifDb # ()
392
-    checkTrue ('geneSymbol' %in% colnames (elementMetadata (mdb)))
396
+    mdb = MotifDb #()
397
+    checkTrue('geneSymbol' %in% colnames(elementMetadata(mdb)))
393 398
     target.gene <<- 'ABCF2'
394
-    mdb.sub = subset (mdb, geneSymbol==target.gene)
395
-    checkEquals (length (mdb.sub), 1)
399
+    mdb.sub = subset(mdb, geneSymbol==target.gene)
400
+    checkEquals(length(mdb.sub), 1)
396 401
     } # if interactive
397 402
 
398 403
 } # test.subsetWithVariables
399 404
 #------------------------------------------------------------------------------------------------------------------------
400 405
 # "old style": just one query term allowed
401
-test.queryOldStyle = function ()
406
+test.queryOldStyle = function()
402 407
 {
403
-  print ('--- test.queryOldStyle')
408
+  print('--- test.queryOldStyle')
404 409
   mdb = MotifDb
405 410
 
406 411
     # do queries on dataSource counts match those from a contingency table?
407
-  sources.list = as.list (table (mcols(mdb)$dataSource))
408
-  checkEquals (length (query (mdb, 'flyfactorsurvey')), sources.list$FlyFactorSurvey)
409
-  checkEquals (length (query (mdb, 'uniprobe')), sources.list$UniPROBE)
410
-  checkEquals (length (query (mdb, 'UniPROBE')), sources.list$UniPROBE)
412
+  sources.list = as.list(table(mcols(mdb)$dataSource))
413
+  checkEquals(length(query(mdb, 'flyfactorsurvey')), sources.list$FlyFactorSurvey)
414
+  checkEquals(length(query(mdb, 'uniprobe')), sources.list$UniPROBE)
415
+  checkEquals(length(query(mdb, 'UniPROBE')), sources.list$UniPROBE)
411 416
 
412 417
     # gene symbols which begin with 'sox' are quite common.  can we them?
413
-    # there are currently (19 jul 2012) 18, but since this may change, our test is approximate
418
+    # there are currently(19 jul 2012) 18, but since this may change, our test is approximate
414 419
 
415 420
   # Change on 8/1/2017: increase top limit of sox entries as they've expanded
416
-  sox.entries = query (mdb, '^sox')
417
-  checkTrue (length (sox.entries) > 10)
418
-  checkTrue (length (sox.entries) < 200)
421
+  sox.entries = query(mdb, '^sox')
422
+  checkTrue(length(sox.entries) > 10)
423
+  checkTrue(length(sox.entries) < 200)
419 424
 
420 425
     # manual inspection reveals that some of these genes have names which are all capitalized.  test that.
421
-  checkTrue (length (query (mdb, '^sox', ignore.case=TRUE)) > length (query (mdb, '^SOX', ignore.case=FALSE)))
426
+  checkTrue(length(query(mdb, '^sox', ignore.case=TRUE)) > length(query(mdb, '^SOX', ignore.case=FALSE)))
422 427
 
423 428
     # make sure that queries can be stacked up, and that order of call does not affect the outcome
424
-   uniprobe.sox.matrices = query (query (mdb, 'uniprobe'), '^sox')
425
-   sox.uniprobe.matrices = query (query (mdb, '^sox'), 'uniprobe')
429
+   uniprobe.sox.matrices = query(query(mdb, 'uniprobe'), '^sox')
430
+   sox.uniprobe.matrices = query(query(mdb, '^sox'), 'uniprobe')
426 431
 
427
-   checkEquals (length (uniprobe.sox.matrices), length (sox.uniprobe.matrices))
432
+   checkEquals(length(uniprobe.sox.matrices), length(sox.uniprobe.matrices))
428 433
 
429 434
      # an approximate count check
430
-  checkTrue (length (uniprobe.sox.matrices) > 10)
431
-  checkTrue (length (uniprobe.sox.matrices) < 30)
435
+  checkTrue(length(uniprobe.sox.matrices) > 10)
436
+  checkTrue(length(uniprobe.sox.matrices) < 30)
432 437
 
433
-   checkEquals (unique (mcols(uniprobe.sox.matrices)$dataSource), 'UniPROBE')
434
-   checkEquals (unique (mcols(sox.uniprobe.matrices)$dataSource), 'UniPROBE')
435
-   gene.symbols = sort (unique (mcols(uniprobe.sox.matrices)$geneSymbol))
438
+   checkEquals(unique(mcols(uniprobe.sox.matrices)$dataSource), 'UniPROBE')
439
+   checkEquals(unique(mcols(sox.uniprobe.matrices)$dataSource), 'UniPROBE')
440
+   gene.symbols = sort(unique(mcols(uniprobe.sox.matrices)$geneSymbol))
436 441
 
437
-    # query on a string (in this case, an oddly named motif) which contains
442
+    # query on a string(in this case, an oddly named motif) which contains
438 443
     # regular expression characters.  no solution to this yet.
439 444
     # query uses base R's grep, in which
440 445
     #  x <- query(mdb, "ELK1,4_GABP{A,B1}.p3")
... ...
@@ -443,7 +448,7 @@ test.queryOldStyle = function ()
443 448
 #------------------------------------------------------------------------------------------------------------------------
444 449
 test.query <- function()
445 450
 {
446
-  print ('--- test.query')
451
+  print('--- test.query')
447 452
   mdb = MotifDb
448 453
 
449 454
   ors <- c("MA0511.1", "MA0057.1")
... ...
@@ -471,10 +476,10 @@ test.query <- function()
471 476
   #checkEquals(sort(names(x)),
472 477
 
473 478
     # do queries on dataSource counts match those from a contingency table?
474
-  sources.list = as.list (table (mcols(mdb)$dataSource))
475
-  checkEquals (length (query (mdb, 'flyfactorsurvey')), sources.list$FlyFactorSurvey)
476
-  checkEquals (length (query (mdb, 'uniprobe')), sources.list$UniPROBE)
477
-  checkEquals (length (query (mdb, 'UniPROBE')), sources.list$UniPROBE)
479
+  sources.list = as.list(table(mcols(mdb)$dataSource))
480
+  checkEquals(length(query(mdb, 'flyfactorsurvey')), sources.list$FlyFactorSurvey)
481
+  checkEquals(length(query(mdb, 'uniprobe')), sources.list$UniPROBE)
482
+  checkEquals(length(query(mdb, 'UniPROBE')), sources.list$UniPROBE)
478 483
 
479 484
 } # test.query
480 485
 #------------------------------------------------------------------------------------------------------------------------
... ...
@@ -486,7 +491,7 @@ test.query2 <- function()
486 491
 
487 492
   matrices.human.sox4 <- query(mdb, c("hsapiens", "sox4"))
488 493
   matrices.human.sox4.oldStyle <- query(matrices.human, "sox4")
489
-  checkTrue(length(matrices.human.sox4) > 0)   # 6 found on (24 oct 2018)
494
+  checkTrue(length(matrices.human.sox4) > 0)   # 6 found on(24 oct 2018)
490 495
   checkEquals(length(matrices.human.sox4), length(matrices.human.sox4.oldStyle))
491 496
 
492 497
   checkTrue(length(matrices.human.sox4) < length(matrices.human))
... ...
@@ -496,260 +501,264 @@ test.query2 <- function()
496 501
 
497 502
 } # test.query2
498 503
 #------------------------------------------------------------------------------------------------------------------------
499
-test.transformMatrixToMemeRepresentation = function ()
504
+test.transformMatrixToMemeRepresentation = function()
500 505
 {
501
-  print ('--- test.transformMatrixToMemeRepresentation')
502
-  mdb = MotifDb # ()
503
-  matrix.agl3 = subset (mdb, dataSource=='JASPAR_CORE' & organism=='Athaliana' & geneSymbol=='AGL3')[[1]]
504
-  checkEquals (dim (matrix.agl3), c (4, 10))
506
+  print('--- test.transformMatrixToMemeRepresentation')
507
+  mdb = MotifDb #()
508
+  matrix.agl3 = subset(mdb, dataSource=='JASPAR_CORE' & organism=='Athaliana' & geneSymbol=='AGL3')[[1]]
509
+  checkEquals(dim(matrix.agl3), c(4, 10))
505 510
 
506 511
     # a transposed frequency is needed for meme.  we store normalized frequency matrices, to just transposition is needed
507
-  tfm = MotifDb:::transformMatrixToMemeRepresentation (matrix.agl3)
508
-  checkEquals (dim (tfm), c (10, 4))
509
-  checkTrue (all (round (rowSums (tfm)) == 1.0))
512
+  tfm = MotifDb:::transformMatrixToMemeRepresentation(matrix.agl3)
513
+  checkEquals(dim(tfm), c(10, 4))
514
+  checkTrue(all(round(rowSums(tfm)) == 1.0))
510 515
 
511 516
 } # test.transformMatrixToMemeRepresentation
512 517
 #------------------------------------------------------------------------------------------------------------------------
513 518
 # translate a motif matrix from MotifList into text suitable for meme and tomtom input.
514 519
 # choose the top two from UniPROBE.  they reliably have high counts, and easily distinguished normalized frequencies
515
-test.matrixToMemeText = function ()
520
+test.matrixToMemeText = function()
516 521
 {
517
-  print ('--- test.matrixToMemeText')
518
-  sox4 = subset (MotifDb, dataSource=='UniPROBE' & organism=='Hsapiens' & geneSymbol=='Sox4')
519
-  rtg3 = subset (MotifDb, dataSource=='UniPROBE' & organism=='Scerevisiae' & geneSymbol=='Rtg3')
522
+  print('--- test.matrixToMemeText')
523
+  sox4 = subset(MotifDb, dataSource=='UniPROBE' & organism=='Hsapiens' & geneSymbol=='Sox4')
524
+  rtg3 = subset(MotifDb, dataSource=='UniPROBE' & organism=='Scerevisiae' & geneSymbol=='Rtg3')
520 525
 
521
-  text.sox4 = MotifDb:::matrixToMemeText (sox4)
522
-     # check these with t (sox4 [[1]])
526
+  text.sox4 = MotifDb:::matrixToMemeText(sox4)
527
+     # check these with t(sox4 [[1]])
523 528
   line1.sox4  = " 0.2457457130  0.1950426500  0.2287887620  0.3304228750"
524 529
   line14.sox4 = " 0.2821643030  0.2286132160  0.1585395830  0.3306828990"
525 530
 
526
-  checkEquals (length (text.sox4), 29)
527
-  checkEquals (text.sox4 [1], "MEME version 4")
528
-  checkEquals (text.sox4 [10], "MOTIF Hsapiens-UniPROBE-Sox4.UP00401")
529
-  checkEquals (grep (line1.sox4, text.sox4), 12)
530
-  checkEquals (grep (line14.sox4, text.sox4), 25)
531
+  checkEquals(length(text.sox4), 29)
532
+  checkEquals(text.sox4 [1], "MEME version 4")
533
+  checkEquals(text.sox4 [10], "MOTIF Hsapiens-UniPROBE-Sox4.UP00401")
534
+  checkEquals(grep(line1.sox4, text.sox4), 12)
535
+  checkEquals(grep(line14.sox4, text.sox4), 25)
531 536
 
532
-  text.rtg3 = MotifDb:::matrixToMemeText (rtg3)
537
+  text.rtg3 = MotifDb:::matrixToMemeText(rtg3)
533 538
   line1.rtg3  = " 0.3935122858  0.1453016447  0.3308830322  0.1303030373"
534 539
   line20.rtg3 = " 0.2490417648  0.3966478493  0.1083586569  0.2459517291"
535
-  checkEquals (length (text.rtg3), 35)         # 4 trailing empty lines
536
-  checkEquals (text.rtg3 [1], "MEME version 4")
537
-  checkEquals (text.rtg3 [10], "MOTIF Scerevisiae-UniPROBE-Rtg3.UP00356")
538
-  checkEquals (grep (line1.rtg3, text.rtg3), 12)
539
-  checkEquals (grep (line20.rtg3, text.rtg3), 31)
540
+  checkEquals(length(text.rtg3), 35)         # 4 trailing empty lines
541
+  checkEquals(text.rtg3 [1], "MEME version 4")
542
+  checkEquals(text.rtg3 [10], "MOTIF Scerevisiae-UniPROBE-Rtg3.UP00356")
543
+  checkEquals(grep(line1.rtg3, text.rtg3), 12)
544
+  checkEquals(grep(line20.rtg3, text.rtg3), 31)
540 545
 
541 546
     # now call with both matrices, and see if the right results are returned
542
-  text.both = MotifDb:::matrixToMemeText (c (sox4, rtg3))
543
-  checkEquals (text.both [1], "MEME version 4")
544
-  checkEquals (grep (line1.sox4, text.both), 12)
545
-  checkEquals (grep (line14.sox4, text.both), 25)
546
-  checkEquals (grep (line1.rtg3, text.both),  29)
547
-  checkEquals (grep (line20.rtg3, text.both), 48)
547
+  text.both = MotifDb:::matrixToMemeText(c(sox4, rtg3))
548
+  checkEquals(text.both [1], "MEME version 4")
549
+  checkEquals(grep(line1.sox4, text.both), 12)
550
+  checkEquals(grep(line14.sox4, text.both), 25)
551
+  checkEquals(grep(line1.rtg3, text.both),  29)
552
+  checkEquals(grep(line20.rtg3, text.both), 48)
548 553
 
549 554
 } # test.matrixToMemeText
550 555
 #------------------------------------------------------------------------------------------------------------------------
551 556
 # translate a motif matrix from MotifList into text suitable for meme and tomtom input.
552 557
 # choose the top two from UniPROBE.  they reliably have high counts, and easily distinguished normalized frequencies
553
-#test.matrixToMemeText_mapVersion = function ()
558
+#test.matrixToMemeText_mapVersion = function()
554 559
 #{
555
-#  print ('--- test.matrixToMemeText_mapVersion')
556
-#  sox4 = subset (MotifDb, dataSource=='UniPROBE' & organism=='Hsapiens' & geneSymbol=='Sox4')
557
-#  rtg3 = subset (MotifDb, dataSource=='UniPROBE' & organism=='Scerevisiae' & geneSymbol=='Rtg3')
560
+#  print('--- test.matrixToMemeText_mapVersion')
561
+#  sox4 = subset(MotifDb, dataSource=='UniPROBE' & organism=='Hsapiens' & geneSymbol=='Sox4')
562
+#  rtg3 = subset(MotifDb, dataSource=='UniPROBE' & organism=='Scerevisiae' & geneSymbol=='Rtg3')
558 563
 #
559
-#  text.sox4 = MotifDb:::mapped.broken.matrixToMemeText (sox4)
560
-#  text.rtg3 = MotifDb:::mapped.broken.matrixToMemeText (rtg3)
561
-#  text.both = MotifDb:::mapped.broken.matrixToMemeText (c (sox4, rtg3))
564
+#  text.sox4 = MotifDb:::mapped.broken.matrixToMemeText(sox4)
565
+#  text.rtg3 = MotifDb:::mapped.broken.matrixToMemeText(rtg3)
566
+#  text.both = MotifDb:::mapped.broken.matrixToMemeText(c(sox4, rtg3))
562 567
 #
563
-#     # check these with t (sox4 [[1]])
568
+#     # check these with t(sox4 [[1]])
564 569
 #  line1.sox4  = " 0.2457457130  0.1950426500  0.2287887620  0.3304228750"
565 570
 #  line14.sox4 = " 0.2821643030  0.2286132160  0.1585395830  0.3306828990"
566 571
 #
567
-#  checkEquals (length (text.sox4), 29)
568
-#  checkEquals (text.sox4 [1], "MEME version 4")
569
-#  checkEquals (text.sox4 [10], "MOTIF Hsapiens-UniPROBE-Sox4.UP00401")
570
-#  checkEquals (grep (line1.sox4, text.sox4), 12)
571
-#  checkEquals (grep (line14.sox4, text.sox4), 25)
572
+#  checkEquals(length(text.sox4), 29)
573
+#  checkEquals(text.sox4 [1], "MEME version 4")
574
+#  checkEquals(text.sox4 [10], "MOTIF Hsapiens-UniPROBE-Sox4.UP00401")
575
+#  checkEquals(grep(line1.sox4, text.sox4), 12)
576
+#  checkEquals(grep(line14.sox4, text.sox4), 25)
572 577
 #
573 578
 #  line1.rtg3  = " 0.3935122858  0.1453016447  0.3308830322  0.1303030373"
574 579
 #  line20.rtg3 = " 0.2490417648  0.3966478493  0.1083586569  0.2459517291"
575
-#  checkEquals (length (text.rtg3), 35)         # 4 trailing empty lines
576
-#  checkEquals (text.rtg3 [1], "MEME version 4")
577
-#  checkEquals (text.rtg3 [10], "MOTIF Scerevisiae-UniPROBE-Rtg3.UP00356")
578
-#  checkEquals (grep (line1.rtg3, text.rtg3), 12)
579
-#  checkEquals (grep (line20.rtg3, text.rtg3), 31)
580
+#  checkEquals(length(text.rtg3), 35)         # 4 trailing empty lines
581
+#  checkEquals(text.rtg3 [1], "MEME version 4")
582
+#  checkEquals(text.rtg3 [10], "MOTIF Scerevisiae-UniPROBE-Rtg3.UP00356")
583
+#  checkEquals(grep(line1.rtg3, text.rtg3), 12)
584
+#  checkEquals(grep(line20.rtg3, text.rtg3), 31)
580 585
 #
581 586
 #    # now call with both matrices, and see if the right results are returned
582
-#  checkEquals (text.both [1], "MEME version 4")
583
-#  checkEquals (grep (line1.sox4, text.both), 12)
584
-#  checkEquals (grep (line14.sox4, text.both), 25)
585
-#  checkEquals (grep (line1.rtg3, text.both),  29)
586
-#  checkEquals (grep (line20.rtg3, text.both), 48)
587
+#  checkEquals(text.both [1], "MEME version 4")
588
+#  checkEquals(grep(line1.sox4, text.both), 12)
589
+#  checkEquals(grep(line14.sox4, text.both), 25)
590
+#  checkEquals(grep(line1.rtg3, text.both),  29)
591
+#  checkEquals(grep(line20.rtg3, text.both), 48)
587 592
 #
588 593
 #} # test.matrixToMemeText_mapVersion
589 594
 #------------------------------------------------------------------------------------------------------------------------
590
-test.export_memeFormatStdOut = function ()
595
+test.export_memeFormatStdOut = function()
591 596
 {
592
-  print ('--- test.export_memeFormatStdOut')
593
-  mdb = MotifDb # ()
594
-  mdb.chicken = subset (mdb, organism=='Gallus')
595
-  checkEquals (length (mdb.chicken), 3)
597
+  print('--- test.export_memeFormatStdOut')
598
+  mdb = MotifDb #()
599
+  mdb.chicken = subset(mdb, organism=='Gallus')
600
+  checkEquals(length(mdb.chicken), 3)
596 601
     # text is cat-ed to stdout, so not avaialable here to check.
597 602
     # but just like print, export also returns the text invisibly.
598 603
     # so that CAN be checked.
599 604
 
600
-  meme.text = export (mdb.chicken, format='meme')
601
-  checkEquals (length (meme.text), 1)   # just one long string
602
-  checkTrue (is.character (meme.text))
603
-  checkTrue (nchar (meme.text) > 800)   # 1002 as of (10 aug 2012)
604
-  return (TRUE)
605
+  meme.text = export(mdb.chicken, format='meme')
606
+  checkEquals(length(meme.text), 1)   # just one long string
607
+  checkTrue(is.character(meme.text))
608
+  checkTrue(nchar(meme.text) > 800)   # 1002 as of(10 aug 2012)
609
+  return(TRUE)
605 610
 
606 611
 } # test.exportMemeFormatToStdOut
607 612
 #------------------------------------------------------------------------------------------------------------------------
608
-test.export_memeFormatToFile = function ()
613
+test.export_memeFormatToFile = function()
609 614
 {
610
-  print ('--- test.export_memeFormatToFile')
611
-  mdb = MotifDb # ()
612
-  mdb.chicken = subset (mdb, organism=='Gallus')
613
-  checkEquals (length (mdb.chicken), 3)
614
-  output.file = tempfile ()
615
-  meme.text = export (mdb.chicken, output.file, 'meme')
616
-  retrieved = scan (output.file, what=character (0), sep='\n', quiet=TRUE)
617
-  invisible (retrieved)
615
+  print('--- test.export_memeFormatToFile')
616
+  mdb = MotifDb #()
617
+  mdb.chicken = subset(mdb, organism=='Gallus')
618
+  checkEquals(length(mdb.chicken), 3)
619
+  output.file = tempfile()
620
+  meme.text = export(mdb.chicken, output.file, 'meme')
621
+  retrieved = scan(output.file, what=character(0), sep='\n', quiet=TRUE)
622
+  invisible(retrieved)
618 623
 
619 624
 } # test.exportMemeFormatToFile
620 625
 #------------------------------------------------------------------------------------------------------------------------
621
-test.export_memeFormatToFileDuplication = function ()
626
+test.export_memeFormatToFileDuplication = function()
622 627
 {
623
-  print ('--- test.export_memeFormatToFileDuplication')
624
-  mdb = MotifDb # ()
625
-  mdb.mouse = subset (mdb, organism=='Mmusculus')
628
+  print('--- test.export_memeFormatToFileDuplication')
629
+  mdb = MotifDb #()
630
+  mdb.mouse = subset(mdb, organism=='Mmusculus')
626 631
   checkTrue(length(mdb.mouse) > 1300)
627
-  output.file = 'mouse.txt' # tempfile ()
632
+  output.file = 'mouse.txt' # tempfile()
628 633
   max = 3
629
-  meme.text = export (mdb.mouse [1:max], output.file, 'meme')
630
-  retrieved = scan (output.file, what=character (0), sep='\n', quiet=TRUE)
631
-  invisible (retrieved)
634
+  meme.text = export(mdb.mouse [1:max], output.file, 'meme')
635
+  retrieved = scan(output.file, what=character(0), sep='\n', quiet=TRUE)
636
+  invisible(retrieved)
632 637
 
633 638
 } # test.exportMemeFormatToFileDuplication
634 639
 #------------------------------------------------------------------------------------------------------------------------
635
-test.export_memeFormatToFile_run_tomtom = function (max=50)
640
+test.export_memeFormatToFile_run_tomtom = function(max=50)
636 641
 {
637
-  if (interactive ()) {
638
-    print ('--- test.export_memeFormatToFile_run_tomtom')
642
+  if(interactive()) {
643
+    print('--- test.export_memeFormatToFile_run_tomtom')
639 644
     mdb = MotifDb
640
-    sox4.mouse = subset (mdb, organism=='Mmusculus' & geneSymbol=='Sox4')
641
-    all.human = subset (mdb, organism=='Hsapiens')
645
+    sox4.mouse = subset(mdb, organism=='Mmusculus' & geneSymbol=='Sox4')
646
+    all.human = subset(mdb, organism=='Hsapiens')
642 647
 
643
-    tomtom.tmp.dir = tempfile ()
644
-    print (tomtom.tmp.dir)
645
-    stopifnot (dir.create (tomtom.tmp.dir))
646
-    sox4.file.path = file.path (tomtom.tmp.dir, 'sox4.mouse.text')
647
-    all.human.file.path = file.path (tomtom.tmp.dir,'all.human.text')
648
-    export (sox4.mouse, sox4.file.path, 'meme')
649
-    export (all.human, all.human.file.path, 'meme')
648
+    tomtom.tmp.dir = tempfile()
649
+    print(tomtom.tmp.dir)
650
+    stopifnot(dir.create(tomtom.tmp.dir))
651
+    sox4.file.path = file.path(tomtom.tmp.dir, 'sox4.mouse.text')
652
+    all.human.file.path = file.path(tomtom.tmp.dir,'all.human.text')
653
+    export(sox4.mouse, sox4.file.path, 'meme')
654
+    export(all.human, all.human.file.path, 'meme')
650 655
 
651 656
        # find similarity of motif #1 to all the motifs in mdbMany
652 657
 
653 658
        # cannot rely upon tomtom being present
654 659
 
655
-    #cmd = sprintf ('tomtom -no-ssc -oc %s -verbosity 3 -min-overlap 5 -mi 1 -dist pearson -evalue -thresh 10 %s %s',
660
+    #cmd = sprintf('tomtom -no-ssc -oc %s -verbosity 3 -min-overlap 5 -mi 1 -dist pearson -evalue -thresh 10 %s %s',
656 661
     #                tomtom.tmp.dir, sox4.file.path, all.human.file.path)
657
-    #system (cmd)
658
-    #cmd = sprintf ('open %s/tomtom.html', tomtom.tmp.dir)
659
-    #system (cmd)
662
+    #system(cmd)
663
+    #cmd = sprintf('open %s/tomtom.html', tomtom.tmp.dir)
664
+    #system(cmd)
660 665
     } # if interactive
661 666
 
662 667
 } # test.export_memeFormatToFile_run_tomtom
663 668
 #------------------------------------------------------------------------------------------------------------------------
664 669
 # MotIV::motifMatch fails with MotIV_1.25.0.  will look into this in September, 2015, pshannon
665
-test.run_MotIV.motifMatch = function ()
670
+# MotIV abandoned, all of my own motif/sequence matching is done via an independently installed
671
+# FIMO from the meme suite.
672
+# another option is the MOODs motif matching capability provided by the bioc package "motifmatchr"
673
+# this possibility, and these tests, are deferred for now.
674
+disabled_test.run_MotIV.motifMatch = function()
666 675
 {
667
-  library (MotIV)
668
-  print ('--- test.run_MotIV.motifMatch')
669
-  mdb <- MotifDb # ()
676
+  require(MotIV)
677
+  print('--- test.run_MotIV.motifMatch')
678
+  mdb <- MotifDb #()
670 679
 
671 680
   db.tmp <- mdb@listData
672 681
 
673 682
      # match motif 1 against the entire MotifDb  collection
674
-  motif.hits <- motifMatch (db.tmp [1], database<-db.tmp)
683
+  motif.hits <- motifMatch(db.tmp [1], database<-db.tmp)
675 684
      # the long way to extract the matrix name.  see MotIV.toTable below for more convenient way
676
-  checkEquals (motif.hits@bestMatch[[1]]@aligns[[1]]@TF@name, names (db.tmp)[1])
685
+  checkEquals(motif.hits@bestMatch[[1]]@aligns[[1]]@TF@name, names(db.tmp)[1])
677 686
 
678 687
      # match the last motif against all
679 688
   last <- length(db.tmp)
680 689
      # MotIV:motifMatch works differently on linux and macos.  by asking for 50 matches,
681
-     # the search target (db.tmp[last]) is sure to be in the hit list.
682
-  motif.hits <-  motifMatch (db.tmp [last], database=db.tmp, top=50)
683
-  tbl.hits <- MotIV.toTable (motif.hits)
690
+     # the search target(db.tmp[last]) is sure to be in the hit list.
691
+  motif.hits <-  motifMatch(db.tmp [last], database=db.tmp, top=50)
692
+  tbl.hits <- MotIV.toTable(motif.hits)
684 693
     # the 5 hits return should include the one we tried to match, but the MotIV search strategy
685 694
     # may not place it first
686 695
   checkTrue(names(db.tmp[last]) %in% tbl.hits$name)
687
-  invisible (tbl.hits)
696
+  invisible(tbl.hits)
688 697
 
689
-} # test.run_MotIV.motifMatch
698
+} # distable_test.run_MotIV.motifMatch
690 699
 #------------------------------------------------------------------------------------------------------------------------
691
-MotIV.toTable = function (match)
700
+MotIV.toTable = function(match)
692 701
 {
693
-  stopifnot (length (match@bestMatch) >= 1)
702
+  stopifnot(length(match@bestMatch) >= 1)
694 703
   alignments = match@bestMatch[[1]]@aligns
695 704
 
696
-  df = data.frame (stringsAsFactors=FALSE)
697
-  for (alignment in alignments) {
705
+  df = data.frame(stringsAsFactors=FALSE)
706
+  for(alignment in alignments) {
698 707
     x = alignment
699 708
     name = x@TF@name
700 709
     eVal = x@evalue
701 710
     sequence = x@sequence
702 711
     match = x@match
703 712
     strand = x@strand
704
-    df = rbind (df, data.frame (name=name, eVal=eVal, sequence=sequence, match=match, strand=strand, stringsAsFactors=FALSE))
713
+    df = rbind(df, data.frame(name=name, eVal=eVal, sequence=sequence, match=match, strand=strand, stringsAsFactors=FALSE))
705 714
     } # for alignment
706 715
 
707
-  return (df)
716
+  return(df)
708 717
 
709 718
 } # MotIV.toTable
710 719
 #------------------------------------------------------------------------------------------------------------------------
711
-test.MotIV.toTable = function ()
720
+disabled_test.MotIV.toTable = function()
712 721
 {
713
-  print ('--- test.MotIVtoTable')
714
-  mdb = MotifDb # ()
715
-  test.hits = motifMatch (mdb[1]@listData, database=jaspar)
716
-  tbl.hits =  MotIV.toTable (test.hits)
717
-  checkEquals (dim (tbl.hits), c (5, 5))
718
-  checkEquals (sort(colnames (tbl.hits)), sort(c("name", "eVal", "sequence", "match", "strand")))
722
+  print('--- test.MotIVtoTable')
723
+  mdb = MotifDb #()
724
+  test.hits = motifMatch(mdb[1]@listData, database=jaspar)
725
+  tbl.hits =  MotIV.toTable(test.hits)
726
+  checkEquals(dim(tbl.hits), c(5, 5))
727
+  checkEquals(sort(colnames(tbl.hits)), sort(c("name", "eVal", "sequence", "match", "strand")))
719 728
 
720 729
 } # test.MotIV.toTable
721 730
 #------------------------------------------------------------------------------------------------------------------------
722
-pwmMatch.toTable = function (motifMatch) {
723
-   if (length (motifMatch@bestMatch) == 0)
724
-   return (NA)
731
+pwmMatch.toTable = function(motifMatch) {
732
+   if(length(motifMatch@bestMatch) == 0)
733
+   return(NA)
725 734
 
726 735
    df.list = vector("list", length(motifMatch@bestMatch))
727
-   for (k in seq(length(motifMatch@bestMatch)))
736
+   for(k in seq(length(motifMatch@bestMatch)))
728 737
    {
729 738
        alignments = motifMatch@bestMatch[[k]]@aligns
730
-       df = data.frame (stringsAsFactors=FALSE)
731
-       for (alignment in alignments) {
739
+       df = data.frame(stringsAsFactors=FALSE)
740
+       for(alignment in alignments) {
732 741
            x = alignment
733 742
            name = x@TF@name
734 743
            eVal = x@evalue
735 744
            sequence = x@sequence
736 745
            match = x@match
737 746
            strand = x@strand
738
-           df = rbind (df, data.frame (name=name, eVal=eVal, sequence=sequence, match=match, strand=strand, stringsAsFactors=FALSE))
747
+           df = rbind(df, data.frame(name=name, eVal=eVal, sequence=sequence, match=match, strand=strand, stringsAsFactors=FALSE))
739 748
        } # for alignment
740 749
        df.list[[k]]=df
741 750
    }
742 751
    names(df.list) <- names(motifMatch)
743
-   return (df.list)
752
+   return(df.list)
744 753
 
745 754
 } # pwmMatch.toTable
746 755
 #------------------------------------------------------------------------------
747 756
 # Robert Stojnic reports incorrect gene symbols for matrices obtained from
748 757
 # flyFactorSurvey.
749 758
 # the solution was to abandon the original strategy of extracting the
750
-# symbol from the matrix (and file) name.
751
-# now, the flybase importer ("inst/scripts/import/flyFactorSurvey/import.R")
752
-# uses FBgn id (which can be reliably extracted) and uses indpendent
759
+# symbol from the matrix(and file) name.
760
+# now, the flybase importer("inst/scripts/import/flyFactorSurvey/import.R")
761
+# uses FBgn id(which can be reliably extracted) and uses indpendent
753 762
 # data sources to learn the gene symbol.
754 763
 #
755 764
 # robert's email:
... ...
@@ -776,7 +785,7 @@ pwmMatch.toTable = function (motifMatch) {
776 785
 #
777 786
 test.flyFactorGeneSymbols <- function()
778 787
 {
779
-    print ("--- test.flyFactorGeneSymbols")
788
+    print("--- test.flyFactorGeneSymbols")
780 789
     mdb = MotifDb
781 790
     checkEquals(sort(mcols(query(mdb, "FBgn0259750"))$geneSymbol),
782 791
                 sort(c("FBgn0259750", "FBgn0259750")))
... ...
@@ -785,34 +794,34 @@ test.flyFactorGeneSymbols <- function()
785 794
 
786 795
 } # test.flyFactorGeneSymbols
787 796
 #-------------------------------------------------------------------------------
788
-test.export_jasparFormatStdOut = function ()
797
+test.export_jasparFormatStdOut = function()
789 798
 {
790
-  print ('--- test.export_jasparFormatStdOut')
791
-  mdb = MotifDb # ()
792
-  mdb.chicken = subset (mdb, organism=='Gallus')
793
-  checkEquals (length (mdb.chicken), 3)
799
+  print('--- test.export_jasparFormatStdOut')
800
+  mdb = MotifDb #()
801
+  mdb.chicken = subset(mdb, organism=='Gallus')
802
+  checkEquals(length(mdb.chicken), 3)
794 803
     # text is cat-ed to stdout, so not avaialable here to check.
795 804
     # but just like print, export also returns the text invisibly.
796 805
     # so that CAN be checked.
797 806
 
798
-  jaspar.text = export (mdb.chicken, format='jaspar')
799
-  checkEquals (length (jaspar.text), 1)   # just one long string
800
-  checkTrue (is.character (jaspar.text))
801
-  checkTrue (nchar (jaspar.text) > 800)   # 1002 as of (10 aug 2012)
802
-  return (TRUE)
807
+  jaspar.text = export(mdb.chicken, format='jaspar')
808
+  checkEquals(length(jaspar.text), 1)   # just one long string
809
+  checkTrue(is.character(jaspar.text))
810
+  checkTrue(nchar(jaspar.text) > 800)   # 1002 as of(10 aug 2012)
811
+  return(TRUE)
803 812
 
804 813
 } # test.exportjasparFormatToStdOut
805 814
 #------------------------------------------------------------------------------------------------------------------------
806
-test.export_jasparFormatToFile = function ()
815
+test.export_jasparFormatToFile = function()
807 816
 {
808
-  print ('--- test.export_jasparFormatToFile')
809
-  mdb = MotifDb # ()
810
-  mdb.chicken = subset (mdb, organism=='Gallus')
811
-  checkEquals (length (mdb.chicken), 3)
812
-  output.file = tempfile ()
813
-  jaspar.text = export (mdb.chicken, output.file, 'jaspar')
814
-  retrieved = scan (output.file, what=character (0), sep='\n', quiet=TRUE)
815
-  invisible (retrieved)
817
+  print('--- test.export_jasparFormatToFile')
818
+  mdb = MotifDb #()
819
+  mdb.chicken = subset(mdb, organism=='Gallus')
820
+  checkEquals(length(mdb.chicken), 3)
821
+  output.file = tempfile()
822
+  jaspar.text = export(mdb.chicken, output.file, 'jaspar')
823
+  retrieved = scan(output.file, what=character(0), sep='\n', quiet=TRUE)
824
+  invisible(retrieved)
816 825
 
817 826
 } # test.exportjasparFormatToFile
818 827
 #------------------------------------------------------------------------------------------------------------------------
... ...
@@ -833,9 +842,9 @@ test.geneToMotif <- function()
833 842
 
834 843
       # MotifDb mode uses the MotifDb metadata, pulled from many sources
835 844
    tbl.mdb <- geneToMotif(mdb, genes, source="mOtifdb")     # intentional mis-capitalization
836
-   checkEquals(dim(tbl.mdb), c(13, 6))
845
+   checkEquals(dim(tbl.mdb), c(14, 6))
837 846
    checkEquals(subset(tbl.mdb, dataSource=="jaspar2016" & geneSymbol== "FOS")$motif, "MA0476.1")
838
-      # no recognizable (i.e., jaspar standard) motif name returned by MotifDb metadata
847
+      # no recognizable(i.e., jaspar standard) motif name returned by MotifDb metadata
839 848
       # MotifDb for ATF5
840 849
       # todo: compare the MA0110596_1.02 matrix of cisp_1.02 to japar MA0833.1
841 850
 
... ...
@@ -852,8 +861,8 @@ test.geneToMotif <- function()
852 861
 
853 862
 } # test.geneToMotif
854 863
 #------------------------------------------------------------------------------------------------------------------------
855
-# this case discovered (31 jan 2018). when called on a gene/source combination for which there are
856
-# no motifs, i attempted to add the mapping source (either "MotifDb", "TFClass") as a column
864
+# this case discovered(31 jan 2018). when called on a gene/source combination for which there are
865
+# no motifs, i attempted to add the mapping source(either "MotifDb", "TFClass") as a column
857 866
 # to an empty data.frame.  check for that and its fix here
858 867
 test.geneToMotif.oneGene.noMotifs <- function()
859 868
 {
... ...
@@ -863,7 +872,7 @@ test.geneToMotif.oneGene.noMotifs <- function()
863 872
 
864 873
 } # test.geneToMotif.oneGene.noMotifs
865 874
 #------------------------------------------------------------------------------------------------------------------------
866
-# sad to say I do not recall what problem/fix is tested here (pshannon, 23 jan 2018).
875
+# sad to say I do not recall what problem/fix is tested here(pshannon, 23 jan 2018).
867 876
 # however, it demonstrates the variety of results which can be returned by non-jaspar datasets
868 877
 # when using the MotifDb mapping source, and the relative paucity which is sometimes
869 878
 # seen with the TFclass mapper
... ...
@@ -882,9 +891,9 @@ test.geneToMotif.ignore.jasparSuffixes <- function()
882 891
 
883 892
       # MotifDb mode uses the MotifDb metadata, pulled from many sources
884 893
    tbl.mdb <- geneToMotif(mdb, genes, source="mOtifdb")     # intentional mis-capitalization
885
-   checkEquals(dim(tbl.mdb), c(13, 6))
894
+   checkEquals(dim(tbl.mdb), c(14, 6))
886 895
    checkEquals(subset(tbl.mdb, dataSource=="jaspar2016" & geneSymbol== "FOS")$motif, "MA0476.1")
887
-      # no recognizable (i.e., jaspar standard) motif name returned by MotifDb metadata
896
+      # no recognizable(i.e., jaspar standard) motif name returned by MotifDb metadata
888 897
       # MotifDb for ATF5
889 898
 
890 899
       # compare the MA0110599_1.02 matrix of cisp_1.02 to japar MA0476.1: the identical matrix!
... ...
@@ -972,7 +981,7 @@ test.motifToGene <- function()
972 981
    checkEquals(sort(unique(tbl$geneSymbol)),
973 982
                     c("AR", "RUNX1", "TFAP2A", "TFAP2A(var.3)", "TFAP2B", "TFAP2C", "TFAP2D", "TFAP2E"))
974 983
 
975
-      # (23 may 2018) found that MotifDb works, but c("MotifDb", "TFClass") does not
984
+      #(23 may 2018) found that MotifDb works, but c("MotifDb", "TFClass") does not
976 985
       # test the fix here
977 986
 
978 987
    motifs <- c("Hsapiens-jolma2013-IRF5-2", "Hsapiens-SwissRegulon-IRF5.SwissRegulon")
... ...
@@ -1066,6 +1075,70 @@ test.associateTranscriptionFactors <- function()
1066 1075
       # now some motif names
1067 1076
 } # test.associateTranscriptionFactors
1068 1077
 #------------------------------------------------------------------------------------------------------------------------
1078
+test.match <- function()
1079
+{
1080
+   printf("--- test.match")
1081
+   gr.region <- GRanges(seqnames="chr1", IRanges(start=47229520, end=47229560))
1082
+   motifs <- query(MotifDb, c("jaspar2018", "ZNF263"))
1083
+   checkEquals(length(motifs), 1)
1084
+   gr.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5)
1085
+   checkEquals(length(gr.match), 1)  # just one motif
1086
+   checkEquals(names(gr.match), names(motifs))
1087
+   checkEquals(length(gr.match[[1]]), 3)
1088
+
1089
+   tbl.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5, fimoDataFrameStyle=TRUE)
1090
+   checkEquals(dim(tbl.match), c(3, 7))
1091
+   checkTrue(all(tbl.match$motif == names(motifs)))
1092
+   checkEquals(class(tbl.match$chrom), "character")  # not a factor
1093
+
1094
+   motifs <- query(MotifDb, "ZNF263", c("jaspar2018", "swissregulon"))
1095
+   checkEquals(length(motifs), 2)
1096
+   gr.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5)
1097
+   checkEquals(names(gr.match), names(motifs))
1098
+   checkEquals(as.numeric(lapply(gr.match, length)), c(3, 1))
1099
+
1100
+   tbl.match <-matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5, fimoDataFrameStyle=TRUE)
1101
+   checkEquals(dim(tbl.match), c(4, 7))
1102
+   checkEquals(length(unique(tbl.match$motif)), 2)
1103
+   checkEquals(unique(tbl.match$motif), names(motifs))
1104
+   checkEquals(colnames(tbl.match), c("chrom", "start", "end", "width", "strand", "mood.score", "motif_id"))
1105
+
1106
+
1107
+        #------------------------------------------------
1108
+        # now all jaspar2018 human motifs
1109
+        #------------------------------------------------
1110
+
1111
+   motifs <- query(MotifDb, c("jaspar2018", "hsapiens"))
1112
+   tbl.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5, fimoDataFrameStyle=TRUE)
1113
+   checkEquals(dim(tbl.match), c(7, 7))
1114
+   checkEquals(sort(unique(tbl.match$motif)),
1115
+               c("Hsapiens-jaspar2018-EWSR1-FLI1-MA0149.1", "Hsapiens-jaspar2018-ZNF263-MA0528.1"))
1116
+
1117
+        #-----------------------------------------------------
1118
+        # now all jaspar2018 human motifs, loosen the pValue
1119
+        #-----------------------------------------------------
1120
+
1121
+   motifs <- query(MotifDb, c("jaspar2018", "hsapiens"))
1122
+   tbl.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-4, fimoDataFrameStyle=TRUE)
1123
+   checkTrue(nrow(tbl.match) > 15)
1124
+
1125
+   tbl.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-3, fimoDataFrameStyle=TRUE)
1126
+   checkTrue(nrow(tbl.match) > 50)
1127
+
1128
+       #-------------------------------------------------------------
1129
+       # now all jaspar2018 and hocomoco human motifs across 10kb
1130
+       #------------------------------------------------------------
1131
+
1132
+   motifs <- query(MotifDb, "hsapiens", orStrings=c("jaspar2018", "hocomoco"))
1133
+   checkTrue(length(motifs) > 1000)
1134
+   gr.region <- GRanges(seqnames="chr1", IRanges(start=47229000, end=47239000))
1135
+
1136
+   tbl.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-7, fimoDataFrameStyle=TRUE)
1137
+   checkTrue(nrow(tbl.match) > 200 && nrow(tbl.match) < 275)
1138
+   checkEquals(order(tbl.match$start), seq_len(nrow(tbl.match)))
1139
+
1140
+} # test.match
1141
+#------------------------------------------------------------------------------------------------------------------------
1069 1142
 findMotifsWithMutuallyExclusiveMappings <- function()
1070 1143
 {
1071 1144
    xtab <- as.data.frame(table(MotifDb@manuallyCuratedGeneMotifAssociationTable$motif))
... ...
@@ -1086,8 +1159,21 @@ findMotifsWithMutuallyExclusiveMappings <- function()
1086 1159
    #  [1]   mdb.genes: AP1, JUN::FOS, FOS::JUN, FOS::JUN
1087 1160
    #  [1]   tfc.genes: FOS, JUN
1088 1161
 
1089
-
1090 1162
 } # findMotifsWithMutuallyExclusiveMappings
1091 1163
 #------------------------------------------------------------------------------------------------------------------------
1164
+test.hocomoco11.with.reliabilityScores <- function()
1165
+{
1166
+   printf("--- test.hocomoco11.with.reliabilityScores")
1167
+
1168
+   checkEquals(length(query(MotifDb, "hocomoco")), 1466)
1169
+   checkEquals(length(query(MotifDb, "hocomocov10")), 1066)
1170
+   checkEquals(length(query(MotifDb, "hocomocov11")), 400)
1171
+
1172
+   checkEquals(length(query(MotifDb, "hocomocov11A")), 181)
1173
+   checkEquals(length(query(MotifDb, "hocomocov11B")),  84)
1174
+   checkEquals(length(query(MotifDb, "hocomocov11C")), 135)
1175
+
1176
+} # test.hocomoco11.with.reliabilityScores
1177
+#------------------------------------------------------------------------------------------------------------------------
1092 1178
 if(!interactive())
1093 1179
    runTests()
1094 1180
new file mode 100644
... ...
@@ -0,0 +1,45 @@
1
+\name{matchMotif}
2
+\alias{matchMotif,MotifList-method}
3
+\alias{matchMotif}
4
+\title{matchMotif}
5
+\description{
6
+A very general search tool, returning all matrices whose metadata, in ANY
7
+column, is matched by the query string.
8
+}
9
+\usage{
10
+\S4method{matchMotif}{MotifList}(object, motifs, genomeName, regions, pval.cutoff, fimoDataFrameStyle=FALSE)
11
+}
12