...
|
...
|
@@ -1,43 +1,43 @@
|
1
|
|
-library (MotifDb)
|
2
|
|
-library (RUnit)
|
3
|
|
-library (MotIV)
|
4
|
|
-library (seqLogo)
|
|
1
|
+library(MotifDb)
|
|
2
|
+library(RUnit)
|
|
3
|
+library(MotIV)
|
|
4
|
+library(seqLogo)
|
5
|
5
|
#----------------------------------------------------------------------------------------------------
|
6
|
6
|
printf <- function(...) print(noquote(sprintf(...)))
|
7
|
7
|
#----------------------------------------------------------------------------------------------------
|
8
|
|
-runTests = function ()
|
|
8
|
+runTests = function()
|
9
|
9
|
{
|
10
|
|
- test.emptyCtor ()
|
11
|
|
- test.nonEmptyCtor ()
|
12
|
|
- test.MotifDb.normalMode ()
|
13
|
|
- test.MotifDb.emptyMode ()
|
14
|
|
- test.allMatricesAreNormalized ()
|
15
|
|
- test.providerNames ()
|
16
|
|
- test.geneSymbols ()
|
17
|
|
- test.geneIdsAndTypes ()
|
18
|
|
- test.proteinIds ()
|
19
|
|
- test.sequenceCount ()
|
20
|
|
- test.longNames ()
|
21
|
|
- test.organisms ()
|
22
|
|
- test.bindingDomains ()
|
23
|
|
- test.experimentTypes ()
|
24
|
|
- test.tfFamilies ()
|
25
|
|
- test.bindingSequences ()
|
26
|
|
- test.flyBindingDomains ()
|
27
|
|
- test.pubmedIDs ()
|
28
|
|
- test.allFullNames ()
|
29
|
|
- test.subset ()
|
30
|
|
- test.subsetWithVariables ()
|
31
|
|
- test.queryOldStyle ()
|
|
10
|
+ test.emptyCtor()
|
|
11
|
+ test.nonEmptyCtor()
|
|
12
|
+ test.MotifDb.normalMode()
|
|
13
|
+ test.MotifDb.emptyMode()
|
|
14
|
+ test.allMatricesAreNormalized()
|
|
15
|
+ test.providerNames()
|
|
16
|
+ test.geneSymbols()
|
|
17
|
+ test.geneIdsAndTypes()
|
|
18
|
+ test.proteinIds()
|
|
19
|
+ test.sequenceCount()
|
|
20
|
+ test.longNames()
|
|
21
|
+ test.organisms()
|
|
22
|
+ test.bindingDomains()
|
|
23
|
+ test.experimentTypes()
|
|
24
|
+ test.tfFamilies()
|
|
25
|
+ test.bindingSequences()
|
|
26
|
+ test.flyBindingDomains()
|
|
27
|
+ test.pubmedIDs()
|
|
28
|
+ test.allFullNames()
|
|
29
|
+ test.subset()
|
|
30
|
+ test.subsetWithVariables()
|
|
31
|
+ test.queryOldStyle()
|
32
|
32
|
test.query()
|
33
|
|
- test.transformMatrixToMemeRepresentation ()
|
34
|
|
- test.matrixToMemeText ()
|
35
|
|
- test.export_memeFormatStdOut ()
|
36
|
|
- test.export_memeFormatToFile ()
|
37
|
|
- test.export_memeFormatToFileDuplication ()
|
38
|
|
- test.export_memeFormatToFile_run_tomtom ()
|
39
|
|
- test.MotIV.toTable ()
|
40
|
|
- test.run_MotIV.motifMatch()
|
|
33
|
+ test.transformMatrixToMemeRepresentation()
|
|
34
|
+ test.matrixToMemeText()
|
|
35
|
+ test.export_memeFormatStdOut()
|
|
36
|
+ test.export_memeFormatToFile()
|
|
37
|
+ test.export_memeFormatToFileDuplication()
|
|
38
|
+ test.export_memeFormatToFile_run_tomtom()
|
|
39
|
+ #test.MotIV.toTable()
|
|
40
|
+ #test.run_MotIV.motifMatch()
|
41
|
41
|
test.flyFactorGeneSymbols()
|
42
|
42
|
test.export_jasparFormatStdOut()
|
43
|
43
|
test.export_jasparFormatToFile()
|
...
|
...
|
@@ -46,27 +46,30 @@ runTests = function ()
|
46
|
46
|
test.geneToMotif.ignore.jasparSuffixes()
|
47
|
47
|
test.geneToMotif.oneGene.noMotifs
|
48
|
48
|
test.motifToGene()
|
49
|
|
-
|
50
|
49
|
test.associateTranscriptionFactors()
|
51
|
50
|
|
|
51
|
+ test.match()
|
|
52
|
+
|
|
53
|
+ test.hocomoco11.with.reliabilityScores()
|
|
54
|
+
|
52
|
55
|
} # runTests
|
53
|
56
|
#------------------------------------------------------------------------------------------------------------------------
|
54
|
|
-test.emptyCtor = function ()
|
|
57
|
+test.emptyCtor = function()
|
55
|
58
|
{
|
56
|
|
- print ('--- test.emptyCtor')
|
57
|
|
- motif.list = MotifDb:::MotifList ()
|
58
|
|
- checkEquals (length (motif.list), 0)
|
|
59
|
+ print('--- test.emptyCtor')
|
|
60
|
+ motif.list = MotifDb:::MotifList()
|
|
61
|
+ checkEquals(length(motif.list), 0)
|
59
|
62
|
|
60
|
63
|
} # test.emptyCtor
|
61
|
64
|
#------------------------------------------------------------------------------------------------------------------------
|
62
|
|
-test.nonEmptyCtor = function ()
|
|
65
|
+test.nonEmptyCtor = function()
|
63
|
66
|
{
|
64
|
|
- print ('--- test.nonEmptyCtor')
|
65
|
|
- mtx = matrix (runif (20), nrow=4, ncol=5, byrow=T, dimnames=list(c ('A', 'C', 'G', 'T'), as.character (1:5)))
|
66
|
|
- mtx.normalized = apply (mtx, 2, function (colvector) colvector / sum (colvector))
|
67
|
|
- matrixList = list (mtx.normalized)
|
|
67
|
+ print('--- test.nonEmptyCtor')
|
|
68
|
+ mtx = matrix(runif(20), nrow=4, ncol=5, byrow=T, dimnames=list(c('A', 'C', 'G', 'T'), as.character(1:5)))
|
|
69
|
+ mtx.normalized = apply(mtx, 2, function(colvector) colvector / sum(colvector))
|
|
70
|
+ matrixList = list(mtx.normalized)
|
68
|
71
|
|
69
|
|
- tbl.md = data.frame (providerName='',
|
|
72
|
+ tbl.md = data.frame(providerName='',
|
70
|
73
|
providerId='',
|
71
|
74
|
dataSource='',
|
72
|
75
|
geneSymbol='',
|
...
|
...
|
@@ -82,36 +85,36 @@ test.nonEmptyCtor = function ()
|
82
|
85
|
experimentType='',
|
83
|
86
|
pubmedID='',
|
84
|
87
|
stringsAsFactors=FALSE)
|
85
|
|
- names (matrixList) = 'test'
|
86
|
|
- rownames (tbl.md) = 'test'
|
87
|
|
- motif.list = MotifDb:::MotifList (matrixList, tbl.md)
|
88
|
|
- checkEquals (length (motif.list), 1)
|
|
88
|
+ names(matrixList) = 'test'
|
|
89
|
+ rownames(tbl.md) = 'test'
|
|
90
|
+ motif.list = MotifDb:::MotifList(matrixList, tbl.md)
|
|
91
|
+ checkEquals(length(motif.list), 1)
|
89
|
92
|
|
90
|
93
|
} # test.nonEmptyCtor
|
91
|
94
|
#------------------------------------------------------------------------------------------------------------------------
|
92
|
95
|
# 'normal' in that all included data sources are already loaded
|
93
|
|
-test.MotifDb.normalMode = function ()
|
|
96
|
+test.MotifDb.normalMode = function()
|
94
|
97
|
{
|
95
|
|
- print ('--- test.MotifDb.normalMode')
|
|
98
|
+ print('--- test.MotifDb.normalMode')
|
96
|
99
|
|
97
|
|
- mdb = MotifDb # (quiet=TRUE)
|
98
|
|
- # (5 jun 2012)
|
|
100
|
+ mdb = MotifDb # (quiet=TRUE)
|
|
101
|
+ #(5 jun 2012)
|
99
|
102
|
# JASPAR_CORE: 459
|
100
|
103
|
# ScerTF: 196
|
101
|
104
|
# UniPROBE: 380
|
102
|
105
|
# FlyFactorSurvey: 614
|
103
|
106
|
# hPDI: 437
|
104
|
|
- checkTrue (length (mdb) > 2080)
|
|
107
|
+ checkTrue(length(mdb) > 2080)
|
105
|
108
|
|
106
|
109
|
} # test.MotifDb.normalMode
|
107
|
110
|
#------------------------------------------------------------------------------------------------------------------------
|
108
|
111
|
# this mode is not intended for users, but may see use in the future.
|
109
|
|
-test.MotifDb.emptyMode = function ()
|
|
112
|
+test.MotifDb.emptyMode = function()
|
110
|
113
|
{
|
111
|
|
- print ('--- test.MotifDb.emptyMode')
|
|
114
|
+ print('--- test.MotifDb.emptyMode')
|
112
|
115
|
|
113
|
|
- mdb = MotifDb:::.MotifDb (loadAllSources=FALSE, quiet=TRUE)
|
114
|
|
- checkTrue (length (mdb) == 0)
|
|
116
|
+ mdb = MotifDb:::.MotifDb(loadAllSources=FALSE, quiet=TRUE)
|
|
117
|
+ checkTrue(length(mdb) == 0)
|
115
|
118
|
|
116
|
119
|
} # test.MotifDb.emptyMode
|
117
|
120
|
#------------------------------------------------------------------------------------------------------------------------
|
...
|
...
|
@@ -122,86 +125,86 @@ test.MotifDb.emptyMode = function ()
|
122
|
125
|
|
123
|
126
|
# Many more NA's exist...need to fix these; here's a quick fix for now
|
124
|
127
|
|
125
|
|
-test.noNAorganisms = function ()
|
|
128
|
+test.noNAorganisms = function()
|
126
|
129
|
|
127
|
130
|
{
|
128
|
|
- print ('--- test.noNAorganisms')
|
129
|
|
- #checkEquals (which (is.na (mcols(MotifDb)$organism)), integer (0))
|
|
131
|
+ print('--- test.noNAorganisms')
|
|
132
|
+ #checkEquals(which(is.na(mcols(MotifDb)$organism)), integer(0))
|
130
|
133
|
|
131
|
134
|
# There's a fair number of NA organisms, mostly due to including the homer DB
|
132
|
|
- checkEquals(sum(is.na (mcols(MotifDb)$organism)), 366)
|
|
135
|
+ checkEquals(sum(is.na(mcols(MotifDb)$organism)), 366)
|
133
|
136
|
|
134
|
137
|
} # test.noNAorganisms
|
135
|
138
|
#------------------------------------------------------------------------------------------------------------------------
|
136
|
|
-test.allMatricesAreNormalized = function ()
|
|
139
|
+test.allMatricesAreNormalized = function()
|
137
|
140
|
{
|
138
|
|
- print ('--- test.allMatricesAreNormalized')
|
139
|
|
- mdb = MotifDb# (quiet=TRUE)
|
|
141
|
+ print('--- test.allMatricesAreNormalized')
|
|
142
|
+ mdb = MotifDb#(quiet=TRUE)
|
140
|
143
|
matrices = mdb@listData
|
141
|
144
|
# a lenient test required by "Cparvum-UniPROBE-Cgd2_3490.UP00395" and "Hsapiens-UniPROBE-Sox4.UP00401"
|
142
|
145
|
# for reasons not yet explored. 10e-8 should be be possible
|
143
|
|
- checkTrue(all(sapply(matrices, function (m) all (abs (colSums (m) - 1.0) < 0.02))))
|
|
146
|
+ checkTrue(all(sapply(matrices, function(m) all(abs(colSums(m) - 1.0) < 0.02))))
|
144
|
147
|
|
145
|
148
|
} # test.allMatricesAreNormalized
|
146
|
149
|
#------------------------------------------------------------------------------------------------------------------------
|
147
|
|
-test.providerNames = function ()
|
|
150
|
+test.providerNames = function()
|
148
|
151
|
{
|
149
|
|
- print ('--- test.getProviderNames')
|
150
|
|
- mdb = MotifDb # ()
|
|
152
|
+ print('--- test.getProviderNames')
|
|
153
|
+ mdb = MotifDb #()
|
151
|
154
|
pn = mcols(mdb)$providerName
|
152
|
|
- checkEquals (length (which (is.na (pn))), 0)
|
153
|
|
- checkEquals (length (which (pn == '')), 0)
|
|
155
|
+ checkEquals(length(which(is.na(pn))), 0)
|
|
156
|
+ checkEquals(length(which(pn == '')), 0)
|
154
|
157
|
|
155
|
158
|
} # test.providerNames
|
156
|
159
|
#------------------------------------------------------------------------------------------------------------------------
|
157
|
|
-test.geneSymbols = function ()
|
|
160
|
+test.geneSymbols = function()
|
158
|
161
|
{
|
159
|
|
- print ('--- test.getGeneSymbols')
|
160
|
|
- mdb = MotifDb # ()
|
|
162
|
+ print('--- test.getGeneSymbols')
|
|
163
|
+ mdb = MotifDb #()
|
161
|
164
|
syms = mcols(mdb)$geneSymbol
|
162
|
|
- checkEquals (length (which (is.na (syms))), 683) # no symols yet for the dgf stamlab motifs
|
163
|
|
- checkEquals (length (which (syms == '')), 0)
|
|
165
|
+ checkEquals(length(which(is.na(syms))), 683) # no symols yet for the dgf stamlab motifs
|
|
166
|
+ checkEquals(length(which(syms == '')), 0)
|
164
|
167
|
|
165
|
168
|
} # test.geneSymbols
|
166
|
169
|
#------------------------------------------------------------------------------------------------------------------------
|
167
|
|
-test.geneIdsAndTypes = function ()
|
|
170
|
+test.geneIdsAndTypes = function()
|
168
|
171
|
{
|
169
|
|
- print ('--- test.getGeneIdsAndTypes')
|
|
172
|
+ print('--- test.getGeneIdsAndTypes')
|
170
|
173
|
mdb = MotifDb
|
171
|
174
|
tbl <- mcols(mdb)
|
172
|
175
|
geneIds = tbl$geneId
|
173
|
176
|
geneIdTypes = tbl$geneIdType
|
174
|
|
- typeCounts = as.list (table (geneIdTypes))
|
|
177
|
+ typeCounts = as.list(table(geneIdTypes))
|
175
|
178
|
|
176
|
179
|
checkTrue(typeCounts$ENTREZ > 2300)
|
177
|
180
|
checkTrue(typeCounts$FLYBASE >= 45)
|
178
|
181
|
checkTrue(typeCounts$SGD >= 600)
|
179
|
182
|
checkTrue(nrow(subset(tbl, is.na(geneIdType))) > 2000)
|
180
|
183
|
|
181
|
|
- empty.count = length (which (geneIds == ''))
|
182
|
|
- checkEquals (empty.count, 0)
|
|
184
|
+ empty.count = length(which(geneIds == ''))
|
|
185
|
+ checkEquals(empty.count, 0)
|
183
|
186
|
|
184
|
187
|
|
185
|
188
|
} # test.geneIdsAndTypes
|
186
|
189
|
#------------------------------------------------------------------------------------------------------------------------
|
187
|
190
|
# make sure that all proteinIds have explicit values, either proper identifiers or NA
|
188
|
191
|
# currently tested by looking for empty string assignments
|
189
|
|
-test.proteinIds = function ()
|
|
192
|
+test.proteinIds = function()
|
190
|
193
|
{
|
191
|
|
- print ('--- test.proteinIds')
|
192
|
|
- mdb = MotifDb # (quiet=TRUE)
|
|
194
|
+ print('--- test.proteinIds')
|
|
195
|
+ mdb = MotifDb #(quiet=TRUE)
|
193
|
196
|
NA.string.count <- sum(is.na(mcols(mdb)$proteinId))
|
194
|
|
-# NA.string.count = length (grep ('NA', mcols(mdb)$proteinId))
|
|
197
|
+# NA.string.count = length(grep('NA', mcols(mdb)$proteinId))
|
195
|
198
|
|
196
|
199
|
checkEquals(NA.string.count, 2514)
|
197
|
200
|
# FIX THIS; Currently 2514 don't have protein IDs
|
198
|
|
- #checkEquals (NA.string.count, 0)
|
|
201
|
+ #checkEquals(NA.string.count, 0)
|
199
|
202
|
|
200
|
|
- empty.count = length (which (mcols(mdb)$proteinId==""))
|
201
|
|
- if (empty.count > 0)
|
202
|
|
- browser ('test.proteinIds')
|
|
203
|
+ empty.count = length(which(mcols(mdb)$proteinId==""))
|
|
204
|
+ if(empty.count > 0)
|
|
205
|
+ browser('test.proteinIds')
|
203
|
206
|
|
204
|
|
- checkEquals (empty.count, 0)
|
|
207
|
+ checkEquals(empty.count, 0)
|
205
|
208
|
|
206
|
209
|
# FlyFactorSurvey, as digested by me, had a blanket assigment of UNIPROT to all proteinIds
|
207
|
210
|
# Herve' pointed out that this applied also to entries with no proteinId.
|
...
|
...
|
@@ -209,86 +212,88 @@ test.proteinIds = function ()
|
209
|
212
|
|
210
|
213
|
### FIX THIS TOO! Currently have 913 entries with a proteinIdType and no proteinId
|
211
|
214
|
x = mcols(mdb)
|
212
|
|
- # checkEquals (nrow (subset (x, !is.na (proteinIdType) & is.na (proteinId))), 0)
|
|
215
|
+ # checkEquals(nrow(subset(x, !is.na(proteinIdType) & is.na(proteinId))), 0)
|
213
|
216
|
|
214
|
217
|
|
215
|
218
|
} # test.proteinIds
|
216
|
219
|
#------------------------------------------------------------------------------------------------------------------------
|
217
|
220
|
# only for UniPROBE do we not have sequence count. might be possible to get them along with 'insertion sequences'
|
218
|
|
-test.sequenceCount = function ()
|
|
221
|
+test.sequenceCount = function()
|
219
|
222
|
{
|
220
|
|
- print ('--- test.sequenceCount')
|
221
|
|
- mdb = MotifDb # ()
|
|
223
|
+ print('--- test.sequenceCount')
|
|
224
|
+ mdb = MotifDb #()
|
222
|
225
|
x = mcols(mdb)
|
223
|
|
- if (interactive ()) {
|
224
|
|
- x.up = subset (x, dataSource == 'UniPROBE')
|
225
|
|
- checkTrue (all (is.na (x.up$sequenceCount)))
|
|
226
|
+ if(interactive()) {
|
|
227
|
+ x.up = subset(x, dataSource == 'UniPROBE')
|
|
228
|
+ checkTrue(all(is.na(x.up$sequenceCount)))
|
226
|
229
|
}
|
227
|
230
|
else {
|
228
|
|
- uniprobe.indices = which (x$dataSource == 'UniPROBE')
|
229
|
|
- checkTrue (all (is.na (x$sequenceCount [uniprobe.indices])))
|
|
231
|
+ uniprobe.indices = which(x$dataSource == 'UniPROBE')
|
|
232
|
+ checkTrue(all(is.na(x$sequenceCount [uniprobe.indices])))
|
230
|
233
|
}
|
231
|
234
|
|
232
|
235
|
} # test.sequenceCount
|
233
|
236
|
#------------------------------------------------------------------------------------------------------------------------
|
234
|
237
|
# make sure that a legitimate organism-dataSource-identifier is supplied for each matrix and as a rowname
|
235
|
238
|
# of the corresponding DataFrame
|
236
|
|
-test.longNames = function ()
|
|
239
|
+test.longNames = function()
|
237
|
240
|
{
|
238
|
|
- print ('--- test.longNames')
|
|
241
|
+ print('--- test.longNames')
|
239
|
242
|
mdb = MotifDb
|
240
|
|
- longNames = strsplit (names (mdb), '-')
|
241
|
|
- organisms = unique (sapply (longNames, '[', 1))
|
|
243
|
+ longNames = strsplit(names(mdb), '-')
|
|
244
|
+ organisms = unique(sapply(longNames, '[', 1))
|
242
|
245
|
|
243
|
|
- dataSources = unique (lapply (longNames, '[', 2))
|
|
246
|
+ dataSources = unique(lapply(longNames, '[', 2))
|
244
|
247
|
|
245
|
|
- recognized.dataSources = unique (mcols(mdb)$dataSource)
|
246
|
|
- recognized.organisms = unique (mcols(mdb)$organism)
|
247
|
|
- # a few (3) matrices from JASPAR core have NA organism. make this into a character
|
|
248
|
+ recognized.dataSources = c(unique(mcols(mdb)$dataSource),
|
|
249
|
+ c("HOCOMOCOv11B", "HOCOMOCOv11C", "HOCOMOCOv11A"))
|
|
250
|
+
|
|
251
|
+ recognized.organisms = unique(mcols(mdb)$organism)
|
|
252
|
+ # a few(3) matrices from JASPAR core have NA organism. make this into a character
|
248
|
253
|
# so that it can be matched up against the 'NA' extracted from longNames just above
|
249
|
|
- na.indices = which (is.na (recognized.organisms))
|
250
|
|
- if (length (na.indices) > 0)
|
|
254
|
+ na.indices = which(is.na(recognized.organisms))
|
|
255
|
+ if(length(na.indices) > 0)
|
251
|
256
|
recognized.organisms [na.indices] = 'NA'
|
252
|
257
|
|
253
|
|
- checkTrue (all (organisms %in% recognized.organisms))
|
254
|
|
- checkTrue (all (dataSources %in% recognized.dataSources))
|
|
258
|
+ checkTrue(all(organisms %in% recognized.organisms))
|
|
259
|
+ checkTrue(all(dataSources %in% recognized.dataSources))
|
255
|
260
|
|
256
|
261
|
} # test.longNames
|
257
|
262
|
#------------------------------------------------------------------------------------------------------------------------
|
258
|
263
|
# make sure that a legitimate organism is specified for each matrix
|
259
|
|
-test.organisms = function ()
|
|
264
|
+test.organisms = function()
|
260
|
265
|
{
|
261
|
|
- print ('--- test.organisms')
|
262
|
|
- mdb = MotifDb # (quiet=TRUE)
|
|
266
|
+ print('--- test.organisms')
|
|
267
|
+ mdb = MotifDb #(quiet=TRUE)
|
263
|
268
|
organisms = mcols(mdb)$organism
|
264
|
269
|
|
265
|
|
- # jaspar_core has 3 NA speciesId: TBP, HNF4A and CEBPA (MA0108.2, MA0114.1, MA0102.2)
|
|
270
|
+ # jaspar_core has 3 NA speciesId: TBP, HNF4A and CEBPA(MA0108.2, MA0114.1, MA0102.2)
|
266
|
271
|
# their website shows these as vertebrates, which I map to 'Vertebrata'. An organismID of '-'
|
267
|
272
|
# gets the same treatment, matching website also.
|
268
|
273
|
|
269
|
274
|
### Note: this failing test is the same as the test.noNAorganisms test!
|
270
|
275
|
# As in case of noNA, need to add organisms for these
|
271
|
|
- #checkEquals (which (is.na (mcols(MotifDb)$organism)), integer (0))
|
|
276
|
+ #checkEquals(which(is.na(mcols(MotifDb)$organism)), integer(0))
|
272
|
277
|
|
273
|
|
- empty.count = length (which (mcols(mdb)$organism==""))
|
274
|
|
- checkEquals (empty.count, 0)
|
|
278
|
+ empty.count = length(which(mcols(mdb)$organism==""))
|
|
279
|
+ checkEquals(empty.count, 0)
|
275
|
280
|
|
276
|
281
|
} # test.organisms
|
277
|
282
|
#------------------------------------------------------------------------------------------------------------------------
|
278
|
|
-test.bindingDomains = function ()
|
|
283
|
+test.bindingDomains = function()
|
279
|
284
|
{
|
280
|
|
- print ('--- test.bindingDomains')
|
281
|
|
- mdb = MotifDb # (quiet=TRUE)
|
282
|
|
- checkTrue (length (unique (mcols(mdb)$bindingDomain)) > 1)
|
|
285
|
+ print('--- test.bindingDomains')
|
|
286
|
+ mdb = MotifDb #(quiet=TRUE)
|
|
287
|
+ checkTrue(length(unique(mcols(mdb)$bindingDomain)) > 1)
|
283
|
288
|
|
284
|
289
|
} # test.bindingDomains
|
285
|
290
|
#------------------------------------------------------------------------------------------------------------------------
|
286
|
|
-test.flyBindingDomains = function ()
|
|
291
|
+test.flyBindingDomains = function()
|
287
|
292
|
{
|
288
|
|
- print ('--- test.flyBindingDomains')
|
|
293
|
+ print('--- test.flyBindingDomains')
|
289
|
294
|
|
290
|
295
|
x = mcols(MotifDb)
|
291
|
|
- tmp = as.list (head (sort (table (subset (x, organism=='Dmelanogaster')$bindingDomain), decreasing=TRUE), n=3))
|
|
296
|
+ tmp = as.list(head(sort(table(subset(x, organism=='Dmelanogaster')$bindingDomain), decreasing=TRUE), n=3))
|
292
|
297
|
|
293
|
298
|
# these counts will likely change with a fresh load of data from FlyFactorSurvey.
|
294
|
299
|
|
...
|
...
|
@@ -299,38 +304,38 @@ test.flyBindingDomains = function ()
|
299
|
304
|
|
300
|
305
|
} # test.flyBindingDomains
|
301
|
306
|
#------------------------------------------------------------------------------------------------------------------------
|
302
|
|
-test.experimentTypes = function ()
|
|
307
|
+test.experimentTypes = function()
|
303
|
308
|
{
|
304
|
|
- print ('--- test.experimentTypes')
|
305
|
|
- mdb = MotifDb # (quiet=TRUE)
|
|
309
|
+ print('--- test.experimentTypes')
|
|
310
|
+ mdb = MotifDb #(quiet=TRUE)
|
306
|
311
|
x = mcols(mdb)
|
307
|
|
- checkTrue (length (unique (x$experimentType)) >= 18)
|
308
|
|
- checkEquals (length (which (x$experimentType=='')), 0)
|
|
312
|
+ checkTrue(length(unique(x$experimentType)) >= 18)
|
|
313
|
+ checkEquals(length(which(x$experimentType=='')), 0)
|
309
|
314
|
|
310
|
315
|
} # test.experimentTypes
|
311
|
316
|
#------------------------------------------------------------------------------------------------------------------------
|
312
|
|
-test.tfFamilies = function ()
|
|
317
|
+test.tfFamilies = function()
|
313
|
318
|
{
|
314
|
|
- print ('--- test.tfFamilies')
|
315
|
|
- mdb = MotifDb # (quiet=TRUE)
|
316
|
|
- checkTrue (length (unique (mcols(mdb)$tfFamily)) > 1)
|
|
319
|
+ print('--- test.tfFamilies')
|
|
320
|
+ mdb = MotifDb #(quiet=TRUE)
|
|
321
|
+ checkTrue(length(unique(mcols(mdb)$tfFamily)) > 1)
|
317
|
322
|
|
318
|
323
|
} # test.tfFamilies
|
319
|
324
|
#------------------------------------------------------------------------------------------------------------------------
|
320
|
|
-test.bindingSequences = function ()
|
|
325
|
+test.bindingSequences = function()
|
321
|
326
|
{
|
322
|
|
- print ('--- test.bindingSequences')
|
323
|
|
- mdb = MotifDb # (quiet=TRUE)
|
324
|
|
- checkTrue (length (unique (mcols(mdb)$bindingSequence)) > 1)
|
|
327
|
+ print('--- test.bindingSequences')
|
|
328
|
+ mdb = MotifDb #(quiet=TRUE)
|
|
329
|
+ checkTrue(length(unique(mcols(mdb)$bindingSequence)) > 1)
|
325
|
330
|
|
326
|
331
|
} # test.tfFamilies
|
327
|
332
|
#------------------------------------------------------------------------------------------------------------------------
|
328
|
|
-test.pubmedIDs = function ()
|
|
333
|
+test.pubmedIDs = function()
|
329
|
334
|
{
|
330
|
|
- print ('--- test.pubmedIDs')
|
331
|
|
- x = mcols(MotifDb) # (quiet=TRUE))
|
332
|
|
- checkTrue (length (unique (x$pubmedID)) >= 139)
|
333
|
|
- checkEquals (length (which (x$pubmedID == '')), 0)
|
|
335
|
+ print('--- test.pubmedIDs')
|
|
336
|
+ x = mcols(MotifDb) #(quiet=TRUE))
|
|
337
|
+ checkTrue(length(unique(x$pubmedID)) >= 139)
|
|
338
|
+ checkEquals(length(which(x$pubmedID == '')), 0)
|
334
|
339
|
|
335
|
340
|
} # test.pubmedIDs
|
336
|
341
|
#------------------------------------------------------------------------------------------------------------------------
|
...
|
...
|
@@ -345,96 +350,96 @@ test.pubmedIDs = function ()
|
345
|
350
|
# ScerTF-Scerevisiae-ABF2-badis
|
346
|
351
|
# JASPAR_CORE-Rrattus-Ar-MA0007.1
|
347
|
352
|
#
|
348
|
|
-test.allFullNames = function ()
|
|
353
|
+test.allFullNames = function()
|
349
|
354
|
{
|
350
|
|
- print ('--- test.allFullNames')
|
351
|
|
- mdb = MotifDb # (quiet=TRUE)
|
|
355
|
+ print('--- test.allFullNames')
|
|
356
|
+ mdb = MotifDb #(quiet=TRUE)
|
352
|
357
|
matrices = mdb@listData
|
353
|
|
- fullNames = names (matrices)
|
|
358
|
+ fullNames = names(matrices)
|
354
|
359
|
|
355
|
|
- all.dataSources = unique (mcols(mdb)$dataSource)
|
356
|
|
- checkTrue (length (all.dataSources) >= 4)
|
|
360
|
+ all.dataSources = unique(mcols(mdb)$dataSource)
|
|
361
|
+ checkTrue(length(all.dataSources) >= 4)
|
357
|
362
|
|
358
|
|
- for (source in all.dataSources) {
|
|
363
|
+ for(source in all.dataSources) {
|
359
|
364
|
this.dataSource <<- source
|
360
|
|
- matrices.by.source = subset (mdb, dataSource==this.dataSource)
|
361
|
|
- matrix.name = names (matrices.by.source)[1]
|
|
365
|
+ matrices.by.source = subset(mdb, dataSource==this.dataSource)
|
|
366
|
+ matrix.name = names(matrices.by.source)[1]
|
362
|
367
|
# FlyFactorSurvey: Dmelanogaster-FlyFactorSurvey-ab_SANGER_10_FBgn0259750
|
363
|
368
|
# hPDI: Hsapiens-hPDI-ABCF2
|
364
|
369
|
# JASPAR_CORE: JASPAR_CORE-Athaliana-AGL3-MA0001.1
|
365
|
370
|
# UniPROBE: Hsapiens-UniPROBE-Sox4.UP00401
|
366
|
|
- checkTrue (grep (this.dataSource, matrix.name) == 1)
|
367
|
|
- #printf ('%20s: %s', source, matrix.name)
|
|
371
|
+ checkTrue(grep(this.dataSource, matrix.name) == 1)
|
|
372
|
+ #printf('%20s: %s', source, matrix.name)
|
368
|
373
|
}
|
369
|
374
|
|
370
|
|
- return (TRUE)
|
|
375
|
+ return(TRUE)
|
371
|
376
|
|
372
|
377
|
} # test.allFullNames
|
373
|
378
|
#------------------------------------------------------------------------------------------------------------------------
|
374
|
|
-test.subset = function ()
|
|
379
|
+test.subset = function()
|
375
|
380
|
{
|
376
|
|
- if (interactive ()) {
|
377
|
|
- print ('--- test.subset')
|
|
381
|
+ if(interactive()) {
|
|
382
|
+ print('--- test.subset')
|
378
|
383
|
mdb = MotifDb
|
379
|
|
- checkTrue ('geneSymbol' %in% colnames (elementMetadata (mdb)))
|
380
|
|
- mdb.sub = subset (mdb, geneSymbol=='ABCF2')
|
381
|
|
- checkEquals (length (mdb.sub), 1)
|
|
384
|
+ checkTrue('geneSymbol' %in% colnames(elementMetadata(mdb)))
|
|
385
|
+ mdb.sub = subset(mdb, geneSymbol=='ABCF2')
|
|
386
|
+ checkEquals(length(mdb.sub), 1)
|
382
|
387
|
} # if interactive
|
383
|
388
|
|
384
|
389
|
} # test.subset
|
385
|
390
|
#------------------------------------------------------------------------------------------------------------------------
|
386
|
|
-test.subsetWithVariables = function ()
|
|
391
|
+test.subsetWithVariables = function()
|
387
|
392
|
{
|
388
|
|
- if (interactive ()) {
|
389
|
|
- print ('--- test.subsetWithVariables')
|
|
393
|
+ if(interactive()) {
|
|
394
|
+ print('--- test.subsetWithVariables')
|
390
|
395
|
|
391
|
|
- mdb = MotifDb # ()
|
392
|
|
- checkTrue ('geneSymbol' %in% colnames (elementMetadata (mdb)))
|
|
396
|
+ mdb = MotifDb #()
|
|
397
|
+ checkTrue('geneSymbol' %in% colnames(elementMetadata(mdb)))
|
393
|
398
|
target.gene <<- 'ABCF2'
|
394
|
|
- mdb.sub = subset (mdb, geneSymbol==target.gene)
|
395
|
|
- checkEquals (length (mdb.sub), 1)
|
|
399
|
+ mdb.sub = subset(mdb, geneSymbol==target.gene)
|
|
400
|
+ checkEquals(length(mdb.sub), 1)
|
396
|
401
|
} # if interactive
|
397
|
402
|
|
398
|
403
|
} # test.subsetWithVariables
|
399
|
404
|
#------------------------------------------------------------------------------------------------------------------------
|
400
|
405
|
# "old style": just one query term allowed
|
401
|
|
-test.queryOldStyle = function ()
|
|
406
|
+test.queryOldStyle = function()
|
402
|
407
|
{
|
403
|
|
- print ('--- test.queryOldStyle')
|
|
408
|
+ print('--- test.queryOldStyle')
|
404
|
409
|
mdb = MotifDb
|
405
|
410
|
|
406
|
411
|
# do queries on dataSource counts match those from a contingency table?
|
407
|
|
- sources.list = as.list (table (mcols(mdb)$dataSource))
|
408
|
|
- checkEquals (length (query (mdb, 'flyfactorsurvey')), sources.list$FlyFactorSurvey)
|
409
|
|
- checkEquals (length (query (mdb, 'uniprobe')), sources.list$UniPROBE)
|
410
|
|
- checkEquals (length (query (mdb, 'UniPROBE')), sources.list$UniPROBE)
|
|
412
|
+ sources.list = as.list(table(mcols(mdb)$dataSource))
|
|
413
|
+ checkEquals(length(query(mdb, 'flyfactorsurvey')), sources.list$FlyFactorSurvey)
|
|
414
|
+ checkEquals(length(query(mdb, 'uniprobe')), sources.list$UniPROBE)
|
|
415
|
+ checkEquals(length(query(mdb, 'UniPROBE')), sources.list$UniPROBE)
|
411
|
416
|
|
412
|
417
|
# gene symbols which begin with 'sox' are quite common. can we them?
|
413
|
|
- # there are currently (19 jul 2012) 18, but since this may change, our test is approximate
|
|
418
|
+ # there are currently(19 jul 2012) 18, but since this may change, our test is approximate
|
414
|
419
|
|
415
|
420
|
# Change on 8/1/2017: increase top limit of sox entries as they've expanded
|
416
|
|
- sox.entries = query (mdb, '^sox')
|
417
|
|
- checkTrue (length (sox.entries) > 10)
|
418
|
|
- checkTrue (length (sox.entries) < 200)
|
|
421
|
+ sox.entries = query(mdb, '^sox')
|
|
422
|
+ checkTrue(length(sox.entries) > 10)
|
|
423
|
+ checkTrue(length(sox.entries) < 200)
|
419
|
424
|
|
420
|
425
|
# manual inspection reveals that some of these genes have names which are all capitalized. test that.
|
421
|
|
- checkTrue (length (query (mdb, '^sox', ignore.case=TRUE)) > length (query (mdb, '^SOX', ignore.case=FALSE)))
|
|
426
|
+ checkTrue(length(query(mdb, '^sox', ignore.case=TRUE)) > length(query(mdb, '^SOX', ignore.case=FALSE)))
|
422
|
427
|
|
423
|
428
|
# make sure that queries can be stacked up, and that order of call does not affect the outcome
|
424
|
|
- uniprobe.sox.matrices = query (query (mdb, 'uniprobe'), '^sox')
|
425
|
|
- sox.uniprobe.matrices = query (query (mdb, '^sox'), 'uniprobe')
|
|
429
|
+ uniprobe.sox.matrices = query(query(mdb, 'uniprobe'), '^sox')
|
|
430
|
+ sox.uniprobe.matrices = query(query(mdb, '^sox'), 'uniprobe')
|
426
|
431
|
|
427
|
|
- checkEquals (length (uniprobe.sox.matrices), length (sox.uniprobe.matrices))
|
|
432
|
+ checkEquals(length(uniprobe.sox.matrices), length(sox.uniprobe.matrices))
|
428
|
433
|
|
429
|
434
|
# an approximate count check
|
430
|
|
- checkTrue (length (uniprobe.sox.matrices) > 10)
|
431
|
|
- checkTrue (length (uniprobe.sox.matrices) < 30)
|
|
435
|
+ checkTrue(length(uniprobe.sox.matrices) > 10)
|
|
436
|
+ checkTrue(length(uniprobe.sox.matrices) < 30)
|
432
|
437
|
|
433
|
|
- checkEquals (unique (mcols(uniprobe.sox.matrices)$dataSource), 'UniPROBE')
|
434
|
|
- checkEquals (unique (mcols(sox.uniprobe.matrices)$dataSource), 'UniPROBE')
|
435
|
|
- gene.symbols = sort (unique (mcols(uniprobe.sox.matrices)$geneSymbol))
|
|
438
|
+ checkEquals(unique(mcols(uniprobe.sox.matrices)$dataSource), 'UniPROBE')
|
|
439
|
+ checkEquals(unique(mcols(sox.uniprobe.matrices)$dataSource), 'UniPROBE')
|
|
440
|
+ gene.symbols = sort(unique(mcols(uniprobe.sox.matrices)$geneSymbol))
|
436
|
441
|
|
437
|
|
- # query on a string (in this case, an oddly named motif) which contains
|
|
442
|
+ # query on a string(in this case, an oddly named motif) which contains
|
438
|
443
|
# regular expression characters. no solution to this yet.
|
439
|
444
|
# query uses base R's grep, in which
|
440
|
445
|
# x <- query(mdb, "ELK1,4_GABP{A,B1}.p3")
|
...
|
...
|
@@ -443,7 +448,7 @@ test.queryOldStyle = function ()
|
443
|
448
|
#------------------------------------------------------------------------------------------------------------------------
|
444
|
449
|
test.query <- function()
|
445
|
450
|
{
|
446
|
|
- print ('--- test.query')
|
|
451
|
+ print('--- test.query')
|
447
|
452
|
mdb = MotifDb
|
448
|
453
|
|
449
|
454
|
ors <- c("MA0511.1", "MA0057.1")
|
...
|
...
|
@@ -471,10 +476,10 @@ test.query <- function()
|
471
|
476
|
#checkEquals(sort(names(x)),
|
472
|
477
|
|
473
|
478
|
# do queries on dataSource counts match those from a contingency table?
|
474
|
|
- sources.list = as.list (table (mcols(mdb)$dataSource))
|
475
|
|
- checkEquals (length (query (mdb, 'flyfactorsurvey')), sources.list$FlyFactorSurvey)
|
476
|
|
- checkEquals (length (query (mdb, 'uniprobe')), sources.list$UniPROBE)
|
477
|
|
- checkEquals (length (query (mdb, 'UniPROBE')), sources.list$UniPROBE)
|
|
479
|
+ sources.list = as.list(table(mcols(mdb)$dataSource))
|
|
480
|
+ checkEquals(length(query(mdb, 'flyfactorsurvey')), sources.list$FlyFactorSurvey)
|
|
481
|
+ checkEquals(length(query(mdb, 'uniprobe')), sources.list$UniPROBE)
|
|
482
|
+ checkEquals(length(query(mdb, 'UniPROBE')), sources.list$UniPROBE)
|
478
|
483
|
|
479
|
484
|
} # test.query
|
480
|
485
|
#------------------------------------------------------------------------------------------------------------------------
|
...
|
...
|
@@ -486,7 +491,7 @@ test.query2 <- function()
|
486
|
491
|
|
487
|
492
|
matrices.human.sox4 <- query(mdb, c("hsapiens", "sox4"))
|
488
|
493
|
matrices.human.sox4.oldStyle <- query(matrices.human, "sox4")
|
489
|
|
- checkTrue(length(matrices.human.sox4) > 0) # 6 found on (24 oct 2018)
|
|
494
|
+ checkTrue(length(matrices.human.sox4) > 0) # 6 found on(24 oct 2018)
|
490
|
495
|
checkEquals(length(matrices.human.sox4), length(matrices.human.sox4.oldStyle))
|
491
|
496
|
|
492
|
497
|
checkTrue(length(matrices.human.sox4) < length(matrices.human))
|
...
|
...
|
@@ -496,260 +501,264 @@ test.query2 <- function()
|
496
|
501
|
|
497
|
502
|
} # test.query2
|
498
|
503
|
#------------------------------------------------------------------------------------------------------------------------
|
499
|
|
-test.transformMatrixToMemeRepresentation = function ()
|
|
504
|
+test.transformMatrixToMemeRepresentation = function()
|
500
|
505
|
{
|
501
|
|
- print ('--- test.transformMatrixToMemeRepresentation')
|
502
|
|
- mdb = MotifDb # ()
|
503
|
|
- matrix.agl3 = subset (mdb, dataSource=='JASPAR_CORE' & organism=='Athaliana' & geneSymbol=='AGL3')[[1]]
|
504
|
|
- checkEquals (dim (matrix.agl3), c (4, 10))
|
|
506
|
+ print('--- test.transformMatrixToMemeRepresentation')
|
|
507
|
+ mdb = MotifDb #()
|
|
508
|
+ matrix.agl3 = subset(mdb, dataSource=='JASPAR_CORE' & organism=='Athaliana' & geneSymbol=='AGL3')[[1]]
|
|
509
|
+ checkEquals(dim(matrix.agl3), c(4, 10))
|
505
|
510
|
|
506
|
511
|
# a transposed frequency is needed for meme. we store normalized frequency matrices, to just transposition is needed
|
507
|
|
- tfm = MotifDb:::transformMatrixToMemeRepresentation (matrix.agl3)
|
508
|
|
- checkEquals (dim (tfm), c (10, 4))
|
509
|
|
- checkTrue (all (round (rowSums (tfm)) == 1.0))
|
|
512
|
+ tfm = MotifDb:::transformMatrixToMemeRepresentation(matrix.agl3)
|
|
513
|
+ checkEquals(dim(tfm), c(10, 4))
|
|
514
|
+ checkTrue(all(round(rowSums(tfm)) == 1.0))
|
510
|
515
|
|
511
|
516
|
} # test.transformMatrixToMemeRepresentation
|
512
|
517
|
#------------------------------------------------------------------------------------------------------------------------
|
513
|
518
|
# translate a motif matrix from MotifList into text suitable for meme and tomtom input.
|
514
|
519
|
# choose the top two from UniPROBE. they reliably have high counts, and easily distinguished normalized frequencies
|
515
|
|
-test.matrixToMemeText = function ()
|
|
520
|
+test.matrixToMemeText = function()
|
516
|
521
|
{
|
517
|
|
- print ('--- test.matrixToMemeText')
|
518
|
|
- sox4 = subset (MotifDb, dataSource=='UniPROBE' & organism=='Hsapiens' & geneSymbol=='Sox4')
|
519
|
|
- rtg3 = subset (MotifDb, dataSource=='UniPROBE' & organism=='Scerevisiae' & geneSymbol=='Rtg3')
|
|
522
|
+ print('--- test.matrixToMemeText')
|
|
523
|
+ sox4 = subset(MotifDb, dataSource=='UniPROBE' & organism=='Hsapiens' & geneSymbol=='Sox4')
|
|
524
|
+ rtg3 = subset(MotifDb, dataSource=='UniPROBE' & organism=='Scerevisiae' & geneSymbol=='Rtg3')
|
520
|
525
|
|
521
|
|
- text.sox4 = MotifDb:::matrixToMemeText (sox4)
|
522
|
|
- # check these with t (sox4 [[1]])
|
|
526
|
+ text.sox4 = MotifDb:::matrixToMemeText(sox4)
|
|
527
|
+ # check these with t(sox4 [[1]])
|
523
|
528
|
line1.sox4 = " 0.2457457130 0.1950426500 0.2287887620 0.3304228750"
|
524
|
529
|
line14.sox4 = " 0.2821643030 0.2286132160 0.1585395830 0.3306828990"
|
525
|
530
|
|
526
|
|
- checkEquals (length (text.sox4), 29)
|
527
|
|
- checkEquals (text.sox4 [1], "MEME version 4")
|
528
|
|
- checkEquals (text.sox4 [10], "MOTIF Hsapiens-UniPROBE-Sox4.UP00401")
|
529
|
|
- checkEquals (grep (line1.sox4, text.sox4), 12)
|
530
|
|
- checkEquals (grep (line14.sox4, text.sox4), 25)
|
|
531
|
+ checkEquals(length(text.sox4), 29)
|
|
532
|
+ checkEquals(text.sox4 [1], "MEME version 4")
|
|
533
|
+ checkEquals(text.sox4 [10], "MOTIF Hsapiens-UniPROBE-Sox4.UP00401")
|
|
534
|
+ checkEquals(grep(line1.sox4, text.sox4), 12)
|
|
535
|
+ checkEquals(grep(line14.sox4, text.sox4), 25)
|
531
|
536
|
|
532
|
|
- text.rtg3 = MotifDb:::matrixToMemeText (rtg3)
|
|
537
|
+ text.rtg3 = MotifDb:::matrixToMemeText(rtg3)
|
533
|
538
|
line1.rtg3 = " 0.3935122858 0.1453016447 0.3308830322 0.1303030373"
|
534
|
539
|
line20.rtg3 = " 0.2490417648 0.3966478493 0.1083586569 0.2459517291"
|
535
|
|
- checkEquals (length (text.rtg3), 35) # 4 trailing empty lines
|
536
|
|
- checkEquals (text.rtg3 [1], "MEME version 4")
|
537
|
|
- checkEquals (text.rtg3 [10], "MOTIF Scerevisiae-UniPROBE-Rtg3.UP00356")
|
538
|
|
- checkEquals (grep (line1.rtg3, text.rtg3), 12)
|
539
|
|
- checkEquals (grep (line20.rtg3, text.rtg3), 31)
|
|
540
|
+ checkEquals(length(text.rtg3), 35) # 4 trailing empty lines
|
|
541
|
+ checkEquals(text.rtg3 [1], "MEME version 4")
|
|
542
|
+ checkEquals(text.rtg3 [10], "MOTIF Scerevisiae-UniPROBE-Rtg3.UP00356")
|
|
543
|
+ checkEquals(grep(line1.rtg3, text.rtg3), 12)
|
|
544
|
+ checkEquals(grep(line20.rtg3, text.rtg3), 31)
|
540
|
545
|
|
541
|
546
|
# now call with both matrices, and see if the right results are returned
|
542
|
|
- text.both = MotifDb:::matrixToMemeText (c (sox4, rtg3))
|
543
|
|
- checkEquals (text.both [1], "MEME version 4")
|
544
|
|
- checkEquals (grep (line1.sox4, text.both), 12)
|
545
|
|
- checkEquals (grep (line14.sox4, text.both), 25)
|
546
|
|
- checkEquals (grep (line1.rtg3, text.both), 29)
|
547
|
|
- checkEquals (grep (line20.rtg3, text.both), 48)
|
|
547
|
+ text.both = MotifDb:::matrixToMemeText(c(sox4, rtg3))
|
|
548
|
+ checkEquals(text.both [1], "MEME version 4")
|
|
549
|
+ checkEquals(grep(line1.sox4, text.both), 12)
|
|
550
|
+ checkEquals(grep(line14.sox4, text.both), 25)
|
|
551
|
+ checkEquals(grep(line1.rtg3, text.both), 29)
|
|
552
|
+ checkEquals(grep(line20.rtg3, text.both), 48)
|
548
|
553
|
|
549
|
554
|
} # test.matrixToMemeText
|
550
|
555
|
#------------------------------------------------------------------------------------------------------------------------
|
551
|
556
|
# translate a motif matrix from MotifList into text suitable for meme and tomtom input.
|
552
|
557
|
# choose the top two from UniPROBE. they reliably have high counts, and easily distinguished normalized frequencies
|
553
|
|
-#test.matrixToMemeText_mapVersion = function ()
|
|
558
|
+#test.matrixToMemeText_mapVersion = function()
|
554
|
559
|
#{
|
555
|
|
-# print ('--- test.matrixToMemeText_mapVersion')
|
556
|
|
-# sox4 = subset (MotifDb, dataSource=='UniPROBE' & organism=='Hsapiens' & geneSymbol=='Sox4')
|
557
|
|
-# rtg3 = subset (MotifDb, dataSource=='UniPROBE' & organism=='Scerevisiae' & geneSymbol=='Rtg3')
|
|
560
|
+# print('--- test.matrixToMemeText_mapVersion')
|
|
561
|
+# sox4 = subset(MotifDb, dataSource=='UniPROBE' & organism=='Hsapiens' & geneSymbol=='Sox4')
|
|
562
|
+# rtg3 = subset(MotifDb, dataSource=='UniPROBE' & organism=='Scerevisiae' & geneSymbol=='Rtg3')
|
558
|
563
|
#
|
559
|
|
-# text.sox4 = MotifDb:::mapped.broken.matrixToMemeText (sox4)
|
560
|
|
-# text.rtg3 = MotifDb:::mapped.broken.matrixToMemeText (rtg3)
|
561
|
|
-# text.both = MotifDb:::mapped.broken.matrixToMemeText (c (sox4, rtg3))
|
|
564
|
+# text.sox4 = MotifDb:::mapped.broken.matrixToMemeText(sox4)
|
|
565
|
+# text.rtg3 = MotifDb:::mapped.broken.matrixToMemeText(rtg3)
|
|
566
|
+# text.both = MotifDb:::mapped.broken.matrixToMemeText(c(sox4, rtg3))
|
562
|
567
|
#
|
563
|
|
-# # check these with t (sox4 [[1]])
|
|
568
|
+# # check these with t(sox4 [[1]])
|
564
|
569
|
# line1.sox4 = " 0.2457457130 0.1950426500 0.2287887620 0.3304228750"
|
565
|
570
|
# line14.sox4 = " 0.2821643030 0.2286132160 0.1585395830 0.3306828990"
|
566
|
571
|
#
|
567
|
|
-# checkEquals (length (text.sox4), 29)
|
568
|
|
-# checkEquals (text.sox4 [1], "MEME version 4")
|
569
|
|
-# checkEquals (text.sox4 [10], "MOTIF Hsapiens-UniPROBE-Sox4.UP00401")
|
570
|
|
-# checkEquals (grep (line1.sox4, text.sox4), 12)
|
571
|
|
-# checkEquals (grep (line14.sox4, text.sox4), 25)
|
|
572
|
+# checkEquals(length(text.sox4), 29)
|
|
573
|
+# checkEquals(text.sox4 [1], "MEME version 4")
|
|
574
|
+# checkEquals(text.sox4 [10], "MOTIF Hsapiens-UniPROBE-Sox4.UP00401")
|
|
575
|
+# checkEquals(grep(line1.sox4, text.sox4), 12)
|
|
576
|
+# checkEquals(grep(line14.sox4, text.sox4), 25)
|
572
|
577
|
#
|
573
|
578
|
# line1.rtg3 = " 0.3935122858 0.1453016447 0.3308830322 0.1303030373"
|
574
|
579
|
# line20.rtg3 = " 0.2490417648 0.3966478493 0.1083586569 0.2459517291"
|
575
|
|
-# checkEquals (length (text.rtg3), 35) # 4 trailing empty lines
|
576
|
|
-# checkEquals (text.rtg3 [1], "MEME version 4")
|
577
|
|
-# checkEquals (text.rtg3 [10], "MOTIF Scerevisiae-UniPROBE-Rtg3.UP00356")
|
578
|
|
-# checkEquals (grep (line1.rtg3, text.rtg3), 12)
|
579
|
|
-# checkEquals (grep (line20.rtg3, text.rtg3), 31)
|
|
580
|
+# checkEquals(length(text.rtg3), 35) # 4 trailing empty lines
|
|
581
|
+# checkEquals(text.rtg3 [1], "MEME version 4")
|
|
582
|
+# checkEquals(text.rtg3 [10], "MOTIF Scerevisiae-UniPROBE-Rtg3.UP00356")
|
|
583
|
+# checkEquals(grep(line1.rtg3, text.rtg3), 12)
|
|
584
|
+# checkEquals(grep(line20.rtg3, text.rtg3), 31)
|
580
|
585
|
#
|
581
|
586
|
# # now call with both matrices, and see if the right results are returned
|
582
|
|
-# checkEquals (text.both [1], "MEME version 4")
|
583
|
|
-# checkEquals (grep (line1.sox4, text.both), 12)
|
584
|
|
-# checkEquals (grep (line14.sox4, text.both), 25)
|
585
|
|
-# checkEquals (grep (line1.rtg3, text.both), 29)
|
586
|
|
-# checkEquals (grep (line20.rtg3, text.both), 48)
|
|
587
|
+# checkEquals(text.both [1], "MEME version 4")
|
|
588
|
+# checkEquals(grep(line1.sox4, text.both), 12)
|
|
589
|
+# checkEquals(grep(line14.sox4, text.both), 25)
|
|
590
|
+# checkEquals(grep(line1.rtg3, text.both), 29)
|
|
591
|
+# checkEquals(grep(line20.rtg3, text.both), 48)
|
587
|
592
|
#
|
588
|
593
|
#} # test.matrixToMemeText_mapVersion
|
589
|
594
|
#------------------------------------------------------------------------------------------------------------------------
|
590
|
|
-test.export_memeFormatStdOut = function ()
|
|
595
|
+test.export_memeFormatStdOut = function()
|
591
|
596
|
{
|
592
|
|
- print ('--- test.export_memeFormatStdOut')
|
593
|
|
- mdb = MotifDb # ()
|
594
|
|
- mdb.chicken = subset (mdb, organism=='Gallus')
|
595
|
|
- checkEquals (length (mdb.chicken), 3)
|
|
597
|
+ print('--- test.export_memeFormatStdOut')
|
|
598
|
+ mdb = MotifDb #()
|
|
599
|
+ mdb.chicken = subset(mdb, organism=='Gallus')
|
|
600
|
+ checkEquals(length(mdb.chicken), 3)
|
596
|
601
|
# text is cat-ed to stdout, so not avaialable here to check.
|
597
|
602
|
# but just like print, export also returns the text invisibly.
|
598
|
603
|
# so that CAN be checked.
|
599
|
604
|
|
600
|
|
- meme.text = export (mdb.chicken, format='meme')
|
601
|
|
- checkEquals (length (meme.text), 1) # just one long string
|
602
|
|
- checkTrue (is.character (meme.text))
|
603
|
|
- checkTrue (nchar (meme.text) > 800) # 1002 as of (10 aug 2012)
|
604
|
|
- return (TRUE)
|
|
605
|
+ meme.text = export(mdb.chicken, format='meme')
|
|
606
|
+ checkEquals(length(meme.text), 1) # just one long string
|
|
607
|
+ checkTrue(is.character(meme.text))
|
|
608
|
+ checkTrue(nchar(meme.text) > 800) # 1002 as of(10 aug 2012)
|
|
609
|
+ return(TRUE)
|
605
|
610
|
|
606
|
611
|
} # test.exportMemeFormatToStdOut
|
607
|
612
|
#------------------------------------------------------------------------------------------------------------------------
|
608
|
|
-test.export_memeFormatToFile = function ()
|
|
613
|
+test.export_memeFormatToFile = function()
|
609
|
614
|
{
|
610
|
|
- print ('--- test.export_memeFormatToFile')
|
611
|
|
- mdb = MotifDb # ()
|
612
|
|
- mdb.chicken = subset (mdb, organism=='Gallus')
|
613
|
|
- checkEquals (length (mdb.chicken), 3)
|
614
|
|
- output.file = tempfile ()
|
615
|
|
- meme.text = export (mdb.chicken, output.file, 'meme')
|
616
|
|
- retrieved = scan (output.file, what=character (0), sep='\n', quiet=TRUE)
|
617
|
|
- invisible (retrieved)
|
|
615
|
+ print('--- test.export_memeFormatToFile')
|
|
616
|
+ mdb = MotifDb #()
|
|
617
|
+ mdb.chicken = subset(mdb, organism=='Gallus')
|
|
618
|
+ checkEquals(length(mdb.chicken), 3)
|
|
619
|
+ output.file = tempfile()
|
|
620
|
+ meme.text = export(mdb.chicken, output.file, 'meme')
|
|
621
|
+ retrieved = scan(output.file, what=character(0), sep='\n', quiet=TRUE)
|
|
622
|
+ invisible(retrieved)
|
618
|
623
|
|
619
|
624
|
} # test.exportMemeFormatToFile
|
620
|
625
|
#------------------------------------------------------------------------------------------------------------------------
|
621
|
|
-test.export_memeFormatToFileDuplication = function ()
|
|
626
|
+test.export_memeFormatToFileDuplication = function()
|
622
|
627
|
{
|
623
|
|
- print ('--- test.export_memeFormatToFileDuplication')
|
624
|
|
- mdb = MotifDb # ()
|
625
|
|
- mdb.mouse = subset (mdb, organism=='Mmusculus')
|
|
628
|
+ print('--- test.export_memeFormatToFileDuplication')
|
|
629
|
+ mdb = MotifDb #()
|
|
630
|
+ mdb.mouse = subset(mdb, organism=='Mmusculus')
|
626
|
631
|
checkTrue(length(mdb.mouse) > 1300)
|
627
|
|
- output.file = 'mouse.txt' # tempfile ()
|
|
632
|
+ output.file = 'mouse.txt' # tempfile()
|
628
|
633
|
max = 3
|
629
|
|
- meme.text = export (mdb.mouse [1:max], output.file, 'meme')
|
630
|
|
- retrieved = scan (output.file, what=character (0), sep='\n', quiet=TRUE)
|
631
|
|
- invisible (retrieved)
|
|
634
|
+ meme.text = export(mdb.mouse [1:max], output.file, 'meme')
|
|
635
|
+ retrieved = scan(output.file, what=character(0), sep='\n', quiet=TRUE)
|
|
636
|
+ invisible(retrieved)
|
632
|
637
|
|
633
|
638
|
} # test.exportMemeFormatToFileDuplication
|
634
|
639
|
#------------------------------------------------------------------------------------------------------------------------
|
635
|
|
-test.export_memeFormatToFile_run_tomtom = function (max=50)
|
|
640
|
+test.export_memeFormatToFile_run_tomtom = function(max=50)
|
636
|
641
|
{
|
637
|
|
- if (interactive ()) {
|
638
|
|
- print ('--- test.export_memeFormatToFile_run_tomtom')
|
|
642
|
+ if(interactive()) {
|
|
643
|
+ print('--- test.export_memeFormatToFile_run_tomtom')
|
639
|
644
|
mdb = MotifDb
|
640
|
|
- sox4.mouse = subset (mdb, organism=='Mmusculus' & geneSymbol=='Sox4')
|
641
|
|
- all.human = subset (mdb, organism=='Hsapiens')
|
|
645
|
+ sox4.mouse = subset(mdb, organism=='Mmusculus' & geneSymbol=='Sox4')
|
|
646
|
+ all.human = subset(mdb, organism=='Hsapiens')
|
642
|
647
|
|
643
|
|
- tomtom.tmp.dir = tempfile ()
|
644
|
|
- print (tomtom.tmp.dir)
|
645
|
|
- stopifnot (dir.create (tomtom.tmp.dir))
|
646
|
|
- sox4.file.path = file.path (tomtom.tmp.dir, 'sox4.mouse.text')
|
647
|
|
- all.human.file.path = file.path (tomtom.tmp.dir,'all.human.text')
|
648
|
|
- export (sox4.mouse, sox4.file.path, 'meme')
|
649
|
|
- export (all.human, all.human.file.path, 'meme')
|
|
648
|
+ tomtom.tmp.dir = tempfile()
|
|
649
|
+ print(tomtom.tmp.dir)
|
|
650
|
+ stopifnot(dir.create(tomtom.tmp.dir))
|
|
651
|
+ sox4.file.path = file.path(tomtom.tmp.dir, 'sox4.mouse.text')
|
|
652
|
+ all.human.file.path = file.path(tomtom.tmp.dir,'all.human.text')
|
|
653
|
+ export(sox4.mouse, sox4.file.path, 'meme')
|
|
654
|
+ export(all.human, all.human.file.path, 'meme')
|
650
|
655
|
|
651
|
656
|
# find similarity of motif #1 to all the motifs in mdbMany
|
652
|
657
|
|
653
|
658
|
# cannot rely upon tomtom being present
|
654
|
659
|
|
655
|
|
- #cmd = sprintf ('tomtom -no-ssc -oc %s -verbosity 3 -min-overlap 5 -mi 1 -dist pearson -evalue -thresh 10 %s %s',
|
|
660
|
+ #cmd = sprintf('tomtom -no-ssc -oc %s -verbosity 3 -min-overlap 5 -mi 1 -dist pearson -evalue -thresh 10 %s %s',
|
656
|
661
|
# tomtom.tmp.dir, sox4.file.path, all.human.file.path)
|
657
|
|
- #system (cmd)
|
658
|
|
- #cmd = sprintf ('open %s/tomtom.html', tomtom.tmp.dir)
|
659
|
|
- #system (cmd)
|
|
662
|
+ #system(cmd)
|
|
663
|
+ #cmd = sprintf('open %s/tomtom.html', tomtom.tmp.dir)
|
|
664
|
+ #system(cmd)
|
660
|
665
|
} # if interactive
|
661
|
666
|
|
662
|
667
|
} # test.export_memeFormatToFile_run_tomtom
|
663
|
668
|
#------------------------------------------------------------------------------------------------------------------------
|
664
|
669
|
# MotIV::motifMatch fails with MotIV_1.25.0. will look into this in September, 2015, pshannon
|
665
|
|
-test.run_MotIV.motifMatch = function ()
|
|
670
|
+# MotIV abandoned, all of my own motif/sequence matching is done via an independently installed
|
|
671
|
+# FIMO from the meme suite.
|
|
672
|
+# another option is the MOODs motif matching capability provided by the bioc package "motifmatchr"
|
|
673
|
+# this possibility, and these tests, are deferred for now.
|
|
674
|
+disabled_test.run_MotIV.motifMatch = function()
|
666
|
675
|
{
|
667
|
|
- library (MotIV)
|
668
|
|
- print ('--- test.run_MotIV.motifMatch')
|
669
|
|
- mdb <- MotifDb # ()
|
|
676
|
+ require(MotIV)
|
|
677
|
+ print('--- test.run_MotIV.motifMatch')
|
|
678
|
+ mdb <- MotifDb #()
|
670
|
679
|
|
671
|
680
|
db.tmp <- mdb@listData
|
672
|
681
|
|
673
|
682
|
# match motif 1 against the entire MotifDb collection
|
674
|
|
- motif.hits <- motifMatch (db.tmp [1], database<-db.tmp)
|
|
683
|
+ motif.hits <- motifMatch(db.tmp [1], database<-db.tmp)
|
675
|
684
|
# the long way to extract the matrix name. see MotIV.toTable below for more convenient way
|
676
|
|
- checkEquals (motif.hits@bestMatch[[1]]@aligns[[1]]@TF@name, names (db.tmp)[1])
|
|
685
|
+ checkEquals(motif.hits@bestMatch[[1]]@aligns[[1]]@TF@name, names(db.tmp)[1])
|
677
|
686
|
|
678
|
687
|
# match the last motif against all
|
679
|
688
|
last <- length(db.tmp)
|
680
|
689
|
# MotIV:motifMatch works differently on linux and macos. by asking for 50 matches,
|
681
|
|
- # the search target (db.tmp[last]) is sure to be in the hit list.
|
682
|
|
- motif.hits <- motifMatch (db.tmp [last], database=db.tmp, top=50)
|
683
|
|
- tbl.hits <- MotIV.toTable (motif.hits)
|
|
690
|
+ # the search target(db.tmp[last]) is sure to be in the hit list.
|
|
691
|
+ motif.hits <- motifMatch(db.tmp [last], database=db.tmp, top=50)
|
|
692
|
+ tbl.hits <- MotIV.toTable(motif.hits)
|
684
|
693
|
# the 5 hits return should include the one we tried to match, but the MotIV search strategy
|
685
|
694
|
# may not place it first
|
686
|
695
|
checkTrue(names(db.tmp[last]) %in% tbl.hits$name)
|
687
|
|
- invisible (tbl.hits)
|
|
696
|
+ invisible(tbl.hits)
|
688
|
697
|
|
689
|
|
-} # test.run_MotIV.motifMatch
|
|
698
|
+} # distable_test.run_MotIV.motifMatch
|
690
|
699
|
#------------------------------------------------------------------------------------------------------------------------
|
691
|
|
-MotIV.toTable = function (match)
|
|
700
|
+MotIV.toTable = function(match)
|
692
|
701
|
{
|
693
|
|
- stopifnot (length (match@bestMatch) >= 1)
|
|
702
|
+ stopifnot(length(match@bestMatch) >= 1)
|
694
|
703
|
alignments = match@bestMatch[[1]]@aligns
|
695
|
704
|
|
696
|
|
- df = data.frame (stringsAsFactors=FALSE)
|
697
|
|
- for (alignment in alignments) {
|
|
705
|
+ df = data.frame(stringsAsFactors=FALSE)
|
|
706
|
+ for(alignment in alignments) {
|
698
|
707
|
x = alignment
|
699
|
708
|
name = x@TF@name
|
700
|
709
|
eVal = x@evalue
|
701
|
710
|
sequence = x@sequence
|
702
|
711
|
match = x@match
|
703
|
712
|
strand = x@strand
|
704
|
|
- df = rbind (df, data.frame (name=name, eVal=eVal, sequence=sequence, match=match, strand=strand, stringsAsFactors=FALSE))
|
|
713
|
+ df = rbind(df, data.frame(name=name, eVal=eVal, sequence=sequence, match=match, strand=strand, stringsAsFactors=FALSE))
|
705
|
714
|
} # for alignment
|
706
|
715
|
|
707
|
|
- return (df)
|
|
716
|
+ return(df)
|
708
|
717
|
|
709
|
718
|
} # MotIV.toTable
|
710
|
719
|
#------------------------------------------------------------------------------------------------------------------------
|
711
|
|
-test.MotIV.toTable = function ()
|
|
720
|
+disabled_test.MotIV.toTable = function()
|
712
|
721
|
{
|
713
|
|
- print ('--- test.MotIVtoTable')
|
714
|
|
- mdb = MotifDb # ()
|
715
|
|
- test.hits = motifMatch (mdb[1]@listData, database=jaspar)
|
716
|
|
- tbl.hits = MotIV.toTable (test.hits)
|
717
|
|
- checkEquals (dim (tbl.hits), c (5, 5))
|
718
|
|
- checkEquals (sort(colnames (tbl.hits)), sort(c("name", "eVal", "sequence", "match", "strand")))
|
|
722
|
+ print('--- test.MotIVtoTable')
|
|
723
|
+ mdb = MotifDb #()
|
|
724
|
+ test.hits = motifMatch(mdb[1]@listData, database=jaspar)
|
|
725
|
+ tbl.hits = MotIV.toTable(test.hits)
|
|
726
|
+ checkEquals(dim(tbl.hits), c(5, 5))
|
|
727
|
+ checkEquals(sort(colnames(tbl.hits)), sort(c("name", "eVal", "sequence", "match", "strand")))
|
719
|
728
|
|
720
|
729
|
} # test.MotIV.toTable
|
721
|
730
|
#------------------------------------------------------------------------------------------------------------------------
|
722
|
|
-pwmMatch.toTable = function (motifMatch) {
|
723
|
|
- if (length (motifMatch@bestMatch) == 0)
|
724
|
|
- return (NA)
|
|
731
|
+pwmMatch.toTable = function(motifMatch) {
|
|
732
|
+ if(length(motifMatch@bestMatch) == 0)
|
|
733
|
+ return(NA)
|
725
|
734
|
|
726
|
735
|
df.list = vector("list", length(motifMatch@bestMatch))
|
727
|
|
- for (k in seq(length(motifMatch@bestMatch)))
|
|
736
|
+ for(k in seq(length(motifMatch@bestMatch)))
|
728
|
737
|
{
|
729
|
738
|
alignments = motifMatch@bestMatch[[k]]@aligns
|
730
|
|
- df = data.frame (stringsAsFactors=FALSE)
|
731
|
|
- for (alignment in alignments) {
|
|
739
|
+ df = data.frame(stringsAsFactors=FALSE)
|
|
740
|
+ for(alignment in alignments) {
|
732
|
741
|
x = alignment
|
733
|
742
|
name = x@TF@name
|
734
|
743
|
eVal = x@evalue
|
735
|
744
|
sequence = x@sequence
|
736
|
745
|
match = x@match
|
737
|
746
|
strand = x@strand
|
738
|
|
- df = rbind (df, data.frame (name=name, eVal=eVal, sequence=sequence, match=match, strand=strand, stringsAsFactors=FALSE))
|
|
747
|
+ df = rbind(df, data.frame(name=name, eVal=eVal, sequence=sequence, match=match, strand=strand, stringsAsFactors=FALSE))
|
739
|
748
|
} # for alignment
|
740
|
749
|
df.list[[k]]=df
|
741
|
750
|
}
|
742
|
751
|
names(df.list) <- names(motifMatch)
|
743
|
|
- return (df.list)
|
|
752
|
+ return(df.list)
|
744
|
753
|
|
745
|
754
|
} # pwmMatch.toTable
|
746
|
755
|
#------------------------------------------------------------------------------
|
747
|
756
|
# Robert Stojnic reports incorrect gene symbols for matrices obtained from
|
748
|
757
|
# flyFactorSurvey.
|
749
|
758
|
# the solution was to abandon the original strategy of extracting the
|
750
|
|
-# symbol from the matrix (and file) name.
|
751
|
|
-# now, the flybase importer ("inst/scripts/import/flyFactorSurvey/import.R")
|
752
|
|
-# uses FBgn id (which can be reliably extracted) and uses indpendent
|
|
759
|
+# symbol from the matrix(and file) name.
|
|
760
|
+# now, the flybase importer("inst/scripts/import/flyFactorSurvey/import.R")
|
|
761
|
+# uses FBgn id(which can be reliably extracted) and uses indpendent
|
753
|
762
|
# data sources to learn the gene symbol.
|
754
|
763
|
#
|
755
|
764
|
# robert's email:
|
...
|
...
|
@@ -776,7 +785,7 @@ pwmMatch.toTable = function (motifMatch) {
|
776
|
785
|
#
|
777
|
786
|
test.flyFactorGeneSymbols <- function()
|
778
|
787
|
{
|
779
|
|
- print ("--- test.flyFactorGeneSymbols")
|
|
788
|
+ print("--- test.flyFactorGeneSymbols")
|
780
|
789
|
mdb = MotifDb
|
781
|
790
|
checkEquals(sort(mcols(query(mdb, "FBgn0259750"))$geneSymbol),
|
782
|
791
|
sort(c("FBgn0259750", "FBgn0259750")))
|
...
|
...
|
@@ -785,34 +794,34 @@ test.flyFactorGeneSymbols <- function()
|
785
|
794
|
|
786
|
795
|
} # test.flyFactorGeneSymbols
|
787
|
796
|
#-------------------------------------------------------------------------------
|
788
|
|
-test.export_jasparFormatStdOut = function ()
|
|
797
|
+test.export_jasparFormatStdOut = function()
|
789
|
798
|
{
|
790
|
|
- print ('--- test.export_jasparFormatStdOut')
|
791
|
|
- mdb = MotifDb # ()
|
792
|
|
- mdb.chicken = subset (mdb, organism=='Gallus')
|
793
|
|
- checkEquals (length (mdb.chicken), 3)
|
|
799
|
+ print('--- test.export_jasparFormatStdOut')
|
|
800
|
+ mdb = MotifDb #()
|
|
801
|
+ mdb.chicken = subset(mdb, organism=='Gallus')
|
|
802
|
+ checkEquals(length(mdb.chicken), 3)
|
794
|
803
|
# text is cat-ed to stdout, so not avaialable here to check.
|
795
|
804
|
# but just like print, export also returns the text invisibly.
|
796
|
805
|
# so that CAN be checked.
|
797
|
806
|
|
798
|
|
- jaspar.text = export (mdb.chicken, format='jaspar')
|
799
|
|
- checkEquals (length (jaspar.text), 1) # just one long string
|
800
|
|
- checkTrue (is.character (jaspar.text))
|
801
|
|
- checkTrue (nchar (jaspar.text) > 800) # 1002 as of (10 aug 2012)
|
802
|
|
- return (TRUE)
|
|
807
|
+ jaspar.text = export(mdb.chicken, format='jaspar')
|
|
808
|
+ checkEquals(length(jaspar.text), 1) # just one long string
|
|
809
|
+ checkTrue(is.character(jaspar.text))
|
|
810
|
+ checkTrue(nchar(jaspar.text) > 800) # 1002 as of(10 aug 2012)
|
|
811
|
+ return(TRUE)
|
803
|
812
|
|
804
|
813
|
} # test.exportjasparFormatToStdOut
|
805
|
814
|
#------------------------------------------------------------------------------------------------------------------------
|
806
|
|
-test.export_jasparFormatToFile = function ()
|
|
815
|
+test.export_jasparFormatToFile = function()
|
807
|
816
|
{
|
808
|
|
- print ('--- test.export_jasparFormatToFile')
|
809
|
|
- mdb = MotifDb # ()
|
810
|
|
- mdb.chicken = subset (mdb, organism=='Gallus')
|
811
|
|
- checkEquals (length (mdb.chicken), 3)
|
812
|
|
- output.file = tempfile ()
|
813
|
|
- jaspar.text = export (mdb.chicken, output.file, 'jaspar')
|
814
|
|
- retrieved = scan (output.file, what=character (0), sep='\n', quiet=TRUE)
|
815
|
|
- invisible (retrieved)
|
|
817
|
+ print('--- test.export_jasparFormatToFile')
|
|
818
|
+ mdb = MotifDb #()
|
|
819
|
+ mdb.chicken = subset(mdb, organism=='Gallus')
|
|
820
|
+ checkEquals(length(mdb.chicken), 3)
|
|
821
|
+ output.file = tempfile()
|
|
822
|
+ jaspar.text = export(mdb.chicken, output.file, 'jaspar')
|
|
823
|
+ retrieved = scan(output.file, what=character(0), sep='\n', quiet=TRUE)
|
|
824
|
+ invisible(retrieved)
|
816
|
825
|
|
817
|
826
|
} # test.exportjasparFormatToFile
|
818
|
827
|
#------------------------------------------------------------------------------------------------------------------------
|
...
|
...
|
@@ -833,9 +842,9 @@ test.geneToMotif <- function()
|
833
|
842
|
|
834
|
843
|
# MotifDb mode uses the MotifDb metadata, pulled from many sources
|
835
|
844
|
tbl.mdb <- geneToMotif(mdb, genes, source="mOtifdb") # intentional mis-capitalization
|
836
|
|
- checkEquals(dim(tbl.mdb), c(13, 6))
|
|
845
|
+ checkEquals(dim(tbl.mdb), c(14, 6))
|
837
|
846
|
checkEquals(subset(tbl.mdb, dataSource=="jaspar2016" & geneSymbol== "FOS")$motif, "MA0476.1")
|
838
|
|
- # no recognizable (i.e., jaspar standard) motif name returned by MotifDb metadata
|
|
847
|
+ # no recognizable(i.e., jaspar standard) motif name returned by MotifDb metadata
|
839
|
848
|
# MotifDb for ATF5
|
840
|
849
|
# todo: compare the MA0110596_1.02 matrix of cisp_1.02 to japar MA0833.1
|
841
|
850
|
|
...
|
...
|
@@ -852,8 +861,8 @@ test.geneToMotif <- function()
|
852
|
861
|
|
853
|
862
|
} # test.geneToMotif
|
854
|
863
|
#------------------------------------------------------------------------------------------------------------------------
|
855
|
|
-# this case discovered (31 jan 2018). when called on a gene/source combination for which there are
|
856
|
|
-# no motifs, i attempted to add the mapping source (either "MotifDb", "TFClass") as a column
|
|
864
|
+# this case discovered(31 jan 2018). when called on a gene/source combination for which there are
|
|
865
|
+# no motifs, i attempted to add the mapping source(either "MotifDb", "TFClass") as a column
|
857
|
866
|
# to an empty data.frame. check for that and its fix here
|
858
|
867
|
test.geneToMotif.oneGene.noMotifs <- function()
|
859
|
868
|
{
|
...
|
...
|
@@ -863,7 +872,7 @@ test.geneToMotif.oneGene.noMotifs <- function()
|
863
|
872
|
|
864
|
873
|
} # test.geneToMotif.oneGene.noMotifs
|
865
|
874
|
#------------------------------------------------------------------------------------------------------------------------
|
866
|
|
-# sad to say I do not recall what problem/fix is tested here (pshannon, 23 jan 2018).
|
|
875
|
+# sad to say I do not recall what problem/fix is tested here(pshannon, 23 jan 2018).
|
867
|
876
|
# however, it demonstrates the variety of results which can be returned by non-jaspar datasets
|
868
|
877
|
# when using the MotifDb mapping source, and the relative paucity which is sometimes
|
869
|
878
|
# seen with the TFclass mapper
|
...
|
...
|
@@ -882,9 +891,9 @@ test.geneToMotif.ignore.jasparSuffixes <- function()
|
882
|
891
|
|
883
|
892
|
# MotifDb mode uses the MotifDb metadata, pulled from many sources
|
884
|
893
|
tbl.mdb <- geneToMotif(mdb, genes, source="mOtifdb") # intentional mis-capitalization
|
885
|
|
- checkEquals(dim(tbl.mdb), c(13, 6))
|
|
894
|
+ checkEquals(dim(tbl.mdb), c(14, 6))
|
886
|
895
|
checkEquals(subset(tbl.mdb, dataSource=="jaspar2016" & geneSymbol== "FOS")$motif, "MA0476.1")
|
887
|
|
- # no recognizable (i.e., jaspar standard) motif name returned by MotifDb metadata
|
|
896
|
+ # no recognizable(i.e., jaspar standard) motif name returned by MotifDb metadata
|
888
|
897
|
# MotifDb for ATF5
|
889
|
898
|
|
890
|
899
|
# compare the MA0110599_1.02 matrix of cisp_1.02 to japar MA0476.1: the identical matrix!
|
...
|
...
|
@@ -972,7 +981,7 @@ test.motifToGene <- function()
|
972
|
981
|
checkEquals(sort(unique(tbl$geneSymbol)),
|
973
|
982
|
c("AR", "RUNX1", "TFAP2A", "TFAP2A(var.3)", "TFAP2B", "TFAP2C", "TFAP2D", "TFAP2E"))
|
974
|
983
|
|
975
|
|
- # (23 may 2018) found that MotifDb works, but c("MotifDb", "TFClass") does not
|
|
984
|
+ #(23 may 2018) found that MotifDb works, but c("MotifDb", "TFClass") does not
|
976
|
985
|
# test the fix here
|
977
|
986
|
|
978
|
987
|
motifs <- c("Hsapiens-jolma2013-IRF5-2", "Hsapiens-SwissRegulon-IRF5.SwissRegulon")
|
...
|
...
|
@@ -1066,6 +1075,70 @@ test.associateTranscriptionFactors <- function()
|
1066
|
1075
|
# now some motif names
|
1067
|
1076
|
} # test.associateTranscriptionFactors
|
1068
|
1077
|
#------------------------------------------------------------------------------------------------------------------------
|
|
1078
|
+test.match <- function()
|
|
1079
|
+{
|
|
1080
|
+ printf("--- test.match")
|
|
1081
|
+ gr.region <- GRanges(seqnames="chr1", IRanges(start=47229520, end=47229560))
|
|
1082
|
+ motifs <- query(MotifDb, c("jaspar2018", "ZNF263"))
|
|
1083
|
+ checkEquals(length(motifs), 1)
|
|
1084
|
+ gr.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5)
|
|
1085
|
+ checkEquals(length(gr.match), 1) # just one motif
|
|
1086
|
+ checkEquals(names(gr.match), names(motifs))
|
|
1087
|
+ checkEquals(length(gr.match[[1]]), 3)
|
|
1088
|
+
|
|
1089
|
+ tbl.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5, fimoDataFrameStyle=TRUE)
|
|
1090
|
+ checkEquals(dim(tbl.match), c(3, 7))
|
|
1091
|
+ checkTrue(all(tbl.match$motif == names(motifs)))
|
|
1092
|
+ checkEquals(class(tbl.match$chrom), "character") # not a factor
|
|
1093
|
+
|
|
1094
|
+ motifs <- query(MotifDb, "ZNF263", c("jaspar2018", "swissregulon"))
|
|
1095
|
+ checkEquals(length(motifs), 2)
|
|
1096
|
+ gr.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5)
|
|
1097
|
+ checkEquals(names(gr.match), names(motifs))
|
|
1098
|
+ checkEquals(as.numeric(lapply(gr.match, length)), c(3, 1))
|
|
1099
|
+
|
|
1100
|
+ tbl.match <-matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5, fimoDataFrameStyle=TRUE)
|
|
1101
|
+ checkEquals(dim(tbl.match), c(4, 7))
|
|
1102
|
+ checkEquals(length(unique(tbl.match$motif)), 2)
|
|
1103
|
+ checkEquals(unique(tbl.match$motif), names(motifs))
|
|
1104
|
+ checkEquals(colnames(tbl.match), c("chrom", "start", "end", "width", "strand", "mood.score", "motif_id"))
|
|
1105
|
+
|
|
1106
|
+
|
|
1107
|
+ #------------------------------------------------
|
|
1108
|
+ # now all jaspar2018 human motifs
|
|
1109
|
+ #------------------------------------------------
|
|
1110
|
+
|
|
1111
|
+ motifs <- query(MotifDb, c("jaspar2018", "hsapiens"))
|
|
1112
|
+ tbl.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-5, fimoDataFrameStyle=TRUE)
|
|
1113
|
+ checkEquals(dim(tbl.match), c(7, 7))
|
|
1114
|
+ checkEquals(sort(unique(tbl.match$motif)),
|
|
1115
|
+ c("Hsapiens-jaspar2018-EWSR1-FLI1-MA0149.1", "Hsapiens-jaspar2018-ZNF263-MA0528.1"))
|
|
1116
|
+
|
|
1117
|
+ #-----------------------------------------------------
|
|
1118
|
+ # now all jaspar2018 human motifs, loosen the pValue
|
|
1119
|
+ #-----------------------------------------------------
|
|
1120
|
+
|
|
1121
|
+ motifs <- query(MotifDb, c("jaspar2018", "hsapiens"))
|
|
1122
|
+ tbl.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-4, fimoDataFrameStyle=TRUE)
|
|
1123
|
+ checkTrue(nrow(tbl.match) > 15)
|
|
1124
|
+
|
|
1125
|
+ tbl.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-3, fimoDataFrameStyle=TRUE)
|
|
1126
|
+ checkTrue(nrow(tbl.match) > 50)
|
|
1127
|
+
|
|
1128
|
+ #-------------------------------------------------------------
|
|
1129
|
+ # now all jaspar2018 and hocomoco human motifs across 10kb
|
|
1130
|
+ #------------------------------------------------------------
|
|
1131
|
+
|
|
1132
|
+ motifs <- query(MotifDb, "hsapiens", orStrings=c("jaspar2018", "hocomoco"))
|
|
1133
|
+ checkTrue(length(motifs) > 1000)
|
|
1134
|
+ gr.region <- GRanges(seqnames="chr1", IRanges(start=47229000, end=47239000))
|
|
1135
|
+
|
|
1136
|
+ tbl.match <- matchMotif(MotifDb, motifs, "hg38", gr.region, 1e-7, fimoDataFrameStyle=TRUE)
|
|
1137
|
+ checkTrue(nrow(tbl.match) > 200 && nrow(tbl.match) < 275)
|
|
1138
|
+ checkEquals(order(tbl.match$start), seq_len(nrow(tbl.match)))
|
|
1139
|
+
|
|
1140
|
+} # test.match
|
|
1141
|
+#------------------------------------------------------------------------------------------------------------------------
|
1069
|
1142
|
findMotifsWithMutuallyExclusiveMappings <- function()
|
1070
|
1143
|
{
|
1071
|
1144
|
xtab <- as.data.frame(table(MotifDb@manuallyCuratedGeneMotifAssociationTable$motif))
|
...
|
...
|
@@ -1086,8 +1159,21 @@ findMotifsWithMutuallyExclusiveMappings <- function()
|
1086
|
1159
|
# [1] mdb.genes: AP1, JUN::FOS, FOS::JUN, FOS::JUN
|
1087
|
1160
|
# [1] tfc.genes: FOS, JUN
|
1088
|
1161
|
|
1089
|
|
-
|
1090
|
1162
|
} # findMotifsWithMutuallyExclusiveMappings
|
1091
|
1163
|
#------------------------------------------------------------------------------------------------------------------------
|
|
1164
|
+test.hocomoco11.with.reliabilityScores <- function()
|
|
1165
|
+{
|
|
1166
|
+ printf("--- test.hocomoco11.with.reliabilityScores")
|
|
1167
|
+
|
|
1168
|
+ checkEquals(length(query(MotifDb, "hocomoco")), 1466)
|
|
1169
|
+ checkEquals(length(query(MotifDb, "hocomocov10")), 1066)
|
|
1170
|
+ checkEquals(length(query(MotifDb, "hocomocov11")), 400)
|
|
1171
|
+
|
|
1172
|
+ checkEquals(length(query(MotifDb, "hocomocov11A")), 181)
|
|
1173
|
+ checkEquals(length(query(MotifDb, "hocomocov11B")), 84)
|
|
1174
|
+ checkEquals(length(query(MotifDb, "hocomocov11C")), 135)
|
|
1175
|
+
|
|
1176
|
+} # test.hocomoco11.with.reliabilityScores
|
|
1177
|
+#------------------------------------------------------------------------------------------------------------------------
|
1092
|
1178
|
if(!interactive())
|
1093
|
1179
|
runTests()
|