git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/MotifDb@86069 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -102,7 +102,7 @@ test.createMetadataTable = function (x.matrices, x.novels) |
102 | 102 |
"bindingDomain", "tfFamily", "experimentType", "pubmedID")) |
103 | 103 |
checkEquals (tbl.md$providerName [1:2], c ('UW.Motif.0001', 'UW.Motif.0002')) |
104 | 104 |
checkEquals (tbl.md$providerId [1:2], c ('UW.Motif.0001', 'UW.Motif.0002')) |
105 |
- checkEquals (tbl.md$pubmedID [1:2], c ('22959076', '22959076')) |
|
105 |
+ checkEquals (tbl.md$pubmedID [1:2], c ("22955618", "22955618")) |
|
106 | 106 |
checkEquals (tbl.md$dataSource [1:2], c ('stamlab', 'stamlab')) |
107 | 107 |
checkEquals (tbl.md$organism [1:2], c ('Hsapiens', 'Hsapiens')) |
108 | 108 |
checkEquals (tbl.md$experimentType [1:2], c ('digital genomic footprinting', 'digital genomic footprinting')) |
git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/MotifDb@86014 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -93,13 +93,13 @@ test.createAnnotationTable = function () |
93 | 93 |
#------------------------------------------------------------------------------------------------------------------------ |
94 | 94 |
test.createMetadataTable = function (x.matrices, x.novels) |
95 | 95 |
{ |
96 |
- print ('--- test.createMetadataTable') |
|
97 |
- # try it first with just two matrices |
|
98 |
- tbl.md = createMetadataTable (x.matrices [1:12], x.novels [1:12]) |
|
99 |
- checkEquals (dim (tbl.md), c (12, 15)) |
|
100 |
- checkEquals (colnames (tbl.md), c ("providerName", "providerId", "dataSource", "geneSymbol", "geneId", "geneIdType", |
|
101 |
- "proteinId", "proteinIdType", "organism", "sequenceCount", "bindingSequence", |
|
102 |
- "bindingDomain", "tfFamily", "experimentType", "pubmedID")) |
|
96 |
+ print ('--- test.createMetadataTable') |
|
97 |
+ # try it first with just two matrices |
|
98 |
+ tbl.md = createMetadataTable (x.matrices [1:12], x.novels [1:12]) |
|
99 |
+ checkEquals (dim (tbl.md), c (12, 15)) |
|
100 |
+ checkEquals (colnames (tbl.md), c ("providerName", "providerId", "dataSource", "geneSymbol", "geneId", "geneIdType", |
|
101 |
+ "proteinId", "proteinIdType", "organism", "sequenceCount", "bindingSequence", |
|
102 |
+ "bindingDomain", "tfFamily", "experimentType", "pubmedID")) |
|
103 | 103 |
checkEquals (tbl.md$providerName [1:2], c ('UW.Motif.0001', 'UW.Motif.0002')) |
104 | 104 |
checkEquals (tbl.md$providerId [1:2], c ('UW.Motif.0001', 'UW.Motif.0002')) |
105 | 105 |
checkEquals (tbl.md$pubmedID [1:2], c ('22959076', '22959076')) |
... | ... |
@@ -107,8 +107,9 @@ test.createMetadataTable = function (x.matrices, x.novels) |
107 | 107 |
checkEquals (tbl.md$organism [1:2], c ('Hsapiens', 'Hsapiens')) |
108 | 108 |
checkEquals (tbl.md$experimentType [1:2], c ('digital genomic footprinting', 'digital genomic footprinting')) |
109 | 109 |
checkEquals (tbl.md$geneId, c (rep ('knownMotif', 11), 'novelMotif')) |
110 |
+ checkTrue(all(is.na(tbl.md$geneIdType))) |
|
110 | 111 |
|
111 |
- invisible (tbl.md) |
|
112 |
+ invisible (tbl.md) |
|
112 | 113 |
|
113 | 114 |
} # test.createMetadataTable |
114 | 115 |
#------------------------------------------------------------------------------------------------------------------------ |
git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/MotifDb@75382 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -6,7 +6,7 @@ library (RUnit) |
6 | 6 |
#------------------------------------------------------------------------------------------------------------------------ |
7 | 7 |
source("import.R") |
8 | 8 |
#------------------------------------------------------------------------------------------------------------------------ |
9 |
-run.tests = function (dataDir=kDataDir) |
|
9 |
+run.tests = function (dataDir) |
|
10 | 10 |
{ |
11 | 11 |
dataDir <- file.path(dataDir, "stamlab") |
12 | 12 |
x.rawMatrixList <<- test.readRawMatrices (dataDir) |
git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/MotifDb@75368 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -8,6 +8,7 @@ source("import.R") |
8 | 8 |
#------------------------------------------------------------------------------------------------------------------------ |
9 | 9 |
run.tests = function (dataDir=kDataDir) |
10 | 10 |
{ |
11 |
+ dataDir <- file.path(dataDir, "stamlab") |
|
11 | 12 |
x.rawMatrixList <<- test.readRawMatrices (dataDir) |
12 | 13 |
x.novels <<- test.readNovelStatus (dataDir) |
13 | 14 |
x.matrices <<- test.extractAndNormalizeMatrices (x.rawMatrixList) |
git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/MotifDb@75345 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,210 @@ |
1 |
+# stamlab/test.R |
|
2 |
+# notes: 3 matrices come w/o speciesID, tax = 'vertebrates'. not our problem to fix, at least not yet. |
|
3 |
+# TBP, HNF4A and CEBPA (MA0108.2, MA0114.1, MA0102.2) |
|
4 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
5 |
+library (RUnit) |
|
6 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
7 |
+source("import.R") |
|
8 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
9 |
+run.tests = function (dataDir=kDataDir) |
|
10 |
+{ |
|
11 |
+ x.rawMatrixList <<- test.readRawMatrices (dataDir) |
|
12 |
+ x.novels <<- test.readNovelStatus (dataDir) |
|
13 |
+ x.matrices <<- test.extractAndNormalizeMatrices (x.rawMatrixList) |
|
14 |
+ x.tbl.md <<- test.createMetadataTable (x.matrices, x.novels) |
|
15 |
+ x.matrices.renamed <<- test.renameMatrices (x.matrices, x.tbl.md) |
|
16 |
+ |
|
17 |
+} # run.tests |
|
18 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
19 |
+test.readRawMatrices = function (dataDir) |
|
20 |
+{ |
|
21 |
+ print ('--- test.readMatrices') |
|
22 |
+ list.pwms = readRawMatrices (dataDir) |
|
23 |
+ checkEquals (length (list.pwms), 683) |
|
24 |
+ checkEquals (names (list.pwms [[1]]), c ("title", "consensus.sequence", "matrix")) |
|
25 |
+ checkEquals (rownames (list.pwms[[1]]$matrix), c ("A", "C", "G", "T")) |
|
26 |
+ invisible (list.pwms) |
|
27 |
+ |
|
28 |
+} # test.readRawMatrices |
|
29 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
30 |
+test.readNovelStatus = function (dataDir) |
|
31 |
+{ |
|
32 |
+ print ('--- test.readNovelStatus') |
|
33 |
+ novel.status = readNovelStatus (dataDir) |
|
34 |
+ checkEquals (length (novel.status), 683) |
|
35 |
+ checkEquals (length (which (novel.status == TRUE)), 289) |
|
36 |
+ # do a spot check around first novel in novels.txt |
|
37 |
+ checkEquals (as.logical (novel.status [c ('UW.Motif.0010', 'UW.Motif.0011', 'UW.Motif.0012')]), |
|
38 |
+ c (FALSE, FALSE, TRUE)) |
|
39 |
+ invisible (novel.status) |
|
40 |
+ |
|
41 |
+} # readNovelStatus |
|
42 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
43 |
+test.extractAndNormalizeMatrices = function (x.rawMatrixList) |
|
44 |
+{ |
|
45 |
+ print ('--- test.extractAndNormalizeMatrices') |
|
46 |
+ matrices.fixed <<- extractAndNormalizeMatrices (x.rawMatrixList) |
|
47 |
+ # make sure a UW.Motif.0nnn name accompanies each matrix |
|
48 |
+ checkEquals (length (grep ('UW.Motif.0', names (matrices.fixed))), length (matrices.fixed)) |
|
49 |
+ # make sure all columns in all matrices sum to 1.0 |
|
50 |
+ checkTrue (all (sapply (matrices.fixed, function (m) all (abs (colSums (m) - 1.0) < 1e-10)))) |
|
51 |
+ invisible (matrices.fixed) |
|
52 |
+ |
|
53 |
+} # test.extractAndNormalizeMatrices |
|
54 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
55 |
+test.convertRawMatricesToStandard = function (tbl.rmat) |
|
56 |
+{ |
|
57 |
+ print ('--- test.convertRawMatricesToStandard') |
|
58 |
+ # get just the first two raw matrices |
|
59 |
+ |
|
60 |
+ first.two.ids = head (unique (tbl.rmat$id), n=2) |
|
61 |
+ rows = nrow (subset (tbl.rmat, id %in% first.two.ids)) |
|
62 |
+ matrices = convertRawMatricesToStandard (tbl.rmat [1:rows,]) |
|
63 |
+ checkEquals (length (matrices), 2) |
|
64 |
+ checkEquals (names (matrices), first.two.ids) |
|
65 |
+ |
|
66 |
+ # it will not always be true, but IS true for the first two matrices, currently "9229" and "9231", that there |
|
67 |
+ # are an equal number of nucleotides at each position. |
|
68 |
+ checkTrue (all (colSums (matrices [[1]]) == 97)) |
|
69 |
+ checkTrue (all (colSums (matrices [[2]]) == 185)) |
|
70 |
+ |
|
71 |
+ # now run all the matrices through |
|
72 |
+ matrices = convertRawMatricesToStandard (tbl.rmat) |
|
73 |
+ checkEquals (length (matrices), 459) |
|
74 |
+ checkEquals (names (matrices)[1:2], first.two.ids) |
|
75 |
+ |
|
76 |
+ invisible (matrices) |
|
77 |
+ |
|
78 |
+} # test.convertRawMatricesToStandard |
|
79 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
80 |
+test.createAnnotationTable = function () |
|
81 |
+{ |
|
82 |
+ print ('--- test.createAnnotationTable') |
|
83 |
+ tbl.anno = createAnnotationTable () |
|
84 |
+ checkEquals (dim (tbl.anno), c (513, 13)) |
|
85 |
+ expected = c ("fullID", "id", "category", "mID", "version", "binder", "speciesID", "proteinID", "family", "tax", "class", "pubmed", "type") |
|
86 |
+ checkEquals (colnames (tbl.anno), expected) |
|
87 |
+ |
|
88 |
+ checkEquals (head (tbl.anno$fullID), c ("MA0001.1", "MA0003.1", "MA0004.1", "MA0005.1", "MA0006.1", "MA0006.1")) |
|
89 |
+ invisible (tbl.anno) |
|
90 |
+ |
|
91 |
+} # test.createAnnotationTable |
|
92 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
93 |
+test.createMetadataTable = function (x.matrices, x.novels) |
|
94 |
+{ |
|
95 |
+ print ('--- test.createMetadataTable') |
|
96 |
+ # try it first with just two matrices |
|
97 |
+ tbl.md = createMetadataTable (x.matrices [1:12], x.novels [1:12]) |
|
98 |
+ checkEquals (dim (tbl.md), c (12, 15)) |
|
99 |
+ checkEquals (colnames (tbl.md), c ("providerName", "providerId", "dataSource", "geneSymbol", "geneId", "geneIdType", |
|
100 |
+ "proteinId", "proteinIdType", "organism", "sequenceCount", "bindingSequence", |
|
101 |
+ "bindingDomain", "tfFamily", "experimentType", "pubmedID")) |
|
102 |
+ checkEquals (tbl.md$providerName [1:2], c ('UW.Motif.0001', 'UW.Motif.0002')) |
|
103 |
+ checkEquals (tbl.md$providerId [1:2], c ('UW.Motif.0001', 'UW.Motif.0002')) |
|
104 |
+ checkEquals (tbl.md$pubmedID [1:2], c ('22959076', '22959076')) |
|
105 |
+ checkEquals (tbl.md$dataSource [1:2], c ('stamlab', 'stamlab')) |
|
106 |
+ checkEquals (tbl.md$organism [1:2], c ('Hsapiens', 'Hsapiens')) |
|
107 |
+ checkEquals (tbl.md$experimentType [1:2], c ('digital genomic footprinting', 'digital genomic footprinting')) |
|
108 |
+ checkEquals (tbl.md$geneId, c (rep ('knownMotif', 11), 'novelMotif')) |
|
109 |
+ |
|
110 |
+ invisible (tbl.md) |
|
111 |
+ |
|
112 |
+} # test.createMetadataTable |
|
113 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
114 |
+test.renameMatrices = function (matrices, tbl.md) |
|
115 |
+{ |
|
116 |
+ print("--- test.renameMatrices") |
|
117 |
+ |
|
118 |
+ # try it with just the first two matrices |
|
119 |
+ matrix.pair = matrices [1:2] |
|
120 |
+ tbl.pair = tbl.md [1:2,] |
|
121 |
+ matrix.pair.renamed = renameMatrices (matrix.pair, tbl.pair) |
|
122 |
+ checkEquals (names (matrix.pair.renamed), c ("Hsapiens-stamlab-UW.Motif.0001", "Hsapiens-stamlab-UW.Motif.0002")) |
|
123 |
+ |
|
124 |
+} # test.renameMatrices |
|
125 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
126 |
+test.convertTaxonCode = function () |
|
127 |
+{ |
|
128 |
+ print ('--- test.convertTaxonCode') |
|
129 |
+ |
|
130 |
+ checkEquals (convertTaxonCode ('9606'), 'Hsapiens') |
|
131 |
+ checkEquals (convertTaxonCode (9606), 'Hsapiens') |
|
132 |
+ # anomalous codes, which an examination of the jaspar website reveals as 'vertebrates' |
|
133 |
+ checkEquals (convertTaxonCode (NA), 'Vertebrata') |
|
134 |
+ checkEquals (convertTaxonCode ('NA'), 'Vertebrata') |
|
135 |
+ checkEquals (convertTaxonCode (NA_character_), 'Vertebrata') |
|
136 |
+ checkEquals (convertTaxonCode ('-'), 'Vertebrata') |
|
137 |
+ |
|
138 |
+} # test.convertTaxonCode |
|
139 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
140 |
+test.guessProteinIdentifierType = function (moleculeName) |
|
141 |
+{ |
|
142 |
+ print ('--- test.guessProteinIdentifierType') |
|
143 |
+ checkEquals (guessProteinIdentifierType ('P29383'), 'UNIPROT') |
|
144 |
+ |
|
145 |
+ all.types = sapply (x.tbl.anno$proteinID, guessProteinIdentifierType) |
|
146 |
+ checkTrue (length (which (is.na (all.types))) < 12) # got most of them. |
|
147 |
+ |
|
148 |
+} # test.guessProteinIdentifierType |
|
149 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
150 |
+test.normalizeMatrices = function (matrices) |
|
151 |
+{ |
|
152 |
+ print ('--- test.normalizeMatrices') |
|
153 |
+ |
|
154 |
+ colsums = as.integer (sapply (matrices, function (mtx) as.integer (mean (round (colSums (mtx)))))) |
|
155 |
+ #checkTrue (all (colsums > 1)) |
|
156 |
+ |
|
157 |
+ matrices.norm = normalizeMatrices (matrices) |
|
158 |
+ |
|
159 |
+ colsums = as.integer (sapply (matrices.norm, function (mtx) as.integer (mean (round (colSums (mtx)))))) |
|
160 |
+ checkTrue (all (colsums == 1)) |
|
161 |
+ |
|
162 |
+ invisible (matrices.norm) |
|
163 |
+ |
|
164 |
+} # test.normalizeMatrices |
|
165 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
166 |
+test.assignGeneId = function (proteinId) |
|
167 |
+{ |
|
168 |
+ print ('--- test.assignGeneId') |
|
169 |
+ uniprot.ids = c ('Q9GRA5', 'P31314', 'AAC18941', 'O49397') |
|
170 |
+ refseq.ids = c ('NP_995315.1', 'NP_032840', 'NP_599022') |
|
171 |
+ yeast.ids = c ('YKL112W', 'YMR072W', 'YLR131C') |
|
172 |
+ |
|
173 |
+ checkEquals (assignGeneId ('NP_995315.1'), list (geneId='4782', type='ENTREZ')) |
|
174 |
+ checkEquals (assignGeneId ('NP_599022'), list (geneId='6095', type='ENTREZ')) |
|
175 |
+ |
|
176 |
+ checkEquals (assignGeneId ('P31314'), list (geneId='3195', type='ENTREZ')) |
|
177 |
+ |
|
178 |
+ checkEquals (assignGeneId ('YKL112W'), list (geneId='YKL112W', type='SGD')) |
|
179 |
+ |
|
180 |
+ # see how successful this is over all 513 proteinIds |
|
181 |
+ |
|
182 |
+ tbl.anno = createAnnotationTable () |
|
183 |
+ mtx.geneId = as.data.frame (t (sapply (tbl.anno$proteinID, assignGeneId))) |
|
184 |
+ tbl.types = as.data.frame (table (as.character (mtx.geneId$type), useNA='always'), stringsAsFactors=FALSE) |
|
185 |
+ checkEquals (tbl.types$Var1, c ("ENTREZ", "SGD", NA)) |
|
186 |
+ checkEquals (tbl.types$Freq, c (141, 177, 195)) |
|
187 |
+ |
|
188 |
+} # test.assignGeneId |
|
189 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
190 |
+test.parsePwm = function () |
|
191 |
+{ |
|
192 |
+ print ('--- test.parsePwm') |
|
193 |
+ lines = c ('UW.Motif.0006 aggaaatg', |
|
194 |
+ '0.890585 0.007855 0.051323 0.050237', |
|
195 |
+ '0.060732 0.004506 0.894170 0.040593', |
|
196 |
+ '0.072765 0.037935 0.860704 0.028596', |
|
197 |
+ '0.929585 0.024037 0.034914 0.011464', |
|
198 |
+ '0.931220 0.023231 0.029078 0.016471', |
|
199 |
+ '0.857934 0.044211 0.072594 0.025261', |
|
200 |
+ '0.065840 0.013777 0.058013 0.862370', |
|
201 |
+ '0.049937 0.036238 0.861871 0.051953') |
|
202 |
+ m6 = parsePwm (lines) |
|
203 |
+ checkEquals (names (m6), c ("title", "consensus.sequence", "matrix")) |
|
204 |
+ pwm = m6$matrix |
|
205 |
+ checkEquals (dim (pwm), c (4, 8)) |
|
206 |
+ checkEquals (rownames (pwm), c ('A', 'C', 'G', 'T')) |
|
207 |
+ invisible (m6) |
|
208 |
+ |
|
209 |
+} # test.parsePwm |
|
210 |
+#------------------------------------------------------------------------------------------------------------------------ |