Browse code

fixed pubmed id for stamlab data

git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/MotifDb@86069 bc3139a8-67e5-0310-9ffc-ced21a209358

p.shannon authored on 04/02/2014 19:33:01
Showing 1 changed files
... ...
@@ -102,7 +102,7 @@ test.createMetadataTable = function (x.matrices, x.novels)
102 102
                                       "bindingDomain", "tfFamily", "experimentType", "pubmedID"))
103 103
    checkEquals (tbl.md$providerName [1:2], c ('UW.Motif.0001', 'UW.Motif.0002'))
104 104
    checkEquals (tbl.md$providerId [1:2], c ('UW.Motif.0001', 'UW.Motif.0002'))
105
-   checkEquals (tbl.md$pubmedID [1:2], c ('22959076', '22959076'))
105
+   checkEquals (tbl.md$pubmedID [1:2], c ("22955618", "22955618"))
106 106
    checkEquals (tbl.md$dataSource [1:2], c ('stamlab', 'stamlab'))
107 107
    checkEquals (tbl.md$organism [1:2], c ('Hsapiens', 'Hsapiens'))
108 108
    checkEquals (tbl.md$experimentType [1:2], c ('digital genomic footprinting', 'digital genomic footprinting'))
Browse code

stamlab geneIdType is now NA. unit test adjusted. motivated by macos failure of 'test.geneIdsAndTypes'

git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/MotifDb@86014 bc3139a8-67e5-0310-9ffc-ced21a209358

p.shannon authored on 03/02/2014 19:40:10
Showing 1 changed files
... ...
@@ -93,13 +93,13 @@ test.createAnnotationTable = function ()
93 93
 #------------------------------------------------------------------------------------------------------------------------
94 94
 test.createMetadataTable = function (x.matrices, x.novels)
95 95
 {
96
-  print ('--- test.createMetadataTable')
97
-   # try it first with just two matrices
98
-  tbl.md = createMetadataTable (x.matrices [1:12], x.novels [1:12])
99
-  checkEquals (dim (tbl.md), c (12, 15))
100
-  checkEquals (colnames (tbl.md), c ("providerName", "providerId", "dataSource", "geneSymbol", "geneId", "geneIdType", 
101
-                                     "proteinId", "proteinIdType", "organism", "sequenceCount", "bindingSequence",
102
-                                     "bindingDomain", "tfFamily", "experimentType", "pubmedID"))
96
+   print ('--- test.createMetadataTable')
97
+    # try it first with just two matrices
98
+   tbl.md = createMetadataTable (x.matrices [1:12], x.novels [1:12])
99
+   checkEquals (dim (tbl.md), c (12, 15))
100
+   checkEquals (colnames (tbl.md), c ("providerName", "providerId", "dataSource", "geneSymbol", "geneId", "geneIdType", 
101
+                                      "proteinId", "proteinIdType", "organism", "sequenceCount", "bindingSequence",
102
+                                      "bindingDomain", "tfFamily", "experimentType", "pubmedID"))
103 103
    checkEquals (tbl.md$providerName [1:2], c ('UW.Motif.0001', 'UW.Motif.0002'))
104 104
    checkEquals (tbl.md$providerId [1:2], c ('UW.Motif.0001', 'UW.Motif.0002'))
105 105
    checkEquals (tbl.md$pubmedID [1:2], c ('22959076', '22959076'))
... ...
@@ -107,8 +107,9 @@ test.createMetadataTable = function (x.matrices, x.novels)
107 107
    checkEquals (tbl.md$organism [1:2], c ('Hsapiens', 'Hsapiens'))
108 108
    checkEquals (tbl.md$experimentType [1:2], c ('digital genomic footprinting', 'digital genomic footprinting'))
109 109
    checkEquals (tbl.md$geneId, c (rep ('knownMotif', 11), 'novelMotif'))
110
+   checkTrue(all(is.na(tbl.md$geneIdType)))
110 111
 
111
-  invisible (tbl.md)
112
+   invisible (tbl.md)
112 113
 
113 114
 } # test.createMetadataTable
114 115
 #------------------------------------------------------------------------------------------------------------------------
Browse code

removed all explicit repo paths

git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/MotifDb@75382 bc3139a8-67e5-0310-9ffc-ced21a209358

p.shannon authored on 05/04/2013 20:36:31
Showing 1 changed files
... ...
@@ -6,7 +6,7 @@ library (RUnit)
6 6
 #------------------------------------------------------------------------------------------------------------------------
7 7
 source("import.R")
8 8
 #------------------------------------------------------------------------------------------------------------------------
9
-run.tests = function (dataDir=kDataDir)
9
+run.tests = function (dataDir)
10 10
 {
11 11
   dataDir <- file.path(dataDir, "stamlab")
12 12
   x.rawMatrixList <<- test.readRawMatrices (dataDir)
Browse code

changed kDataDir to be repo root

git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/MotifDb@75368 bc3139a8-67e5-0310-9ffc-ced21a209358

p.shannon authored on 05/04/2013 17:35:26
Showing 1 changed files
... ...
@@ -8,6 +8,7 @@ source("import.R")
8 8
 #------------------------------------------------------------------------------------------------------------------------
9 9
 run.tests = function (dataDir=kDataDir)
10 10
 {
11
+  dataDir <- file.path(dataDir, "stamlab")
11 12
   x.rawMatrixList <<- test.readRawMatrices (dataDir)
12 13
   x.novels <<- test.readNovelStatus (dataDir)
13 14
   x.matrices <<- test.extractAndNormalizeMatrices (x.rawMatrixList)
Browse code

first version

git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/MotifDb@75345 bc3139a8-67e5-0310-9ffc-ced21a209358

p.shannon authored on 05/04/2013 13:35:38
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,210 @@
1
+# stamlab/test.R
2
+# notes:  3 matrices come w/o speciesID, tax = 'vertebrates'.  not our problem to fix, at least not yet.
3
+#         TBP, HNF4A and CEBPA (MA0108.2, MA0114.1, MA0102.2)
4
+#------------------------------------------------------------------------------------------------------------------------
5
+library (RUnit)
6
+#------------------------------------------------------------------------------------------------------------------------
7
+source("import.R")
8
+#------------------------------------------------------------------------------------------------------------------------
9
+run.tests = function (dataDir=kDataDir)
10
+{
11
+  x.rawMatrixList <<- test.readRawMatrices (dataDir)
12
+  x.novels <<- test.readNovelStatus (dataDir)
13
+  x.matrices <<- test.extractAndNormalizeMatrices (x.rawMatrixList)
14
+  x.tbl.md <<- test.createMetadataTable (x.matrices, x.novels)
15
+  x.matrices.renamed <<- test.renameMatrices (x.matrices, x.tbl.md)
16
+
17
+} # run.tests
18
+#------------------------------------------------------------------------------------------------------------------------
19
+test.readRawMatrices = function (dataDir)
20
+{
21
+  print ('--- test.readMatrices')
22
+  list.pwms = readRawMatrices (dataDir)
23
+  checkEquals (length (list.pwms), 683)
24
+  checkEquals (names (list.pwms [[1]]), c ("title", "consensus.sequence", "matrix"))
25
+  checkEquals (rownames (list.pwms[[1]]$matrix),  c ("A", "C", "G", "T"))
26
+  invisible (list.pwms)
27
+
28
+} # test.readRawMatrices
29
+#------------------------------------------------------------------------------------------------------------------------
30
+test.readNovelStatus = function (dataDir)
31
+{
32
+  print ('--- test.readNovelStatus')
33
+  novel.status = readNovelStatus (dataDir)
34
+  checkEquals (length (novel.status), 683)
35
+  checkEquals (length (which (novel.status == TRUE)), 289)
36
+    # do a spot check around first novel in novels.txt
37
+  checkEquals (as.logical (novel.status [c ('UW.Motif.0010', 'UW.Motif.0011', 'UW.Motif.0012')]),
38
+                             c (FALSE, FALSE, TRUE))
39
+  invisible (novel.status)
40
+
41
+} # readNovelStatus
42
+#------------------------------------------------------------------------------------------------------------------------
43
+test.extractAndNormalizeMatrices = function (x.rawMatrixList)
44
+{
45
+  print ('--- test.extractAndNormalizeMatrices')
46
+  matrices.fixed <<- extractAndNormalizeMatrices (x.rawMatrixList)
47
+    # make sure a UW.Motif.0nnn name accompanies each matrix
48
+  checkEquals (length (grep ('UW.Motif.0', names (matrices.fixed))), length (matrices.fixed))
49
+    # make sure all columns in all matrices sum to 1.0
50
+  checkTrue (all (sapply (matrices.fixed, function (m) all (abs (colSums (m) - 1.0) < 1e-10))))
51
+  invisible (matrices.fixed)
52
+
53
+} # test.extractAndNormalizeMatrices
54
+#------------------------------------------------------------------------------------------------------------------------
55
+test.convertRawMatricesToStandard = function (tbl.rmat)
56
+{
57
+  print ('--- test.convertRawMatricesToStandard')
58
+   # get just the first two raw matrices
59
+
60
+  first.two.ids = head (unique (tbl.rmat$id), n=2)
61
+  rows = nrow (subset (tbl.rmat, id %in% first.two.ids))
62
+  matrices = convertRawMatricesToStandard (tbl.rmat [1:rows,])
63
+  checkEquals (length (matrices), 2)
64
+  checkEquals (names (matrices), first.two.ids)
65
+
66
+    # it will not always be true, but IS true for the first two matrices, currently "9229" and  "9231", that there
67
+    # are an equal number of nucleotides at each position.
68
+  checkTrue (all (colSums (matrices [[1]]) == 97))
69
+  checkTrue (all (colSums (matrices [[2]]) == 185))
70
+
71
+    # now run all the matrices through
72
+  matrices = convertRawMatricesToStandard (tbl.rmat)
73
+  checkEquals (length (matrices), 459)
74
+  checkEquals (names (matrices)[1:2], first.two.ids)
75
+  
76
+  invisible (matrices)
77
+  
78
+} # test.convertRawMatricesToStandard 
79
+#------------------------------------------------------------------------------------------------------------------------
80
+test.createAnnotationTable = function ()
81
+{
82
+  print ('--- test.createAnnotationTable')
83
+  tbl.anno = createAnnotationTable ()
84
+  checkEquals (dim (tbl.anno), c (513, 13))
85
+  expected = c ("fullID", "id", "category", "mID", "version", "binder", "speciesID", "proteinID", "family", "tax", "class", "pubmed", "type")
86
+  checkEquals (colnames (tbl.anno), expected)
87
+
88
+  checkEquals (head (tbl.anno$fullID),  c ("MA0001.1", "MA0003.1", "MA0004.1", "MA0005.1", "MA0006.1", "MA0006.1"))
89
+  invisible (tbl.anno)
90
+
91
+} # test.createAnnotationTable
92
+#------------------------------------------------------------------------------------------------------------------------
93
+test.createMetadataTable = function (x.matrices, x.novels)
94
+{
95
+  print ('--- test.createMetadataTable')
96
+   # try it first with just two matrices
97
+  tbl.md = createMetadataTable (x.matrices [1:12], x.novels [1:12])
98
+  checkEquals (dim (tbl.md), c (12, 15))
99
+  checkEquals (colnames (tbl.md), c ("providerName", "providerId", "dataSource", "geneSymbol", "geneId", "geneIdType", 
100
+                                     "proteinId", "proteinIdType", "organism", "sequenceCount", "bindingSequence",
101
+                                     "bindingDomain", "tfFamily", "experimentType", "pubmedID"))
102
+   checkEquals (tbl.md$providerName [1:2], c ('UW.Motif.0001', 'UW.Motif.0002'))
103
+   checkEquals (tbl.md$providerId [1:2], c ('UW.Motif.0001', 'UW.Motif.0002'))
104
+   checkEquals (tbl.md$pubmedID [1:2], c ('22959076', '22959076'))
105
+   checkEquals (tbl.md$dataSource [1:2], c ('stamlab', 'stamlab'))
106
+   checkEquals (tbl.md$organism [1:2], c ('Hsapiens', 'Hsapiens'))
107
+   checkEquals (tbl.md$experimentType [1:2], c ('digital genomic footprinting', 'digital genomic footprinting'))
108
+   checkEquals (tbl.md$geneId, c (rep ('knownMotif', 11), 'novelMotif'))
109
+
110
+  invisible (tbl.md)
111
+
112
+} # test.createMetadataTable
113
+#------------------------------------------------------------------------------------------------------------------------
114
+test.renameMatrices = function (matrices, tbl.md)
115
+{
116
+  print("--- test.renameMatrices")
117
+  
118
+    # try it with just the first two matrices
119
+  matrix.pair = matrices [1:2]
120
+  tbl.pair = tbl.md [1:2,]
121
+  matrix.pair.renamed = renameMatrices (matrix.pair, tbl.pair)
122
+  checkEquals (names (matrix.pair.renamed), c ("Hsapiens-stamlab-UW.Motif.0001", "Hsapiens-stamlab-UW.Motif.0002"))
123
+
124
+} # test.renameMatrices
125
+#------------------------------------------------------------------------------------------------------------------------
126
+test.convertTaxonCode = function ()
127
+{
128
+  print ('--- test.convertTaxonCode')
129
+
130
+  checkEquals (convertTaxonCode ('9606'), 'Hsapiens')
131
+  checkEquals (convertTaxonCode (9606), 'Hsapiens')
132
+     # anomalous codes, which an examination of the jaspar website reveals as 'vertebrates'
133
+  checkEquals (convertTaxonCode (NA), 'Vertebrata')
134
+  checkEquals (convertTaxonCode ('NA'), 'Vertebrata')
135
+  checkEquals (convertTaxonCode (NA_character_), 'Vertebrata')
136
+  checkEquals (convertTaxonCode ('-'), 'Vertebrata')
137
+
138
+} # test.convertTaxonCode
139
+#------------------------------------------------------------------------------------------------------------------------
140
+test.guessProteinIdentifierType = function (moleculeName)
141
+{
142
+  print ('--- test.guessProteinIdentifierType')
143
+  checkEquals (guessProteinIdentifierType ('P29383'), 'UNIPROT')
144
+
145
+  all.types = sapply (x.tbl.anno$proteinID, guessProteinIdentifierType)
146
+  checkTrue (length (which (is.na (all.types))) < 12)   # got most of them.
147
+
148
+} # test.guessProteinIdentifierType
149
+#------------------------------------------------------------------------------------------------------------------------
150
+test.normalizeMatrices = function (matrices)
151
+{
152
+  print ('--- test.normalizeMatrices')
153
+
154
+  colsums = as.integer (sapply (matrices, function (mtx) as.integer (mean (round (colSums (mtx))))))
155
+  #checkTrue (all (colsums > 1))
156
+
157
+  matrices.norm = normalizeMatrices (matrices)
158
+
159
+  colsums = as.integer (sapply (matrices.norm, function (mtx) as.integer (mean (round (colSums (mtx))))))
160
+  checkTrue (all (colsums == 1))
161
+
162
+  invisible (matrices.norm)
163
+
164
+} # test.normalizeMatrices
165
+#------------------------------------------------------------------------------------------------------------------------
166
+test.assignGeneId = function (proteinId)
167
+{
168
+  print ('--- test.assignGeneId')
169
+  uniprot.ids = c ('Q9GRA5', 'P31314', 'AAC18941', 'O49397')
170
+  refseq.ids  = c ('NP_995315.1', 'NP_032840', 'NP_599022')
171
+  yeast.ids   = c ('YKL112W', 'YMR072W', 'YLR131C')
172
+
173
+  checkEquals (assignGeneId ('NP_995315.1'), list (geneId='4782', type='ENTREZ'))
174
+  checkEquals (assignGeneId ('NP_599022'),   list (geneId='6095', type='ENTREZ'))
175
+
176
+  checkEquals (assignGeneId ('P31314'),      list (geneId='3195', type='ENTREZ'))
177
+
178
+  checkEquals (assignGeneId ('YKL112W'),     list (geneId='YKL112W', type='SGD'))
179
+
180
+    # see how successful this is over all 513 proteinIds
181
+
182
+  tbl.anno = createAnnotationTable ()
183
+  mtx.geneId = as.data.frame (t (sapply (tbl.anno$proteinID, assignGeneId)))
184
+  tbl.types = as.data.frame (table (as.character (mtx.geneId$type), useNA='always'), stringsAsFactors=FALSE)
185
+  checkEquals (tbl.types$Var1, c ("ENTREZ", "SGD", NA))
186
+  checkEquals (tbl.types$Freq, c (141, 177, 195))
187
+
188
+} # test.assignGeneId
189
+#------------------------------------------------------------------------------------------------------------------------
190
+test.parsePwm = function ()
191
+{
192
+  print ('--- test.parsePwm')
193
+  lines = c ('UW.Motif.0006	aggaaatg',
194
+             '0.890585	0.007855	0.051323	0.050237',
195
+             '0.060732	0.004506	0.894170	0.040593',
196
+             '0.072765	0.037935	0.860704	0.028596',
197
+             '0.929585	0.024037	0.034914	0.011464',
198
+             '0.931220	0.023231	0.029078	0.016471',
199
+             '0.857934	0.044211	0.072594	0.025261',
200
+             '0.065840	0.013777	0.058013	0.862370',
201
+             '0.049937	0.036238	0.861871	0.051953')
202
+  m6 = parsePwm (lines)
203
+  checkEquals (names (m6), c ("title", "consensus.sequence", "matrix"))
204
+  pwm = m6$matrix
205
+  checkEquals (dim (pwm), c (4, 8))
206
+  checkEquals (rownames (pwm), c ('A', 'C', 'G', 'T'))
207
+  invisible (m6)
208
+
209
+} # test.parsePwm
210
+#------------------------------------------------------------------------------------------------------------------------