Browse code

Added jaspar output format and unit tests

Matthew Richards authored on 15/06/2017 22:50:54
Showing 2 changed files

... ...
@@ -164,7 +164,7 @@ setMethod ('export',  signature=c(object='MotifList', con='connection',
164 164
 
165 165
   function (object, con, format, ...) {
166 166
 
167
-    fmt = match.arg (tolower (format), c ('meme', 'transfac'))
167
+    fmt = match.arg (tolower (format), c ('meme', 'transfac','jaspar'))
168 168
     ## match.arg fails if !fmt %in% c('meme', 'transfac'), so no need
169 169
     ## for test
170 170
     ## let the user manage opened cons
... ...
@@ -172,10 +172,14 @@ setMethod ('export',  signature=c(object='MotifList', con='connection',
172 172
         open(con)
173 173
         on.exit(close(con))
174 174
     }
175
-    if (fmt == 'meme') 
176
-      text = matrixToMemeText (object)
175
+    if (fmt == 'meme') {
176
+        text = matrixToMemeText (object)
177
+    } else if (fmt == 'jaspar') {
178
+        text = matrixToJasparText (object)
179
+    }
177 180
     cat (text, sep='\n', file=con)
178
-    })
181
+  })
182
+
179 183
 
180 184
 #-------------------------------------------------------------------------------
181 185
 # write to connection, using default format,  ??? for matrix list, tsv for
... ...
@@ -185,13 +189,18 @@ setMethod ('export',  signature=c(object='MotifList',  con='missing',
185 189
 
186 190
   function (object, con, format,  ...) {
187 191
 
188
-    fmt = match.arg (tolower (format), c ('meme')) # , 'transfac'
192
+    fmt = match.arg (tolower (format), c ('meme','jaspar')) # , 'transfac'
189 193
     if (fmt == 'meme') {
190
-      text = paste (matrixToMemeText (object), collapse='\n')
194
+        text = paste (matrixToMemeText (object), collapse='\n')
191 195
       cat (text)
192 196
       invisible (text)
193
-      }
194
-    })
197
+    } else if (fmt == 'jaspar') {
198
+        text = paste (matrixToJasparText (object), collapse='\n')
199
+      cat (text)
200
+      invisible (text)
201
+    }
202
+      
203
+  })
195 204
 
196 205
 #-------------------------------------------------------------------------------
197 206
 setMethod('show', 'MotifList',
... ...
@@ -262,3 +271,61 @@ setMethod ('query', 'MotifList',
262 271
         object [indices]
263 272
       })
264 273
 #-------------------------------------------------------------------------------
274
+# Addition on 2017/06/15 from Matt Richards
275
+
276
+# This will not exactly match JASPAR because units are PFM and JASPAR uses PCM
277
+# General JASPAR Format:
278
+
279
+# > "Motif Name"\t"Transcription Factor"
280
+# A [ PCMS ]
281
+# C [ PCMS ]
282
+# G [ PCMS ]
283
+# T [ PCMS ]
284
+#
285
+# ...
286
+
287
+# Note: the PCMs are space-delimited
288
+
289
+matrixToJasparText <- function (matrices)
290
+{
291
+  matrix.count <- length (matrices)
292
+
293
+  # Incoming matrices have nucleotide rows, position columns.
294
+  # This is the correct orientation for JASPAR; however, we need to also
295
+  # add brackets and letters to them
296
+
297
+  # Calculate the number of lines of text by counting matrices and assuming
298
+  # 6 lines per matrix
299
+  
300
+  predicted.line.count <- 6*matrix.count
301
+
302
+  #s = vector ('character', predicted.line.count)
303
+  s <- character (predicted.line.count)
304
+
305
+  index <- 1
306
+  
307
+  for (name in names (matrices)) {
308
+
309
+      # Print the name with an arrow, follwed by the motif
310
+      s[index] <- sprintf('>%s',name)
311
+      index <- index + 1
312
+
313
+      # For each line of the matrix, print the correct letter and the
314
+      # matrix row surrounded by brackets
315
+      motif.matrix <- matrices[name][[1]]
316
+      for (r in 1:nrow(motif.matrix)) {
317
+          s[index] <- sprintf("%s [ %s ]",
318
+                              rownames(motif.matrix)[r],
319
+                              paste(motif.matrix[r,],collapse=" "))
320
+          index <- index + 1
321
+      }
322
+
323
+      s[index] <- ""
324
+      index <- index + 1
325
+          
326
+    } # for name
327
+
328
+  invisible (s)
329
+
330
+} # matrixToJasparText
331
+#-------------------------------------------------------------------------------
... ...
@@ -36,6 +36,8 @@ run.tests = function ()
36 36
   test.MotIV.toTable ()
37 37
   test.run_MotIV.motifMatch()
38 38
   test.flyFactorGeneSymbols()
39
+  test.export_jasparFormatStdOut ()
40
+  test.export_jasparFormatToFile ()
39 41
 
40 42
 } # run.tests
41 43
 #------------------------------------------------------------------------------------------------------------------------
... ...
@@ -697,4 +699,33 @@ test.flyFactorGeneSymbols <- function()
697 699
 
698 700
 } # test.flyFactorGeneSymbols
699 701
 #-------------------------------------------------------------------------------
702
+test.export_jasparFormatStdOut = function ()
703
+{
704
+  print ('--- test.export_jasparFormatStdOut')
705
+  mdb = MotifDb # ()
706
+  mdb.chicken = subset (mdb, organism=='Gallus')
707
+  checkEquals (length (mdb.chicken), 3)
708
+    # text is cat-ed to stdout, so not avaialable here to check.
709
+    # but just like print, export also returns the text invisibly.
710
+    # so that CAN be checked.
711
+  
712
+  jaspar.text = export (mdb.chicken, format='jaspar')
713
+  checkEquals (length (jaspar.text), 1)   # just one long string
714
+  checkTrue (is.character (jaspar.text))
715
+  checkTrue (nchar (jaspar.text) > 800)   # 1002 as of (10 aug 2012)
716
+  return (TRUE)
717
+
718
+} # test.exportjasparFormatToStdOut
719
+#------------------------------------------------------------------------------------------------------------------------
720
+test.export_jasparFormatToFile = function ()
721
+{
722
+  print ('--- test.export_jasparFormatToFile')
723
+  mdb = MotifDb # ()
724
+  mdb.chicken = subset (mdb, organism=='Gallus')
725
+  checkEquals (length (mdb.chicken), 3)
726
+  output.file = tempfile ()
727
+  jaspar.text = export (mdb.chicken, output.file, 'jaspar')
728
+  retrieved = scan (output.file, what=character (0), sep='\n', quiet=TRUE)
729
+  invisible (retrieved)
700 730
 
731
+} # test.exportjasparFormatToFile