Browse code

added query2(mdb, andTerms, orTerms=c(), notTerms=c())

paul-shannon authored on 10/05/2018 23:52:12
Showing 4 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: MotifDb
2 2
 Type: Package
3 3
 Title: An Annotated Collection of Protein-DNA Binding Sequence Motifs
4
-Version: 1.21.3
5
-Date: 2018-04-08
4
+Version: 1.21.4
5
+Date: 2018-05-10
6 6
 Author: Paul Shannon, Matt Richards
7 7
 Maintainer: Paul Shannon <pshannon@systemsbiology.org>
8 8
 Depends: R (>= 2.15.0), methods, BiocGenerics, S4Vectors, IRanges, Biostrings
... ...
@@ -4,6 +4,7 @@ exportMethods (
4 4
    export,
5 5
    show,
6 6
    query,
7
+   query2,
7 8
    motifToGene,
8 9
    geneToMotif,
9 10
    associateTranscriptionFactors
... ...
@@ -1,5 +1,5 @@
1
-setGeneric('query', signature='object', function(object, queryString, ignore.case=TRUE)
2
-              standardGeneric ('query'))
1
+setGeneric('query', signature='object', function(object, queryString, ignore.case=TRUE) standardGeneric ('query'))
2
+setGeneric('query2', signature='object', function(object, andStrings, orStrings=c(), notStrings=c(), ignore.case=TRUE) standardGeneric ('query2'))
3 3
 setGeneric('motifToGene', signature='object', function(object, motifs, source) standardGeneric('motifToGene'))
4 4
 setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source, ignore.case=FALSE) standardGeneric('geneToMotif'))
5 5
 setGeneric('associateTranscriptionFactors', signature='object',
... ...
@@ -281,6 +281,45 @@ setMethod ('query', 'MotifList',
281 281
         object [indices]
282 282
       })
283 283
 #-------------------------------------------------------------------------------
284
+setMethod ('query2', 'MotifList',
285
+
286
+   function (object, andStrings, orStrings=c(), notStrings=c(), ignore.case=TRUE) {
287
+      find.indices <- function(queryString)
288
+         {unique(as.integer(unlist(sapply(colnames(mcols(object)),
289
+                 function(colname) grep(queryString, mcols(object)[,colname],ignore.case=ignore.case)))))
290
+             }
291
+          # setup defaults
292
+       and.indices <- list(seq_len(length(object)))
293
+       or.indices <- list(seq_len(length(object)))
294
+       not.indices <- list(c())
295
+
296
+       if(length(andStrings) > 0)
297
+          and.indices <- lapply(andStrings, find.indices)
298
+
299
+       if(length(orStrings) > 0)
300
+          or.indices <- lapply(orStrings, find.indices)
301
+
302
+       if(length(notStrings) > 0)
303
+          not.indices <- lapply(notStrings, find.indices)
304
+
305
+          # start with the indices of all elements
306
+       final.indices <- seq_len(length(object))
307
+
308
+         # get the cumulative intersection of all the "and" terms
309
+         # this steadily dimishes the set of indices
310
+       for(indices in and.indices)
311
+         final.indices <- intersect(final.indices, indices)
312
+
313
+         # lump all of the "or" terms together: they all get included
314
+       final.indices <- intersect(unlist(or.indices), final.indices)
315
+
316
+         # finally reduce the set to exclude all indices of all "not" terms
317
+       for(indices in not.indices)
318
+         final.indices <- setdiff(final.indices, indices)
319
+
320
+       object [final.indices]
321
+       })
322
+#-------------------------------------------------------------------------------
284 323
 # Addition on 2017/06/15 from Matt Richards
285 324
 
286 325
 # This will not exactly match JASPAR because units are PFM and JASPAR uses PCM
... ...
@@ -29,6 +29,7 @@ runTests = function ()
29 29
   test.subset ()
30 30
   test.subsetWithVariables ()
31 31
   test.query ()
32
+  test.query2()
32 33
   test.transformMatrixToMemeRepresentation ()
33 34
   test.matrixToMemeText ()
34 35
   test.export_memeFormatStdOut ()
... ...
@@ -438,6 +439,42 @@ test.query = function ()
438 439
 
439 440
 } # test.query
440 441
 #------------------------------------------------------------------------------------------------------------------------
442
+test.query2 <- function()
443
+{
444
+  print ('--- test.query2')
445
+  mdb = MotifDb
446
+
447
+  ors <- c("MA0511.1", "MA0057.1")
448
+  ands <- c("jaspar2018", "sapiens")
449
+  nots <- "cisbp"
450
+  x <- query2(mdb, andStrings=ands, orStrings=ors)
451
+  checkEquals(length(x), 2)
452
+  checkEquals(sort(names(x)),
453
+             c("Hsapiens-jaspar2018-MZF1(var.2)-MA0057.1", "Hsapiens-jaspar2018-RUNX2-MA0511.1"))
454
+
455
+  x <- query2(mdb, andStrings="MA0057.1")
456
+  checkEquals(length(x), 15)
457
+
458
+  x <- query2(mdb, andStrings=c("MA0057.1", "cisbp"))
459
+  checkEquals(length(x), 11)
460
+
461
+  x <- query2(mdb, andStrings=c("MA0057.1"), notStrings="cisbp")
462
+  checkEquals(length(x), 4)
463
+
464
+  x <- query2(mdb, andStrings=c("MA0057.1"), notStrings=c("cisbp", "JASPAR_2014"))
465
+  checkEquals(length(x), 3)
466
+
467
+  x <- query2(mdb, orStrings=c("mus", "sapiens"), andStrings="MA0057.1")
468
+  #checkEquals(sort(names(x)),
469
+
470
+    # do queries on dataSource counts match those from a contingency table?
471
+  sources.list = as.list (table (mcols(mdb)$dataSource))
472
+  checkEquals (length (query2 (mdb, 'flyfactorsurvey')), sources.list$FlyFactorSurvey)
473
+  checkEquals (length (query2 (mdb, 'uniprobe')), sources.list$UniPROBE)
474
+  checkEquals (length (query2 (mdb, 'UniPROBE')), sources.list$UniPROBE)
475
+
476
+} # test.query2
477
+#------------------------------------------------------------------------------------------------------------------------
441 478
 test.transformMatrixToMemeRepresentation = function ()
442 479
 {
443 480
   print ('--- test.transformMatrixToMemeRepresentation')