... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: MotifDb |
2 | 2 |
Type: Package |
3 | 3 |
Title: An Annotated Collection of Protein-DNA Binding Sequence Motifs |
4 |
-Version: 1.21.3 |
|
5 |
-Date: 2018-04-08 |
|
4 |
+Version: 1.21.4 |
|
5 |
+Date: 2018-05-10 |
|
6 | 6 |
Author: Paul Shannon, Matt Richards |
7 | 7 |
Maintainer: Paul Shannon <pshannon@systemsbiology.org> |
8 | 8 |
Depends: R (>= 2.15.0), methods, BiocGenerics, S4Vectors, IRanges, Biostrings |
... | ... |
@@ -1,5 +1,5 @@ |
1 |
-setGeneric('query', signature='object', function(object, queryString, ignore.case=TRUE) |
|
2 |
- standardGeneric ('query')) |
|
1 |
+setGeneric('query', signature='object', function(object, queryString, ignore.case=TRUE) standardGeneric ('query')) |
|
2 |
+setGeneric('query2', signature='object', function(object, andStrings, orStrings=c(), notStrings=c(), ignore.case=TRUE) standardGeneric ('query2')) |
|
3 | 3 |
setGeneric('motifToGene', signature='object', function(object, motifs, source) standardGeneric('motifToGene')) |
4 | 4 |
setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source, ignore.case=FALSE) standardGeneric('geneToMotif')) |
5 | 5 |
setGeneric('associateTranscriptionFactors', signature='object', |
... | ... |
@@ -281,6 +281,45 @@ setMethod ('query', 'MotifList', |
281 | 281 |
object [indices] |
282 | 282 |
}) |
283 | 283 |
#------------------------------------------------------------------------------- |
284 |
+setMethod ('query2', 'MotifList', |
|
285 |
+ |
|
286 |
+ function (object, andStrings, orStrings=c(), notStrings=c(), ignore.case=TRUE) { |
|
287 |
+ find.indices <- function(queryString) |
|
288 |
+ {unique(as.integer(unlist(sapply(colnames(mcols(object)), |
|
289 |
+ function(colname) grep(queryString, mcols(object)[,colname],ignore.case=ignore.case))))) |
|
290 |
+ } |
|
291 |
+ # setup defaults |
|
292 |
+ and.indices <- list(seq_len(length(object))) |
|
293 |
+ or.indices <- list(seq_len(length(object))) |
|
294 |
+ not.indices <- list(c()) |
|
295 |
+ |
|
296 |
+ if(length(andStrings) > 0) |
|
297 |
+ and.indices <- lapply(andStrings, find.indices) |
|
298 |
+ |
|
299 |
+ if(length(orStrings) > 0) |
|
300 |
+ or.indices <- lapply(orStrings, find.indices) |
|
301 |
+ |
|
302 |
+ if(length(notStrings) > 0) |
|
303 |
+ not.indices <- lapply(notStrings, find.indices) |
|
304 |
+ |
|
305 |
+ # start with the indices of all elements |
|
306 |
+ final.indices <- seq_len(length(object)) |
|
307 |
+ |
|
308 |
+ # get the cumulative intersection of all the "and" terms |
|
309 |
+ # this steadily dimishes the set of indices |
|
310 |
+ for(indices in and.indices) |
|
311 |
+ final.indices <- intersect(final.indices, indices) |
|
312 |
+ |
|
313 |
+ # lump all of the "or" terms together: they all get included |
|
314 |
+ final.indices <- intersect(unlist(or.indices), final.indices) |
|
315 |
+ |
|
316 |
+ # finally reduce the set to exclude all indices of all "not" terms |
|
317 |
+ for(indices in not.indices) |
|
318 |
+ final.indices <- setdiff(final.indices, indices) |
|
319 |
+ |
|
320 |
+ object [final.indices] |
|
321 |
+ }) |
|
322 |
+#------------------------------------------------------------------------------- |
|
284 | 323 |
# Addition on 2017/06/15 from Matt Richards |
285 | 324 |
|
286 | 325 |
# This will not exactly match JASPAR because units are PFM and JASPAR uses PCM |
... | ... |
@@ -29,6 +29,7 @@ runTests = function () |
29 | 29 |
test.subset () |
30 | 30 |
test.subsetWithVariables () |
31 | 31 |
test.query () |
32 |
+ test.query2() |
|
32 | 33 |
test.transformMatrixToMemeRepresentation () |
33 | 34 |
test.matrixToMemeText () |
34 | 35 |
test.export_memeFormatStdOut () |
... | ... |
@@ -438,6 +439,42 @@ test.query = function () |
438 | 439 |
|
439 | 440 |
} # test.query |
440 | 441 |
#------------------------------------------------------------------------------------------------------------------------ |
442 |
+test.query2 <- function() |
|
443 |
+{ |
|
444 |
+ print ('--- test.query2') |
|
445 |
+ mdb = MotifDb |
|
446 |
+ |
|
447 |
+ ors <- c("MA0511.1", "MA0057.1") |
|
448 |
+ ands <- c("jaspar2018", "sapiens") |
|
449 |
+ nots <- "cisbp" |
|
450 |
+ x <- query2(mdb, andStrings=ands, orStrings=ors) |
|
451 |
+ checkEquals(length(x), 2) |
|
452 |
+ checkEquals(sort(names(x)), |
|
453 |
+ c("Hsapiens-jaspar2018-MZF1(var.2)-MA0057.1", "Hsapiens-jaspar2018-RUNX2-MA0511.1")) |
|
454 |
+ |
|
455 |
+ x <- query2(mdb, andStrings="MA0057.1") |
|
456 |
+ checkEquals(length(x), 15) |
|
457 |
+ |
|
458 |
+ x <- query2(mdb, andStrings=c("MA0057.1", "cisbp")) |
|
459 |
+ checkEquals(length(x), 11) |
|
460 |
+ |
|
461 |
+ x <- query2(mdb, andStrings=c("MA0057.1"), notStrings="cisbp") |
|
462 |
+ checkEquals(length(x), 4) |
|
463 |
+ |
|
464 |
+ x <- query2(mdb, andStrings=c("MA0057.1"), notStrings=c("cisbp", "JASPAR_2014")) |
|
465 |
+ checkEquals(length(x), 3) |
|
466 |
+ |
|
467 |
+ x <- query2(mdb, orStrings=c("mus", "sapiens"), andStrings="MA0057.1") |
|
468 |
+ #checkEquals(sort(names(x)), |
|
469 |
+ |
|
470 |
+ # do queries on dataSource counts match those from a contingency table? |
|
471 |
+ sources.list = as.list (table (mcols(mdb)$dataSource)) |
|
472 |
+ checkEquals (length (query2 (mdb, 'flyfactorsurvey')), sources.list$FlyFactorSurvey) |
|
473 |
+ checkEquals (length (query2 (mdb, 'uniprobe')), sources.list$UniPROBE) |
|
474 |
+ checkEquals (length (query2 (mdb, 'UniPROBE')), sources.list$UniPROBE) |
|
475 |
+ |
|
476 |
+} # test.query2 |
|
477 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
441 | 478 |
test.transformMatrixToMemeRepresentation = function () |
442 | 479 |
{ |
443 | 480 |
print ('--- test.transformMatrixToMemeRepresentation') |