... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: MotifDb |
2 | 2 |
Type: Package |
3 | 3 |
Title: An Annotated Collection of Protein-DNA Binding Sequence Motifs |
4 |
-Version: 1.19.3 |
|
5 |
-Date: 2017-08-12 |
|
4 |
+Version: 1.19.6 |
|
5 |
+Date: 2017-09-07 |
|
6 | 6 |
Author: Paul Shannon, Matt Richards |
7 | 7 |
Maintainer: Paul Shannon <pshannon@systemsbiology.org> |
8 | 8 |
Depends: R (>= 2.15.0), methods, BiocGenerics, S4Vectors, IRanges, Biostrings |
... | ... |
@@ -4,8 +4,9 @@ exportMethods ( |
4 | 4 |
export, |
5 | 5 |
show, |
6 | 6 |
query, |
7 |
- mapMotifToTranscriptionFactorGeneSymbol, |
|
8 |
- mapTranscriptionFactorGeneSymbolToMotif |
|
7 |
+ motifToGene, |
|
8 |
+ geneToMotif, |
|
9 |
+ associateTranscriptionFactors |
|
9 | 10 |
) |
10 | 11 |
|
11 | 12 |
export( |
... | ... |
@@ -18,3 +19,4 @@ import(IRanges) |
18 | 19 |
importFrom(rtracklayer, export) |
19 | 20 |
import(Biostrings) |
20 | 21 |
import(methods) |
22 |
+importFrom(splitstackshape, expandRows) |
... | ... |
@@ -1,9 +1,10 @@ |
1 | 1 |
setGeneric('query', signature='object', function(object, queryString, ignore.case=TRUE) |
2 | 2 |
standardGeneric ('query')) |
3 |
-setGeneric('mapMotifToTranscriptionFactorGeneSymbol', signature='object', function(object, motifs, mode) |
|
4 |
- standardGeneric('mapMotifToTranscriptionFactorGeneSymbol')) |
|
5 |
-setGeneric('mapTranscriptionFactorGeneSymbolToMotif', signature='object', function(object, geneSymbols, mode) |
|
6 |
- standardGeneric('mapTranscriptionFactorGeneSymbolToMotif')) |
|
3 |
+setGeneric('motifToGene', signature='object', function(object, motifs, source) standardGeneric('motifToGene')) |
|
4 |
+setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source) standardGeneric('geneToMotif')) |
|
5 |
+setGeneric('associateTranscriptionFactors', signature='object', |
|
6 |
+ function(object, tbl.withMotifs, motif.column.name, source, expand.rows) |
|
7 |
+ standardGeneric('associateTranscriptionFactors')) |
|
7 | 8 |
#------------------------------------------------------------------------------------------------------------------------ |
8 | 9 |
setClass ('MotifList', |
9 | 10 |
contains='SimpleList', |
... | ... |
@@ -323,7 +324,7 @@ matrixToJasparText <- function (matrices) |
323 | 324 |
# For each line of the matrix, print the correct letter and the |
324 | 325 |
# matrix row surrounded by brackets |
325 | 326 |
motif.matrix <- matrices[name][[1]] |
326 |
- |
|
327 |
+ |
|
327 | 328 |
|
328 | 329 |
for (r in 1:nrow(motif.matrix)) { |
329 | 330 |
s[index] <- sprintf("%s [ %s ]", |
... | ... |
@@ -342,35 +343,105 @@ matrixToJasparText <- function (matrices) |
342 | 343 |
} # matrixToJasparText |
343 | 344 |
#------------------------------------------------------------------------------- |
344 | 345 |
# returns a data.frame with motif, geneSymbol, source, pubmedID columns |
345 |
-setMethod ('mapMotifToTranscriptionFactorGeneSymbol', 'MotifList', |
|
346 |
- |
|
347 |
- function (object, motifs, mode) { |
|
348 |
- stopifnot(mode %in% c("direct", "indirect", "both")) |
|
349 |
- if(mode %in% c("direct", "both")){ |
|
350 |
- # result <- rbind(result, newRows |
|
346 |
+setMethod ('motifToGene', 'MotifList', |
|
347 |
+ |
|
348 |
+ function (object, motifs, source) { |
|
349 |
+ stopifnot(source %in% c("MotifDb", "TFClass")) |
|
350 |
+ tbl <- data.frame() |
|
351 |
+ if(source %in% c("MotifDb")){ |
|
352 |
+ tbl <- as.data.frame(subset(mcols(object), providerId %in% motifs)) |
|
353 |
+ tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")]) |
|
354 |
+ colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
|
355 |
+ tbl |
|
356 |
+ tbl <- tbl[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")] |
|
357 |
+ tbl$from <- "MotifDb" |
|
351 | 358 |
} |
352 |
- if(mode %in% c("indirect", "both")){ |
|
353 |
- # draw on object@manuallyCuratedGeneMotifAssociationTable |
|
354 |
- # result <- rbind(result, newRows |
|
359 |
+ if(source %in% c("TFClass")){ |
|
360 |
+ tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs) |
|
361 |
+ tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")]) |
|
362 |
+ tbl <- tbl[order(tbl$motif),] |
|
363 |
+ rownames(tbl) <- NULL |
|
364 |
+ colnames(tbl) <- c("motif", "gene", "pubmedID") |
|
365 |
+ tbl$from <- "TFClass" |
|
355 | 366 |
} |
356 |
- result |
|
367 |
+ tbl |
|
357 | 368 |
}) |
358 | 369 |
|
359 | 370 |
#------------------------------------------------------------------------------- |
360 | 371 |
# returns a data.frame with motif, geneSymbol, source, pubmedID columns |
361 |
-setMethod ('mapTranscriptionFactorGeneSymbolToMotif', 'MotifList', |
|
362 |
- |
|
363 |
- function (object, geneSymbols, mode) { |
|
364 |
- stopifnot(mode %in% c("direct", "indirect")) |
|
365 |
- result <- data.frame() |
|
366 |
- if(mode %in% c("direct", "both")){ |
|
367 |
- # result <- rbind(result, newRows |
|
372 |
+setMethod ('geneToMotif', 'MotifList', |
|
373 |
+ |
|
374 |
+ function (object, geneSymbols, source) { |
|
375 |
+ stopifnot(source %in% c("MotifDb", "TFClass")) |
|
376 |
+ #browser() |
|
377 |
+ extract.mdb <- function(gene){ |
|
378 |
+ tbl <- as.data.frame(subset(mcols(object), geneSymbol == gene)) |
|
379 |
+ tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")]) |
|
380 |
+ colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID") |
|
381 |
+ tbl |
|
368 | 382 |
} |
369 |
- if(mode %in% c("indirect", "both")){ |
|
370 |
- # draw on object@manuallyCuratedGeneMotifAssociationTable |
|
371 |
- # result <- rbind(result, newRows |
|
383 |
+ if(source %in% c("MotifDb")){ |
|
384 |
+ tbls <- lapply(geneSymbols, extract.mdb) |
|
385 |
+ result <- do.call(rbind, tbls) |
|
386 |
+ result$from <- "MotifDb" |
|
387 |
+ } |
|
388 |
+ if(source %in% c("TFClass")){ |
|
389 |
+ tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tf.gene %in% geneSymbols) |
|
390 |
+ tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")]) |
|
391 |
+ tbl <- tbl[order(tbl$tf.gene),] |
|
392 |
+ rownames(tbl) <- NULL |
|
393 |
+ colnames(tbl) <- c("motif", "gene", "pubmedID") |
|
394 |
+ result <- tbl[, c("gene", "motif", "pubmedID")] |
|
395 |
+ result$from <- "TFClass" |
|
372 | 396 |
} |
373 | 397 |
result |
374 | 398 |
}) |
375 | 399 |
|
376 | 400 |
#------------------------------------------------------------------------------- |
401 |
+setMethod('associateTranscriptionFactors', 'MotifList', |
|
402 |
+ |
|
403 |
+ function(object, tbl.withMotifs, motif.column.name, source, expand.rows){ |
|
404 |
+ stopifnot(source %in% c("MotifDb", "TFClass")) |
|
405 |
+ tbl.out <- data.frame() |
|
406 |
+ if(source %in% c("MotifDb")){ |
|
407 |
+ # "direct" means: lookup up in the object metadata, expect one TF geneSymbol per matrix name |
|
408 |
+ pfm.ids <- tbl.withMotifs[, motif.column.name] |
|
409 |
+ matched.rows <- match(pfm.ids, names(as.list(object))) |
|
410 |
+ #if(length(matched.rows) == nrow(tbl.withMotifs)) { |
|
411 |
+ tbl.new <- mcols(object)[matched.rows, c("geneSymbol", "pubmedID")] |
|
412 |
+ tbl.new$geneSymbol[nchar(tbl.new$geneSymbol)==0] <- NA |
|
413 |
+ tbl.new$pubmedID[nchar(tbl.new$pubmedID)==0] <- NA |
|
414 |
+ tbl.out <- as.data.frame(cbind(tbl.withMotifs, tbl.new)) |
|
415 |
+ #} |
|
416 |
+ } # direct |
|
417 |
+ if(source %in% c("TFClass")){ |
|
418 |
+ tbl.tfClass <- read.table(system.file(package="MotifDb", "extdata", "tfClass.tsv"), sep="\t", as.is=TRUE, header=TRUE) |
|
419 |
+ motif.ids <- tbl.withMotifs[, motif.column.name] |
|
420 |
+ geneSymbols <- lapply(motif.ids, function(id) paste(tbl.tfClass$tf.gene[grep(id, tbl.tfClass$motif)], collapse=";")) |
|
421 |
+ geneSymbols <- unlist(geneSymbols) |
|
422 |
+ pubmedIds <- lapply(motif.ids, function(id) unique(tbl.tfClass$pubmedID[grep(id, tbl.tfClass$motif)])) |
|
423 |
+ pubmedIds <- as.character(pubmedIds) |
|
424 |
+ pubmedIds <- gsub("integer(0)", "", pubmedIds, fixed=TRUE) |
|
425 |
+ tbl.new <- data.frame(geneSymbol=geneSymbols, pubmedID=pubmedIds, stringsAsFactors=FALSE) |
|
426 |
+ tbl.new$geneSymbol[nchar(tbl.new$geneSymbol)==0] <- NA |
|
427 |
+ tbl.new$pubmedID[nchar(tbl.new$pubmedID)==0] <- NA |
|
428 |
+ tbl.out <- as.data.frame(cbind(tbl.withMotifs, tbl.new)) |
|
429 |
+ |
|
430 |
+ if(expand.rows){ |
|
431 |
+ rows.with.na <- which(is.na(tbl.out$geneSymbol)) |
|
432 |
+ rows.with.geneSymbol <- setdiff(1:nrow(tbl.out), rows.with.na) |
|
433 |
+ tbl.asIs <- tbl.out[rows.with.na,] |
|
434 |
+ tbl.toExpand <- tbl.out[rows.with.geneSymbol,] |
|
435 |
+ geneSymbols.split <- strsplit(tbl.toExpand$geneSymbol, ";") |
|
436 |
+ counts <- unlist(lapply(geneSymbols.split, length)) |
|
437 |
+ geneSymbols.split.vec <- unlist(geneSymbols.split) |
|
438 |
+ tbl.expanded <- splitstackshape::expandRows(tbl.toExpand, counts, count.is.col=FALSE, drop=FALSE) |
|
439 |
+ stopifnot(length(geneSymbols.split.vec) == nrow(tbl.expanded)) |
|
440 |
+ tbl.expanded$geneSymbol <- geneSymbols.split.vec |
|
441 |
+ tbl.out <- rbind(tbl.expanded, tbl.asIs) |
|
442 |
+ } |
|
443 |
+ } # indirect |
|
444 |
+ tbl.out |
|
445 |
+ }) |
|
446 |
+ |
|
447 |
+#------------------------------------------------------------------------------- |
... | ... |
@@ -3,7 +3,7 @@ library (RUnit) |
3 | 3 |
library (MotIV) |
4 | 4 |
library (seqLogo) |
5 | 5 |
#------------------------------------------------------------------------------------------------------------------------ |
6 |
-run.tests = function () |
|
6 |
+runTests = function () |
|
7 | 7 |
{ |
8 | 8 |
test.emptyCtor () |
9 | 9 |
test.nonEmptyCtor () |
... | ... |
@@ -36,10 +36,14 @@ run.tests = function () |
36 | 36 |
test.MotIV.toTable () |
37 | 37 |
test.run_MotIV.motifMatch() |
38 | 38 |
test.flyFactorGeneSymbols() |
39 |
- test.export_jasparFormatStdOut () |
|
40 |
- test.export_jasparFormatToFile () |
|
39 |
+ test.export_jasparFormatStdOut() |
|
40 |
+ test.export_jasparFormatToFile() |
|
41 | 41 |
|
42 |
-} # run.tests |
|
42 |
+ test.geneToMotif() |
|
43 |
+ test.motifToGene() |
|
44 |
+ test.associateTranscriptionFactors() |
|
45 |
+ |
|
46 |
+} # runTests |
|
43 | 47 |
#------------------------------------------------------------------------------------------------------------------------ |
44 | 48 |
test.emptyCtor = function () |
45 | 49 |
{ |
... | ... |
@@ -743,26 +747,144 @@ test.export_jasparFormatToFile = function () |
743 | 747 |
|
744 | 748 |
} # test.exportjasparFormatToFile |
745 | 749 |
#------------------------------------------------------------------------------------------------------------------------ |
746 |
-test.motifTfGeneMapping <- function() |
|
750 |
+test.geneToMotif <- function() |
|
747 | 751 |
{ |
748 |
- printf("--- test.motifTfGeneMapping") |
|
752 |
+ printf("--- test.geneToMotif") |
|
749 | 753 |
mdb <- MotifDb |
750 | 754 |
|
751 |
- set.seed(17) |
|
752 |
- random.10 <- sample(seq_len(nrow(mcols(mdb))), 10) |
|
753 |
- random.10.b <- sample(seq_len(nrow(mcols(mdb))), 10) |
|
755 |
+ genes <- c("FOS", "ATF5", "bogus") |
|
756 |
+ |
|
757 |
+ # use TFClass family classifcation |
|
758 |
+ tbl.i <- geneToMotif(mdb, genes, source="TFClass") |
|
759 |
+ checkEquals(tbl.i$gene, c("ATF5", "FOS", "FOS")) |
|
760 |
+ checkEquals(tbl.i$motif, c("MA0833.1", "MA0099.2", "MA0476.1")) |
|
761 |
+ checkEquals(tbl.i$from, rep("TFClass", 3)) |
|
762 |
+ |
|
763 |
+ # MotifDb mode uses the MotifDb metadata, pulled from many sources |
|
764 |
+ tbl.d <- geneToMotif(mdb, genes, source="MotifDb") |
|
765 |
+ checkEquals(dim(tbl.d), c(12, 6)) |
|
766 |
+ checkEquals(subset(tbl.d, dataSource=="jaspar2016" & geneSymbol== "FOS")$motif, "MA0476.1") |
|
767 |
+ # no recognizable (i.e., jaspar standard) motif name returned by MotifDb metadata |
|
768 |
+ # MotifDb for ATF5 |
|
769 |
+ # todo: compare the MA0110596_1.02 matrix of cisp_1.02 to japar MA0833.1 |
|
770 |
+ |
|
771 |
+ # now try motifs to genes |
|
754 | 772 |
|
755 |
- motifs <- mcols(mdb)[random.10, "providerId"] |
|
756 |
- genes <- mcols(mdb)[random.10, "geneSymbol"] |
|
773 |
+} # test.geneToMotif |
|
774 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
775 |
+test.motifToGene <- function() |
|
776 |
+{ |
|
777 |
+ printf("--- test.motifToGene") |
|
778 |
+ mdb <- MotifDb |
|
757 | 779 |
|
758 |
- tbl.m2tf <- mapMotifToTranscriptionFactorGeneSymbol(mdb, motifs, mode="direct") |
|
759 |
- tbl.m2tf <- mapMotifToTranscriptionFactorGeneSymbol(mdb, motifs, mode="indirect") |
|
780 |
+ motifs <- c("MA0592.2", "UP00022", "ELF1.SwissRegulon") |
|
760 | 781 |
|
761 |
- tbl.tf2m <- mapTranscriptionFactorGeneSymbolToMotif(mdb, genes, mode="direct") |
|
762 |
- tbl.tf2m <- mapTranscriptionFactorGeneSymbolToMotif(mdb, genes, mode="indirect") |
|
782 |
+ # TFClass mode uses TF family classifcation |
|
783 |
+ tbl.d <- motifToGene(mdb, motifs, source="MotifDb") |
|
784 |
+ checkEquals(dim(tbl.d), c(3, 6)) |
|
785 |
+ checkEquals(tbl.d$motif, c("MA0592.2", "ELF1.SwissRegulon", "UP00022")) |
|
786 |
+ checkEquals(tbl.d$geneSymbol, c("Esrra", "ELF1", "Zfp740")) |
|
787 |
+ checkEquals(tbl.d$dataSource, c("jaspar2016", "SwissRegulon", "UniPROBE")) |
|
788 |
+ checkEquals(tbl.d$organism, c("Mmusculus", "Hsapiens", "Mmusculus")) |
|
789 |
+ checkEquals(tbl.d$from, rep("MotifDb", 3)) |
|
790 |
+ |
|
791 |
+ # MotifDb mode uses the MotifDb metadata, pulled from many sources |
|
792 |
+ tbl.i <- motifToGene(mdb, motifs, source="TFClass") |
|
793 |
+ checkEquals(dim(tbl.i), c(9,4)) |
|
794 |
+ checkEquals(tbl.i$motif, rep("MA0592.2", 9)) |
|
795 |
+ checkEquals(sort(tbl.i$gene), c("AR", "ESR1", "ESR2", "ESRRA", "ESRRB", "ESRRG", "NR3C1", "NR3C2", "PGR")) |
|
796 |
+ checkEquals(tbl.i$from, rep("TFClass", 9)) |
|
797 |
+ |
|
798 |
+} # test.geneToMotif |
|
799 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
800 |
+test.associateTranscriptionFactors <- function() |
|
801 |
+{ |
|
802 |
+ printf("--- test.associateTranscriptionFactors") |
|
803 |
+ |
|
804 |
+ mdb <- MotifDb |
|
805 |
+ pfms <- query(query(mdb, "jaspar2016"), "sapiens") |
|
763 | 806 |
|
764 |
- # check these 4 tables for agreement |
|
807 |
+ # first check motifs with MotifDb-style long names, using MotifDb lookup, in the |
|
808 |
+ # metadata of MotifDb: |
|
809 |
+ # "Hsapiens-jaspar2016-RUNX1-MA0002.1" "Hsapiens-jaspar2016-TFAP2A-MA0003.1" |
|
765 | 810 |
|
811 |
+ motif.names <- names(pfms[1:5]) |
|
812 |
+ tbl <- data.frame(motifLongName=motif.names, score=runif(5), stringsAsFactors=FALSE) |
|
813 |
+ tbl.anno <- associateTranscriptionFactors(mdb, tbl, "motifLongName", source="MotifDb", expand.rows=FALSE) |
|
814 |
+ checkEquals(dim(tbl.anno), c(nrow(tbl), ncol(tbl) + 2)) |
|
815 |
+ checkTrue(all(c("geneSymbol", "pubmedID") %in% colnames(tbl.anno))) |
|
766 | 816 |
|
767 |
-} # test.motifTfGeneMapping |
|
817 |
+ # now add in a bogus motif name, one for which there cannot possibly be a TF |
|
818 |
+ |
|
819 |
+ motif.names[3] <- "bogus" |
|
820 |
+ tbl <- data.frame(motifLongName=motif.names, score=runif(5), stringsAsFactors=FALSE) |
|
821 |
+ tbl.anno <- associateTranscriptionFactors(mdb, tbl, "motifLongName", source="MotifDb", expand.rows=FALSE) |
|
822 |
+ checkTrue(is.na(tbl.anno$geneSymbol[3])) |
|
823 |
+ checkTrue(is.na(tbl.anno$pubmedID[3])) |
|
824 |
+ |
|
825 |
+ # now check motifs with short, traditional names, in "TFClass" mode, which uses |
|
826 |
+ # the tfFamily |
|
827 |
+ # "MA0002.1" "MA0003.1" "MA0003.2" "MA0003.3" "MA0007.2" |
|
828 |
+ |
|
829 |
+ short.motif.names <- unlist(lapply(strsplit(motif.names, "-"), function(tokens) return(tokens[length(tokens)]))) |
|
830 |
+ tbl <- data.frame(motif=short.motif.names, score=runif(5), stringsAsFactors=FALSE) |
|
831 |
+ |
|
832 |
+ tbl.anno <- associateTranscriptionFactors(mdb, tbl, "motif", source="TFClass", expand.rows=FALSE) |
|
833 |
+ checkEquals(dim(tbl.anno), c(nrow(tbl), ncol(tbl) + 2)) |
|
834 |
+ |
|
835 |
+ # now add in a bogus motif name, one for which there cannot possibly be a TF |
|
836 |
+ |
|
837 |
+ motif.names <- names(pfms[1:5]) |
|
838 |
+ short.motif.names <- unlist(lapply(strsplit(motif.names, "-"), function(tokens) return(tokens[length(tokens)]))) |
|
839 |
+ short.motif.names[3] <- "bogus" |
|
840 |
+ tbl <- data.frame(motif=short.motif.names, score=runif(5), stringsAsFactors=FALSE) |
|
841 |
+ |
|
842 |
+ tbl.anno <- associateTranscriptionFactors(mdb, tbl, "motif", source="TFClass", expand.rows=FALSE) |
|
843 |
+ checkEquals(dim(tbl.anno), c(nrow(tbl), ncol(tbl) + 2)) |
|
844 |
+ |
|
845 |
+} # test.associateTranscriptionFactors |
|
846 |
+#------------------------------------------------------------------------------------------------------------------------ |
|
847 |
+# MA0003.3 is mapped by TFClass to TFAP2A;TFAP2B;TFAP2C;TFAP2D;TFAP2E (pumbedID 23180794) |
|
848 |
+# use it as a first test of the "exapand.rows" argument to associateTranscriptionFactors method |
|
849 |
+# MA0017.1 is unmapped by TFClass |
|
850 |
+# MA0017.2 is mapped to NR2F1 |
|
851 |
+test.associateTranscriptionFactors_expandRows <- function() |
|
852 |
+{ |
|
853 |
+ printf("--- test.assoicateTranscriptionFactors") |
|
854 |
+ |
|
855 |
+ mdb <- MotifDb |
|
856 |
+ motifs <- c("MA0003.3", "MA0017.1", "MA0017.2") |
|
857 |
+ tbl <- data.frame(motif=motifs, stringsAsFactors=FALSE) |
|
858 |
+ tbl.anno <- associateTranscriptionFactors(mdb, tbl, motif.column.name="motif", source="TFClass", expand.rows=FALSE) |
|
859 |
+ checkEquals(dim(tbl.anno), c(3,3)) |
|
860 |
+ checkEquals(tbl.anno$geneSymbol[c(1,3)], c("TFAP2A;TFAP2B;TFAP2C;TFAP2D;TFAP2E", "NR2F1")) |
|
861 |
+ checkTrue(is.na(tbl.anno$geneSymbol[2])) |
|
862 |
+ |
|
863 |
+ tbl.annoX <- associateTranscriptionFactors(mdb, tbl, motif.column.name="motif", source="TFClass", expand.rows=TRUE) |
|
864 |
+ checkEquals(dim(tbl.annoX), c(7,3)) |
|
865 |
+ checkEquals(tbl.annoX$motif, c(rep("MA0003.3", 5), "MA0017.2", "MA0017.1")) |
|
866 |
+ checkEquals(tbl.annoX$geneSymbol[1:6], c("TFAP2A", "TFAP2B", "TFAP2C", "TFAP2D", "TFAP2E", "NR2F1")) |
|
867 |
+ checkTrue(is.na(tbl.annoX$geneSymbol[7])) |
|
868 |
+ |
|
869 |
+ # now a large scale test |
|
870 |
+ set.seed(37) |
|
871 |
+ mdb.human <- query(mdb, "sapiens") # > 4000 human matrices |
|
872 |
+ indices <- sample(1:length(mdb.human), size=250) |
|
873 |
+ motif.long.names <- names(mdb.human)[indices] |
|
874 |
+ motif.short.names <- mcols(mdb.human)[indices, "providerId"] |
|
875 |
+ tbl <- data.frame(motif.short=motif.short.names, motif.long=motif.long.names, stringsAsFactors=FALSE) |
|
876 |
+ |
|
877 |
+ tbl.anno.mdb <- associateTranscriptionFactors(mdb, tbl, motif.column.name="motif.long", source="MotifDb", expand.rows=FALSE) |
|
878 |
+ tbl.anno.mdbX <- associateTranscriptionFactors(mdb, tbl, motif.column.name="motif.long", source="MotifDb", expand.rows=TRUE) |
|
879 |
+ |
|
880 |
+ tbl.anno.tfc <- associateTranscriptionFactors(mdb, tbl, motif.column.name="motif.short", source="TFClass", expand.rows=FALSE) |
|
881 |
+ checkEquals(nrow(tbl.anno.tfc), length(motif.short.names)) |
|
882 |
+ checkTrue(length(grep(";", tbl.anno.tfc$geneSymbol)) > 20) |
|
883 |
+ |
|
884 |
+ tbl.anno.tfcX <- associateTranscriptionFactors(mdb, tbl, motif.column.name="motif.short", source="TFClass", expand.rows=TRUE) |
|
885 |
+ checkTrue(nrow(tbl.anno.tfcX) > nrow(tbl.anno.tfc)) # 250 vs 779 |
|
886 |
+ checkEquals(length(grep(";", tbl.anno.tfcX$geneSymbol)), 0) |
|
887 |
+ |
|
888 |
+ |
|
889 |
+} # test.associateTranscriptionFactors_expandRows |
|
768 | 890 |
#------------------------------------------------------------------------------------------------------------------------ |