... | ... |
@@ -1,14 +1,14 @@ |
1 | 1 |
Package: MotifDb |
2 | 2 |
Type: Package |
3 | 3 |
Title: An Annotated Collection of Protein-DNA Binding Sequence Motifs |
4 |
-Version: 1.21.6 |
|
4 |
+Version: 1.21.7 |
|
5 | 5 |
Date: 2018-05-18 |
6 | 6 |
Author: Paul Shannon, Matt Richards |
7 | 7 |
Maintainer: Paul Shannon <pshannon@systemsbiology.org> |
8 | 8 |
Depends: R (>= 2.15.0), methods, BiocGenerics, S4Vectors, IRanges, Biostrings |
9 | 9 |
Suggests: RUnit, seqLogo, MotIV |
10 | 10 |
Imports: rtracklayer, splitstackshape |
11 |
-Description: More than 9000 annotated position frequency matrices from 14 public sources, for multiple organisms. |
|
11 |
+Description: More than 9900 annotated position frequency matrices from 14 public sources, for multiple organisms. |
|
12 | 12 |
License: Artistic-2.0 | file LICENSE |
13 | 13 |
License_is_FOSS: no |
14 | 14 |
License_restricts_use: yes |
... | ... |
@@ -475,6 +475,9 @@ setMethod ('motifToGene', 'MotifList', |
475 | 475 |
} |
476 | 476 |
} |
477 | 477 |
tbl.out <- rbind(tbl.mdb, tbl.tfc) |
478 |
+ dups <- which(duplicated(tbl.out[, c("motif", "geneSymbol", "organism", "source")])) |
|
479 |
+ if(length(dups) > 0) |
|
480 |
+ tbl.out <- tbl.out[-dups,] |
|
478 | 481 |
if(length(name.map) > 0) |
479 | 482 |
tbl.out$motif <- as.character(name.map[tbl.out$motif]) |
480 | 483 |
#browser() |
... | ... |
@@ -525,53 +528,62 @@ setMethod ('geneToMotif', 'MotifList', |
525 | 528 |
#------------------------------------------------------------------------------- |
526 | 529 |
setMethod('associateTranscriptionFactors', 'MotifList', |
527 | 530 |
|
528 |
- function(object, tbl.withMotifs, source, expand.rows){ |
|
529 |
- source <- tolower(source) |
|
530 |
- stopifnot(source %in% c("motifdb", "tfclass")) |
|
531 |
- tbl.out <- data.frame() |
|
532 |
- if(source %in% c("motifdb")){ |
|
533 |
- # lookup up in the object metadata, expect one TF geneSymbol per matrix name |
|
534 |
- pfm.ids <- tbl.withMotifs[, "motifName"] |
|
535 |
- matched.rows <- match(pfm.ids, names(as.list(object))) |
|
536 |
- #if(length(matched.rows) == nrow(tbl.withMotifs)) { |
|
537 |
- tbl.new <- mcols(object)[matched.rows, c("geneSymbol", "pubmedID")] |
|
538 |
- tbl.new$geneSymbol[nchar(tbl.new$geneSymbol)==0] <- NA |
|
539 |
- tbl.new$pubmedID[nchar(tbl.new$pubmedID)==0] <- NA |
|
540 |
- tbl.out <- as.data.frame(cbind(tbl.withMotifs, tbl.new)) |
|
541 |
- } # direct |
|
542 |
- if(source %in% c("tfclass")){ |
|
543 |
- if(! "shortMotif" %in% colnames(tbl.withMotifs)){ |
|
544 |
- stop("MotifDb::assoicateTranscriptionFactors needs a 'shortMotif' column with the TFClass source") |
|
545 |
- } |
|
546 |
- tbl.tfClass <- read.table(system.file(package="MotifDb", "extdata", "tfClass.tsv"), sep="\t", as.is=TRUE, header=TRUE) |
|
547 |
- motif.ids <- tbl.withMotifs[, "shortMotif"] |
|
548 |
- geneSymbols <- lapply(motif.ids, function(id) |
|
549 |
- paste(tbl.tfClass$tf.gene[grep(id, tbl.tfClass$motif, fixed=TRUE)], collapse=";")) |
|
550 |
- geneSymbols <- unlist(geneSymbols) |
|
551 |
- pubmedIds <- lapply(motif.ids, function(id) |
|
552 |
- unique(tbl.tfClass$pubmedID[grep(id, tbl.tfClass$motif, fixed=TRUE)])) |
|
553 |
- pubmedIds <- as.character(pubmedIds) |
|
554 |
- pubmedIds <- gsub("integer(0)", "", pubmedIds, fixed=TRUE) |
|
555 |
- tbl.new <- data.frame(geneSymbol=geneSymbols, pubmedID=pubmedIds, stringsAsFactors=FALSE) |
|
556 |
- tbl.new$geneSymbol[nchar(tbl.new$geneSymbol)==0] <- NA |
|
557 |
- tbl.new$pubmedID[nchar(tbl.new$pubmedID)==0] <- NA |
|
558 |
- tbl.out <- as.data.frame(cbind(tbl.withMotifs, tbl.new)) |
|
559 |
- |
|
560 |
- if(expand.rows){ |
|
561 |
- rows.with.na <- which(is.na(tbl.out$geneSymbol)) |
|
562 |
- rows.with.geneSymbol <- setdiff(1:nrow(tbl.out), rows.with.na) |
|
563 |
- tbl.asIs <- tbl.out[rows.with.na,] |
|
564 |
- tbl.toExpand <- tbl.out[rows.with.geneSymbol,] |
|
565 |
- geneSymbols.split <- strsplit(tbl.toExpand$geneSymbol, ";") |
|
566 |
- counts <- unlist(lapply(geneSymbols.split, length)) |
|
567 |
- geneSymbols.split.vec <- unlist(geneSymbols.split) |
|
568 |
- tbl.expanded <- splitstackshape::expandRows(tbl.toExpand, counts, count.is.col=FALSE, drop=FALSE) |
|
569 |
- stopifnot(length(geneSymbols.split.vec) == nrow(tbl.expanded)) |
|
570 |
- tbl.expanded$geneSymbol <- geneSymbols.split.vec |
|
571 |
- tbl.out <- rbind(tbl.expanded, tbl.asIs) |
|
572 |
- } |
|
573 |
- } # indirect |
|
574 |
- tbl.out |
|
575 |
- }) |
|
531 |
+ function(object, tbl.withMotifs, source, expand.rows){ |
|
532 |
+ stopifnot("motifName" %in% colnames(tbl.withMotifs)) |
|
533 |
+ tbl.tf <- motifToGene(object, tbl.withMotifs$motifName, source) |
|
534 |
+ merge(tbl.withMotifs, tbl.tf, by.x="motifName", by.y="motif", all.x=TRUE) |
|
535 |
+ }) |
|
576 | 536 |
|
577 | 537 |
#------------------------------------------------------------------------------- |
538 |
+# setMethod('associateTranscriptionFactors', 'MotifList', |
|
539 |
+# |
|
540 |
+# function(object, tbl.withMotifs, source, expand.rows){ |
|
541 |
+# source <- tolower(source) |
|
542 |
+# stopifnot(source %in% c("motifdb", "tfclass")) |
|
543 |
+# tbl.out <- data.frame() |
|
544 |
+# if(source %in% c("motifdb")){ |
|
545 |
+# # lookup up in the object metadata, expect one TF geneSymbol per matrix name |
|
546 |
+# pfm.ids <- tbl.withMotifs[, "motifName"] |
|
547 |
+# matched.rows <- match(pfm.ids, names(as.list(object))) |
|
548 |
+# #if(length(matched.rows) == nrow(tbl.withMotifs)) { |
|
549 |
+# tbl.new <- mcols(object)[matched.rows, c("geneSymbol", "pubmedID")] |
|
550 |
+# tbl.new$geneSymbol[nchar(tbl.new$geneSymbol)==0] <- NA |
|
551 |
+# tbl.new$pubmedID[nchar(tbl.new$pubmedID)==0] <- NA |
|
552 |
+# tbl.out <- as.data.frame(cbind(tbl.withMotifs, tbl.new)) |
|
553 |
+# } # direct |
|
554 |
+# if(source %in% c("tfclass")){ |
|
555 |
+# if(! "shortMotif" %in% colnames(tbl.withMotifs)){ |
|
556 |
+# stop("MotifDb::assoicateTranscriptionFactors needs a 'shortMotif' column with the TFClass source") |
|
557 |
+# } |
|
558 |
+# tbl.tfClass <- read.table(system.file(package="MotifDb", "extdata", "tfClass.tsv"), sep="\t", as.is=TRUE, header=TRUE) |
|
559 |
+# motif.ids <- tbl.withMotifs[, "shortMotif"] |
|
560 |
+# geneSymbols <- lapply(motif.ids, function(id) |
|
561 |
+# paste(tbl.tfClass$tf.gene[grep(id, tbl.tfClass$motif, fixed=TRUE)], collapse=";")) |
|
562 |
+# geneSymbols <- unlist(geneSymbols) |
|
563 |
+# pubmedIds <- lapply(motif.ids, function(id) |
|
564 |
+# unique(tbl.tfClass$pubmedID[grep(id, tbl.tfClass$motif, fixed=TRUE)])) |
|
565 |
+# pubmedIds <- as.character(pubmedIds) |
|
566 |
+# pubmedIds <- gsub("integer(0)", "", pubmedIds, fixed=TRUE) |
|
567 |
+# tbl.new <- data.frame(geneSymbol=geneSymbols, pubmedID=pubmedIds, stringsAsFactors=FALSE) |
|
568 |
+# tbl.new$geneSymbol[nchar(tbl.new$geneSymbol)==0] <- NA |
|
569 |
+# tbl.new$pubmedID[nchar(tbl.new$pubmedID)==0] <- NA |
|
570 |
+# tbl.out <- as.data.frame(cbind(tbl.withMotifs, tbl.new)) |
|
571 |
+# |
|
572 |
+# if(expand.rows){ |
|
573 |
+# rows.with.na <- which(is.na(tbl.out$geneSymbol)) |
|
574 |
+# rows.with.geneSymbol <- setdiff(1:nrow(tbl.out), rows.with.na) |
|
575 |
+# tbl.asIs <- tbl.out[rows.with.na,] |
|
576 |
+# tbl.toExpand <- tbl.out[rows.with.geneSymbol,] |
|
577 |
+# geneSymbols.split <- strsplit(tbl.toExpand$geneSymbol, ";") |
|
578 |
+# counts <- unlist(lapply(geneSymbols.split, length)) |
|
579 |
+# geneSymbols.split.vec <- unlist(geneSymbols.split) |
|
580 |
+# tbl.expanded <- splitstackshape::expandRows(tbl.toExpand, counts, count.is.col=FALSE, drop=FALSE) |
|
581 |
+# stopifnot(length(geneSymbols.split.vec) == nrow(tbl.expanded)) |
|
582 |
+# tbl.expanded$geneSymbol <- geneSymbols.split.vec |
|
583 |
+# tbl.out <- rbind(tbl.expanded, tbl.asIs) |
|
584 |
+# } |
|
585 |
+# } # indirect |
|
586 |
+# tbl.out |
|
587 |
+# }) |
|
588 |
+# |
|
589 |
+#------------------------------------------------------------------------------- |
... | ... |
@@ -896,12 +896,12 @@ test.motifToGene <- function() |
896 | 896 |
|
897 | 897 |
# MotifDb mode uses the MotifDb metadata "providerId", |
898 | 898 |
tbl.mdb <- motifToGene(MotifDb, motifs, source="MotifDb") |
899 |
- checkEquals(dim(tbl.mdb), c(4, 5)) |
|
900 |
- expected <- sort(c("MA0592.2", "MA0592.2", "ELF1.SwissRegulon", "UP00022")) |
|
899 |
+ checkEquals(dim(tbl.mdb), c(3, 5)) |
|
900 |
+ expected <- sort(c("MA0592.2", "ELF1.SwissRegulon", "UP00022")) |
|
901 | 901 |
actual <- sort(tbl.mdb$motif) |
902 | 902 |
checkEquals(actual, expected) |
903 |
- checkEquals(sort(tbl.mdb$geneSymbol), sort(c("ELF1", "Esrra", "Esrra", "Zfp740"))) |
|
904 |
- checkEquals(tbl.mdb$source, rep("MotifDb", 4)) |
|
903 |
+ checkEquals(sort(tbl.mdb$geneSymbol), sort(c("ELF1", "Esrra", "Zfp740"))) |
|
904 |
+ checkEquals(tbl.mdb$source, rep("MotifDb", 3)) |
|
905 | 905 |
|
906 | 906 |
# TFClass mode uses TF family classifcation |
907 | 907 |
tbl.tfClass <- motifToGene(MotifDb, motifs, source="TFClass") |
... | ... |
@@ -948,7 +948,10 @@ test.motifToGene <- function() |
948 | 948 |
checkEquals(sort(unique(tbl$motif)), c("Hsapiens-jaspar2016-TFAP2A-MA0003.3", "MA0872.1")) |
949 | 949 |
checkEquals(sort(unique(tbl$geneSymbol)), c("TFAP2A", "TFAP2B", "TFAP2C", "TFAP2D", "TFAP2E")) |
950 | 950 |
|
951 |
- tbl <- motifToGene(MotifDb, motifs, source="TFClass") |
|
951 |
+ tbl <- motifToGene(MotifDb, motifs, source=c("MotifDb", "TFClass")) |
|
952 |
+ checkTrue(all(motifs %in% tbl$motif)) |
|
953 |
+ checkEquals(sort(unique(tbl$geneSymbol)), |
|
954 |
+ c("AR", "RUNX1", "TFAP2A", "TFAP2A(var.3)", "TFAP2B", "TFAP2C", "TFAP2D", "TFAP2E")) |
|
952 | 955 |
|
953 | 956 |
} # test.motifToGene |
954 | 957 |
#------------------------------------------------------------------------------------------------------------------------ |
... | ... |
@@ -958,66 +961,80 @@ test.associateTranscriptionFactors <- function() |
958 | 961 |
|
959 | 962 |
mdb <- MotifDb |
960 | 963 |
pfms <- query(mdb, andStrings=c("sapiens", "jaspar2016")) |
961 |
- # query(mdb, "jaspar2016", "sapiens") |
|
962 | 964 |
|
965 |
+ # query(mdb, "jaspar2016", "sapiens") |
|
963 | 966 |
# first check motifs with MotifDb-style long names, using MotifDb lookup, in the |
964 | 967 |
# metadata of MotifDb: |
965 | 968 |
# "Hsapiens-jaspar2016-RUNX1-MA0002.1" "Hsapiens-jaspar2016-TFAP2A-MA0003.1" |
966 | 969 |
|
967 |
- motif.names <- names(pfms[1:5]) |
|
968 |
- tbl <- data.frame(motifName=motif.names, score=runif(5), stringsAsFactors=FALSE) |
|
969 |
- tbl.anno <- associateTranscriptionFactors(mdb, tbl, source="MotifDb", expand.rows=FALSE) |
|
970 |
- checkEquals(dim(tbl.anno), c(nrow(tbl), ncol(tbl) + 2)) |
|
971 |
- checkTrue(all(c("geneSymbol", "pubmedID") %in% colnames(tbl.anno))) |
|
972 |
- checkEquals(sort(tbl.anno$geneSymbol), sort(c("RUNX1", "TFAP2A", "TFAP2A", "TFAP2A", "AR"))) |
|
973 |
- |
|
974 |
- # now add in a bogus motif name, one for which there cannot possibly be a TF |
|
975 |
- |
|
976 |
- motif.names[3] <- "bogus" |
|
977 |
- tbl <- data.frame(motifName=motif.names, score=runif(5), stringsAsFactors=FALSE) |
|
978 |
- tbl.anno <- associateTranscriptionFactors(mdb, tbl, source="MotifDb", expand.rows=FALSE) |
|
979 |
- checkTrue(is.na(tbl.anno$geneSymbol[3])) |
|
980 |
- checkTrue(is.na(tbl.anno$pubmedID[3])) |
|
981 |
- |
|
982 |
- # now check motifs with short, traditional names, in "TFClass" mode, which uses |
|
983 |
- # the tfFamily |
|
984 |
- # "MA0002.1" "MA0003.1" "MA0003.2" "MA0003.3" "MA0007.2" |
|
985 |
- |
|
986 |
- motif.names <- names(pfms[1:5]) |
|
987 |
- short.motif.names <- unlist(lapply(strsplit(motif.names, "-"), function(tokens) return(tokens[length(tokens)]))) |
|
988 |
- tbl <- data.frame(motifName=motif.names, shortMotif=short.motif.names, score=runif(5), stringsAsFactors=FALSE) |
|
989 |
- |
|
990 |
- tbl.anno <- associateTranscriptionFactors(mdb, tbl, source="TFClass", expand.rows=FALSE) |
|
991 |
- checkEquals(dim(tbl.anno), c(nrow(tbl), ncol(tbl) + 2)) |
|
992 |
- |
|
993 |
- # TFClass only annotates MA0003.3, none of the others |
|
994 |
- checkTrue(all(is.na(tbl.anno$geneSymbol[-4]))) |
|
995 |
- checkTrue(all(is.na(tbl.anno$pubmedID[-4]))) |
|
996 |
- checkEquals(tbl.anno$geneSymbol[4], "TFAP2A;TFAP2B;TFAP2C;TFAP2D;TFAP2E") |
|
997 |
- checkEquals(tbl.anno$pubmedID[4], "23180794") |
|
998 |
- |
|
999 |
- # now ask for expandsion of the semicolon separated list |
|
1000 |
- tbl.anno <- associateTranscriptionFactors(mdb, tbl, source="TFClass", expand.rows=TRUE) |
|
1001 |
- checkEquals(dim(tbl.anno), c(nrow(tbl) + 4, ncol(tbl) + 2)) |
|
1002 |
- checkTrue(all(c("TFAP2A", "TFAP2B", "TFAP2C", "TFAP2D", "TFAP2E") %in% tbl.anno$geneSymbol)) |
|
1003 |
- |
|
1004 |
- # now add in a bogus motif name, one for which there cannot possibly be a TF |
|
1005 |
- |
|
1006 |
- motif.names <- names(pfms[1:5]) |
|
1007 |
- short.motif.names <- unlist(lapply(strsplit(motif.names, "-"), function(tokens) return(tokens[length(tokens)]))) |
|
1008 |
- short.motif.names[4] <- "bogus" |
|
1009 |
- tbl <- data.frame(shortMotif=short.motif.names, score=runif(5), stringsAsFactors=FALSE) |
|
1010 |
- |
|
1011 |
- tbl.anno <- associateTranscriptionFactors(mdb, tbl, source="TFClass", expand.rows=FALSE) |
|
1012 |
- checkEquals(dim(tbl.anno), c(nrow(tbl), ncol(tbl) + 2)) |
|
1013 |
- # after adding bogus to the only mapped motif name, all geneSymbol and pubmedID values should be NA |
|
1014 |
- checkTrue(all(is.na(tbl.anno$geneSymbol))) |
|
1015 |
- checkTrue(all(is.na(tbl.anno$pubmedID))) |
|
1016 |
- |
|
1017 |
- # now make sure that the absence of the TFClass-specific "shortMotif" field is detected |
|
1018 |
- motif.names <- names(pfms[1:5]) |
|
1019 |
- tbl <- data.frame(motifName=motif.names, score=runif(5), stringsAsFactors=FALSE) |
|
1020 |
- checkException(tbl.anno <- associateTranscriptionFactors(mdb, tbl, source="TFClass", expand.rows=FALSE), silent=TRUE) |
|
970 |
+ motif.names <- c(names(pfms[1:5]), "MA0872.1", "hocus.pocus") |
|
971 |
+ tbl <- data.frame(motifName=motif.names, score=runif(7), stringsAsFactors=FALSE) |
|
972 |
+ |
|
973 |
+ tbl.anno.mdb <- associateTranscriptionFactors(mdb, tbl, source="MotifDb", expand.rows=TRUE) |
|
974 |
+ checkEquals(nrow(tbl), nrow(tbl.anno.mdb)) |
|
975 |
+ checkTrue(is.na(tbl.anno.mdb$geneSymbol[grep("hocus.pocus", tbl.anno.mdb$motifName)])) |
|
976 |
+ checkTrue(all(c("AR", "RUNX1", "TFAP2A", "TFAP2A", "TFAP2A", "TFAP2A(var.3)") %in% tbl.anno.mdb$geneSymbol)) |
|
977 |
+ |
|
978 |
+ tbl.anno.tfc <- associateTranscriptionFactors(mdb, tbl, source="TFClass", expand.rows=TRUE) |
|
979 |
+ checkTrue(nrow(tbl) < nrow(tbl.anno.tfc)) |
|
980 |
+ checkEquals(sort(unique(tbl.anno.tfc$geneSymbol)), c("TFAP2A", "TFAP2B", "TFAP2C", "TFAP2D", "TFAP2E")) |
|
981 |
+ |
|
982 |
+ tbl.anno.both <- associateTranscriptionFactors(mdb, tbl, source=c("MotifDb", "TFClass"), expand.rows=TRUE) |
|
983 |
+ checkEquals(length(grep("MotifDb", tbl.anno.both$source)), 6) |
|
984 |
+ checkEquals(length(grep("TFClass", tbl.anno.both$source)), 10) |
|
985 |
+ |
|
986 |
+ # checkEquals(dim(tbl.anno.mdb), c(nrow(tbl), ncol(tbl) + 4)) |
|
987 |
+ # checkTrue(all(c("geneSymbol", "pubmedID") %in% colnames(tbl.anno.mdb))) |
|
988 |
+ # checkEquals(sort(tbl.anno.mdb$geneSymbol), |
|
989 |
+ # sort(c("AR", "RUNX1", "TFAP2A", "TFAP2A", "TFAP2A", "TFAP2A(var.3)"))) |
|
990 |
+ # |
|
991 |
+ # # now add in a bogus motif name, one for which there cannot possibly be a TF |
|
992 |
+ # |
|
993 |
+ # motif.names[3] <- "bogus" |
|
994 |
+ # tbl <- data.frame(motifName=motif.names, score=runif(5), stringsAsFactors=FALSE) |
|
995 |
+ # tbl.anno <- associateTranscriptionFactors(mdb, tbl, source="MotifDb", expand.rows=FALSE) |
|
996 |
+ # checkTrue(is.na(tbl.anno$geneSymbol[3])) |
|
997 |
+ # checkTrue(is.na(tbl.anno$pubmedID[3])) |
|
998 |
+ # |
|
999 |
+ # # now check motifs with short, traditional names, in "TFClass" mode, which uses |
|
1000 |
+ # # the tfFamily |
|
1001 |
+ # # "MA0002.1" "MA0003.1" "MA0003.2" "MA0003.3" "MA0007.2" |
|
1002 |
+ # |
|
1003 |
+ # motif.names <- names(pfms[1:5]) |
|
1004 |
+ # short.motif.names <- unlist(lapply(strsplit(motif.names, "-"), function(tokens) return(tokens[length(tokens)]))) |
|
1005 |
+ # tbl <- data.frame(motifName=motif.names, shortMotif=short.motif.names, score=runif(5), stringsAsFactors=FALSE) |
|
1006 |
+ # |
|
1007 |
+ # tbl.anno <- associateTranscriptionFactors(mdb, tbl, source="TFClass", expand.rows=FALSE) |
|
1008 |
+ # checkEquals(dim(tbl.anno), c(nrow(tbl), ncol(tbl) + 2)) |
|
1009 |
+ # |
|
1010 |
+ # # TFClass only annotates MA0003.3, none of the others |
|
1011 |
+ # checkTrue(all(is.na(tbl.anno$geneSymbol[-4]))) |
|
1012 |
+ # checkTrue(all(is.na(tbl.anno$pubmedID[-4]))) |
|
1013 |
+ # checkEquals(tbl.anno$geneSymbol[4], "TFAP2A;TFAP2B;TFAP2C;TFAP2D;TFAP2E") |
|
1014 |
+ # checkEquals(tbl.anno$pubmedID[4], "23180794") |
|
1015 |
+ # |
|
1016 |
+ # # now ask for expandsion of the semicolon separated list |
|
1017 |
+ # tbl.anno <- associateTranscriptionFactors(mdb, tbl, source="TFClass", expand.rows=TRUE) |
|
1018 |
+ # checkEquals(dim(tbl.anno), c(nrow(tbl) + 4, ncol(tbl) + 2)) |
|
1019 |
+ # checkTrue(all(c("TFAP2A", "TFAP2B", "TFAP2C", "TFAP2D", "TFAP2E") %in% tbl.anno$geneSymbol)) |
|
1020 |
+ # |
|
1021 |
+ # # now add in a bogus motif name, one for which there cannot possibly be a TF |
|
1022 |
+ # |
|
1023 |
+ # motif.names <- names(pfms[1:5]) |
|
1024 |
+ # short.motif.names <- unlist(lapply(strsplit(motif.names, "-"), function(tokens) return(tokens[length(tokens)]))) |
|
1025 |
+ # short.motif.names[4] <- "bogus" |
|
1026 |
+ # tbl <- data.frame(shortMotif=short.motif.names, score=runif(5), stringsAsFactors=FALSE) |
|
1027 |
+ # |
|
1028 |
+ # tbl.anno <- associateTranscriptionFactors(mdb, tbl, source="TFClass", expand.rows=FALSE) |
|
1029 |
+ # checkEquals(dim(tbl.anno), c(nrow(tbl), ncol(tbl) + 2)) |
|
1030 |
+ # # after adding bogus to the only mapped motif name, all geneSymbol and pubmedID values should be NA |
|
1031 |
+ # checkTrue(all(is.na(tbl.anno$geneSymbol))) |
|
1032 |
+ # checkTrue(all(is.na(tbl.anno$pubmedID))) |
|
1033 |
+ # |
|
1034 |
+ # # now make sure that the absence of the TFClass-specific "shortMotif" field is detected |
|
1035 |
+ # motif.names <- names(pfms[1:5]) |
|
1036 |
+ # tbl <- data.frame(motifName=motif.names, score=runif(5), stringsAsFactors=FALSE) |
|
1037 |
+ # checkException(tbl.anno <- associateTranscriptionFactors(mdb, tbl, source="TFClass", expand.rows=FALSE), silent=TRUE) |
|
1021 | 1038 |
|
1022 | 1039 |
# now some motif names |
1023 | 1040 |
} # test.associateTranscriptionFactors |