git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/MotifDb@75491 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -35,7 +35,7 @@ run.tests = function () |
35 | 35 |
test.export_memeFormatToFile_run_tomtom () |
36 | 36 |
test.run_MotIV () |
37 | 37 |
test.MotIV.toTable () |
38 |
- |
|
38 |
+ test.flyFactorGeneSymbols() |
|
39 | 39 |
|
40 | 40 |
} # run.tests |
41 | 41 |
#------------------------------------------------------------------------------------------------------------------------ |
... | ... |
@@ -644,5 +644,47 @@ pwmMatch.toTable = function (motifMatch) { |
644 | 644 |
} |
645 | 645 |
names(df.list) <- names(motifMatch) |
646 | 646 |
return (df.list) |
647 |
-} |
|
648 |
-#------------------------------------------------------------------------------------------------------------------------ |
|
647 |
+ |
|
648 |
+} # pwmMatch.toTable |
|
649 |
+#------------------------------------------------------------------------------ |
|
650 |
+# Robert Stojnic reports incorrect gene symbols for matrices obtained from |
|
651 |
+# flyFactorSurvey. |
|
652 |
+# the solution was to abandon the original strategy of extracting the |
|
653 |
+# symbol from the matrix (and file) name. |
|
654 |
+# now, the flybase importer ("inst/scripts/import/flyFactorSurvey/import.R") |
|
655 |
+# uses FBgn id (which can be reliably extracted) and uses indpendent |
|
656 |
+# data sources to learn the gene symbol. |
|
657 |
+# |
|
658 |
+# robert's email: |
|
659 |
+# I'm working on using MotifDb motifs in my PWMEnrich package and I |
|
660 |
+# have noticed that there is a slight problem with gene symbols for |
|
661 |
+# Drosophila. In particular, the gene symbols do not always correspond |
|
662 |
+# to the gene ID and are frequently mis-capitalized. In Drosophila z |
|
663 |
+# and Z are two different genes and capitalization does matter if |
|
664 |
+# someone is to use the gene symbols. Also, in some cases the symbols |
|
665 |
+# are missing hyphens or parenthesis. I have used the gene IDs and the |
|
666 |
+# Flybase annotation database to set the correct gene symbols for |
|
667 |
+# Drosophila, please find attached the result of my re-annotation. |
|
668 |
+# |
|
669 |
+# looking at his correctedMotifDbDmel.csv |
|
670 |
+# |
|
671 |
+# head(read.table("correctedMotifDbDmel.csv", sep=",", header=TRUE, stringsAsFactors=FALSE)) |
|
672 |
+# providerName oldGeneSymbol newGeneSymbol |
|
673 |
+# 1 ab_SANGER_10_FBgn0259750 Ab ab |
|
674 |
+# 2 ab_SOLEXA_5_FBgn0259750 Ab ab |
|
675 |
+# 3 Abd-A_FlyReg_FBgn0000014 Abd-a abd-A |
|
676 |
+# 4 Abd-B_FlyReg_FBgn0000015 Abd-b Abd-B |
|
677 |
+# 5 AbdA_Cell_FBgn0000014 Abda abd-A |
|
678 |
+# 6 AbdA_SOLEXA_FBgn0000014 Abda abd-A |
|
679 |
+# |
|
680 |
+test.flyFactorGeneSymbols <- function() |
|
681 |
+{ |
|
682 |
+ print ("--- test.flyFactorGeneSymbols") |
|
683 |
+ mdb = MotifDb |
|
684 |
+ checkEquals(mcols(query(mdb, "FBgn0259750"))$geneSymbol, c("ab", "ab")) |
|
685 |
+ checkEquals(mcols(query(mdb, "FBgn0000014"))$geneSymbol, rep("abd-A", 3)) |
|
686 |
+ checkEquals(mcols(query(mdb, "FBgn0000015"))$geneSymbol, rep("Abd-B", 3)) |
|
687 |
+ |
|
688 |
+} # test.flyFactorGeneSymbols |
|
689 |
+#------------------------------------------------------------------------------- |
|
690 |
+ |