Browse code

documentation for geneToMotif, motifToGene, associateT...

paul-shannon authored on 04/10/2017 19:49:25
Showing7 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: MotifDb
2 2
 Type: Package
3 3
 Title: An Annotated Collection of Protein-DNA Binding Sequence Motifs
4
-Version: 1.19.12
4
+Version: 1.19.13
5 5
 Date: 2017-10-04
6 6
 Author: Paul Shannon, Matt Richards
7 7
 Maintainer: Paul Shannon <pshannon@systemsbiology.org>
... ...
@@ -352,23 +352,25 @@ setMethod ('motifToGene', 'MotifList',
352 352
      stopifnot(source %in% c("motifdb", "tfclass"))
353 353
      tbl <- data.frame()
354 354
      if(source %in% c("motifdb")){
355
+        providerId <- NULL   # avoid R CMD check note
355 356
         tbl <- as.data.frame(subset(mcols(object), providerId %in% motifs))
356 357
         if(nrow(tbl) == 0)
357 358
            return(data.frame())
358 359
         tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
359 360
         colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
360 361
         tbl <- tbl[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")]
361
-        tbl$from <- "MotifDb"
362
+        tbl$source <- "MotifDb"
362 363
         }
363 364
      if(source %in% c("tfclass")){
365
+        motif <- NULL
364 366
         tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs)
365 367
         if(nrow(tbl) == 0)
366 368
            return(data.frame())
367 369
         tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")])
368 370
         tbl <- tbl[order(tbl$motif),]
369 371
         rownames(tbl) <- NULL
370
-        colnames(tbl) <- c("motif", "gene", "pubmedID")
371
-        tbl$from <- "TFClass"
372
+        colnames(tbl) <- c("motif", "geneSymbol", "pubmedID")
373
+        tbl$source <- "TFClass"
372 374
         }
373 375
      tbl
374 376
      })
... ...
@@ -380,8 +382,8 @@ setMethod ('geneToMotif', 'MotifList',
380 382
    function (object, geneSymbols, source) {
381 383
      source <- tolower(source)
382 384
      stopifnot(source %in% c("motifdb", "tfclass"))
383
-     #browser()
384 385
      extract.mdb <- function(gene){
386
+        geneSymbol <- NULL # workaround the R CMD check "no visible binding for global variable"
385 387
         tbl <- as.data.frame(subset(mcols(object), geneSymbol == gene))
386 388
         tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
387 389
         colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
... ...
@@ -390,16 +392,17 @@ setMethod ('geneToMotif', 'MotifList',
390 392
      if(source %in% c("motifdb")){
391 393
         tbls <- lapply(geneSymbols, extract.mdb)
392 394
         result <- do.call(rbind, tbls)
393
-        result$from <- "MotifDb"
395
+        result$source <- "MotifDb"
394 396
         }
395 397
      if(source %in% c("tfclass")){
396 398
         tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tf.gene %in% geneSymbols)
399
+        tf.gene <- NULL; motif <- NULL  # workaround R CMD CHECK "no visible binding ..." bogus error
397 400
         tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")])
398 401
         tbl <- tbl[order(tbl$tf.gene),]
399 402
         rownames(tbl) <- NULL
400
-        colnames(tbl) <- c("motif", "gene", "pubmedID")
401
-        result <- tbl[, c("gene", "motif", "pubmedID")]
402
-        result$from <- "TFClass"
403
+        colnames(tbl) <- c("motif", "geneSymbol", "pubmedID")
404
+        result <- tbl[, c("geneSymbol", "motif", "pubmedID")]
405
+        result$source <- "TFClass"
403 406
         }
404 407
      result
405 408
      })
... ...
@@ -759,15 +759,15 @@ test.geneToMotif <- function()
759 759
    genes <- c("FOS", "ATF5", "bogus")
760 760
 
761 761
       # use  TFClass family classifcation
762
-   tbl.i <- geneToMotif(mdb, genes, source="TfClaSS")   # intentional mis-capitalization
763
-   checkEquals(tbl.i$gene,  c("ATF5", "FOS", "FOS"))
764
-   checkEquals(tbl.i$motif,  c("MA0833.1", "MA0099.2", "MA0476.1"))
765
-   checkEquals(tbl.i$from, rep("TFClass", 3))
762
+   tbl.tfClass <- geneToMotif(mdb, genes, source="TfClaSS")   # intentional mis-capitalization
763
+   checkEquals(tbl.tfClass$gene,  c("ATF5", "FOS", "FOS"))
764
+   checkEquals(tbl.tfClass$motif,  c("MA0833.1", "MA0099.2", "MA0476.1"))
765
+   checkEquals(tbl.tfClass$source, rep("TFClass", 3))
766 766
 
767 767
       # MotifDb mode uses the MotifDb metadata, pulled from many sources
768
-   tbl.d <- geneToMotif(mdb, genes, source="mOtifdb")     # intentional mis-capitalization
769
-   checkEquals(dim(tbl.d), c(12, 6))
770
-   checkEquals(subset(tbl.d, dataSource=="jaspar2016" & geneSymbol== "FOS")$motif, "MA0476.1")
768
+   tbl.mdb <- geneToMotif(mdb, genes, source="mOtifdb")     # intentional mis-capitalization
769
+   checkEquals(dim(tbl.mdb), c(12, 6))
770
+   checkEquals(subset(tbl.mdb, dataSource=="jaspar2016" & geneSymbol== "FOS")$motif, "MA0476.1")
771 771
       # no recognizable (i.e., jaspar standard) motif name returned by MotifDb metadata
772 772
       # MotifDb for ATF5
773 773
       # todo: compare the MA0110596_1.02 matrix of cisp_1.02 to japar MA0833.1
... ...
@@ -782,20 +782,19 @@ test.motifToGene <- function()
782 782
 
783 783
       # MotifDb mode uses the MotifDb metadata "providerId",
784 784
    tbl.mdb <- motifToGene(MotifDb, motifs, source="MotifDb")
785
-   checkEquals(dim(tbl.d), c(3, 6))
786
-   checkEquals(tbl.d$motif, c("MA0592.2", "ELF1.SwissRegulon", "UP00022"))
787
-   checkEquals(tbl.d$geneSymbol, c("Esrra", "ELF1", "Zfp740"))
788
-   checkEquals(tbl.d$dataSource, c("jaspar2016", "SwissRegulon", "UniPROBE"))
789
-   checkEquals(tbl.d$organism,   c("Mmusculus", "Hsapiens", "Mmusculus"))
790
-   checkEquals(tbl.d$from,       rep("MotifDb", 3))
791
-
785
+   checkEquals(dim(tbl.mdb), c(3, 6))
786
+   checkEquals(tbl.mdb$motif, c("MA0592.2", "ELF1.SwissRegulon", "UP00022"))
787
+   checkEquals(tbl.mdb$geneSymbol, c("Esrra", "ELF1", "Zfp740"))
788
+   checkEquals(tbl.mdb$dataSource, c("jaspar2016", "SwissRegulon", "UniPROBE"))
789
+   checkEquals(tbl.mdb$organism,   c("Mmusculus", "Hsapiens", "Mmusculus"))
790
+   checkEquals(tbl.mdb$source,     rep("MotifDb", 3))
792 791
 
793 792
       # TFClass mode uses  TF family classifcation
794 793
    tbl.tfClass <- motifToGene(MotifDb, motifs, source="TFClass")
795
-   checkEquals(dim(tbl.i), c(9,4))
796
-   checkEquals(tbl.i$motif, rep("MA0592.2", 9))
797
-   checkEquals(sort(tbl.i$gene), c("AR", "ESR1", "ESR2", "ESRRA", "ESRRB", "ESRRG", "NR3C1", "NR3C2", "PGR"))
798
-   checkEquals(tbl.i$from,       rep("TFClass", 9))
794
+   checkEquals(dim(tbl.tfClass), c(9,4))
795
+   checkEquals(tbl.tfClass$motif, rep("MA0592.2", 9))
796
+   checkEquals(sort(tbl.tfClass$gene), c("AR", "ESR1", "ESR2", "ESRRA", "ESRRB", "ESRRG", "NR3C1", "NR3C2", "PGR"))
797
+   checkEquals(tbl.tfClass$source,       rep("TFClass", 9))
799 798
 
800 799
      # test motifs with regex characters in them, or other characters neither letter nor number
801 800
    motifs <- sort(c("DMAP1_NCOR{1,2}_SMARC.p2", "ELK1,4_GABP{A,B1}.p3", "SNAI1..3.p2", "EWSR1-FLI1.p2", "ETS1,2.p2"))
802 801
new file mode 100644
... ...
@@ -0,0 +1,77 @@
1
+\name{associateTranscriptionFactors}
2
+\alias{associateTranscriptionFactors,MotifList-method}
3
+\alias{associateTranscriptionFactors}
4
+\title{associateTranscriptionFactors}
5
+\description{
6
+In the analysis of, or exploration of gene regulatory networks, one
7
+often creates a data.frame of possible genomic regulatory sites, genomic
8
+locations where a TF binding motif matches some DNA sequence.  A common
9
+next step is to associate each of these motifs with its related
10
+transcription factor/s.     We provide two sources for those
11
+relationships.  When you specify the "MotifDb" source, we return
12
+the motif/TF relationships provided by each of the constituent
13
+public MotifDb sources.  When you specify the "TFClass" source,
14
+transcription factor family memberships (described in
15
+\url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4383905/}) are -
16
+sometimes expansively - provided for each motif you supply.
17
+
18
+This method uses, and therefore expects, different columns of the incoming data.frame
19
+to be used with each method.  The MotifDb source uses the "motifName" column of
20
+the incoming data.frame.  The TFClass source expects a "shortName"
21
+column in the incoming database.
22
+
23
+A new column, "geneSymbol", is added to the incoming data.frame.  This
24
+new column identifies the transcription factor associated with the motif
25
+for each row in the data.frame.
26
+
27
+}
28
+\usage{
29
+\S4method{associateTranscriptionFactors}{MotifList}(object, tbl.withMotifs, source, expand.rows)
30
+}
31
+\arguments{
32
+  \item{object}{a \code{MotifList} object.}
33
+  \item{tbl.withMotifs}{a \code{data.frame}}
34
+  \item{source}{a \code{character} string, either 'MotifDb' or "TFClass' (case insensitive)}
35
+    \item{expand.rows}{a \code{logical} value, recommended especially for the TFClass source,
36
+      in which sometimes many TFs are mapped to the same motif}
37
+}
38
+
39
+\value{
40
+  A data.frame with one column ("geneSymbol") and possibly multiple rows added
41
+}
42
+\author{Paul Shannon}
43
+
44
+\examples{
45
+  tbl.tfClassExample <- data.frame(motifName=c("MA0006.1", "MA0042.2", "MA0043.2"),
46
+                                   chrom=c("chr1", "chr1", "chr1"),
47
+                                   start=c(1000005, 1000085, 1000105),
48
+                                   start=c(1000013, 1000092, 1000123),
49
+                                   score=c(0.85, 0.92, 0.98),
50
+                                   stringsAsFactors=FALSE)
51
+      # here we illustrate how to add a column with the required name:
52
+  tbl.tfClassExample$shortMotif <- tbl.tfClassExample$motifName
53
+  tbl.out <- associateTranscriptionFactors(MotifDb, tbl.tfClassExample, source="TFClass",
54
+                                           expand.rows=TRUE)
55
+  dim(tbl.out)   #    MANY tfs mapped, mostly FOX family genes
56
+  tbl.motifDbExample <- data.frame(motifName=c("Mmusculus-jaspar2016-Ahr::Arnt-MA0006.1",
57
+                                               "Hsapiens-jaspar2016-FOXI1-MA0042.2",
58
+                                               "Hsapiens-jaspar2016-HLF-MA0043.2"),
59
+                                    chrom=c("chr1", "chr1", "chr1"),
60
+                                    start=c(1000005, 1000085, 1000105),
61
+                                    start=c(1000013, 1000092, 1000123),
62
+                                    score=c(0.85, 0.92, 0.98),
63
+                                    stringsAsFactors=FALSE)
64
+
65
+   tbl.out <- associateTranscriptionFactors(MotifDb, tbl.motifDbExample, source="MotifDb",
66
+                                            expand.rows=TRUE)
67
+   dim(tbl.out)   # one new column ("geneSymbol"), no new rows
68
+}
69
+\seealso{
70
+  MotifDb,
71
+  geneToMotif,
72
+  motifToGene,
73
+  subset,
74
+  query
75
+}
76
+
77
+\keyword{utilities}
0 78
new file mode 100644
... ...
@@ -0,0 +1,44 @@
1
+\name{geneToMotif}
2
+\alias{geneToMotif,MotifList-method}
3
+\alias{geneToMotif}
4
+\title{geneToMotif}
5
+\description{
6
+Using either of our two sources ("MotifDb" or "TFClass") retrieve the
7
+names of the transcription factor binding motifs associated with the
8
+gene symbol for each transcription factor.  Slightly different
9
+information is returned in each case but the columns "geneSymbol",
10
+"motif", "pubmedID", "source" are returned by both sources.
11
+The TFClass source is described here:
12
+\url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4383905/}.
13
+The MotifDb source is in fact the usually 1:1 gene/motif mapping
14
+provided by each of the data sources upon which MotifDb is built.
15
+}
16
+\usage{
17
+\S4method{geneToMotif}{MotifList}(object, geneSymbols, source)
18
+}
19
+\arguments{
20
+  \item{object}{a \code{MotifList} object.}
21
+  \item{geneSymbols}{a \code{character} string}
22
+  \item{source}{a \code{character} string, either 'MotifDb' or "TFclass' (case insensitive)}
23
+}
24
+
25
+\value{
26
+  A data.frame with these columns: geneSymbol, motif, pubmedID, source.
27
+  The MotifDb source alos include dataSource and organism.
28
+}
29
+\author{Paul Shannon}
30
+
31
+\examples{
32
+  genes <- c("ATF5", "FOS")
33
+  geneToMotif(MotifDb, genes, source="TFClass")
34
+  geneToMotif(MotifDb, genes, source="MotifDb")
35
+}
36
+\seealso{
37
+  MotifDb,
38
+  motifToGene,
39
+  associateTranscriptionFactors,
40
+  subset,
41
+  query
42
+}
43
+
44
+\keyword{utilities}
0 45
new file mode 100644
... ...
@@ -0,0 +1,44 @@
1
+\name{motifToGene}
2
+\alias{motifToGene,MotifList-method}
3
+\alias{motifToGene}
4
+\title{motifToGene}
5
+\description{
6
+Using either of our two sources ("MotifDb" or "TFClass") this method retrieves the
7
+the transcription factor (its gene symbol) for each of the supplied motifs.
8
+ Slightly different
9
+information is returned in each case but the columns "geneSymbol",
10
+"motif", "pubmedID", "source" are returned by both.
11
+The TFClass source is described here:
12
+\url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4383905/}.
13
+The MotifDb source is in fact the (typically) 1:1 gene/motif mapping
14
+provided by each of the data sources upon which MotifDb is built.
15
+}
16
+\usage{
17
+\S4method{motifToGene}{MotifList}(object, motifs, source)
18
+}
19
+\arguments{
20
+  \item{object}{a \code{MotifList} object.}
21
+  \item{motifs}{a \code{character} string}
22
+  \item{source}{a \code{character} string, either 'MotifDb' or "TFclass' (case insensitive)}
23
+}
24
+
25
+\value{
26
+  A data.frame with these columns: geneSymbol, motif, pubmedID, source.
27
+  The MotifDb source also include dataSource and organism.
28
+}
29
+\author{Paul Shannon}
30
+
31
+\examples{
32
+  motifs <- c("MA0592.2", "ELF1.SwissRegulon", "UP00022")
33
+  motifToGene(MotifDb, motifs, source="TFClass")
34
+  motifToGene(MotifDb, motifs, source="MotifDb")
35
+}
36
+\seealso{
37
+  MotifDb,
38
+  geneToMotif,
39
+  associateTranscriptionFactors,
40
+  subset,
41
+  query
42
+}
43
+
44
+\keyword{utilities}
... ...
@@ -4,7 +4,7 @@
4 4
 \title{query}
5 5
 \description{
6 6
 A very general search tool, returning all matrices whose metadata, in ANY
7
-column, is matched by the query string.  
7
+column, is matched by the query string.
8 8
 }
9 9
 \usage{
10 10
 \S4method{query}{MotifList}(object, queryString, ignore.case=TRUE)
... ...
@@ -13,7 +13,7 @@ column, is matched by the query string.
13 13
   \item{object}{a \code{MotifList} object.}
14 14
   \item{queryString}{a \code{character} string}
15 15
   \item{ignore.case}{a \code{logical} value, default TRUE}
16
-  
16
+
17 17
 }
18 18
 
19 19
 \value{
... ...
@@ -23,9 +23,9 @@ A list of the matrices
23 23
 
24 24
 \examples{
25 25
   mdb <- MotifDb
26
-  matrices.human = query (mdb, 'hsapiens')
27
-  matrices.sox4 = query (mdb, 'sox4')
28
-  uniprobe.sox.matrices = query (query (mdb, 'uniprobe'), '^sox')
26
+  matrices.human <- query(mdb, 'hsapiens')
27
+  matrices.sox4 <- query(mdb, 'sox4')
28
+  uniprobe.sox.matrices <- query(query(mdb, 'uniprobe'), '^sox')
29 29
 }
30 30
 \seealso{
31 31
   MotifDb,