Browse code

big speedup of associateTranscriptionFactors method, exploiting duplicate motifs

paul-shannon authored on 18/10/2018 14:18:37
Showing 1 changed files
... ...
@@ -26,14 +26,17 @@ for each row in the data.frame.
26 26
 
27 27
 }
28 28
 \usage{
29
-\S4method{associateTranscriptionFactors}{MotifList}(object, tbl.withMotifs, source, expand.rows)
29
+\S4method{associateTranscriptionFactors}{MotifList}(object, tbl.withMotifs, source, expand.rows, motifColumnName="motifName")
30 30
 }
31 31
 \arguments{
32 32
   \item{object}{a \code{MotifList} object.}
33 33
   \item{tbl.withMotifs}{a \code{data.frame}}
34
-  \item{source}{a \code{character} string, either 'MotifDb' or "TFClass' (case insensitive)}
35
-    \item{expand.rows}{a \code{logical} value, recommended especially for the TFClass source,
36
-      in which sometimes many TFs are mapped to the same motif}
34
+  \item{source}{a \code{character} string, either "MotifDb" or "TFClass" (case insensitive)}
35
+  \item{expand.rows}{a \code{logical} value, recommended especially for the TFClass source,
36
+    in which sometimes many TFs are mapped to the same motif}
37
+  \item{motifColumnName}{a \code{character} string identifying the
38
+    column in tbl.withMotifs which contains the motifs to be associated
39
+    with transcription factors}
37 40
 }
38 41
 
39 42
 \value{
Browse code

documentation for geneToMotif, motifToGene, associateT...

paul-shannon authored on 04/10/2017 19:49:25
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,77 @@
1
+\name{associateTranscriptionFactors}
2
+\alias{associateTranscriptionFactors,MotifList-method}
3
+\alias{associateTranscriptionFactors}
4
+\title{associateTranscriptionFactors}
5
+\description{
6
+In the analysis of, or exploration of gene regulatory networks, one
7
+often creates a data.frame of possible genomic regulatory sites, genomic
8
+locations where a TF binding motif matches some DNA sequence.  A common
9
+next step is to associate each of these motifs with its related
10
+transcription factor/s.     We provide two sources for those
11
+relationships.  When you specify the "MotifDb" source, we return
12
+the motif/TF relationships provided by each of the constituent
13
+public MotifDb sources.  When you specify the "TFClass" source,
14
+transcription factor family memberships (described in
15
+\url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4383905/}) are -
16
+sometimes expansively - provided for each motif you supply.
17
+
18
+This method uses, and therefore expects, different columns of the incoming data.frame
19
+to be used with each method.  The MotifDb source uses the "motifName" column of
20
+the incoming data.frame.  The TFClass source expects a "shortName"
21
+column in the incoming database.
22
+
23
+A new column, "geneSymbol", is added to the incoming data.frame.  This
24
+new column identifies the transcription factor associated with the motif
25
+for each row in the data.frame.
26
+
27
+}
28
+\usage{
29
+\S4method{associateTranscriptionFactors}{MotifList}(object, tbl.withMotifs, source, expand.rows)
30
+}
31
+\arguments{
32
+  \item{object}{a \code{MotifList} object.}
33
+  \item{tbl.withMotifs}{a \code{data.frame}}
34
+  \item{source}{a \code{character} string, either 'MotifDb' or "TFClass' (case insensitive)}
35
+    \item{expand.rows}{a \code{logical} value, recommended especially for the TFClass source,
36
+      in which sometimes many TFs are mapped to the same motif}
37
+}
38
+
39
+\value{
40
+  A data.frame with one column ("geneSymbol") and possibly multiple rows added
41
+}
42
+\author{Paul Shannon}
43
+
44
+\examples{
45
+  tbl.tfClassExample <- data.frame(motifName=c("MA0006.1", "MA0042.2", "MA0043.2"),
46
+                                   chrom=c("chr1", "chr1", "chr1"),
47
+                                   start=c(1000005, 1000085, 1000105),
48
+                                   start=c(1000013, 1000092, 1000123),
49
+                                   score=c(0.85, 0.92, 0.98),
50
+                                   stringsAsFactors=FALSE)
51
+      # here we illustrate how to add a column with the required name:
52
+  tbl.tfClassExample$shortMotif <- tbl.tfClassExample$motifName
53
+  tbl.out <- associateTranscriptionFactors(MotifDb, tbl.tfClassExample, source="TFClass",
54
+                                           expand.rows=TRUE)
55
+  dim(tbl.out)   #    MANY tfs mapped, mostly FOX family genes
56
+  tbl.motifDbExample <- data.frame(motifName=c("Mmusculus-jaspar2016-Ahr::Arnt-MA0006.1",
57
+                                               "Hsapiens-jaspar2016-FOXI1-MA0042.2",
58
+                                               "Hsapiens-jaspar2016-HLF-MA0043.2"),
59
+                                    chrom=c("chr1", "chr1", "chr1"),
60
+                                    start=c(1000005, 1000085, 1000105),
61
+                                    start=c(1000013, 1000092, 1000123),
62
+                                    score=c(0.85, 0.92, 0.98),
63
+                                    stringsAsFactors=FALSE)
64
+
65
+   tbl.out <- associateTranscriptionFactors(MotifDb, tbl.motifDbExample, source="MotifDb",
66
+                                            expand.rows=TRUE)
67
+   dim(tbl.out)   # one new column ("geneSymbol"), no new rows
68
+}
69
+\seealso{
70
+  MotifDb,
71
+  geneToMotif,
72
+  motifToGene,
73
+  subset,
74
+  query
75
+}
76
+
77
+\keyword{utilities}