... | ... |
@@ -3,8 +3,7 @@ Version: 2.7.0 |
3 | 3 |
Date: 2014-08-23 |
4 | 4 |
Title: Coordinated Gene Activity in Pattern Sets |
5 | 5 |
Author: Thomas Sherman, Wai-shing Lee, Conor Kelton, Ondrej Maxian, Jacob Carey, |
6 |
- Genevieve Stein-O'Brien, Michael Considine, John Stansfield, Shawn Sivy, Carlo |
|
7 |
- Colantuoni, Alexander Favorov, Mike Ochs, Elana Fertig |
|
6 |
+ Genevieve Stein-O'Brien, Michael Considine, Maggie Wodicka, John Stansfield, Shawn Sivy, Carlo Colantuoni, Alexander Favorov, Mike Ochs, Elana Fertig |
|
8 | 7 |
Description: Coordinated Gene Activity in Pattern Sets (CoGAPS) |
9 | 8 |
implements a Bayesian MCMC matrix factorization algorithm, |
10 | 9 |
GAPS, and links it to gene set statistic methods to infer biological |
... | ... |
@@ -24,6 +24,8 @@ |
24 | 24 |
#' @param fixedPatterns matrix of fixed values in either A or P matrix |
25 | 25 |
#' @param checkpointInterval time (in seconds) between creating a checkpoint |
26 | 26 |
#' @param checkpointFile name of the checkpoint file |
27 |
+#' @param pumpThreshold type of threshold for pump statistic |
|
28 |
+#' @param nPumpSamples number of samples used in pump statistic |
|
27 | 29 |
#' @param ... keeps backwards compatibility with arguments from older versions |
28 | 30 |
#' @return list with A and P matrix estimates |
29 | 31 |
#' @importFrom methods new |
... | ... |
@@ -36,7 +38,7 @@ nSnapshots=0, alphaA=0.01, alphaP=0.01, maxGibbmassA=100, maxGibbmassP=100, |
36 | 38 |
seed=-1, messages=TRUE, singleCellRNASeq=FALSE, whichMatrixFixed='N', |
37 | 39 |
fixedPatterns=matrix(0), checkpointInterval=0, |
38 | 40 |
checkpointFile="gaps_checkpoint.out", pumpThreshold="unique", |
39 |
-nPumpSamples=100, ...) |
|
41 |
+nPumpSamples=0, ...) |
|
40 | 42 |
{ |
41 | 43 |
# get v2 arguments |
42 | 44 |
oldArgs <- list(...) |
... | ... |
@@ -94,6 +96,7 @@ nPumpSamples=100, ...) |
94 | 96 |
#' @param D data matrix |
95 | 97 |
#' @param S uncertainty matrix |
96 | 98 |
#' @param path path to checkpoint file |
99 |
+#' @param checkpointFile name for future checkpooints made |
|
97 | 100 |
#' @return list with A and P matrix estimates |
98 | 101 |
#' @export |
99 | 102 |
CoGapsFromCheckpoint <- function(D, S, path, checkpointFile=NA) |
... | ... |
@@ -166,7 +169,7 @@ alphaP=0.01, nMaxP=100000, max_gibbmass_paraP=100.0, seed=-1, messages=TRUE) |
166 | 169 |
#' @examples |
167 | 170 |
#' data(SimpSim) |
168 | 171 |
#' nC <- ncol(SimpSim.D) |
169 |
-#' patterns <- matrix(runif(nC, 0, 1), nrow=1, ncol=nC) |
|
172 |
+#' patterns <- matrix(1:nC/nC, nrow=1, ncol=nC) |
|
170 | 173 |
#' result <- gapsMapRun(SimpSim.D, SimpSim.S, FP=patterns, nFactor=3) |
171 | 174 |
#' @export |
172 | 175 |
gapsMapRun <- function(D, S, FP, ABins=data.frame(), PBins=data.frame(), |
... | ... |
@@ -3,6 +3,7 @@ |
3 | 3 |
#' @details calls the C++ MCMC code and performs Bayesian |
4 | 4 |
#' matrix factorization returning the two matrices that reconstruct |
5 | 5 |
#' the data matrix for whole genome data; |
6 |
+#' @param simulationName name of this simulation |
|
6 | 7 |
#' @param nFactor number of patterns (basis vectors, metagenes), which must be |
7 | 8 |
#' greater than or equal to the number of rows of FP |
8 | 9 |
#' @param nCores number of cores for parallelization. If left to the default NA, nCores = nSets. |
... | ... |
@@ -12,10 +13,10 @@ |
12 | 13 |
#' @return list of A and P estimates |
13 | 14 |
#' @seealso \code{\link{gapsRun}}, \code{\link{patternMatch4Parallel}}, and \code{\link{gapsMapRun}} |
14 | 15 |
#' @examples |
15 |
-#' # Load the sample data from CoGAPS |
|
16 | 16 |
#' data(SimpSim) |
17 |
-#' # Run GWCoGAPS |
|
18 |
-#' GWCoGAPS(SimpSim.D, SimpSim.S, nFactor=3, nSets=2) |
|
17 |
+#' sim_name <- "example" |
|
18 |
+#' createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name) |
|
19 |
+#' result <- GWCoGAPS(sim_name, nFactor=3, nEquil=1000, nSample=1000) |
|
19 | 20 |
#' @export |
20 | 21 |
GWCoGAPS <- function(simulationName, nFactor, nCores=NA, cut=NA, minNS=NA, ...) |
21 | 22 |
{ |
... | ... |
@@ -31,6 +32,9 @@ GWCoGAPS <- function(simulationName, nFactor, nCores=NA, cut=NA, minNS=NA, ...) |
31 | 32 |
|
32 | 33 |
#' Restart a GWCoGaps Run from Checkpoint |
33 | 34 |
#' |
35 |
+#' @inheritParams GWCoGAPS |
|
36 |
+#' @return list of A and P estimates |
|
37 |
+#' @importFrom utils file_test |
|
34 | 38 |
#' @export |
35 | 39 |
GWCoGapsFromCheckpoint <- function(simulationName, nCores=NA, cut=NA, minNS=NA, ...) |
36 | 40 |
{ |
... | ... |
@@ -9,9 +9,7 @@ |
9 | 9 |
#' that an element of Amean must be to get a value of 1 |
10 | 10 |
#' @return plots a heatmap of the A Matrix |
11 | 11 |
#' @examples |
12 |
-#' # Load the sample data from CoGAPS |
|
13 | 12 |
#' data(SimpSim) |
14 |
-#' # Run binaryA with the correct arguments from 'results' |
|
15 | 13 |
#' binaryA(SimpSim.result$Amean, SimpSim.result$Asd, threshold=3) |
16 | 14 |
#' @export |
17 | 15 |
binaryA <-function(Amean, Asd, threshold=3) |
... | ... |
@@ -6,9 +6,7 @@ |
6 | 6 |
#' @param sdMat matrix of standard deviation values |
7 | 7 |
#' @return matrix of z-scores |
8 | 8 |
#' @examples |
9 |
-#' # Load the sample data from CoGAPS |
|
10 | 9 |
#' data(SimpSim) |
11 |
-#' # Run calcZ |
|
12 | 10 |
#' calcZ(SimpSim.result$Amean, SimpSim.result$Asd) |
13 | 11 |
#' @export |
14 | 12 |
calcZ <- function(meanMat, sdMat) |
... | ... |
@@ -5,10 +5,10 @@ |
5 | 5 |
#' @param D data matrix |
6 | 6 |
#' @param S uncertainty matrix |
7 | 7 |
#' @param nSets number of sets to partition the data into |
8 |
-#' @param simulatioName name used to identify files created by this simulation |
|
8 |
+#' @param simulationName name used to identify files created by this simulation |
|
9 | 9 |
#' @return simulationName used to identify saved files |
10 | 10 |
#' @examples |
11 |
-#' data(SimpSim) # Load the sample data from CoGAPS |
|
11 |
+#' data(SimpSim) |
|
12 | 12 |
#' createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, "example") |
13 | 13 |
#' @export |
14 | 14 |
createGWCoGAPSSets <- function(D, S, nSets, simulationName) |
... | ... |
@@ -8,9 +8,7 @@ |
8 | 8 |
#' not specified, output goes to screen |
9 | 9 |
#' @return plot |
10 | 10 |
#' @examples |
11 |
-#' # Load the sample data from CoGAPS |
|
12 | 11 |
#' data(SimpSim) |
13 |
-#' # Run plotGAPS with arguments from CoGAPS results list |
|
14 | 12 |
#' plotGAPS(SimpSim.result$Amean, SimpSim.result$Pmean) |
15 | 13 |
#' @export |
16 | 14 |
plotGAPS <- function(A, P, outputPDF="") |
... | ... |
@@ -5,9 +5,7 @@ |
5 | 5 |
#' @param Psd matrix of standard deviation values of P |
6 | 6 |
#' @return plot |
7 | 7 |
#' @examples |
8 |
-#' # Load the sample data from CoGAPS |
|
9 | 8 |
#' data(SimpSim) |
10 |
-#' # Run plotP with arguments from CoGAPS results list |
|
11 | 9 |
#' plotP(SimpSim.result$Pmean, SimpSim.result$Psd) |
12 | 10 |
#' @export |
13 | 11 |
plotP <- function(Pmean, Psd) |
... | ... |
@@ -5,9 +5,7 @@ |
5 | 5 |
#' @param genes an index of the gene or genes of interest |
6 | 6 |
#' @return the D' estimate of a gene or set of genes |
7 | 7 |
#' @examples |
8 |
-#' # Load the sample data from CoGAPS |
|
9 | 8 |
#' data(SimpSim) |
10 |
-#' # Run reconstructGene |
|
11 | 9 |
#' reconstructGene(SimpSim.result$Amean, SimpSim.result$Pmean) |
12 | 10 |
#' @export |
13 | 11 |
reconstructGene<-function(A, P, genes=NA) |
... | ... |
@@ -7,9 +7,7 @@ |
7 | 7 |
#' @param S original standard deviation matrix run through GAPS |
8 | 8 |
#' @return creates a residual plot |
9 | 9 |
#' @examples |
10 |
-#' # Load the sample data from CoGAPS |
|
11 | 10 |
#' data(SimpSim) |
12 |
-#' # Run residuals with the correct arguments |
|
13 | 11 |
#' residuals(SimpSim.result$Amean, SimpSim.result$Pmean, SimpSim.D, SimpSim.S) |
14 | 12 |
#' @export |
15 | 13 |
residuals <- function(AMean_Mat, PMean_Mat, D, S) |
... | ... |
@@ -10,7 +10,7 @@ CoGAPS(D, S, nFactor = 7, nEquil = 1000, nSample = 1000, |
10 | 10 |
singleCellRNASeq = FALSE, whichMatrixFixed = "N", |
11 | 11 |
fixedPatterns = matrix(0), checkpointInterval = 0, |
12 | 12 |
checkpointFile = "gaps_checkpoint.out", pumpThreshold = "unique", |
13 |
- nPumpSamples = 100, ...) |
|
13 |
+ nPumpSamples = 0, ...) |
|
14 | 14 |
} |
15 | 15 |
\arguments{ |
16 | 16 |
\item{D}{data matrix} |
... | ... |
@@ -52,6 +52,10 @@ the fixed patterns} |
52 | 52 |
|
53 | 53 |
\item{checkpointFile}{name of the checkpoint file} |
54 | 54 |
|
55 |
+\item{pumpThreshold}{type of threshold for pump statistic} |
|
56 |
+ |
|
57 |
+\item{nPumpSamples}{number of samples used in pump statistic} |
|
58 |
+ |
|
55 | 59 |
\item{...}{keeps backwards compatibility with arguments from older versions} |
56 | 60 |
} |
57 | 61 |
\value{ |
... | ... |
@@ -7,6 +7,8 @@ |
7 | 7 |
GWCoGAPS(simulationName, nFactor, nCores = NA, cut = NA, minNS = NA, ...) |
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 |
+\item{simulationName}{name of this simulation} |
|
11 |
+ |
|
10 | 12 |
\item{nFactor}{number of patterns (basis vectors, metagenes), which must be |
11 | 13 |
greater than or equal to the number of rows of FP} |
12 | 14 |
|
... | ... |
@@ -30,10 +32,10 @@ matrix factorization returning the two matrices that reconstruct |
30 | 32 |
the data matrix for whole genome data; |
31 | 33 |
} |
32 | 34 |
\examples{ |
33 |
-# Load the sample data from CoGAPS |
|
34 | 35 |
data(SimpSim) |
35 |
-# Run GWCoGAPS |
|
36 |
-GWCoGAPS(SimpSim.D, SimpSim.S, nFactor=3, nSets=2) |
|
36 |
+sim_name <- "example" |
|
37 |
+createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name) |
|
38 |
+result <- GWCoGAPS(sim_name, nFactor=3, nEquil=1000, nSample=1000) |
|
37 | 39 |
} |
38 | 40 |
\seealso{ |
39 | 41 |
\code{\link{gapsRun}}, \code{\link{patternMatch4Parallel}}, and \code{\link{gapsMapRun}} |
... | ... |
@@ -7,6 +7,20 @@ |
7 | 7 |
GWCoGapsFromCheckpoint(simulationName, nCores = NA, cut = NA, minNS = NA, |
8 | 8 |
...) |
9 | 9 |
} |
10 |
+\arguments{ |
|
11 |
+\item{simulationName}{name of this simulation} |
|
12 |
+ |
|
13 |
+\item{nCores}{number of cores for parallelization. If left to the default NA, nCores = nSets.} |
|
14 |
+ |
|
15 |
+\item{cut}{number of branches at which to cut dendrogram used in patternMatch4Parallel} |
|
16 |
+ |
|
17 |
+\item{minNS}{minimum of individual set contributions a cluster must contain} |
|
18 |
+ |
|
19 |
+\item{...}{additional parameters to be fed into \code{gapsRun} and \code{gapsMapRun}} |
|
20 |
+} |
|
21 |
+\value{ |
|
22 |
+list of A and P estimates |
|
23 |
+} |
|
10 | 24 |
\description{ |
11 | 25 |
Restart a GWCoGaps Run from Checkpoint |
12 | 26 |
} |
... | ... |
@@ -26,9 +26,7 @@ in which the value is 1 if the value in Amean is greater than |
26 | 26 |
threshold * Asd and 0 otherwise |
27 | 27 |
} |
28 | 28 |
\examples{ |
29 |
-# Load the sample data from CoGAPS |
|
30 | 29 |
data(SimpSim) |
31 |
-# Run binaryA with the correct arguments from 'results' |
|
32 | 30 |
binaryA(SimpSim.result$Amean, SimpSim.result$Asd, threshold=3) |
33 | 31 |
} |
34 | 32 |
|
... | ... |
@@ -13,7 +13,7 @@ createGWCoGAPSSets(D, S, nSets, simulationName) |
13 | 13 |
|
14 | 14 |
\item{nSets}{number of sets to partition the data into} |
15 | 15 |
|
16 |
-\item{simulatioName}{name used to identify files created by this simulation} |
|
16 |
+\item{simulationName}{name used to identify files created by this simulation} |
|
17 | 17 |
} |
18 | 18 |
\value{ |
19 | 19 |
simulationName used to identify saved files |
... | ... |
@@ -25,7 +25,7 @@ Create Gene Sets for GWCoGAPS |
25 | 25 |
factors whole genome data into randomly generated sets for indexing |
26 | 26 |
} |
27 | 27 |
\examples{ |
28 |
-data(SimpSim) # Load the sample data from CoGAPS |
|
28 |
+data(SimpSim) |
|
29 | 29 |
createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, "example") |
30 | 30 |
} |
31 | 31 |
|
... | ... |
@@ -74,7 +74,7 @@ Backwards Compatibility with v2 |
74 | 74 |
\examples{ |
75 | 75 |
data(SimpSim) |
76 | 76 |
nC <- ncol(SimpSim.D) |
77 |
-patterns <- matrix(runif(nC, 0, 1), nrow=1, ncol=nC) |
|
77 |
+patterns <- matrix(1:nC/nC, nrow=1, ncol=nC) |
|
78 | 78 |
result <- gapsMapRun(SimpSim.D, SimpSim.S, FP=patterns, nFactor=3) |
79 | 79 |
} |
80 | 80 |
|
... | ... |
@@ -25,9 +25,7 @@ plots the output A and P matrices as a |
25 | 25 |
heatmap and line plot respectively |
26 | 26 |
} |
27 | 27 |
\examples{ |
28 |
-# Load the sample data from CoGAPS |
|
29 | 28 |
data(SimpSim) |
30 |
-# Run plotGAPS with arguments from CoGAPS results list |
|
31 | 29 |
plotGAPS(SimpSim.result$Amean, SimpSim.result$Pmean) |
32 | 30 |
} |
33 | 31 |
|
... | ... |
@@ -21,9 +21,7 @@ Plot the P Matrix |
21 | 21 |
plots the P matrix in a line plot with error bars |
22 | 22 |
} |
23 | 23 |
\examples{ |
24 |
-# Load the sample data from CoGAPS |
|
25 | 24 |
data(SimpSim) |
26 |
-# Run plotP with arguments from CoGAPS results list |
|
27 | 25 |
plotP(SimpSim.result$Pmean, SimpSim.result$Psd) |
28 | 26 |
} |
29 | 27 |
|
... | ... |
@@ -25,9 +25,7 @@ Plot of Residuals |
25 | 25 |
calculate residuals and produce heatmap |
26 | 26 |
} |
27 | 27 |
\examples{ |
28 |
-# Load the sample data from CoGAPS |
|
29 | 28 |
data(SimpSim) |
30 |
-# Run residuals with the correct arguments |
|
31 | 29 |
residuals(SimpSim.result$Amean, SimpSim.result$Pmean, SimpSim.D, SimpSim.S) |
32 | 30 |
} |
33 | 31 |
|
... | ... |
@@ -151,12 +151,12 @@ TEST_CASE("Test Archive.h") |
151 | 151 |
|
152 | 152 |
SECTION("GibbsSampler Serialization") |
153 | 153 |
{ |
154 |
- |
|
154 |
+ //TODO |
|
155 | 155 |
} |
156 | 156 |
|
157 | 157 |
SECTION("GapsInternalState Serialization") |
158 | 158 |
{ |
159 |
- |
|
159 |
+ //TODO |
|
160 | 160 |
} |
161 | 161 |
|
162 | 162 |
SECTION("Random Generator Serialization") |