... | ... |
@@ -23,7 +23,7 @@ exportClasses(CogapsResult) |
23 | 23 |
importClassesFrom(S4Vectors,Annotated) |
24 | 24 |
importClassesFrom(S4Vectors,character_OR_NULL) |
25 | 25 |
importClassesFrom(SingleCellExperiment,LinearEmbeddingMatrix) |
26 |
-importFrom(BiocParallel,SnowParam) |
|
26 |
+importFrom(BiocParallel,MulticoreParam) |
|
27 | 27 |
importFrom(BiocParallel,bplapply) |
28 | 28 |
importFrom(Rcpp,evalCpp) |
29 | 29 |
importFrom(SummarizedExperiment,assay) |
... | ... |
@@ -85,6 +85,11 @@ checkpointInFile=NULL, transposeData=FALSE, ...) |
85 | 85 |
if (!is(data, "character")) |
86 | 86 |
checkDataMatrix(data, uncertainty, params) |
87 | 87 |
|
88 |
+ # check single cell parameter |
|
89 |
+ if (!is.null(allParams$gaps@distributed)) |
|
90 |
+ if (allParams$gaps@distributed == "single-cell" & !allParams$gaps@singleCell) |
|
91 |
+ warning("running single-cell CoGAPS with singleCell=FALSE") |
|
92 |
+ |
|
88 | 93 |
# convert data to matrix |
89 | 94 |
if (is(data, "data.frame")) |
90 | 95 |
data <- data.matrix(data) |
... | ... |
@@ -132,7 +137,7 @@ checkpointInFile=NULL, transposeData=FALSE, ...) |
132 | 137 |
#' @inheritParams CoGAPS |
133 | 138 |
#' @return CogapsResult object |
134 | 139 |
#' @importFrom methods new |
135 |
-scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=NULL, |
|
140 |
+scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
|
136 | 141 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
137 | 142 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
138 | 143 |
checkpointInFile=NULL, transposeData=FALSE, ...) |
... | ... |
@@ -151,7 +156,7 @@ checkpointInFile=NULL, transposeData=FALSE, ...) |
151 | 156 |
#' @inheritParams CoGAPS |
152 | 157 |
#' @return CogapsResult object |
153 | 158 |
#' @importFrom methods new |
154 |
-GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=NULL, |
|
159 |
+GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
|
155 | 160 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
156 | 161 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
157 | 162 |
checkpointInFile=NULL, transposeData=FALSE, ...) |
... | ... |
@@ -7,8 +7,8 @@ |
7 | 7 |
#' @param allParams list of all parameters used in computation |
8 | 8 |
#' @param uncertainty uncertainty matrix (same supported types as data) |
9 | 9 |
#' @return list |
10 |
-#' @importFrom BiocParallel bplapply SnowParam |
|
11 |
-distributedCogaps <- function(data, allParams, uncertainty) |
|
10 |
+#' @importFrom BiocParallel bplapply MulticoreParam |
|
11 |
+distributedCogaps <- function(data, allParams, uncertainty, BPPARAM=NULL) |
|
12 | 12 |
{ |
13 | 13 |
FUN <- function(set, data, allParams, uncertainty, fixedMatrix=NULL) |
14 | 14 |
{ |
... | ... |
@@ -21,10 +21,11 @@ distributedCogaps <- function(data, allParams, uncertainty) |
21 | 21 |
# randomly sample either rows or columns into subsets to break the data up |
22 | 22 |
set.seed(allParams$gaps@seed) |
23 | 23 |
sets <- createSets(data, allParams) |
24 |
- snow <- SnowParam(workers=length(sets), type="SOCK") |
|
25 |
- |
|
24 |
+ if (is.null(BPPARAM)) |
|
25 |
+ BPPARAM <- BiocParallel::MulticoreParam(workers=length(sets)) |
|
26 |
+ |
|
26 | 27 |
# run Cogaps normally on each subset of the data |
27 |
- initialResult <- bplapply(sets, FUN, BPPARAM=snow, data=data, |
|
28 |
+ initialResult <- bplapply(sets, FUN, BPPARAM=BPPARAM, data=data, |
|
28 | 29 |
allParams=allParams, uncertainty=uncertainty) |
29 | 30 |
|
30 | 31 |
# match patterns in either A or P matrix |
... | ... |
@@ -36,7 +37,7 @@ distributedCogaps <- function(data, allParams, uncertainty) |
36 | 37 |
== "genome-wide", "P", "A") |
37 | 38 |
|
38 | 39 |
# ru final phase with fixed matrix |
39 |
- finalResult <- bplapply(sets, FUN, data=data, BPPARAM=snow, |
|
40 |
+ finalResult <- bplapply(sets, FUN, BPPARAM=BPPARAM, data=data, |
|
40 | 41 |
allParams=allParams, uncertainty=uncertainty, fixedMatrix=consensusMatrix) |
41 | 42 |
|
42 | 43 |
# get result |
... | ... |
@@ -27,26 +27,8 @@ NULL |
27 | 27 |
|
28 | 28 |
#' Simulated data |
29 | 29 |
#' @docType data |
30 |
-#' @name SimpSim.D |
|
31 |
-#' @usage SimpSim.D |
|
32 |
-NULL |
|
33 |
- |
|
34 |
-#' Simulated data |
|
35 |
-#' @docType data |
|
36 |
-#' @name SimpSim.S |
|
37 |
-#' @usage SimpSim.S |
|
38 |
-NULL |
|
39 |
- |
|
40 |
-#' Simulated data |
|
41 |
-#' @docType data |
|
42 |
-#' @name SimpSim.A |
|
43 |
-#' @usage SimpSim.A |
|
44 |
-NULL |
|
45 |
- |
|
46 |
-#' Simulated data |
|
47 |
-#' @docType data |
|
48 |
-#' @name SimpSim.P |
|
49 |
-#' @usage SimpSim.P |
|
30 |
+#' @name SimpSim.data |
|
31 |
+#' @usage SimpSim.data |
|
50 | 32 |
NULL |
51 | 33 |
|
52 | 34 |
#' CoGAPS result from running on simulated data |
... | ... |
@@ -57,20 +39,14 @@ NULL |
57 | 39 |
|
58 | 40 |
#' Sample GIST gene expression data from Ochs et al. (2009) |
59 | 41 |
#' @docType data |
60 |
-#' @name GIST.D |
|
61 |
-#' @usage GIST.D |
|
42 |
+#' @name GIST.data_frame |
|
43 |
+#' @usage GIST.data_frame |
|
62 | 44 |
NULL |
63 | 45 |
|
64 | 46 |
#' Sample GIST gene expression data from Ochs et al. (2009) |
65 | 47 |
#' @docType data |
66 |
-#' @name GIST.S |
|
67 |
-#' @usage GIST.S |
|
68 |
-NULL |
|
69 |
- |
|
70 |
-#' Simulated dataset to quantify gene set membership in the GIST dataset |
|
71 |
-#' @docType data |
|
72 |
-#' @name GIST.GeneSets |
|
73 |
-#' @usage GIST.GeneSets |
|
48 |
+#' @name GIST.matrix |
|
49 |
+#' @usage GIST.matrix |
|
74 | 50 |
NULL |
75 | 51 |
|
76 | 52 |
#' CoGAPS result from running on GIST dataset |
... | ... |
@@ -79,12 +55,6 @@ NULL |
79 | 55 |
#' @usage GIST.result |
80 | 56 |
NULL |
81 | 57 |
|
82 |
-#' Simulated dataset to quantify gene set membership. |
|
83 |
-#' @docType data |
|
84 |
-#' @name GSets |
|
85 |
-#' @usage GSets |
|
86 |
-NULL |
|
87 |
- |
|
88 | 58 |
#' Gene sets defined by transcription factors defined from TRANSFAC. |
89 | 59 |
#' @docType data |
90 | 60 |
#' @name tf2ugFC |
... | ... |
@@ -79,6 +79,8 @@ setValidity("CogapsParams", |
79 | 79 |
"random seed must be an integer greater than zero" |
80 | 80 |
if (object@minNS <= 1 | object@minNS %% 1 != 0) |
81 | 81 |
"minNS must be an integer greater than one" |
82 |
+ if (object@nSets <= 1 | object@nSets %% 1 != 0) |
|
83 |
+ "minNS must be an integer greater than one" |
|
82 | 84 |
} |
83 | 85 |
) |
84 | 86 |
|
... | ... |
@@ -104,14 +106,16 @@ setGeneric("setParam", function(object, whichParam, value) |
104 | 106 |
#' |
105 | 107 |
#' @description these parameters are interrelated so they must be set together |
106 | 108 |
#' @param object an object of type CogapsParams |
107 |
-#' @param cut a distributed CoGAPS parameter |
|
108 |
-#' @param minNS a distributed CoGAPS parameter |
|
109 |
-#' @param maxNS a distributed CoGAPS parameter |
|
109 |
+#' @param nSets number of sets to break data into |
|
110 |
+#' @param cut number of branches at which to cut dendrogram used in |
|
111 |
+#' pattern matching |
|
112 |
+#' @param minNS minimum of individual set contributions a cluster must contain |
|
113 |
+#' @param maxNS maximum of individual set contributions a cluster can contain |
|
110 | 114 |
#' @return the modified params object |
111 | 115 |
#' @examples |
112 | 116 |
#' params <- new("CogapsParams") |
113 | 117 |
#' params <- setDistributedParams(3, 2, 4) |
114 |
-setGeneric("setDistributedParams", function(object, cut=NULL, |
|
118 |
+setGeneric("setDistributedParams", function(object, nSets, cut=NULL, |
|
115 | 119 |
minNS=NULL, maxNS=NULL) |
116 | 120 |
{standardGeneric("setDistributedParams")}) |
117 | 121 |
|
... | ... |
@@ -57,7 +57,7 @@ function(object, whichParam, value) |
57 | 57 |
object@maxGibbsMassA <- value |
58 | 58 |
object@maxGibbsMassP <- value |
59 | 59 |
} |
60 |
- else if (whichParam %in% c("cut", "minNS", "maxNS")) |
|
60 |
+ else if (whichParam %in% c("nSets", "cut", "minNS", "maxNS")) |
|
61 | 61 |
{ |
62 | 62 |
stop("please set this parameter with setDistributedParams") |
63 | 63 |
} |
... | ... |
@@ -73,19 +73,15 @@ function(object, whichParam, value) |
73 | 73 |
#' @aliases setDistributedParams |
74 | 74 |
#' @importFrom methods slot |
75 | 75 |
setMethod("setDistributedParams", signature(object="CogapsParams"), |
76 |
-function(object, cut, minNS, maxNS) |
|
76 |
+function(object, nSets, cut, minNS, maxNS) |
|
77 | 77 |
{ |
78 |
- object@cut <- cut |
|
79 |
- object@minNS <- minNS |
|
80 |
- object@maxNS <- maxNS |
|
78 |
+ object@nSets <- nSets |
|
81 | 79 |
|
82 |
- if (is.null(object@cut)) |
|
83 |
- object@cut <- object@nPatterns |
|
84 |
- if (is.null(object@minNS)) |
|
85 |
- object@minNS <- ceiling(object@nSets / 2) |
|
86 |
- if (is.null(object@maxNS)) |
|
87 |
- object@maxNS <- object@minNS + object@nSets |
|
80 |
+ object@cut <- ifelse(is.null(cut), object@nPatterns, cut) |
|
81 |
+ object@minNS <- ifelse(is.null(minNS), ceiling(object@nSets / 2), minNS) |
|
82 |
+ object@maxNS <- ifelse(is.null(maxNS), object@minNS + object@nSets, maxNS) |
|
88 | 83 |
|
84 |
+ validObject(object) |
|
89 | 85 |
return(object) |
90 | 86 |
}) |
91 | 87 |
|
... | ... |
@@ -15,18 +15,18 @@ function(object) |
15 | 15 |
#' @importFrom grDevices rainbow |
16 | 16 |
plot.CogapsResult <- function(x, ...) |
17 | 17 |
{ |
18 |
- nSamples <- nrow(object@sampleFactors) |
|
19 |
- nFactors <- ncol(object@sampleFactors) |
|
18 |
+ nSamples <- nrow(x@sampleFactors) |
|
19 |
+ nFactors <- ncol(x@sampleFactors) |
|
20 | 20 |
colors <- rainbow(nFactors) |
21 | 21 |
xlimits <- c(0, nSamples + 1) |
22 |
- ylimits <- c(0, max(object@sampleFactors) * 1.1) |
|
22 |
+ ylimits <- c(0, max(x@sampleFactors) * 1.1) |
|
23 | 23 |
|
24 | 24 |
plot(NULL, xlim=xlimits, ylim=ylimits, ylab="Relative Amplitude") |
25 | 25 |
|
26 | 26 |
for (i in 1:nFactors) |
27 | 27 |
{ |
28 |
- lines(x=1:nSamples, y=object@sampleFactors[,i], col=colors[i]) |
|
29 |
- points(x=1:nSamples, y=object@sampleFactors[,i], col=colors[i], pch=i) |
|
28 |
+ lines(x=1:nSamples, y=x@sampleFactors[,i], col=colors[i]) |
|
29 |
+ points(x=1:nSamples, y=x@sampleFactors[,i], col=colors[i], pch=i) |
|
30 | 30 |
} |
31 | 31 |
|
32 | 32 |
legend("top", paste("Pattern", 1:nFactors, sep = ""), pch = 1:nFactors, |
35 | 35 |
deleted file mode 100644 |
... | ... |
@@ -1,12 +0,0 @@ |
1 |
-% Generated by roxygen2: do not edit by hand |
|
2 |
-% Please edit documentation in R/Package.R |
|
3 |
-\docType{data} |
|
4 |
-\name{GIST.GeneSets} |
|
5 |
-\alias{GIST.GeneSets} |
|
6 |
-\title{Simulated dataset to quantify gene set membership in the GIST dataset} |
|
7 |
-\usage{ |
|
8 |
-GIST.GeneSets |
|
9 |
-} |
|
10 |
-\description{ |
|
11 |
-Simulated dataset to quantify gene set membership in the GIST dataset |
|
12 |
-} |
13 | 0 |
similarity index 79% |
14 | 1 |
rename from man/GIST.S.Rd |
15 | 2 |
rename to man/GIST.data_frame.Rd |
... | ... |
@@ -1,11 +1,11 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/Package.R |
3 | 3 |
\docType{data} |
4 |
-\name{GIST.S} |
|
5 |
-\alias{GIST.S} |
|
4 |
+\name{GIST.data_frame} |
|
5 |
+\alias{GIST.data_frame} |
|
6 | 6 |
\title{Sample GIST gene expression data from Ochs et al. (2009)} |
7 | 7 |
\usage{ |
8 |
-GIST.S |
|
8 |
+GIST.data_frame |
|
9 | 9 |
} |
10 | 10 |
\description{ |
11 | 11 |
Sample GIST gene expression data from Ochs et al. (2009) |
12 | 12 |
similarity index 83% |
13 | 13 |
rename from man/GIST.D.Rd |
14 | 14 |
rename to man/GIST.matrix.Rd |
... | ... |
@@ -1,11 +1,11 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/Package.R |
3 | 3 |
\docType{data} |
4 |
-\name{GIST.D} |
|
5 |
-\alias{GIST.D} |
|
4 |
+\name{GIST.matrix} |
|
5 |
+\alias{GIST.matrix} |
|
6 | 6 |
\title{Sample GIST gene expression data from Ochs et al. (2009)} |
7 | 7 |
\usage{ |
8 |
-GIST.D |
|
8 |
+GIST.matrix |
|
9 | 9 |
} |
10 | 10 |
\description{ |
11 | 11 |
Sample GIST gene expression data from Ochs et al. (2009) |
12 | 12 |
deleted file mode 100644 |
... | ... |
@@ -1,12 +0,0 @@ |
1 |
-% Generated by roxygen2: do not edit by hand |
|
2 |
-% Please edit documentation in R/Package.R |
|
3 |
-\docType{data} |
|
4 |
-\name{GSets} |
|
5 |
-\alias{GSets} |
|
6 |
-\title{Simulated dataset to quantify gene set membership.} |
|
7 |
-\usage{ |
|
8 |
-GSets |
|
9 |
-} |
|
10 |
-\description{ |
|
11 |
-Simulated dataset to quantify gene set membership. |
|
12 |
-} |
13 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,12 +0,0 @@ |
1 |
-% Generated by roxygen2: do not edit by hand |
|
2 |
-% Please edit documentation in R/Package.R |
|
3 |
-\docType{data} |
|
4 |
-\name{SimpSim.A} |
|
5 |
-\alias{SimpSim.A} |
|
6 |
-\title{Simulated data} |
|
7 |
-\usage{ |
|
8 |
-SimpSim.A |
|
9 |
-} |
|
10 |
-\description{ |
|
11 |
-Simulated data |
|
12 |
-} |
13 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,12 +0,0 @@ |
1 |
-% Generated by roxygen2: do not edit by hand |
|
2 |
-% Please edit documentation in R/Package.R |
|
3 |
-\docType{data} |
|
4 |
-\name{SimpSim.P} |
|
5 |
-\alias{SimpSim.P} |
|
6 |
-\title{Simulated data} |
|
7 |
-\usage{ |
|
8 |
-SimpSim.P |
|
9 |
-} |
|
10 |
-\description{ |
|
11 |
-Simulated data |
|
12 |
-} |
13 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,12 +0,0 @@ |
1 |
-% Generated by roxygen2: do not edit by hand |
|
2 |
-% Please edit documentation in R/Package.R |
|
3 |
-\docType{data} |
|
4 |
-\name{SimpSim.S} |
|
5 |
-\alias{SimpSim.S} |
|
6 |
-\title{Simulated data} |
|
7 |
-\usage{ |
|
8 |
-SimpSim.S |
|
9 |
-} |
|
10 |
-\description{ |
|
11 |
-Simulated data |
|
12 |
-} |
13 | 0 |
similarity index 75% |
14 | 1 |
rename from man/SimpSim.D.Rd |
15 | 2 |
rename to man/SimpSim.data.Rd |
... | ... |
@@ -1,11 +1,11 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/Package.R |
3 | 3 |
\docType{data} |
4 |
-\name{SimpSim.D} |
|
5 |
-\alias{SimpSim.D} |
|
4 |
+\name{SimpSim.data} |
|
5 |
+\alias{SimpSim.data} |
|
6 | 6 |
\title{Simulated data} |
7 | 7 |
\usage{ |
8 |
-SimpSim.D |
|
8 |
+SimpSim.data |
|
9 | 9 |
} |
10 | 10 |
\description{ |
11 | 11 |
Simulated data |
... | ... |
@@ -4,7 +4,7 @@ |
4 | 4 |
\alias{distributedCogaps} |
5 | 5 |
\title{CoGAPS Distributed Matrix Factorization Algorithm} |
6 | 6 |
\usage{ |
7 |
-distributedCogaps(data, allParams, uncertainty) |
|
7 |
+distributedCogaps(data, allParams, uncertainty, BPPARAM = NULL) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 | 10 |
\item{data}{File name or R object (see details for supported types)} |
... | ... |
@@ -6,19 +6,23 @@ |
6 | 6 |
\alias{setDistributedParams,CogapsParams-method} |
7 | 7 |
\title{set the value of parameters for distributed CoGAPS} |
8 | 8 |
\usage{ |
9 |
-setDistributedParams(object, cut = NULL, minNS = NULL, maxNS = NULL) |
|
9 |
+setDistributedParams(object, nSets, cut = NULL, minNS = NULL, |
|
10 |
+ maxNS = NULL) |
|
10 | 11 |
|
11 |
-\S4method{setDistributedParams}{CogapsParams}(object, cut = NULL, |
|
12 |
+\S4method{setDistributedParams}{CogapsParams}(object, nSets, cut = NULL, |
|
12 | 13 |
minNS = NULL, maxNS = NULL) |
13 | 14 |
} |
14 | 15 |
\arguments{ |
15 | 16 |
\item{object}{an object of type CogapsParams} |
16 | 17 |
|
17 |
-\item{cut}{a distributed CoGAPS parameter} |
|
18 |
+\item{nSets}{number of sets to break data into} |
|
18 | 19 |
|
19 |
-\item{minNS}{a distributed CoGAPS parameter} |
|
20 |
+\item{cut}{number of branches at which to cut dendrogram used in |
|
21 |
+pattern matching} |
|
20 | 22 |
|
21 |
-\item{maxNS}{a distributed CoGAPS parameter} |
|
23 |
+\item{minNS}{minimum of individual set contributions a cluster must contain} |
|
24 |
+ |
|
25 |
+\item{maxNS}{maximum of individual set contributions a cluster can contain} |
|
22 | 26 |
} |
23 | 27 |
\value{ |
24 | 28 |
the modified params object |
... | ... |
@@ -127,7 +127,6 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix) |
127 | 127 |
{ |
128 | 128 |
GAPS_ASSERT(!Rf_isNull(allParams["whichMatrixFixed"])); |
129 | 129 |
std::string which = Rcpp::as<std::string>(allParams["whichMatrixFixed"]); |
130 |
- gaps_printf("%s %c\n", which.c_str(), which[0]); |
|
131 | 130 |
runner.setFixedMatrix(which[0], convertRMatrix(Rcpp::NumericMatrix(fixedMatrix), which[0]=='P')); |
132 | 131 |
} |
133 | 132 |
|
... | ... |
@@ -174,22 +174,63 @@ void GapsRunner::updateSampler(unsigned nA, unsigned nP) |
174 | 174 |
} |
175 | 175 |
} |
176 | 176 |
|
177 |
+// sum coef * log(i) for i = 1 to total, fit coef from number of atoms |
|
178 |
+// approximates sum of number of atoms (stirling approx to factorial) |
|
179 |
+// this should be proportional to total number of updates |
|
180 |
+static double estimatedNumUpdates(double current, double total, float nAtoms) |
|
181 |
+{ |
|
182 |
+ double coef = nAtoms / std::log(current); |
|
183 |
+ return coef * std::log(std::sqrt(2 * total * gaps::algo::pi)) + |
|
184 |
+ total * coef * std::log(total) - total * coef; |
|
185 |
+} |
|
186 |
+ |
|
187 |
+double GapsRunner::estimatedPercentComplete() const |
|
188 |
+{ |
|
189 |
+ double nIter = static_cast<double>(mCurrentIteration); |
|
190 |
+ double nAtomsA = static_cast<double>(mASampler.nAtoms()); |
|
191 |
+ double nAtomsP = static_cast<double>(mPSampler.nAtoms()); |
|
192 |
+ |
|
193 |
+ if (mPhase == 'S') |
|
194 |
+ { |
|
195 |
+ nIter += mMaxIterations; |
|
196 |
+ } |
|
197 |
+ |
|
198 |
+ double totalIter = 2.0 * static_cast<double>(mMaxIterations); |
|
199 |
+ |
|
200 |
+ double estimatedCompleted = estimatedNumUpdates(nIter, nIter, nAtomsA) + |
|
201 |
+ estimatedNumUpdates(nIter, nIter, nAtomsP); |
|
202 |
+ |
|
203 |
+ double estimatedTotal = estimatedNumUpdates(nIter, totalIter, nAtomsA) + |
|
204 |
+ estimatedNumUpdates(nIter, totalIter, nAtomsP); |
|
205 |
+ |
|
206 |
+ return estimatedCompleted / estimatedTotal; |
|
207 |
+} |
|
208 |
+ |
|
177 | 209 |
void GapsRunner::displayStatus() |
178 | 210 |
{ |
179 | 211 |
if (mPrintMessages && mOutputFrequency > 0 && ((mCurrentIteration + 1) % mOutputFrequency) == 0) |
180 | 212 |
{ |
181 | 213 |
bpt::time_duration diff = bpt_now() - mStartTime; |
182 |
- unsigned elapsedSeconds = static_cast<unsigned>(diff.total_seconds()); |
|
214 |
+ double nSecondsCurrent = diff.total_seconds(); |
|
215 |
+ double nSecondsTotal = nSecondsCurrent / estimatedPercentComplete(); |
|
216 |
+ |
|
217 |
+ unsigned elapsedSeconds = static_cast<unsigned>(nSecondsCurrent); |
|
218 |
+ unsigned totalSeconds = static_cast<unsigned>(nSecondsTotal); |
|
219 |
+ |
|
220 |
+ unsigned elapsedHours = elapsedSeconds / 3600; |
|
221 |
+ elapsedSeconds -= elapsedHours * 3600; |
|
222 |
+ unsigned elapsedMinutes = elapsedSeconds / 60; |
|
223 |
+ elapsedSeconds -= elapsedMinutes * 60; |
|
183 | 224 |
|
184 |
- unsigned hours = elapsedSeconds / 3600; |
|
185 |
- elapsedSeconds -= hours * 3600; |
|
186 |
- unsigned minutes = elapsedSeconds / 60; |
|
187 |
- elapsedSeconds -= minutes * 60; |
|
188 |
- unsigned seconds = elapsedSeconds; |
|
225 |
+ unsigned totalHours = totalSeconds / 3600; |
|
226 |
+ totalSeconds -= totalHours * 3600; |
|
227 |
+ unsigned totalMinutes = totalSeconds / 60; |
|
228 |
+ totalSeconds -= totalMinutes * 60; |
|
189 | 229 |
|
190 |
- gaps_printf("%d of %d, Atoms: %lu(%lu), ChiSq: %.0f, elapsed time: %02d:%02d:%02d\n", |
|
230 |
+ gaps_printf("%d of %d, Atoms: %lu(%lu), ChiSq: %.0f, Time: %02d:%02d:%02d / %02d:%02d:%02d\n", |
|
191 | 231 |
mCurrentIteration + 1, mMaxIterations, mASampler.nAtoms(), |
192 |
- mPSampler.nAtoms(), mASampler.chi2(), hours, minutes, seconds); |
|
232 |
+ mPSampler.nAtoms(), mASampler.chi2(), elapsedHours, elapsedMinutes, |
|
233 |
+ elapsedSeconds, totalHours, totalMinutes, totalSeconds); |
|
193 | 234 |
gaps_flush(); |
194 | 235 |
} |
195 | 236 |
} |
... | ... |
@@ -156,9 +156,9 @@ inline std::string buildReport() |
156 | 156 |
#endif |
157 | 157 |
|
158 | 158 |
#if defined( __GAPS_AVX__ ) |
159 |
- std::string simd = "AVX enabled\n"; |
|
159 |
+ std::string simd = "SIMD: AVX instructions enabled\n"; |
|
160 | 160 |
#elif defined( __GAPS_SSE__ ) |
161 |
- std::string simd = "SSE enabled\n"; |
|
161 |
+ std::string simd = "SIMD: SSE instructions enabled\n"; |
|
162 | 162 |
#else |
163 | 163 |
std::string simd = "SIMD not enabled\n"; |
164 | 164 |
#endif |
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
--- |
2 | 2 |
title: "CoGAPS - Coordinated Gene Association in Pattern Sets" |
3 |
-author: "Genevieve Stein-O'Brien, Thomas Sherman, Hyejune Limb, Elana Fertig" |
|
3 |
+author: "Thomas Sherman, Genevieve Stein-O'Brien, Hyejune Limb, Elana Fertig" |
|
4 | 4 |
date: "`r doc_date()`" |
5 | 5 |
bibliography: References.bib |
6 | 6 |
vignette: > |
... | ... |
@@ -17,15 +17,23 @@ library(CoGAPS) |
17 | 17 |
|
18 | 18 |
# Introduction |
19 | 19 |
|
20 |
-Coordinated Gene Association in Pattern Sets (CoGAPS) infers underlying patterns |
|
21 |
-in a matrix of measurements that can be interpreted as arising from the |
|
22 |
-multiplication of two lower dimensional matrices. |
|
23 |
- |
|
24 |
-The Markov chain Monte Carlo (MCMC) matrix factorization that infers patterns |
|
25 |
-also infers the extent to which individual genes belong to these patterns. The |
|
26 |
-CoGAPS algorithm extends GAPS to infer the coordinated activity in sets of genes |
|
27 |
-for each of the inferred patterns based upon (5) and to refine gene set |
|
28 |
-membership based upon (2). |
|
20 |
+Coordinated Gene Association in Pattern Sets (CoGAPS) is a technique for latent |
|
21 |
+space learning in gene expression data. CoGAPS is a member of the |
|
22 |
+Nonnegative Matrix Factorization (NMF) class of algorithms. NMFs factorize a |
|
23 |
+data matrix into two related matrices containing gene weights, the |
|
24 |
+Amplitude (A) matrix, and sample weights, the Pattern (P) Matrix. Each column |
|
25 |
+of A or row of P defines a feature and together this set of features defines |
|
26 |
+the latent space among genes and samples, respectively. In NMF, the values of |
|
27 |
+the elements in the A and P matrices are constrained to be greater than or |
|
28 |
+equal to zero. This constraint simultaneously reflects the non-negative nature |
|
29 |
+of gene expression data and enforces the additive nature of the resulting |
|
30 |
+feature dimensions, generating solutions that are biologically intuitive to |
|
31 |
+interpret @SEUNG_1999. |
|
32 |
+ |
|
33 |
+CoGAPS has two extensions that allow it to scale up to large data sets, |
|
34 |
+Genome-Wide CoGAPS (GWCoGAPS) and Single-Cell CoGAPS (scCOGAPS). This package |
|
35 |
+presents a unified R interface for all three methods, with a parallel, |
|
36 |
+efficient underlying implementation in C++. |
|
29 | 37 |
|
30 | 38 |
# Installing CoGAPS |
31 | 39 |
|
... | ... |
@@ -37,7 +45,7 @@ source("https://bioconductor.org/biocLite.R") |
37 | 45 |
biocLite("CoGAPS") |
38 | 46 |
``` |
39 | 47 |
|
40 |
-The most up to date version of *CoGAPS* can be installed directly from the |
|
48 |
+The most up-to-date version of *CoGAPS* can be installed directly from the |
|
41 | 49 |
*FertigLab* Github Repository: |
42 | 50 |
|
43 | 51 |
```{r eval=FALSE} |
... | ... |
@@ -49,8 +57,8 @@ devtools::install_github("FertigLab/CoGAPS") |
49 | 57 |
``` |
50 | 58 |
|
51 | 59 |
There is also an option to install the development version of *CoGAPS*, |
52 |
-although this is not guaranteed to be stable and should not be used for |
|
53 |
-for publications. |
|
60 |
+while this version has the latest experimental features, it is not guaranteed |
|
61 |
+to be stable. |
|
54 | 62 |
|
55 | 63 |
```{r eval=FALSE} |
56 | 64 |
## Method 1 using biocLite |
... | ... |
@@ -60,36 +68,48 @@ biocLite("FertigLab/CoGAPS", ref="develop", dependencies = TRUE, build_vignettes |
60 | 68 |
devtools::install_github("FertigLab/CoGAPS", ref="develop") |
61 | 69 |
``` |
62 | 70 |
|
63 |
-# Running CoGAPS |
|
71 |
+# Package Overview |
|
64 | 72 |
|
65 |
-Here we walk through running *CoGAPS* on the *GIST* data set from Ochs et |
|
66 |
-al. (2009), which contains gene expression data from gastrointestinal |
|
67 |
-stromal tumor cell lines treated with Gleevec. The matrix contains bulk gene |
|
68 |
-expression data with 1363 genes and 9 samples. |
|
73 |
+We first give a walkthrough of the package features using a simple, simulated |
|
74 |
+data set. In later sections we provide two example workflows on real data |
|
75 |
+sets. |
|
69 | 76 |
|
70 |
-## Running with Default Parameters |
|
77 |
+## Running CoGAPS with Default Parameters |
|
71 | 78 |
|
72 |
-The only required argument to *CoGAPS* is the data set. The simplest call to |
|
73 |
-*CoGAPS* is the following: |
|
79 |
+The only required argument to `CoGAPS` is the data set. This can be a `matrix`, |
|
80 |
+`data.frame`, `SummarizedExperiment`, `SingleCellExperiment` or the path of a |
|
81 |
+file (`tsv`, `csv`, `mtx`) containing the data. |
|
74 | 82 |
|
75 |
-```{r eval=FALSE} |
|
83 |
+```{r} |
|
76 | 84 |
# load data |
77 |
-data(GIST) |
|
85 |
+data(SimpSim) |
|
78 | 86 |
|
79 | 87 |
# run CoGAPS |
80 |
-result <- CoGAPS(GIST.D) |
|
88 |
+CoGAPS(SimpSim.data) |
|
81 | 89 |
``` |
82 | 90 |
|
91 |
+While CoGAPS is running it periodically prints status messages. For example, |
|
92 |
+`20000 of 25000, Atoms: 2932(80), ChiSq: 9728, time: 00:00:29 / 00:01:19`. This |
|
93 |
+message tells us that CoGAPS is at iteration 20000 out of 25000 for this phase, |
|
94 |
+and that 29 seconds out of an estimated 1 minute 19 seconds have passed. It |
|
95 |
+also tells us the size of the atomic domain which is a core component of the |
|
96 |
+algorithm but can be ignored for now. Finally, the ChiSq value tells us how |
|
97 |
+closely the A and P matrices reconstruct the original data. In general, we want |
|
98 |
+this value to go down - but it is not a perfect measurment of how well CoGAPS |
|
99 |
+is finding the biological processes contained in the data. CoGAPS also prints |
|
100 |
+a message indicating which phase is currently happening. There are two phases |
|
101 |
+to the algorithm - *Calibration* and *Sampling*. |
|
102 |
+ |
|
83 | 103 |
## Setting Parameters |
84 | 104 |
|
85 | 105 |
### Model Parameters |
86 | 106 |
|
87 |
-Most of the time we'll want to set some parameters before running *CoGAPS*. |
|
88 |
-Parameters are managed with a *CogapsParams* object. This object will |
|
107 |
+Most of the time we'll want to set some parameters before running CoGAPS. |
|
108 |
+Parameters are managed with a `CogapsParams` object. This object will |
|
89 | 109 |
store all parameters needed to run CoGAPS and provides a simple interface for |
90 | 110 |
viewing and setting the parameter values. |
91 | 111 |
|
92 |
-```{r eval=FALSE} |
|
112 |
+```{r} |
|
93 | 113 |
# create new parameters object |
94 | 114 |
params <- new("CogapsParams") |
95 | 115 |
|
... | ... |
@@ -105,219 +125,178 @@ getParam(params, "nPatterns") |
105 | 125 |
``` |
106 | 126 |
|
107 | 127 |
Once we've created the parameters object we can pass it along with our data to |
108 |
-*CoGAPS*. |
|
128 |
+`CoGAPS`. |
|
109 | 129 |
|
110 |
-```{r eval=FALSE} |
|
130 |
+```{r} |
|
111 | 131 |
# run CoGAPS with specified model parameters |
112 |
-result <- CoGAPS(GIST.D, params) |
|
132 |
+CoGAPS(SimpSim.data, params) |
|
113 | 133 |
``` |
114 | 134 |
|
115 | 135 |
### Run Configuration Options |
116 | 136 |
|
117 |
-The *CogapsParams* class manages the model parameters - i.e. the parameters |
|
118 |
-that affect the result. There are also a few parameters that are passed directly |
|
119 |
-to *CoGAPS* that control things like displaying the status of the run. |
|
137 |
+The `CogapsParams` class manages the model parameters - i.e. the parameters |
|
138 |
+that affect the result. There are also a few parameters that are passed |
|
139 |
+directly to `CoGAPS` that control things like displaying the status of the run. |
|
120 | 140 |
|
121 |
-```{r eval=FALSE} |
|
141 |
+```{r} |
|
122 | 142 |
# run CoGAPS with specified output frequency |
123 |
-result <- CoGAPS(GIST.D, params, outputFrequency=200) |
|
143 |
+CoGAPS(SimpSim.data, params, outputFrequency=250) |
|
124 | 144 |
``` |
125 | 145 |
|
126 |
-There are several other arguments that are passed directly to *CoGAPS* which |
|
146 |
+There are several other arguments that are passed directly to `CoGAPS` which |
|
127 | 147 |
are covered in later sections. |
128 | 148 |
|
129 |
-# Visualizing Output |
|
149 |
+## Visualizing Output |
|
130 | 150 |
|
131 |
-*CoGAPS* returns a *CogapsResult* object that can be passed on to the analysis |
|
132 |
-and plotting functions provided in the package. By default, the *plot* function |
|
133 |
-displays the patterns across samples. |
|
151 |
+`CoGAPS` returns a `CogapsResult` object that can be passed on to the analysis |
|
152 |
+and plotting functions provided in the package. By default, the `plot` function |
|
153 |
+displays how the patterns vary across the samples. (Note that we pass the |
|
154 |
+`nIterations` parameter here directly, this is allowed for any parameters in |
|
155 |
+the `CogapsParams` class and will always take precedent over the values given |
|
156 |
+in `params`). |
|
134 | 157 |
|
135 |
-```{r eval=FALSE} |
|
136 |
-# show result summary |
|
137 |
-result |
|
158 |
+```{r} |
|
159 |
+# store result |
|
160 |
+result <- CoGAPS(SimpSim.data, params, nIterations=5000, outputFrequency=2500) |
|
138 | 161 |
|
139 | 162 |
# plot CogapsResult object returned from CoGAPS |
140 | 163 |
plot(result) |
141 | 164 |
``` |
142 | 165 |
|
143 |
-## Pattern Markers |
|
144 |
- |
|
145 |
-# Selecting Appropiate Number of Patterns |
|
166 |
+In the example workflows we'll explore some more analysis functions provided in |
|
167 |
+the package. |
|
146 | 168 |
|
147 |
-Selecting the best value for *nPatterns* is the most difficult part of the |
|
148 |
-analysis. For starters, there is not one "best" value for the number of |
|
149 |
-patterns - various numbers of patterns can capture various levels of |
|
150 |
-granularity in the data. To further complicate the problem, there's not a |
|
151 |
-clear way to compare runs for different numbers of patterns. |
|
169 |
+## Running CoGAPS in Parallel |
|
152 | 170 |
|
153 |
-Here we show the simplest approach of selecting dimensionality by plotting the |
|
154 |
-error and selecting the least number of patterns that sufficiently reduce the |
|
155 |
-error. We also introduce another way to pass parameters - any parameter in the |
|
156 |
-*CogapsParams* class can be passed by name directly to the *CoGAPS* function, |
|
157 |
-overwriting the value contained in `params`. |
|
171 |
+Non-Negative Matrix Factorization algorithms typically require long computation |
|
172 |
+times and CoGAPS is no exception. In order to scale CoGAPS up to the size of |
|
173 |
+data sets seen in practice we need to take advantage of modern hardware |
|
174 |
+and parallelize the algorithm. |
|
158 | 175 |
|
159 |
-```{r eval=FALSE} |
|
160 |
-# define the range of patterns we are searching over |
|
161 |
-pattern_range <- c(3,5,8) |
|
176 |
+### Multi-Threaded Parallelization |
|
162 | 177 |
|
163 |
-# run CoGAPS with each value in range |
|
164 |
-resultList <- lapply(pattern_range, function(p) CoGAPS(GIST.D, params, nPatterns=p, nIterations=3000, outputFrequency=2500)) |
|
178 |
+The simplest way to run CoGAPS in parallel is to provide the `nThreads` |
|
179 |
+argument to `CoGAPS`. This allows the underlying algorithm to run on multiple |
|
180 |
+threads and has no effect on the mathematics of the algorithm i.e. this is |
|
181 |
+still standard CoGAPS. The precise number of threads to use depends on many |
|
182 |
+things like hardware and data size. The best approach is to play around with |
|
183 |
+different values and see how it effects the estimated time. |
|
165 | 184 |
|
166 |
-# plot chi-sq values for each run |
|
167 |
-chisq <- sapply(resultList, function(result) getMeanChiSq(result)) |
|
168 |
-plot(pattern_range, chisq) |
|
185 |
+```{r} |
|
186 |
+CoGAPS(SimpSim.data, nIterations=10000, outputFrequency=5000, nThreads=1) |
|
187 |
+CoGAPS(SimpSim.data, nIterations=10000, outputFrequency=5000, nThreads=4) |
|
169 | 188 |
``` |
170 | 189 |
|
171 |
-# Running CoGAPS in Parallel |
|
172 |
- |
|
173 |
-## Single Process Parallelization |
|
190 |
+Note this method relies on CoGAPS being compiled with OpenMP support, use |
|
191 |
+`buildReport` to check. |
|
174 | 192 |
|
175 |
-*CoGAPS* can be run across multiple cores by setting the *nCores* parameter. |
|
176 |
-This is different from (Running Distributed CoGAPS)[running-distributed-cogaps]. |
|
177 |
-The algorithm will run in the normal fashion, without splitting up the data. |
|
178 |
- |
|
179 |
-```{r eval=FALSE} |
|
180 |
-params <- setParam(params, "nCores", 4) |
|
193 |
+```{r} |
|
194 |
+cat(CoGAPS::buildReport()) |
|
181 | 195 |
``` |
182 | 196 |
|
183 |
-## Running Distributed CoGAPS |
|
184 |
- |
|
185 |
-For extremely large datasets (greater than a few thousand rows or columns), it |
|
186 |
-is much more efficient to break up the data into random subsets and perform |
|
187 |
-*CoGAPS* on each subset in parallel - stitching the results back together |
|
188 |
-at the end. The full explanation of this method can be seen here |
|
189 |
- (CITE GWCOGAPS). |
|
190 |
- |
|
191 |
-To run *CoGAPS* in this distributed way there are two distinct functions. |
|
192 |
-Genome-Wide CoGAPS - *GWCoGAPS* and Single Cell CoGAPS - *scCoGAPS*. |
|
193 |
-*GWCoGAPS* is used to break the genes (rows) into subsets and *scCoGAPS* is |
|
194 |
-used to break the samples (columns - cells in single cell data) into subsets. |
|
195 |
-Both methods follow a similiar workflow and the underlying method is |
|
196 |
-essentially the same. |
|
197 |
- |
|
198 |
-The first step is breaking the data into subsets. This is done as a separate |
|
199 |
-step where all subsets are then saved back to a file. In the case where the |
|
200 |
-data set is so large it requires a machine with large memory, this allows the |
|
201 |
-user to only pay for the machine for enough time to split the data. The actual |
|
202 |
-algorithm should use a reasonable amount of memory on each machine during |
|
203 |
-the distributed computation. |
|
204 |
- |
|
205 |
-```{r eval=FALSE} |
|
206 |
-# splitting data into subsets for GWCoGAPS |
|
207 |
-gw_sim_name <- createGWCoGAPSSets(as.matrix(GIST.D), as.matrix(GIST.S), nSets=4, "gw_example") |
|
197 |
+### Distributed CoGAPS (GWCoGAPS/scCoGAPS) |
|
198 |
+ |
|
199 |
+For large datasets (greater than a few thousand genes or samples) the |
|
200 |
+multi-threaded parallelization isn't enough. It is more efficient to break up |
|
201 |
+the data into subsets and perform CoGAPS on each subset in parallel, stitching |
|
202 |
+the results back together at the end. The CoGAPS extensions, GWCOGAPS and |
|
203 |
+scCoGAPS, each implement a version of this method (CITE). |
|
204 |
+ |
|
205 |
+In order to use these extensions, some additional parameters are required. |
|
206 |
+`nSets` specifies the number of subsets to break the data set into. `cut`, |
|
207 |
+`minNS`, and `maxNS` control the process of matching patterns across subsets |
|
208 |
+and in general should not be changed from defaults. More information about |
|
209 |
+these parameters are available in the original papers (CITE). These parameters |
|
210 |
+need to be set with a different function than `setParam` since they depend |
|
211 |
+on each other. Here we only set `nSets` (always required), but we have the |
|
212 |
+option to pass the other parameters as well. |
|
213 |
+ |
|
214 |
+```{r} |
|
215 |
+params <- setDistributedParams(params, nSets=3) |
|
216 |
+``` |
|
208 | 217 |
|
209 |
-# splitting data into subsets for scCoGAPS |
|
210 |
-sc_sim_name <- createscCoGAPSSets(t(as.matrix(GIST.D)), nSets=4, "sc_example") |
|
218 |
+Setting `nSets` requires balancing available hardware and run time against the |
|
219 |
+size of your data. In general, `nSets` should be less than or equal to the |
|
220 |
+number of nodes/cores that are available. If that is true, then the more subsets |
|
221 |
+you create, the faster CoGAPS will run - however, some robustness can be lost |
|
222 |
+when the subsets get too small. The general rule of thumb is to set `nSets` |
|
223 |
+so that each subset has between 1000 and 5000 genes or cells. We will see an |
|
224 |
+example of this on real data in the next two sections. |
|
225 |
+ |
|
226 |
+Once the distributed parameters have been set we can call CoGAPS either by |
|
227 |
+setting the `distributed` parameter or by using the provided wrapper functions. |
|
228 |
+The following calls are equivalent: |
|
229 |
+ |
|
230 |
+```{r} |
|
231 |
+# genome-wide CoGAPS |
|
232 |
+CoGAPS(SimpSim.data, params, distributed="genome-wide") |
|
233 |
+GWCoGAPS(SimpSim.data, params) |
|
234 |
+ |
|
235 |
+# single-cell CoGAPS |
|
236 |
+CoGAPS(SimpSim.data, params, distributed="single-cell", singleCell=TRUE) |
|
237 |
+scCoGAPS(SimpSim.data, params) |
|
211 | 238 |
``` |
212 | 239 |
|
213 |
-Notice that we pass the transpose of the *GIST* data to *scCoGAPS* since |
|
214 |
-the normal dataset is 1363 x 9 and we want a data set with a large number of |
|
215 |
-samples. |
|
240 |
+Notice that we also set the parameter `singleCell=TRUE`. This makes some |
|
241 |
+adjustments to the algorithm to account for the sparsity in single-cell data. |
|
242 |
+The `scCoGAPS` wrapper automatically sets this parameter for us. |
|
216 | 243 |
|
217 |
-It is neccesary to name the simulation so that the computation portion knows |
|
218 |
-what the subset files are called. For convenience the name provided to the |
|
219 |
-subset function is returned back so that it can be saved. |
|
244 |
+The parallel backend for this computation is managed by the package `BiocParallel` |
|
245 |
+and there is an option for the user to specifiy which backend they want. See the |
|
246 |
+[Additional Features](#setting-parallel-backend-for-gwcogapssccogaps) |
|
247 |
+section for more information. |
|
220 | 248 |
|
221 |
-Running the computation is just as easy as running normal *CoGAPS*. The results |
|
222 |
-will be in the exact same format as *CoGAPS*, only the intermediate computation |
|
223 |
-is different. Notice here that no *params* object is accepted - all parameters |
|
224 |
-must be passed by name. |
|
249 |
+In general it is preferred to pass a file name to `GWCoGAPS`/`scCoGAPS` since |
|
250 |
+otherwise the entire data set must be copied across multiple processes which |
|
251 |
+will slow things down and potentially cause an out-of-memory error. We will |
|
252 |
+see examples of this in the next two sections. |
|
225 | 253 |
|
226 |
-```{r eval=FALSE} |
|
227 |
-GWCoGAPS(gw_sim_name, nPatterns=3, nIterations=1000) |
|
228 |
-scCoGAPS(sc_sim_name, nPatterns=3, nIterations=1000) |
|
229 |
-``` |
|
254 |
+# Workflow Example - Bulk Data |
|
230 | 255 |
|
231 |
-# Supported Data Formats |
|
256 |
+THIS SECTION UNDER CONSTRUCTION |
|
232 | 257 |
|
233 |
-CoGAPS supports both R objects and file names as input. Any of these data types |
|
234 |
-can be passed directly to *CoGAPS* as the *data* parameter. |
|
258 |
+Here we walk through running CoGAPS on the *GIST* data set from @OCHS_2009, |
|
259 |
+which contains gene expression data from gastrointestinal |
|
260 |
+stromal tumor cell lines treated with Gleevec. The matrix contains bulk gene |
|
261 |
+expression data with 1363 genes and 9 samples. |
|
235 | 262 |
|
236 |
-R object |
|
263 |
+# Workflow Example - Single Cell Data |
|
237 | 264 |
|
238 |
-* matrix |
|
239 |
-* data.frame |
|
240 |
-* SummarizedExperiment |
|
241 |
-* SingleCellExperiment |
|
265 |
+THIS SECTION UNDER CONSTRUCTION |
|
242 | 266 |
|
243 |
-File |
|
267 |
+# Additional Features of CoGAPS |
|
244 | 268 |
|
245 |
-* .mtx |
|
246 |
-* .tsv |
|
247 |
-* .csv |
|
269 |
+## Checkpoint System - Saving/Loading CoGAPS Runs |
|
248 | 270 |
|
249 |
-# Transposing Data |
|
271 |
+## Manual Pipeline for GWCoGAPS/scCoGAPS |
|
250 | 272 |
|
251 |
-If your data is stored as samples x genes, *CoGAPS* allows you to pass |
|
252 |
-*transposeData* and will automatically and efficiently read the transpose |
|
253 |
-of your data to get the required gense x samples configuration. |
|
273 |
+THIS SECTION UNDER CONSTRUCTION |
|
254 | 274 |
|
255 |
-# Obscure Options |
|
275 |
+## Setting Parallel Backend for GWCoGAPS/scCoGAPS |
|
256 | 276 |
|
257 |
-## Running CoGAPS on Single Cell Data |
|
277 |
+## Transposing Data |
|
258 | 278 |
|
259 |
-The size and distribution of single cell data creates additional considerations |
|
260 |
-when running *CoGAPS*. The section |
|
261 |
-[Running Distributed CoGAPS](#running-distributed-cogaps) gives an overview |
|
262 |
-for running CoGAPS on large single cell datasets. With regards to the |
|
263 |
-distribution of single cell data, it is neccesary to set the parameter |
|
264 |
-**singleCell**, i.e. `params <- setParam(params, "singleCell" , TRUE)`. |
|
279 |
+If your data is stored as samples x genes, `CoGAPS` allows you to pass |
|
280 |
+`transposeData=TRUE` and will automatically read the transpose of your data |
|
281 |
+to get the required genes x samples configuration. |
|
265 | 282 |
|
266 | 283 |
## Passing Uncertainty Matrix |
267 | 284 |
|
268 | 285 |
In addition to providing the data, the user can also specify an uncertainty |
269 | 286 |
measurement - the standard deviation of each entry in the data matrix. By |
270 |
-default, *CoGAPS* assumes that the standard deviation matrix is 10% of the |
|
271 |
-data. |
|
287 |
+default, `CoGAPS` assumes that the standard deviation matrix is 10% of the |
|
288 |
+data matrix. This is a reasonable heuristic to use, but for specific types |
|
289 |
+of data you may be able to provide better information. |
|
272 | 290 |
|
273 | 291 |
```{r eval=FALSE} |
274 | 292 |
# run CoGAPS with custom uncertainty |
275 |
-result <- CoGAPS(GIST.D, params, uncertainty=as.matrix(GIST.S)) |
|
293 |
+result <- CoGAPS(GIST.matrix, params, uncertainty=GIST.uncertainty) |
|
276 | 294 |
``` |
277 | 295 |
|
278 |
-# Using CoGAPS-based statistics to infer gene membership in annotated gene sets |
|
279 |
- |
|
280 |
-## CoGAPSStat |
|
281 |
- |
|
282 |
-The function *calcCoGAPSStat* is used to infer gene set activity in each |
|
283 |
-pattern from the CoGAPS matrix factorization. *calcCoGAPSStat* calculates the |
|
284 |
-gene set statistics for each column of the feature matrix using a Z-score, the |
|
285 |
-input gene set and permutation tests. The function outputs a list containing: |
|
286 |
- |
|
287 |
-* *GSUpreg* lists p-values for upregulation of each gene set in each pattern |
|
288 |
-* *GSDownreg* lists p-values for downregulation of each gene set in each pattern |
|
289 |
-* *GSActEst* provides gene set activity through conversion of p-values to |
|
290 |
-activity estimates of each gene set in each pattern |
|
291 |
- |
|
292 |
-## computeGeneGSProb |
|
293 |
- |
|
294 |
-Now using the *computeGeneGSProb* function, we can use the gene set statistic |
|
295 |
-(returned from calcCoGAPSStat) to compute a statistic to quantify the |
|
296 |
-likelihood of membership for each gene annotated to a set based on its |
|
297 |
-inferred activity. The statistic used to infer membership compares the |
|
298 |
-expression pattern of a gene annotated as a member of a gene set to the common |
|
299 |
-expression pattern of all annotated members of that gene set. The function |
|
300 |
-outputs the p-value of a set membership for each gene specified in GStoGenes. |
|
301 |
- |
|
302 | 296 |
# Citing CoGAPS |
303 | 297 |
|
304 |
-If you use the CoGAPS package for your analysis please cite: (1) EJ Fertig, |
|
305 |
-J Ding, AV Favorov, G Parmigiani, and MF Ochs (2010) CoGAPS: an R/C++ package to |
|
306 |
-identify patterns and biological process activity in transcriptomic data. |
|
307 |
-Bioinformatics 26: 2792-2793. |
|
308 |
- |
|
309 |
-To cite the CoGAPS algorithm use: (3) MF Ochs (2003) Bayesian Decomposition in |
|
310 |
-The Analysis of Gene Expression Data: Methods and Software G Parmigiani, |
|
311 |
-E Garrett, R Irizarry, and S Zeger, ed. New York: Springer Verlag. |
|
312 |
- |
|
313 |
-To cite the gene set statistic use: (5) MF Ochs, L Rink, C Tarn, S Mburu, |
|
314 |
-T Taguchi, B Eisenberg, and AK Godwin (2009) Detection of treatment-induced |
|
315 |
-changes in signaling pathways in gastrointestinal stromal tumors using |
|
316 |
-transcriptomic data. Cancer Research 69: 9125-9132. |
|
298 |
+If you use the CoGAPS package for your analysis, please cite @FERTIG_2010 |
|
317 | 299 |
|
318 |
-To site the set-membership refinement statistic use: (2) EJ Fertig, AV Favorov, |
|
319 |
-and MF Ochs (2012) Identifying context-specific transcription factor targets |
|
320 |
-from prior knowledge and gene expression data. 2012 IEEE International |
|
321 |
-Conference on Bioinformatics and Biomedicine, B310, in press. |
|
300 |
+If you use the gene set statistic, please cite @OCHS_2009 |
|
322 | 301 |
|
323 | 302 |
# References |
... | ... |
@@ -1,570 +1,46 @@ |
1 |
-%% This BibTeX bibliography file was created using BibDesk. |
|
2 |
-%% http://bibdesk.sourceforge.net/ |
|
3 |
- |
|
4 |
- |
|
5 |
-%% Created for Elana Fertig at 2012-08-02 15:30:36 -0400 |
|
6 |
- |
|
7 |
- |
|
8 |
-%% Saved with string encoding Unicode (UTF-8) |
|
9 |
- |
|
10 |
- |
|
11 |
- |
|
12 |
-@conference{Fertig2012, |
|
13 |
- Address = {Philadelphia, PA, USA}, |
|
14 |
- Author = {EJ Fertig and AV Favorov and MF Ochs}, |
|
15 |
- Booktitle = {IEEE International Conference on Bioinformatics and Biomedicine}, |
|
16 |
- Date-Added = {2012-08-02 15:29:22 -0400}, |
|
17 |
- Date-Modified = {2012-08-02 15:30:29 -0400}, |
|
18 |
- Number = {B310}, |
|
19 |
- Title = {Identifying context-specific transcription factor targets from prior knowledge and gene expression data.}, |
|
20 |
- Year = {2012}} |
|
21 |
- |
|
22 |
-@incollection{Ochs2003, |
|
23 |
- Address = {New York}, |
|
24 |
- Author = {MF Ochs}, |
|
25 |
- Booktitle = {The Analysis of Gene Expression Data: Methods and Software}, |
|
26 |
- Date-Added = {2012-08-02 15:27:27 -0400}, |
|
27 |
- Date-Modified = {2012-08-02 15:29:07 -0400}, |
|
28 |
- Editor = {G Parmigiani and E Garrett and R Irizarry and S Zeger}, |
|
29 |
- Publisher = {Springer-Verlag}, |
|
30 |
- Title = {Bayesian Decomposition}, |
|
31 |
- Year = {2003}} |
|
32 |
- |
|
33 |
-@article{Fertig2010, |
|
34 |
- Abstract = {SUMMARY: Coordinated Gene Activity in Pattern Sets (CoGAPS) provides an integrated package for isolating gene expression driven by a biological process, enhancing inference of biological processes from transcriptomic data. CoGAPS improves on other enrichment measurement methods by combining a Markov chain Monte Carlo (MCMC) matrix factorization algorithm (GAPS) with a threshold-independent statistic inferring activity on gene sets. The software is provided as open source C++ code built on top of JAGS software with an R interface. AVAILABILITY: The R package CoGAPS and the C++ package GAPS-JAGS are provided open source under the GNU Lesser Public License (GLPL) with a users manual containing installation and operating instructions. CoGAPS is available through Bioconductor and depends on the rjags package available through CRAN to interface CoGAPS with GAPS-JAGS. URL: http://www.cancerbiostats.onc.jhmi.edu/cogaps.cfm .}, |
|
35 |
- Author = {Fertig, EJ and Ding, J and Favorov, AV and Parmigiani, G and Ochs, MF}, |
|
36 |
- Date-Added = {2012-08-01 12:56:23 -0400}, |
|
37 |
- Date-Modified = {2012-08-01 12:57:24 -0400}, |
|
38 |
- Doi = {10.1093/bioinformatics/btq503}, |
|
1 |
+@article{FERTIG_2010, |
|
2 |
+ Author = {Fertig, Elana J. and Ding, Jie and Favorov, Alexander V. and Parmigiani, Giovanni and Ochs, Michael F.}, |
|
3 |
+ Year = {2010}, |
|
4 |
+ Title = {CoGAPS: an R/C++ package to identify patterns and biological process activity in transcriptomic data}, |
|
39 | 5 |
Journal = {Bioinformatics}, |
40 |
- Journal-Full = {Bioinformatics (Oxford, England)}, |
|
41 |
- Mesh = {Computational Biology; Gene Expression; Gene Expression Profiling; Genomics; Markov Chains; Oligonucleotide Array Sequence Analysis; Software}, |
|
42 |
- Month = {Nov}, |
|
43 |
- Number = {21}, |
|
44 |
- Pages = {2792-3}, |
|
45 |
- Pmc = {PMC3025742}, |
|
46 |
- Pmid = {20810601}, |
|
47 |
- Pst = {ppublish}, |
|
48 |
- Title = {{CoGAPS: an R/C++ package to identify patterns and biological process activity in transcriptomic data}}, |
|
49 | 6 |
Volume = {26}, |
50 |
- Year = {2010}, |
|
51 |
- Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/btq503}} |
|
7 |
+ Number = {21}, |
|
8 |
+ Pages = {2792-2793}, |
|
9 |
+ Doi = {10.1093/bioinformatics/btq503}} |
|
52 | 10 |
|
53 |
-@article{Tavazoie1999, |
|
54 |
- Abstract = {Technologies to measure whole-genome mRNA abundances and methods to organize and display such data are emerging as valuable tools for systems-level exploration of transcriptional regulatory networks. For instance, it has been shown that mRNA data from 118 genes, measured at several time points in the developing hindbrain of mice, can be hierarchically clustered into various patterns (or 'waves') whose members tend to participate in common processes. We have previously shown that hierarchical clustering can group together genes whose cis-regulatory elements are bound by the same proteins in vivo. Hierarchical clustering has also been used to organize genes into hierarchical dendograms on the basis of their expression across multiple growth conditions. The application of Fourier analysis to synchronized yeast mRNA expression data has identified cell-cycle periodic genes, many of which have expected cis-regulatory elements. Here we apply a systematic set of statistical algorithms, based on whole-genome mRNA data, partitional clustering and motif discovery, to identify transcriptional regulatory sub-networks in yeast-without any a priori knowledge of their structure or any assumptions about their dynamics. This approach uncovered new regulons (sets of co-regulated genes) and their putative cis-regulatory elements. We used statistical characterization of known regulons and motifs to derive criteria by which we infer the biological significance of newly discovered regulons and motifs. Our approach holds promise for the rapid elucidation of genetic network architecture in sequenced organisms in which little biology is known.}, |
|
55 |
- Address = {Department of Genetics, Harvard Medical School, Boston, Massachusetts 02115, USA.}, |
|
56 |
- Au = {Tavazoie, S and Hughes, JD and Campbell, MJ and Cho, RJ and Church, GM}, |
|
57 |
- Author = {Tavazoie, S. and Hughes, J.D. and Campbell, M.J. and Cho, R.J. and Church, G.M.}, |
|
58 |
- Cin = {Nat Genet. 1999 Jul;22(3):213-5. PMID: 10391202}, |
|
59 |
- Crdt = {1999/07/03 10:00}, |
|
60 |
- Da = {19990719}, |
|
61 |
- Date-Added = {2010-02-04 11:43:20 -0500}, |
|
62 |
- Date-Modified = {2010-02-04 11:48:43 -0500}, |
|
63 |
- Dcom = {19990719}, |
|
64 |
- Doi = {10.1038/10343}, |
|
65 |
- Edat = {1999/07/03 10:00}, |
|
66 |
- Issn = {1061-4036 (Print); 1061-4036 (Linking)}, |
|
67 |
- Jid = {9216904}, |
|
68 |
- Journal = {Nat Genet}, |
|
69 |
- Jt = {Nature genetics}, |
|
70 |
- Language = {eng}, |
|
71 |
- Lr = {20061115}, |
|
72 |
- Mh = {Animals; Cell Cycle/genetics; DNA/genetics; Gene Expression; *Genetic Techniques; Mice; Multigene Family; Open Reading Frames; RNA, Messenger/genetics/metabolism; Rhombencephalon/growth \& development/metabolism; Saccharomyces cerevisiae/cytology/genetics/metabolism}, |
|
73 |
- Mhda = {2001/03/23 10:01}, |
|
74 |
- Number = {3}, |
|
75 |
- Own = {NLM}, |
|
76 |
- Pages = {281--285}, |
|
77 |
- Pl = {UNITED STATES}, |
|
78 |
- Pmid = {10391217}, |
|
79 |
- Pst = {ppublish}, |
|
80 |
- Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.}, |
|
81 |
- Rn = {0 (RNA, Messenger); 9007-49-2 (DNA)}, |
|
82 |
- Sb = {IM}, |
|
83 |
- So = {Nat Genet. 1999 Jul;22(3):281-5.}, |
|
84 |
- Stat = {MEDLINE}, |
|
85 |
- Title = {Systematic determination of genetic network architecture.}, |
|
86 |
- Volume = {22}, |
|
11 |
+@article{SEUNG_1999, |
|
12 |
+ Author = {Seung, Sebastian and Lee, Daniel D.}, |
|
87 | 13 |
Year = {1999}, |
88 |
- Bdsk-Url-1 = {http://dx.doi.org/10.1038/10343}} |
|
89 |
- |
|
90 |
-@article{Goeman2007, |
|
91 |
- Abstract = {MOTIVATION: Many statistical tests have been proposed in recent years for analyzing gene expression data in terms of gene sets, usually from Gene Ontology. These methods are based on widely different methodological assumptions. Some approaches test differential expression of each gene set against differential expression of the rest of the genes, whereas others test each gene set on its own. Also, some methods are based on a model in which the genes are the sampling units, whereas others treat the subjects as the sampling units. This article aims to clarify the assumptions behind different approaches and to indicate a preferential methodology of gene set testing. RESULTS: We identify some crucial assumptions which are needed by the majority of methods. P-values derived from methods that use a model which takes the genes as the sampling unit are easily misinterpreted, as they are based on a statistical model that does not resemble the biological experiment actually performed. Furthermore, because these models are based on a crucial and unrealistic independence assumption between genes, the P-values derived from such methods can be wildly anti-conservative, as a simulation experiment shows. We also argue that methods that competitively test each gene set against the rest of the genes create an unnecessary rift between single gene testing and gene set testing.}, |
|
92 |
- Address = {Department of Medical Statistics and Bioinformatics, Leiden University Medical Center, Leiden, The Netherlands. j.j.goeman@lumc.nl}, |
|
93 |
- Au = {Goeman, JJ and Buhlmann, P}, |
|
94 |
- Author = {Goeman, J.J. and Buhlmann, P.}, |
|
95 |
- Crdt = {2007/02/17 09:00}, |
|
96 |
- Da = {20070501}, |
|
97 |
- Date-Added = {2010-02-04 11:42:46 -0500}, |
|
98 |
- Date-Modified = {2010-02-04 11:48:51 -0500}, |
|
99 |
- Dcom = {20070522}, |
|
100 |
- Dep = {20070215}, |
|
101 |
- Doi = {10.1093/bioinformatics/btm051}, |
|
102 |
- Edat = {2007/02/17 09:00}, |
|
103 |
- Issn = {1367-4811 (Electronic); 1367-4803 (Linking)}, |
|
104 |
- Jid = {9808944}, |
|
105 |
- Journal = {Bioinformatics}, |
|
106 |
- Jt = {Bioinformatics (Oxford, England)}, |
|
107 |
- Keywords = {gene sets}, |
|
108 |
- Language = {eng}, |
|
109 |
- Lr = {20091104}, |
|
110 |
- Mh = {*Algorithms; *Artifacts; *Data Interpretation, Statistical; *Databases, Genetic; Gene Expression Profiling/*methods; Information Storage and Retrieval/*methods; Reproducibility of Results; Sensitivity and Specificity}, |
|
111 |
- Mhda = {2007/05/23 09:00}, |
|
112 |
- Number = {8}, |
|
113 |
- Own = {NLM}, |
|
114 |
- Pages = {980--987}, |
|
115 |
- Phst = {2007/02/15 {$[$}aheadofprint{$]$}}, |
|
116 |
- Pii = {btm051}, |
|
117 |
- Pl = {England}, |
|
118 |
- Pmid = {17303618}, |
|
119 |
- Pst = {ppublish}, |
|
120 |
- Pt = {Comparative Study; Evaluation Studies; Journal Article}, |
|
121 |
- Sb = {IM}, |
|
122 |
- So = {Bioinformatics. 2007 Apr 15;23(8):980-7. Epub 2007 Feb 15.}, |
|
123 |
- Stat = {MEDLINE}, |
|
124 |
- Title = {Analyzing gene expression data in terms of gene sets: methodological issues.}, |
|
125 |
- Volume = {23}, |
|
126 |
- Year = {2007}, |
|
127 |
- Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/btm051}} |
|
128 |
- |
|
129 |
-@article{Bidaut2004, |
|
130 |
- Abstract = {ClutrFree facilitates the visualization and interpretation of clusters or patterns computed from microarray data through a graphical user interface that displays patterns, membership information of the genes and annotation statistics simultaneously. ClutrFree creates a tree linking the patterns based on similarity, permitting the navigation among patterns identified by different algorithms or by the same algorithm with different parameters, and aids the inferring of conclusions from a microarray experiment. AVAILABILITY: The ClutrFree Java source code and compiled bytecode are available as a package under the GNU General Public License at http://bioinformatics.fccc.edu}, |
|
131 |
- Address = {Division of Population Science, Fox Chase Cancer Center, 333 Cottman Avenue, Philadelphia, PA 19111, USA.}, |
|
132 |
- Au = {Bidaut, G and Ochs, MF}, |
|
133 |
- Author = {Bidaut, G. and Ochs, M.F.}, |
|
134 |
- Crdt = {2004/05/18 05:00}, |
|
135 |
- Da = {20041101}, |
|
136 |
- Date-Added = {2010-02-02 12:36:53 -0500}, |
|
137 |
- Date-Modified = {2010-02-02 16:00:15 -0500}, |
|
138 |
- Dcom = {20050210}, |
|
139 |
- Dep = {20040514}, |
|
140 |
- Doi = {10.1093/bioinformatics/bth307}, |
|
141 |
- Edat = {2004/05/18 05:00}, |
|
142 |
- Gr = {CA06927/CA/NCI NIH HHS/United States}, |
|
143 |
- Issn = {1367-4803 (Print); 1367-4803 (Linking)}, |
|
144 |
- Jid = {9808944}, |
|
145 |
- Journal = {Bioinformatics}, |
|
146 |
- Jt = {Bioinformatics (Oxford, England)}, |
|
147 |
- Language = {eng}, |
|
148 |
- Lr = {20071114}, |
|
149 |
- Mh = {*Algorithms; *Cluster Analysis; Computer Graphics; Oligonucleotide Array Sequence Analysis/*methods; Sequence Alignment/*methods; Sequence Analysis, DNA/*methods; *Software; *User-Computer Interface}, |
|
150 |
- Mhda = {2005/02/11 09:00}, |
|
151 |
- Number = {16}, |
|
152 |
- Own = {NLM}, |
|
153 |
- Pages = {2869--2871}, |
|
154 |
- Phst = {2004/05/14 {$[$}aheadofprint{$]$}}, |
|
155 |
- Pii = {bth307}, |
|
156 |
- Pl = {England}, |
|
157 |
- Pmid = {15145813}, |
|
158 |
- Pst = {ppublish}, |
|
159 |
- Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.}, |
|
160 |
- Sb = {IM}, |
|
161 |
- So = {Bioinformatics. 2004 Nov 1;20(16):2869-71. Epub 2004 May 14.}, |
|
162 |
- Stat = {MEDLINE}, |
|
163 |
- Title = {ClutrFree: cluster tree visualization and interpretation.}, |
|
164 |
- Volume = {20}, |
|
165 |
- Year = {2004}, |
|
166 |
- Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/bth307}} |
|
167 |
- |
|
168 |
-@article{Carvalho2008, |
|
169 |
- Author = {Carvalho, C.M. and Chang, J. and Lucas, J. and Nevins, J.R. and Wang, Q. and West, M.}, |
|
170 |
- Date-Added = {2010-02-02 12:36:36 -0500}, |
|
171 |
- Date-Modified = {2010-02-02 12:36:36 -0500}, |
|
172 |
- Journal = {J. Am. Stat. Assoc.}, |
|
173 |
- Pages = {1438 - 1456}, |
|
174 |
- Title = {High-dimensional sparse factor modelling: Applications in gene expression genomics}, |
|
175 |
- Volume = {103}, |
|
176 |
- Year = {2008}} |
|
177 |
- |
|
178 |
-@article{Lee1999, |
|
179 |
- Abstract = {Is perception of the whole based on perception of its parts? There is psychological and physiological evidence for parts-based representations in the brain, and certain computational theories of object recognition rely on such representations. But little is known about how brains or computers might learn the parts of objects. Here we demonstrate an algorithm for non-negative matrix factorization that is able to learn parts of faces and semantic features of text. This is in contrast to other methods, such as principal components analysis and vector quantization, that learn holistic, not parts-based, representations. Non-negative matrix factorization is distinguished from the other methods by its use of non-negativity constraints. These constraints lead to a parts-based representation because they allow only additive, not subtractive, combinations. When non-negative matrix factorization is implemented as a neural network, parts-based representations emerge by virtue of two properties: the firing rates of neurons are never negative and synaptic strengths do not change sign.}, |
|
180 |
- Address = {Bell Laboratories, Lucent Technologies, Murray Hill, New Jersey 07974, USA.}, |
|
181 |
- Author = {Lee, D.D. and Seung, H.S.}, |
|
182 |
- Cin = {Nature. 1999 Oct 21;401(6755):759. PMID: 10548097; Nature. 1999 Oct 21;401(6755):759-60. PMID: 10548098}, |
|
183 |
- Crdt = {1999/11/05 08:00}, |
|
184 |
- Da = {19991116}, |
|
185 |
- Date = {1999 Oct 21}, |
|
186 |
- Date-Added = {2010-02-02 12:35:49 -0500}, |
|
187 |
- Date-Modified = {2010-02-02 15:57:17 -0500}, |
|
188 |
- Dcom = {19991116}, |
|
189 |
- Doi = {10.1038/44565}, |
|
190 |
- Edat = {1999/11/05 08:00}, |
|
191 |
- Issn = {0028-0836 (Print)}, |
|
192 |
- Jid = {0410462}, |
|
14 |
+ Title = {Learning the Parts of Objects by Non-Negative Matrix Factorization}, |
|
193 | 15 |
Journal = {Nature}, |
194 |
- Jt = {Nature}, |
|
195 |
- Language = {eng}, |
|
196 |
- Lr = {20061115}, |
|
197 |
- Mh = {*Algorithms; Face; Humans; *Learning; Models, Neurological; Perception/physiology; Semantics}, |
|
198 |
- Mhda = {2001/03/23 10:01}, |
|
199 |
- Number = {6755}, |
|
200 |
- Own = {NLM}, |
|
201 |
- Pages = {788--791}, |
|
202 |
- Pl = {ENGLAND}, |
|
203 |
- Pmid = {10548103}, |
|
204 |
- Pst = {ppublish}, |
|
205 |
- Pt = {Journal Article; Research Support, Non-U.S. Gov't}, |
|
206 |
- Sb = {IM}, |
|
207 |
- Status = {MEDLINE}, |
|
208 |
- Title = {Learning the parts of objects by non-negative matrix factorization.}, |
|
209 | 16 |
Volume = {401}, |
210 |
- Year = {1999}, |
|
211 |
- Bdsk-Url-1 = {http://dx.doi.org/10.1038/44565}} |
|
212 |
- |
|
213 |
-@article{Kossenkov2009, |
|
214 |
- Abstract = {We explore a number of matrix factorization methods in terms of their ability to identify signatures of biological processes in a large gene expression study. We focus on the ability of these methods to find signatures in terms of gene ontology enhancement and on the interpretation of these signatures in the samples. Two Bayesian approaches, Bayesian Decomposition (BD) and Bayesian Factor Regression Modeling (BFRM), perform best. Differences in the strength of the signatures between the samples suggest that BD will be most useful for systems modeling and BFRM for biomarker discovery.}, |
|
215 |
- Address = {The Wistar Institute, Philadelphia, Pennsylvania, USA.}, |
|
216 |
- Au = {Kossenkov, AV and Ochs, MF}, |
|
217 |
- Author = {Kossenkov, A.V. and Ochs, M.F.}, |
|
218 |
- Crdt = {2009/11/10 06:00}, |
|
219 |
- Da = {20091109}, |
|
220 |
- Date-Added = {2010-02-02 12:33:48 -0500}, |
|
221 |
- Date-Modified = {2010-02-02 15:55:21 -0500}, |
|
222 |
- Dcom = {20100111}, |
|
223 |
- Doi = {10.1016/S0076-6879(09)67003-8}, |
|
224 |
- Edat = {2009/11/10 06:00}, |
|
225 |
- Issn = {1557-7988 (Electronic); 1557-7988 (Linking)}, |
|
226 |
- Jid = {0212271}, |
|
227 |
- Journal = {Methods Enzymol}, |
|
228 |
- Jt = {Methods in enzymology}, |
|
229 |
- Keywords = {Markov Chain Monte Carlo, matrix factorization}, |
|
230 |
- Language = {eng}, |
|
231 |
- Mh = {Algorithms; Bayes Theorem; Biological Processes/physiology; *Cluster Analysis; *Data Interpretation, Statistical; Gene Expression Profiling/*methods; Gene Regulatory Networks; Microarray Analysis/*methods; Pattern Recognition, Automated/methods; Saccharomyces cerevisiae/genetics/physiology}, |
|
232 |
- Mhda = {2010/01/12 06:00}, |
|
233 |
- Own = {NLM}, |
|
234 |
- Pages = {59--77}, |
|
235 |
- Pii = {S0076-6879(09)67003-8}, |
|
236 |
- Pl = {United States}, |
|
237 |
- Pmid = {19897089}, |
|
238 |
- Pst = {ppublish}, |
|
239 |
- Pt = {Journal Article}, |
|
240 |
- Sb = {IM}, |
|
241 |
- So = {Methods Enzymol. 2009;467:59-77.}, |
|
242 |
- Stat = {MEDLINE}, |
|
243 |
- Title = {Matrix factorization for recovery of biological processes from microarray data.}, |
|
244 |
- Volume = {467}, |
|
245 |
- Year = {2009}, |
|
246 |
- Bdsk-Url-1 = {http://dx.doi.org/10.1016/S0076-6879(09)67003-8}} |
|
247 |
- |
|
248 |
-@article{Subramanian2005, |
|
249 |
- Abstract = {Although genomewide RNA expression analysis has become a routine tool in biomedical research, extracting biological insight from such information remains a major challenge. Here, we describe a powerful analytical method called Gene Set Enrichment Analysis (GSEA) for interpreting gene expression data. The method derives its power by focusing on gene sets, that is, groups of genes that share common biological function, chromosomal location, or regulation. We demonstrate how GSEA yields insights into several cancer-related data sets, including leukemia and lung cancer. Notably, where single-gene analysis finds little similarity between two independent studies of patient survival in lung cancer, GSEA reveals many biological pathways in common. The GSEA method is embodied in a freely available software package, together with an initial database of 1,325 biologically defined gene sets.}, |
|
250 |
- Address = {Broad Institute of Massachusetts Institute of Technology and Harvard, 320 Charles Street, Cambridge, MA 02141, USA.}, |
|
251 |
- Au = {Subramanian, A and Tamayo, P and Mootha, VK and Mukherjee, S and Ebert, BL and Gillette, MA and Paulovich, A and Pomeroy, SL and Golub, TR and Lander, ES and Mesirov, JP}, |
|
252 |
- Author = {Subramanian, A. and Tamayo, P. and Mootha, V.K. and Mukherjee, S. and Ebert, B.L. and Gillette, M.A. and Paulovich, A. and Pomeroy, S.L. and Golub, T.R. and Lander, E.S. and Mesirov, J.P.}, |
|
253 |
- Cin = {Proc Natl Acad Sci U S A. 2005 Oct 25;102(43):15278-9. PMID: 16230612}, |
|
254 |
- Crdt = {2005/10/04 09:00}, |
|
255 |
- Da = {20051026}, |
|
256 |
- Date-Added = {2010-02-01 16:29:02 -0500}, |
|
257 |
- Date-Modified = {2010-02-02 15:59:33 -0500}, |
|
258 |
- Dcom = {20051212}, |
|
259 |
- Dep = {20050930}, |
|
260 |
- Doi = {10.1073/pnas.0506580102}, |
|
261 |
- Edat = {2005/10/04 09:00}, |
|
262 |
- Issn = {0027-8424 (Print); 0027-8424 (Linking)}, |
|
263 |
- Jid = {7505876}, |
|
264 |
- Journal = {Proc Natl Acad Sci}, |
|
265 |
- Jt = {Proceedings of the National Academy of Sciences of the United States of America}, |
|
266 |
- Keywords = {gene set}, |
|
267 |
- Language = {eng}, |
|
268 |
- Lr = {20091118}, |
|
269 |
- Mh = {Cell Line, Tumor; Female; Gene Expression Profiling/*methods; Genes, p53/physiology; Genome; Humans; Leukemia, Myeloid, Acute/genetics; Lung Neoplasms/genetics/mortality; Male; *Oligonucleotide Array Sequence Analysis; Precursor Cell Lymphoblastic Leukemia-Lymphoma/genetics}, |
|
270 |
- Mhda = {2005/12/15 09:00}, |
|
271 |
- Number = {43}, |
|
272 |
- Oid = {NLM: PMC1239896}, |
|
273 |
- Own = {NLM}, |
|
274 |
- Pages = {15545--15550}, |
|
275 |
- Phst = {2005/09/30 {$[$}aheadofprint{$]$}}, |
|
276 |
- Pii = {0506580102}, |
|
277 |
- Pl = {United States}, |
|
278 |
- Pmc = {PMC1239896}, |
|
279 |
- Pmid = {16199517}, |
|
280 |
- Pst = {ppublish}, |
|
281 |
- Pt = {Journal Article}, |
|
282 |
- Sb = {IM}, |
|
283 |
- So = {Proc Natl Acad Sci U S A. 2005 Oct 25;102(43):15545-50. Epub 2005 Sep 30.}, |
|
284 |
- Stat = {MEDLINE}, |
|
285 |
- Title = {Gene set enrichment analysis: a knowledge-based approach for interpreting genome-wide expression profiles.}, |
|
286 |
- Volume = {102}, |
|
287 |
- Year = {2005}, |
|
288 |
- Bdsk-Url-1 = {http://dx.doi.org/10.1073/pnas.0506580102}} |
|
289 |
- |
|
290 |
-@article{Draghici2003, |
|
291 |
- Abstract = {Onto-Tools is a set of four seamlessly integrated databases: Onto-Express, Onto-Compare, Onto-Design and Onto-Translate. Onto-Express is able to automatically translate lists of genes found to be differentially regulated in a given condition into functional profiles characterizing the impact of the condition studied upon various biological processes and pathways. OE constructs functional profiles (using Gene Ontology terms) for the following categories: biochemical function, biological process, cellular role, cellular component, molecular function and chromosome location. Statistical significance values are calculated for each category. Once the initial exploratory analysis identified a number of relevant biological processes, specific mechanisms of interactions can be hypothesized for the conditions studied. Currently, many commercial arrays are available for the investigation of specific mechanisms. Each such array is characterized by a biological bias determined by the extent to which the genes present on the array represent specific pathways. Onto-Compare is a tool that allows efficient comparisons of any sets of commercial or custom arrays. Using Onto-Compare, a researcher can determine quickly which array, or set of arrays, covers best the hypotheses studied. In many situations, no commercial arrays are available for specific biological mechanisms. Onto-Design is a tool that allows the user to select genes that represent given functional categories. Onto-Translate allows the user to translate easily lists of accession numbers, UniGene clusters and Affymetrix probes into one another. All tools above are seamlessly integrated. The Onto-Tools are available online at http://vortex.cs.wayne.edu/Projects.html.}, |
|
292 |
- Address = {Department of Computer Science, Wayne State University, 431 State Hall, Detroit, MI 48202, USA. sod@cs.wayne.edu}, |
|
293 |
- Au = {Draghici, S and Khatri, P and Bhavsar, P and Shah, A and Krawetz, SA and Tainsky, MA}, |
|
294 |
- Author = {Draghici, S. and Khatri, P. and Bhavsar, P. and Shah, A. and Krawetz, S.A. and Tainsky, M.A.}, |
|
295 |
- Crdt = {2003/06/26 05:00}, |
|
296 |
- Da = {20030625}, |
|
297 |
- Date-Added = {2010-02-01 16:28:22 -0500}, |
|
298 |
- Date-Modified = {2010-02-02 15:56:55 -0500}, |
|
299 |
- Dcom = {20030818}, |
|
300 |
- Edat = {2003/06/26 05:00}, |
|
301 |
- Gr = {R01-NS045207-01/NS/NINDS NIH HHS/United States; R21-EB000990-01/EB/NIBIB NIH HHS/United States}, |
|
302 |
- Issn = {1362-4962 (Electronic); 1362-4962 (Linking)}, |
|
303 |
- Jid = {0411011}, |
|
304 |
- Journal = {Nucleic Acids Res}, |
|
305 |
- Jt = {Nucleic acids research}, |
|
306 |
- Keywords = {gene set}, |
|
307 |
- Language = {eng}, |
|
308 |
- Lr = {20091118}, |
|
309 |
- Mh = {Databases, Nucleic Acid; Gene Expression Profiling/*methods; Internet; Oligonucleotide Array Sequence Analysis/*methods; Proteins/genetics/physiology; *Software; Systems Integration}, |
|
310 |
- Mhda = {2003/08/19 05:00}, |
|
311 |
- Number = {13}, |
|
312 |
- Oid = {NLM: PMC169030}, |
|
313 |
- Own = {NLM}, |
|
314 |
- Pages = {3775--3781}, |
|
315 |
- Pl = {England}, |
|
316 |
- Pmc = {PMC169030}, |
|
317 |
- Pmid = {12824416}, |
|
318 |
- Pst = {ppublish}, |
|
319 |
- Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.}, |
|
320 |
- Rn = {0 (Proteins)}, |
|
321 |
- Sb = {IM}, |
|
322 |
- So = {Nucleic Acids Res. 2003 Jul 1;31(13):3775-81.}, |
|
323 |
- Stat = {MEDLINE}, |
|
324 |
- Title = {{Onto-Tools, the toolkit of the modern biologist: Onto-Express, Onto-Compare, Onto-Design and Onto-Translate.}}, |
|
325 |
- Volume = {31}, |
|
326 |
- Year = {2003}} |
|
327 |
- |
|
328 |
-@inbook{Ochs2006, |
|
329 |
- Address = {London}, |
|
330 |
- Author = {M.F. Ochs}, |
|
331 |
- Date-Added = {2010-01-08 14:45:36 -0500}, |
|
332 |
- Date-Modified = {2010-01-08 14:48:01 -0500}, |
|
333 |
- Editor = {G. Parmigiani and E. S. Garrett and R. A. Irizarry and S. L. Zeger}, |
|
334 |
- Pages = {388-408}, |
|
335 |
- Publisher = {Springer-Verlag}, |
|
336 |
- Series = {Statistics for Biology and Health}, |
|
337 |
- Title = {The Analysis of Gene Expression Data The Analysis of Gene Expression Data: Methods and Software}, |
|
338 |
- Year = {2006}} |
|
339 |
- |
|
340 |
-@article{Bidaut2006, |
|
341 |
- Abstract = {BACKGROUND: As numerous diseases involve errors in signal transduction, modern therapeutics often target proteins involved in cellular signaling. Interpretation of the activity of signaling pathways during disease development or therapeutic intervention would assist in drug development, design of therapy, and target identification. Microarrays provide a global measure of cellular response, however linking these responses to signaling pathways requires an analytic approach tuned to the underlying biology. An ongoing issue in pattern recognition in microarrays has been how to determine the number of patterns (or clusters) to use for data interpretation, and this is a critical issue as measures of statistical significance in gene ontology or pathways rely on proper separation of genes into groups. RESULTS: Here we introduce a method relying on gene annotation coupled to decompositional analysis of global gene expression data that allows us to estimate specific activity on strongly coupled signaling pathways and, in some cases, activity of specific signaling proteins. We demonstrate the technique using the Rosetta yeast deletion mutant data set, decompositional analysis by Bayesian Decomposition, and annotation analysis using ClutrFree. We determined from measurements of gene persistence in patterns across multiple potential dimensionalities that 15 basis vectors provides the correct dimensionality for interpreting the data. Using gene ontology and data on gene regulation in the Saccharomyces Genome Database, we identified the transcriptional signatures of several cellular processes in yeast, including cell wall creation, ribosomal disruption, chemical blocking of protein synthesis, and, critically, individual signatures of the strongly coupled mating and filamentation pathways. CONCLUSION: This works demonstrates that microarray data can provide downstream indicators of pathway activity either through use of gene ontology or transcription factor databases. This can be used to investigate the specificity and success of targeted therapeutics as well as to elucidate signaling activity in normal and disease processes.}, |
|
342 |
- Address = {Fox Chase Cancer Center, 333 Cottman Avenue, Philadelphia, PA 19111, USA. ghbidaut@pcbi.upenn.edu}, |
|
343 |
- Au = {Bidaut, G and Suhre, K and Claverie, JM and Ochs, MF}, |
|
344 |
- Author = {Bidaut, G. and Suhre, K. and Claverie, J.-M. and Ochs, M.F.}, |
|
345 |
- Da = {20060327}, |
|
346 |
- Date-Added = {2010-01-08 14:39:10 -0500}, |
|
347 |
- Date-Modified = {2010-01-08 14:39:10 -0500}, |
|
348 |
- Dcom = {20060426}, |
|
349 |
- Dep = {20060228}, |
|
350 |
- Doi = {10.1186/1471-2105-7-99}, |
|
351 |
- Edat = {2006/03/02 09:00}, |
|
352 |
- Gr = {CA06927/CA/United States NCI; LM008309/LM/United States NLM}, |
|
353 |
- Issn = {1471-2105 (Electronic)}, |
|
354 |
- Jid = {100965194}, |
|
355 |
- Journal = {BMC Bioinformatics}, |
|
356 |
- Jt = {BMC bioinformatics}, |
|
357 |
- Keywords = {Bayesian, Markov Chain Monte Carlo, signaling network, microarray, k25}, |
|
358 |
- Language = {eng}, |
|
359 |
- Lr = {20071114}, |
|
360 |
- Mh = {Algorithms; Computer Simulation; Gene Expression Profiling/*methods; *Models, Biological; Oligonucleotide Array Sequence Analysis/*methods; Pattern Recognition, Automated/*methods; Saccharomyces cerevisiae Proteins/genetics/*metabolism; Signal Transduction/*physiology; Transcription Factors/genetics/*metabolism}, |
|
361 |
- Mhda = {2006/04/28 09:00}, |
|
362 |
- Own = {NLM}, |
|
363 |
- Pages = {99}, |
|
364 |
- Phst = {2005/09/22 {$[$}received{$]$}; 2006/02/28 {$[$}accepted{$]$}; 2006/02/28 {$[$}aheadofprint{$]$}}, |
|
365 |
- Pii = {1471-2105-7-99}, |
|
366 |
- Pl = {England}, |
|
367 |
- Pmc = {PMC1413561}, |
|
368 |
- Pmid = {16507110}, |
|
369 |
- Pst = {epublish}, |
|
370 |
- Pt = {Journal Article; Research Support, N.I.H., Extramural; Research Support, Non-U.S. Gov't}, |
|
371 |
- Pubm = {Electronic}, |
|
372 |
- Rn = {0 (Saccharomyces cerevisiae Proteins); 0 (Transcription Factors)}, |
|
373 |
- Sb = {IM}, |
|
374 |
- So = {BMC Bioinformatics. 2006 Feb 28;7:99.}, |
|
375 |
- Stat = {MEDLINE}, |
|
376 |
- Title = {Determination of strongly overlapping signaling activity from microarray data.}, |
|
377 |
- Volume = {7}, |
|
378 |
- Year = {2006}, |
|
379 |
- Bdsk-Url-1 = {http://dx.doi.org/10.1186/1471-2105-7-99}} |
|
380 |
- |
|
381 |
-@article{Kossenkov2007a, |
|
382 |
- Abstract = {Many biological processes rely on remodeling of the transcriptional response of cells through activation of transcription factors. Although determination of the activity level of transcription factors from microarray data can provide insight into developmental and disease processes, it requires careful analysis because of the multiple regulation of genes. We present a novel approach that handles both the assignment of genes to multiple patterns, as required by multiple regulation, and the linking of genes in prior probability distributions according to their known transcriptional regulators. We demonstrate the power of this approach in simulations and by application to yeast cell cycle and deletion mutant data. The results of simulations in the presence of increasing noise showed improved recovery of patterns in terms of chi2 fit. Analysis of the yeast data led to improved inference of biologically meaningful groups in comparison to other techniques, as demonstrated with ROC analysis. The new algorithm provides an approach for estimating the levels of transcription factor activity from microarray data, and therefore provides insights into biological response.}, |
|
383 |
- Address = {The Wistar Institute, Philadelphia, PA, USA.}, |
|
384 |
- Au = {Kossenkov, AV and Peterson, AJ and Ochs, MF}, |
|
385 |
- Author = {A.V. Kossenkov and A.J. Peterson and M.F. Ochs}, |
|
386 |
- Da = {20071003}, |
|
387 |
- Date-Added = {2010-01-08 14:39:10 -0500}, |
|
388 |
- Date-Modified = {2010-01-08 14:39:10 -0500}, |
|
389 |
- Dcom = {20071102}, |
|
390 |
- Edat = {2007/10/04 09:00}, |
|
391 |
- Gr = {CA06973/CA/United States NCI; LM008309/LM/United States NLM}, |
|
392 |
- Issn = {0926-9630 (Print)}, |
|
393 |
- Jid = {9214582}, |
|
394 |
- Journal = {Stud Health Technol Inform}, |
|
395 |
- Jt = {Studies in health technology and informatics}, |
|
396 |
- Keywords = {Bayesian, Markov Chain Monte Carlo, microarray}, |
|
397 |
- Language = {eng}, |
|
398 |
- Lr = {20080710}, |
|
399 |
- Mh = {*Algorithms; Bayes Theorem; Computational Biology; *Gene Expression Regulation; Markov Chains; Models, Genetic; Monte Carlo Method; *Oligonucleotide Array Sequence Analysis; ROC Curve; Transcription Factors/*metabolism; Transcription, Genetic; Yeasts/genetics}, |
|
400 |
- Mhda = {2007/11/06 09:00}, |
|
401 |
- Number = {Pt 2}, |
|
402 |
- Own = {NLM}, |
|
403 |
- Pages = {1250--1254}, |
|
404 |
- Pl = {Netherlands}, |
|
405 |
- Pmid = {17911915}, |
|
406 |
- Pst = {ppublish}, |
|
407 |
- Pt = {Journal Article; Research Support, N.I.H., Extramural}, |
|
408 |
- Pubm = {Print}, |
|
409 |
- Rn = {0 (Transcription Factors)}, |
|
410 |
- Sb = {T}, |
|
411 |
- So = {Stud Health Technol Inform. 2007;129(Pt 2):1250-4.}, |
|
412 |
- Stat = {MEDLINE}, |
|
413 |
- Title = {Determining transcription factor activity from microarray data using {Bayesian Markov chain Monte Carlo} sampling.}, |
|
414 |
- Volume = {129}, |
|
415 |
- Year = {2007}} |
|
416 |
- |
|
417 |
-@article{Moloshok2002, |
|
418 |
- Abstract = {MOTIVATION: Microarray and gene chip technology provide high throughput tools for measuring gene expression levels in a variety of circumstances, including cellular response to drug treatment, cellular growth and development, tumorigenesis, among many other processes. In order to interpret the large data sets generated in experiments, data analysis techniques that consider biological knowledge during analysis will be extremely useful. We present here results showing the application of such a tool to expression data from yeast cell cycle experiments. RESULTS: Originally developed for spectroscopic analysis, Bayesian Decomposition (BD) includes two features which make it useful for microarray data analysis: the ability to assign genes to multiple coexpression groups and the ability to encode biological knowledge into the system. Here we demonstrate the ability of the algorithm to provide insight into the yeast cell cycle, including identification of five temporal patterns tied to cell cycle phases as well as the identification of a pattern tied to an approximately 40 min cell cycle oscillator. The genes are simultaneously assigned to the patterns, including partial assignment to multiple patterns when this is required to explain the expression profile. AVAILABILITY: The application is available free to academic users under a material transfer agreement. Go to http://bioinformatics.fccc.edu/ for more details.}, |
|
419 |
- Address = {Bioinformatics Working Group, Fox Chase Cancer Center, Philadelphia, PA 19111, USA. td_moloshok@fccc.edu}, |
|
420 |
- Au = {Moloshok, TD and Klevecz, RR and Grant, JD and Manion, FJ and Speier WF, 4th and Ochs, MF}, |
|
421 |
- Author = {Moloshok, T.D. and Klevecz, R.R. and Grant, J.D. and Manion, F.J. and Speier IV, W.F. and Ochs, M.F.}, |
|
422 |
- Da = {20020517}, |
|
423 |
- Date-Added = {2010-01-08 14:39:10 -0500}, |
|
424 |
- Date-Modified = {2010-01-08 14:39:10 -0500}, |
|
425 |
- Dcom = {20021108}, |
|
426 |
- Edat = {2002/05/23 10:00}, |
|
427 |
- Gr = {CA06927/CA/United States NCI}, |
|
428 |
- Issn = {1367-4803 (Print)}, |
|
429 |
- Jid = {9808944}, |
|
430 |
- Journal = {Bioinformatics}, |
|
431 |
- Jt = {Bioinformatics (Oxford, England)}, |
|
432 |
- Keywords = {Bayesian, signaling network, microarray}, |
|
433 |
- Language = {eng}, |
|
434 |
- Lr = {20071114}, |
|
435 |
- Mh = {*Algorithms; *Bayes Theorem; Cell Cycle/genetics; Databases, Genetic; Gene Expression Regulation; Genome, Fungal; Markov Chains; *Models, Genetic; *Models, Statistical; Monte Carlo Method; Oligonucleotide Array Sequence Analysis/*methods; Pattern Recognition, Automated; Periodicity; Reproducibility of Results; Saccharomyces cerevisiae/genetics; Sensitivity and Specificity}, |
|
436 |
- Mhda = {2002/11/26 04:00}, |
|
437 |
- Number = {4}, |
|
438 |
- Own = {NLM}, |
|
439 |
- Pages = {566--575}, |
|
440 |
- Pl = {England}, |
|
441 |
- Pmid = {12016054}, |
|
442 |
- Pst = {ppublish}, |
|
443 |
- Pt = {Comparative Study; Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.}, |
|
444 |
- Pubm = {Print}, |
|
445 |
- Sb = {IM}, |
|
446 |
- So = {Bioinformatics. 2002 Apr;18(4):566-75.}, |
|
447 |
- Stat = {MEDLINE}, |
|
448 |
- Title = {Application of {B}ayesian decomposition for analysing microarray data.}, |
|
449 |
- Volume = {18}, |
|
450 |
- Year = {2002}, |
|
451 |
- Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RFFpOUy5vYmplY3RzV05TLmtleXNWJGNsYXNzog8QgASABqISE4ACgAOAB1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgNGRpXTlMuZGF0YU8RAkYAAAAAAkYAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMPjDF1IKwAABRay/hVNb2xvc2hva19CRF9ZZWFzdC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFFrL/xebg41BERiBDQVJPAAIACQAACSAAAAAAAAAAAAAAAAAAAAABMgAAEAAIAADD41KtAAAAEQAIAADF5xkjAAAAAQAoBRay/gUWsv0COfpEAjlFGgAUHYQAFBk5AA/qXAAKpBIACqQRAAB63AACAIVNYWNpbnRvc2ggSEQ6VXNlcnM6ZWpmZXJ0aWc6TGlicmFyeTpNYWlsOklNQVAtZWpmZXJ0aWdAbWFpbC5sIzE0MTkzOS5vcmc6SU5CT1g6T2Nocy5pbWFwbWJveDpBdHRhY2htZW50czozMDg3OjI6TW9sb3Nob2tfQkRfWWVhc3QucGRmAAAOACwAFQBNAG8AbABvAHMAaABvAGsAXwBCAEQAXwBZAGUAYQBzAHQALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAHpVc2Vycy9lamZlcnRpZy9MaWJyYXJ5L01haWwvSU1BUC1lamZlcnRpZ0BtYWlsLmxpcXVpZGRpcnQub3JnL0lOQk9YL09jaHMuaW1hcG1ib3gvQXR0YWNobWVudHMvMzA4Ny8yL01vbG9zaG9rX0JEX1llYXN0LnBkZgATAAEvAAAVAAIAD///AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxBxLi4vLi4vTGlicmFyeS9NYWlsL0lNQVAtZWpmZXJ0aWdAbWFpbC5saXF1aWRkaXJ0Lm9yZy9JTkJPWC9PY2hzLmltYXBtYm94L0F0dGFjaG1lbnRzLzMwODcvMi9Nb2xvc2hva19CRF9ZZWFzdC5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGUAbABvAHEAcwB2AHgAegB8AIYAkwCYAKAC6gLsAvEC+gMFAwkDFwMeAycDmwOgA6MDsAO1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAA8c=}} |
|
17 |
+ Number = {6755}, |
|
18 |
+ Pages = {788-791}, |
|
19 |
+ Doi = {10.1038/44565}} |
|
452 | 20 |
|
453 |
-@article{Ochs1999, |
|
454 |
- Abstract = {A frequent problem in analysis is the need to find two matrices, closely related to the underlying measurement process, which when multiplied together reproduce the matrix of data points. Such problems arise throughout science, for example, in imaging where both the calibration of the sensor and the true scene may be unknown and in localized spectroscopy where multiple components may be present in varying amounts in any spectrum. Since both matrices are unknown, such a decomposition is a bilinear problem. We report here a solution to this problem for the case in which the decomposition results in matrices with elements drawn from positive additive distributions. We demonstrate the power of the methodology on chemical shift images (CSI). The new method, Bayesian spectral decomposition (BSD), reduces the CSI data to a small number of basis spectra together with their localized amplitudes. We apply this new algorithm to a 19F nonlocalized study of the catabolism of 5-fluorouracil in human liver, 31P CSI studies of a human head and calf muscle, and simulations which show its strengths and limitations. In all cases, the dataset, viewed as a matrix with rows containing the individual NMR spectra, results from the multiplication of a matrix of generally nonorthogonal basis spectra (the spectral matrix) by a matrix of the amplitudes of each basis spectrum in the the individual voxels (the amplitude matrix). The results show that BSD can simultaneously determine both the basis spectra and their distribution. In principle, BSD should solve this bilinear problem for any dataset which results from multiplication of matrices representing positive additive distributions if the data overdetermine the solutions.}, |
|
455 |
- Address = {NMR and Medical Spectroscopy, Fox Chase Cancer Center, Philadelphia, PA, USA.}, |
|
456 |
- Au = {Ochs, MF and Stoyanova, RS and Arias-Mendoza, F and Brown, TR}, |
|
457 |
- Author = {Ochs, M.F. and Stoyanova, R.S. and Arias-Mendoza, F. and Brown, T.R.}, |
|
458 |
- Ci = {Copyright 1999 Academic Press.}, |
|
459 |
- Da = {19990331}, |
|
460 |
- Date-Added = {2010-01-08 14:39:10 -0500}, |
|
461 |
- Date-Modified = {2010-01-08 14:39:10 -0500}, |
|
462 |
- Dcom = {19990331}, |
|
463 |
- Doi = {10.1006/jmre.1998.1639}, |
|
464 |
- Edat = {1999/03/04}, |
|
465 |
- Gr = {CA41078/CA/United States NCI; CA62556/CA/United States NCI}, |
|
466 |
- Issn = {1090-7807 (Print)}, |
|
467 |
- Jid = {9707935}, |
|
468 |
- Journal = {J Magn Reson}, |
|
469 |
- Jt = {Journal of magnetic resonance (San Diego, Calif. : 1997)}, |
|
470 |
- Keywords = {Markov Chain Monte Carlo, Bayesian, k25}, |
|
471 |
- Language = {eng}, |
|
472 |
- Lr = {20071114}, |
|
473 |
- Mh = {Adenosine Triphosphate/analysis; Bayes Theorem; Brain/metabolism; Fluorouracil/*metabolism; Humans; Image Processing, Computer-Assisted; Liver/*metabolism; Magnesium/analysis; Magnetic Resonance Spectroscopy/*methods; Muscle, Skeletal/metabolism}, |
|
474 |
- Mhda = {1999/03/04 00:01}, |
|
475 |
- Number = {1}, |
|
476 |
- Own = {NLM}, |
|
477 |
- Pages = {161--176}, |
|
478 |
- Pii = {S1090-7807(98)91639-1}, |
|
479 |
- Pl = {UNITED STATES}, |
|
480 |
- Pmid = {10053145}, |
|
481 |
- Pst = {ppublish}, |
|
482 |
- Pt = {Comparative Study; Journal Article; Research Support, U.S. Gov't, P.H.S.}, |
|
483 |
- Pubm = {Print}, |
|
484 |
- Rn = {51-21-8 (Fluorouracil); 56-65-5 (Adenosine Triphosphate); 7439-95-4 (Magnesium)}, |
|
485 |
- Sb = {IM}, |
|
486 |
- So = {J Magn Reson. 1999 Mar;137(1):161-76.}, |
|
487 |
- Stat = {MEDLINE}, |
|
488 |
- Title = {A new method for spectral decomposition using a bilinear Bayesian approach.}, |
|
489 |
- Volume = {137}, |
|
490 |
- Year = {1999}, |
|
491 |
- Bdsk-Url-1 = {http://dx.doi.org/10.1006/jmre.1998.1639}} |
|
21 |
+@conference{FERTIG_2012, |
|
22 |
+ Author = {Fertig, Elana J. and Favorov, Alexander V. and Ochs, Michael F.}, |
|
23 |
+ Year = {2012}, |
|
24 |
+ Title = {Identifying Context-Specific Transcription Factor Targets from Prior Knowledge and Gene Expression Data}, |
|
25 |
+ Booktitle = {IEEE International Conference on Bioinformatics and Biomedicine}, |
|
26 |
+ Address = {Philadelphia, PA, USA}} |
|
492 | 27 |
|
493 |
-@conference{Plummer2003, |
|
494 |
- Address = {Vienna, Austria}, |
|
495 |
- Author = {M. Plummer}, |
|
496 |
- Booktitle = {Proceedings of the 3rd Internation Workshop on Distributed Statistical Computing}, |
|
497 |
- Date-Added = {2010-01-08 14:38:05 -0500}, |
|
498 |
- Date-Modified = {2010-02-02 15:47:49 -0500}, |
|
499 |
- Editor = {K. Hornik and F. Leisch and A. Zeileis}, |
|
500 |
- Keywords = {Markov Chain Monte Carlo}, |
|
501 |
- Month = {March 20-22}, |
|
502 |
- Title = {{JAGS}: A program for analysis of {Bayesian} graphical models using {Gibbs} sampling}, |
|
503 |
- Year = {2003}} |
|
28 |
+@incollection{OCHS_2003, |
|
29 |
+ Author = {Ochs, Michael F.}, |
|
30 |
+ Year = {2003}, |
|
31 |
+ Title = {Bayesian Decomposition}, |
|
32 |
+ Booktitle = {The Analysis of Gene Expression Data: Methods and Software}, |
|
33 |
+ Editor = {Parmigiani, Giovanni and Garett, Elizabeth S. and Irizarry, Rafael A. and Zeger, Scott L.}, |
|
34 |
+ Address = {New York}, |
|
35 |
+ Publisher = {Springer-Verlag}} |
|
504 | 36 |
|
505 |
-@article{Ochs2009, |
|
506 |
- Abstract = {Cell signaling plays a central role in the etiology of cancer. Numerous therapeutics in use or under development target signaling proteins; however, off-target effects often limit assignment of positive clinical response to the intended target. As direct measurements of signaling protein activity are not generally feasible during treatment, there is a need for more powerful methods to determine if therapeutics inhibit their targets and when off-target effects occur. We have used the Bayesian Decomposition algorithm and data on transcriptional regulation to create a novel methodology, Differential Expression for Signaling Determination (DESIDE), for inferring signaling activity from microarray measurements. We applied DESIDE to deduce signaling activity in gastrointestinal stromal tumor cell lines treated with the targeted therapeutic imatinib mesylate (Gleevec). We detected the expected reduced activity in the KIT pathway, as well as unexpected changes in the p53 pathway. Pursuing these findings, we have determined that imatinib-induced DNA damage is responsible for the increased activity of p53, identifying a novel off-target activity for this drug. We then used DESIDE on data from resected, post-imatinib treatment tumor samples and identified a pattern in these tumors similar to that at late time points in the cell lines, and this pattern correlated with initial clinical response. The pattern showed increased activity of ETS domain-containing protein Elk-1 and signal transducers and activators of transcription 3 transcription factors, which are associated with the growth of side population cells. DESIDE infers the global reprogramming of signaling networks during treatment, permitting treatment modification that leverages ongoing drug development efforts, which is crucial for personalized medicine.}, |
|
507 |
- Address = {Division of Oncology Biostatistics and Bioinformatics, Johns Hopkins University, Baltimore, Maryland 21205, USA. mfo@jhu.edu}, |
|
508 |
- Au = {Ochs, MF and Rink, L and Tarn, C and Mburu, S and Taguchi, T and Eisenberg, B and Godwin, AK}, |
|
509 |
- Author = {Ochs, M.F. and Rink, L. and Tarn, C. and Mburu, S. and Taguchi, T. and Eisenberg, B. and Godwin, A.K.}, |
|
510 |
- Crdt = {2009/11/12 06:00}, |
|
511 |
- Da = {20091204}, |
|
512 |
- Date-Added = {2010-01-08 14:35:07 -0500}, |
|
513 |
- Date-Modified = {2010-02-02 15:57:46 -0500}, |
|
514 |
- Dcom = {20091221}, |
|
515 |
- Dep = {20091110}, |
|
516 |
- Doi = {10.1158/0008-5472.CAN-09-1709}, |
|
517 |
- Edat = {2009/11/12 06:00}, |
|
518 |
- Gr = {CA009035/CA/NCI NIH HHS/United States; CA106588/CA/NCI NIH HHS/United States; CA21661/CA/NCI NIH HHS/United States; LM009382/LM/NLM NIH HHS/United States}, |
|
519 |
- Issn = {1538-7445 (Electronic); 1538-7445 (Linking)}, |
|
520 |
- Jid = {2984705R}, |
|
521 |
- Journal = {Cancer Res}, |
|
522 |
- Jt = {Cancer research}, |
|
523 |
- Language = {eng}, |
|
524 |
- Mh = {Antineoplastic Agents/*pharmacology; Cell Line, Tumor; DNA Damage; Gastrointestinal Stromal Tumors/*drug therapy/*genetics/metabolism; Gene Expression Profiling; Humans; Piperazines/*pharmacology; Pyrimidines/*pharmacology; RNA, Messenger/biosynthesis/genetics; STAT3 Transcription Factor/metabolism; Signal Transduction; Tumor Suppressor Protein p53/genetics/metabolism; ets-Domain Protein Elk-1/metabolism}, |
|
525 |
- Mhda = {2009/12/22 06:00}, |
|
526 |
- Mid = {NIHMS149886}, |
|
527 |
- Number = {23}, |
|
528 |
- Oid = {NLM: NIHMS149886 {$[$}Available on 12/01/10{$]$}; NLM: PMC2789202 {$[$}Available on 12/01/10{$]$}}, |
|
529 |
- Own = {NLM}, |
|
530 |
- Pages = {9125--9132}, |
|
531 |
- Phst = {2009/11/10 {$[$}aheadofprint{$]$}}, |
|
532 |
- Pii = {0008-5472.CAN-09-1709}, |
|
533 |
- Pl = {United States}, |
|
534 |
- Pmc = {PMC2789202}, |
|
535 |
- Pmcr = {2010/12/01}, |
|
536 |
- Pmid = {19903850}, |
|
537 |
- Pst = {ppublish}, |
|
538 |
- Pt = {Journal Article; Research Support, N.I.H., Extramural; Research Support, Non-U.S. Gov't}, |
|
539 |
- Rn = {0 (Antineoplastic Agents); 0 (ELK1 protein, human); 0 (Piperazines); 0 (Pyrimidines); 0 (RNA, Messenger); 0 (STAT3 Transcription Factor); 0 (STAT3 protein, human); 0 (TP53 protein, human); 0 (Tumor Suppressor Protein p53); 0 (ets-Domain Protein Elk-1); 152459-95-5 (imatinib)}, |
|
540 |
- Sb = {IM}, |
|
541 |
- So = {Cancer Res. 2009 Dec 1;69(23):9125-32. Epub 2009 Nov 10.}, |
|
542 |
- Stat = {MEDLINE}, |
|
543 |
- Title = {Detection of treatment-induced changes in signaling pathways in gastrointestinal stromal tumors using transcriptomic data.}, |
|
544 |
- Volume = {69}, |
|
37 |
+@article{OCHS_2009, |
|
38 |
+ Author = {Ochs, Michael F. and Rink, Lori and Tarn, Chi and Mburu, Sarah and Taguchi, Takahiro and Eisenberg, Burton and Godwin, Andrew K.}, |
|
545 | 39 |
Year = {2009}, |
546 |
- Bdsk-Url-1 = {http://dx.doi.org/10.1158/0008-5472.CAN-09-1709}} |
|
547 |
- |
|
548 |
-@conference{Skilling1998, |
|
549 |
- Address = {Dordrecht/Boston/London}, |
|
550 |
- Author = {J. Skilling}, |
|
551 |
- Booktitle = {Maximum Entropy and Bayesian Methods, Proceedings of the 17th International Workshop on Maxiumum Entropy and Bayesian Methods of Statistical Analysis}, |
|
552 |
- Date-Added = {2010-01-08 14:27:23 -0500}, |
|
553 |
- Date-Modified = {2010-01-08 14:29:16 -0500}, |
|
554 |
- Editor = {G. J. Erickson and J. T. Rychert and C. R. Smith}, |
|
555 |
- Publisher = {Kluwer Academic Publishers}, |
|
556 |
- Title = {Massive inference and maximum entropy}, |
|
557 |
- Year = {1998}} |
|
40 |
+ Title = {Detection of Treatment-Induced Changes in Signaling Pathways in Gastrointestinal Stromal Tumors Using Transcriptomic Data}, |
|
41 |
+ Journal = {Cancer Research}, |
|
42 |
+ Volume = {69}, |
|
43 |
+ Number = {23}, |
|
44 |
+ Pages = {9125-9132}, |
|
45 |
+ Doi = {10.1158/0008-5472.CAN-09-1709}} |
|
558 | 46 |
|
559 |
-@article{Sibisi1997, |
|
560 |
- Author = {S. Sibisi and J. Skilling}, |
|
561 |
- Date-Added = {2010-01-08 13:50:30 -0500}, |
|
562 |
- Date-Modified = {2010-01-08 13:50:30 -0500}, |
|
563 |
- Journal = {Journal of the Royal Statistical Society, B}, |
|
564 |
- Keywords = {Bayesian, probability}, |
|
565 |
- Number = {1}, |
|
566 |
- Pages = {217-235}, |
|
567 |
- Title = {Prior distributions on measure space}, |
|
568 |
- Volume = {59}, |
|
569 |
- Year = {1997}, |
|
570 |
- Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RFFpOUy5vYmplY3RzV05TLmtleXNWJGNsYXNzog8QgASABqISE4ACgAOAB1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgNGRpXTlMuZGF0YU8RAfIAAAAAAfIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMkCCRlIKwAAAAnLqh9TaWJpc2lTa2lsbGluZ19KUm95YWwjOUNDRTEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACczhxKuwAwAAAAAAAAAA/////wAACSAAAAAAAAAAAAAAAAAAAAAKUmVmZXJlbmNlcwAQAAgAAMkCT2kAAAARAAgAAMSr6EMAAAABABAACcuqAAhtRgAITPkAAJMnAAIAUE1hY2ludG9zaCBIRDpVc2VyczplamZlcnRpZzpEb2N1bWVudHM6UmVmZXJlbmNlczpTaWJpc2lTa2lsbGluZ19KUm95YWwjOUNDRTEucGRmAA4ATgAmAFMAaQBiAGkAcwBpAFMAawBpAGwAbABpAG4AZwBfAEoAUgBvAHkAYQBsAFMAdABhAHQAUwBvAGMAQgBfADEAOQA5ADcALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAEpVc2Vycy9lamZlcnRpZy9Eb2N1bWVudHMvUmVmZXJlbmNlcy9TaWJpc2lTa2lsbGluZ19KUm95YWxTdGF0U29jQl8xOTk3LnBkZgATAAEvAAAVAAIAD///AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxBiLi4vLi4vLi4vLi4vLi4vLi4vLi4vLi4vVXNlcnMvZWpmZXJ0aWcvRG9jdW1lbnRzL1JlZmVyZW5jZXMvU2liaXNpU2tpbGxpbmdfSlJveWFsU3RhdFNvY0JfMTk5Ny5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGUAbABvAHEAcwB2AHgAegB8AIYAkwCYAKAClgKYAp0CpgKxArUCwwLKAtMDOAM9A0ADTQNSAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAA2Q=}} |
571 | 47 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,71 @@ |
1 |
+## Pattern Markers |
|
2 |
+ |
|
3 |
+## Selecting Appropiate Number of Patterns |
|
4 |
+ |
|
5 |
+Selecting the best value for *nPatterns* is the most difficult part of the |
|
6 |
+analysis. For starters, there is not one "best" value for the number of |
|
7 |
+patterns - various numbers of patterns can capture various levels of |
|
8 |
+granularity in the data. To further complicate the problem, there's not a |
|
9 |
+clear way to compare runs for different numbers of patterns. |
|
10 |
+ |
|
11 |
+Here we show the simplest approach of selecting dimensionality by plotting the |
|
12 |
+error and selecting the least number of patterns that sufficiently reduce the |
|
13 |
+error. We also introduce another way to pass parameters - any parameter in the |
|
14 |
+*CogapsParams* class can be passed by name directly to the *CoGAPS* function, |
|
15 |
+overwriting the value contained in `params`. |
|
16 |
+ |
|
17 |
+```{r eval=FALSE} |
|
18 |
+# define the range of patterns we are searching over |
|
19 |
+pattern_range <- c(3,5,8) |
|
20 |
+ |
|
21 |
+# run CoGAPS with each value in range |
|
22 |
+resultList <- lapply(pattern_range, function(p) CoGAPS(GIST.D, params, nPatterns=p, nIterations=3000, outputFrequency=2500)) |
|
23 |
+ |
|
24 |
+# plot chi-sq values for each run |
|
25 |
+chisq <- sapply(resultList, function(result) getMeanChiSq(result)) |
|
26 |
+plot(pattern_range, chisq) |
|
27 |
+``` |
|
28 |
+ |
|
29 |
+## CoGAPS-based statistics |
|
30 |
+ |
|
31 |
+### CoGAPSStat |
|
32 |
+ |
|
33 |
+The function *calcCoGAPSStat* is used to infer gene set activity in each |
|
34 |
+pattern from the CoGAPS matrix factorization. *calcCoGAPSStat* calculates the |
|
35 |
+gene set statistics for each column of the feature matrix using a Z-score, the |
|
36 |
+input gene set and permutation tests. The function outputs a list containing: |
|
37 |
+ |
|
38 |
+* *GSUpreg* lists p-values for upregulation of each gene set in each pattern |
|
39 |
+* *GSDownreg* lists p-values for downregulation of each gene set in each pattern |
|
40 |
+* *GSActEst* provides gene set activity through conversion of p-values to |
|
41 |
+activity estimates of each gene set in each pattern |
|
42 |
+ |
|
43 |
+### computeGeneGSProb |
|
44 |
+ |
|
45 |
+Now using the *computeGeneGSProb* function, we can use the gene set statistic |
|
46 |
+(returned from calcCoGAPSStat) to compute a statistic to quantify the |
|
47 |
+likelihood of membership for each gene annotated to a set based on its |
|
48 |
+inferred activity. The statistic used to infer membership compares the |
|
49 |
+expression pattern of a gene annotated as a member of a gene set to the common |
|
50 |
+expression pattern of all annotated members of that gene set. The function |
|
51 |
+outputs the p-value of a set membership for each gene specified in GStoGenes. |
|
52 |
+ |
|
53 |
+ |
|
54 |
+ |
|
55 |
+Notice that we pass the transpose of the *GIST* data to *scCoGAPS* since |
|
56 |
+the normal dataset is 1363 x 9 and we want a data set with a large number of |
|
57 |
+samples. |
|
58 |
+ |
|
59 |
+It is neccesary to name the simulation so that the computation portion knows |
|
60 |
+what the subset files are called. For convenience the name provided to the |
|
61 |
+subset function is returned back so that it can be saved. |
|
62 |
+ |
|
63 |
+Running the computation is just as easy as running normal *CoGAPS*. The results |
|
64 |
+will be in the exact same format as *CoGAPS*, only the intermediate computation |
|
65 |
+is different. Notice here that no *params* object is accepted - all parameters |
|
66 |
+must be passed by name. |
|
67 |
+ |
|
68 |
+```{r eval=FALSE} |
|
69 |
+GWCoGAPS(gw_sim_name, nPatterns=3, nIterations=1000) |
|
70 |
+scCoGAPS(sc_sim_name, nPatterns=3, nIterations=1000) |
|
71 |
+``` |