... | ... |
@@ -1,16 +1,18 @@ |
1 | 1 |
Package: CoGAPS |
2 |
-Version: 2.99.0 |
|
3 |
-Date: 2014-08-23 |
|
2 |
+Version: 2.99.2 |
|
3 |
+Date: 2018-04-24 |
|
4 | 4 |
Title: Coordinated Gene Activity in Pattern Sets |
5 | 5 |
Author: Thomas Sherman, Wai-shing Lee, Conor Kelton, Ondrej Maxian, Jacob Carey, |
6 |
- Genevieve Stein-O'Brien, Michael Considine, Maggie Wodicka, John Stansfield, Shawn Sivy, Carlo Colantuoni, Alexander Favorov, Mike Ochs, Elana Fertig |
|
6 |
+ Genevieve Stein-O'Brien, Michael Considine, Maggie Wodicka, John Stansfield, |
|
7 |
+ Shawn Sivy, Carlo Colantuoni, Alexander Favorov, Mike Ochs, Elana Fertig |
|
7 | 8 |
Description: Coordinated Gene Activity in Pattern Sets (CoGAPS) |
8 | 9 |
implements a Bayesian MCMC matrix factorization algorithm, |
9 | 10 |
GAPS, and links it to gene set statistic methods to infer biological |
10 | 11 |
process activity. It can be used to perform sparse matrix factorization on |
11 | 12 |
any data, and when this data represents biomolecules, to do gene set |
12 | 13 |
analysis. |
13 |
-Maintainer: Elana J. Fertig <ejfertig@jhmi.edu> |
|
14 |
+Maintainer: Elana J. Fertig <ejfertig@jhmi.edu>, |
|
15 |
+ Thomas D. Sherman <tomsherman159@gmail.com> |
|
14 | 16 |
Depends: |
15 | 17 |
R (>= 3.4.0), |
16 | 18 |
Rcpp (>= 0.11.0) |
... | ... |
@@ -1,7 +1,6 @@ |
1 | 1 |
# Generated by roxygen2: do not edit by hand |
2 | 2 |
|
3 | 3 |
export(CoGAPS) |
4 |
-export(CoGapsFromCheckpoint) |
|
5 | 4 |
export(GWCoGAPS) |
6 | 5 |
export(GWCoGapsFromCheckpoint) |
7 | 6 |
export(binaryA) |
... | ... |
@@ -9,16 +8,21 @@ export(buildReport) |
9 | 8 |
export(calcCoGAPSStat) |
10 | 9 |
export(calcGeneGSStat) |
11 | 10 |
export(calcZ) |
11 |
+export(cellMatchR) |
|
12 | 12 |
export(computeGeneGSProb) |
13 | 13 |
export(createGWCoGAPSSets) |
14 |
+export(createscCoGAPSSets) |
|
14 | 15 |
export(gapsMapRun) |
15 | 16 |
export(gapsRun) |
17 |
+export(patternMarkers) |
|
18 |
+export(patternMatch4Parallel) |
|
16 | 19 |
export(plotAtoms) |
17 | 20 |
export(plotDiag) |
18 | 21 |
export(plotGAPS) |
19 | 22 |
export(plotP) |
20 | 23 |
export(reconstructGene) |
21 | 24 |
export(residuals) |
25 |
+export(scCoGAPS) |
|
22 | 26 |
import(doParallel) |
23 | 27 |
import(foreach) |
24 | 28 |
import(ggplot2) |
... | ... |
@@ -34,8 +34,6 @@ |
34 | 34 |
#' @param fixedPatterns matrix of fixed values in either A or P matrix |
35 | 35 |
#' @param checkpointInterval time (in seconds) between creating a checkpoint |
36 | 36 |
#' @param checkpointFile name of the checkpoint file |
37 |
-#' @param pumpThreshold type of threshold for pump statistic |
|
38 |
-#' @param nPumpSamples number of samples used in pump statistic |
|
39 | 37 |
#' @param ... keeps backwards compatibility with arguments from older versions |
40 | 38 |
#' @return list with A and P matrix estimates |
41 | 39 |
#' @importFrom methods new |
... | ... |
@@ -47,8 +45,7 @@ CoGAPS <- function(D, S, nFactor=7, nEquil=1000, nSample=1000, nOutputs=1000, |
47 | 45 |
nSnapshots=0, alphaA=0.01, alphaP=0.01, maxGibbmassA=100, maxGibbmassP=100, |
48 | 46 |
seed=-1, messages=TRUE, singleCellRNASeq=FALSE, whichMatrixFixed='N', |
49 | 47 |
fixedPatterns=matrix(0), checkpointInterval=0, |
50 |
-checkpointFile="gaps_checkpoint.out", pumpThreshold="unique", |
|
51 |
-nPumpSamples=0, ...) |
|
48 |
+checkpointFile="gaps_checkpoint.out", ...) |
|
52 | 49 |
{ |
53 | 50 |
# get v2 arguments |
54 | 51 |
oldArgs <- list(...) |
... | ... |
@@ -67,6 +64,14 @@ nPumpSamples=0, ...) |
67 | 64 |
if (missing(S) & !is.null(oldArgs$unc)) |
68 | 65 |
S <- oldArgs$unc |
69 | 66 |
|
67 |
+ # get pump arguments - hidden for now from user |
|
68 |
+ pumpThreshold <- "unique" |
|
69 |
+ nPumpSamples <- 0 |
|
70 |
+ if (!is.null(list(...)$pumpThreshold)) |
|
71 |
+ pumpThreshold <- list(...)$pumpThreshold |
|
72 |
+ if (!is.null(list(...)$nPumpSamples)) |
|
73 |
+ pumpThreshold <- list(...)$nPumpSamples |
|
74 |
+ |
|
70 | 75 |
# check arguments |
71 | 76 |
if (class(D) != "matrix" | class(S) != "matrix") |
72 | 77 |
stop('D and S must be matrices') |
... | ... |
@@ -115,7 +120,6 @@ nPumpSamples=0, ...) |
115 | 120 |
#' @param path path to checkpoint file |
116 | 121 |
#' @param checkpointFile name for future checkpooints made |
117 | 122 |
#' @return list with A and P matrix estimates |
118 |
-#' @export |
|
119 | 123 |
CoGapsFromCheckpoint <- function(D, S, path, checkpointFile=NA) |
120 | 124 |
{ |
121 | 125 |
if (is.na(checkpointFile)) |
... | ... |
@@ -129,7 +133,7 @@ CoGapsFromCheckpoint <- function(D, S, path, checkpointFile=NA) |
129 | 133 |
#' compiler/version was used, which compile time options were enabled, etc... |
130 | 134 |
#' @return display builds information |
131 | 135 |
#' @examples |
132 |
-#' CoGAPS::displayBuildReport() |
|
136 |
+#' CoGAPS::buildReport() |
|
133 | 137 |
#' @export |
134 | 138 |
buildReport <- function() |
135 | 139 |
{ |
... | ... |
@@ -166,7 +170,7 @@ output_atomic=FALSE, fixedBinProbs=FALSE, fixedDomain="N", sampleSnapshots=TRUE, |
166 | 170 |
numSnapshots=100, alphaA=0.01, nMaxA=100000, max_gibbmass_paraA=100.0, |
167 | 171 |
alphaP=0.01, nMaxP=100000, max_gibbmass_paraP=100.0, seed=-1, messages=TRUE) |
168 | 172 |
{ |
169 |
- #warning('gapsRun is deprecated with v3.0, use CoGAPS') |
|
173 |
+ warning('gapsRun is deprecated with v3.0, use CoGAPS') |
|
170 | 174 |
CoGAPS(D, S, nFactor=nFactor, nEquil=nEquil, nSample=nSample, |
171 | 175 |
nOutputs=nOutR, nSnapshots=ifelse(sampleSnapshots,numSnapshots,0), |
172 | 176 |
alphaA=alphaA, alphaP=alphaP, maxGibbmassA=max_gibbmass_paraA, |
... | ... |
@@ -196,7 +200,7 @@ sampleSnapshots=TRUE, numSnapshots=100, alphaA=0.01, nMaxA=100000, |
196 | 200 |
max_gibbmass_paraA=100.0, alphaP=0.01, nMaxP=100000, max_gibbmass_paraP=100.0, |
197 | 201 |
seed=-1, messages=TRUE) |
198 | 202 |
{ |
199 |
- #warning('gapsMapRun is deprecated with v3.0, use CoGaps') |
|
203 |
+ warning('gapsMapRun is deprecated with v3.0, use CoGaps') |
|
200 | 204 |
CoGAPS(D, S, nFactor=nFactor, nEquil=nEquil, nSample=nSample, |
201 | 205 |
nOutputs=nOutR, nSnapshots=ifelse(sampleSnapshots,numSnapshots,0), |
202 | 206 |
alphaA=alphaA, alphaP=alphaP, maxGibbmassA=max_gibbmass_paraA, |
... | ... |
@@ -209,7 +213,7 @@ v2CoGAPS <- function(result, ...) |
209 | 213 |
{ |
210 | 214 |
if (!is.null(list(...)$GStoGenes)) |
211 | 215 |
{ |
212 |
- #warning('GStoGenes is deprecated with v3.0, see CoGAPS documentation') |
|
216 |
+ warning('GStoGenes is deprecated with v3.0, see CoGAPS documentation') |
|
213 | 217 |
if (is.null(list(...)$plot) | list(...)$plot) |
214 | 218 |
{ |
215 | 219 |
plotGAPS(result$Amean, result$Pmean) |
... | ... |
@@ -9,6 +9,8 @@ |
9 | 9 |
#' @param nCores number of cores for parallelization. If left to the default NA, nCores = nSets. |
10 | 10 |
#' @param cut number of branches at which to cut dendrogram used in patternMatch4Parallel |
11 | 11 |
#' @param minNS minimum of individual set contributions a cluster must contain |
12 |
+#' @param manualMatch logical indicating whether or not to stop after initial phase for manual pattern matching |
|
13 |
+#' @param consensusPatterns fixed pattern matrix to be used to ensure reciprocity of A weights accross sets |
|
12 | 14 |
#' @param ... additional parameters to be fed into \code{gapsRun} and \code{gapsMapRun} |
13 | 15 |
#' @return list of A and P estimates |
14 | 16 |
#' @seealso \code{\link{gapsRun}}, \code{\link{patternMatch4Parallel}}, and \code{\link{gapsMapRun}} |
... | ... |
@@ -16,18 +18,30 @@ |
16 | 18 |
#' data(SimpSim) |
17 | 19 |
#' sim_name <- "example" |
18 | 20 |
#' createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name) |
19 |
-#' result <- GWCoGAPS(sim_name, nFactor=3, nEquil=1000, nSample=1000) |
|
21 |
+#' result <- GWCoGAPS(sim_name, nFactor=3, nEquil=200, nSample=200) |
|
20 | 22 |
#' @export |
21 |
-GWCoGAPS <- function(simulationName, nFactor, nCores=NA, cut=NA, minNS=NA, ...) |
|
23 |
+GWCoGAPS <- function(simulationName, nFactor, nCores=NA, cut=NA, minNS=NA, manualMatch=FALSE, consensusPatterns=NULL, ...) |
|
22 | 24 |
{ |
23 | 25 |
if (!is.null(list(...)$checkpointFile)) |
26 |
+ { |
|
24 | 27 |
stop("checkpoint file name automatically set in GWCoGAPS - don't pass this parameter") |
25 |
- allDataSets <- preInitialPhase(simulationName, nCores) |
|
26 |
- initialResult <- runInitialPhase(simulationName, allDataSets, nFactor, ...) |
|
27 |
- matchedPatternSets <- postInitialPhase(initialResult, length(allDataSets), cut, minNS) |
|
28 |
- save(matchedPatternSets, file=paste(simulationName, "_matched_ps.RData", sep="")) |
|
29 |
- finalResult <- runFinalPhase(simulationName, allDataSets, matchedPatternSets, ...) |
|
30 |
- return(postFinalPhase(finalResult, matchedPatternSets)) |
|
28 |
+ } |
|
29 |
+ |
|
30 |
+ if (is.null(consensusPatterns)) |
|
31 |
+ { |
|
32 |
+ allDataSets <- preInitialPhase(simulationName, nCores) |
|
33 |
+ initialResult <- runInitialPhase(simulationName, allDataSets, nFactor, ...) |
|
34 |
+ if (manualMatch) |
|
35 |
+ { |
|
36 |
+ saveRDS(initialResult,file=paste(simulationName,"_initial.rds", sep="")) |
|
37 |
+ stop("Please provide consensus patterns upon restarting.") |
|
38 |
+ } |
|
39 |
+ matchedPatternSets <- postInitialPhase(initialResult, length(allDataSets), cut, minNS) |
|
40 |
+ save(matchedPatternSets, file=paste(simulationName, "_matched_ps.RData", sep="")) |
|
41 |
+ consensusPatterns <- matchedPatternSets[[1]] |
|
42 |
+ } |
|
43 |
+ finalResult <- runFinalPhase(simulationName, allDataSets, consensusPatterns, nCores, ...) |
|
44 |
+ return(postFinalPhase(finalResult, consensusPatterns)) |
|
31 | 45 |
} |
32 | 46 |
|
33 | 47 |
#' Restart a GWCoGaps Run from Checkpoint |
... | ... |
@@ -35,15 +49,23 @@ GWCoGAPS <- function(simulationName, nFactor, nCores=NA, cut=NA, minNS=NA, ...) |
35 | 49 |
#' @inheritParams GWCoGAPS |
36 | 50 |
#' @return list of A and P estimates |
37 | 51 |
#' @importFrom utils file_test |
52 |
+#' @examples |
|
53 |
+#' data(SimpSim) |
|
54 |
+#' sim_name <- "example" |
|
55 |
+#' createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name) |
|
56 |
+#' trash <- GWCoGAPS(sim_name, nFactor=3, nEquil=200, nSample=200) |
|
57 |
+#' result <- GWCoGapsFromCheckpoint(sim_name, 2) |
|
38 | 58 |
#' @export |
39 |
-GWCoGapsFromCheckpoint <- function(simulationName, nCores=NA, cut=NA, minNS=NA, ...) |
|
59 |
+GWCoGapsFromCheckpoint <- function(simulationName, nCores, cut=NA, minNS=NA, ...) |
|
40 | 60 |
{ |
41 | 61 |
# find data files |
42 | 62 |
allDataSets <- preInitialPhase(simulationName, nCores) |
43 | 63 |
|
44 | 64 |
# figure out phase from file signature |
45 |
- initialCpts <- list.files(full.names=TRUE, pattern=paste(simulationName, "_initial_cpt_[0-9]+.out", sep="")) |
|
46 |
- finalCpts <- list.files(full.names=TRUE, pattern=paste(simulationName, "_final_cpt_[0-9]+.out", sep="")) |
|
65 |
+ initialCpts <- list.files(full.names=TRUE, pattern=paste(simulationName, |
|
66 |
+ "_initial_cpt_[0-9]+.out", sep="")) |
|
67 |
+ finalCpts <- list.files(full.names=TRUE, pattern=paste(simulationName, |
|
68 |
+ "_final_cpt_[0-9]+.out", sep="")) |
|
47 | 69 |
|
48 | 70 |
if (length(finalCpts)) |
49 | 71 |
{ |
... | ... |
@@ -51,10 +73,12 @@ GWCoGapsFromCheckpoint <- function(simulationName, nCores=NA, cut=NA, minNS=NA, |
51 | 73 |
{ |
52 | 74 |
# load data set and shift values so gene minimum is zero |
53 | 75 |
load(allDataSets[[i]]) |
54 |
- sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x) pmin(0,min(x)))) |
|
76 |
+ sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x) |
|
77 |
+ pmin(0,min(x)))) |
|
55 | 78 |
|
56 | 79 |
# run CoGAPS with fixed patterns |
57 |
- cptFileName <- paste(simulationName, "_final_cpt_", i, ".out", sep="") |
|
80 |
+ cptFileName <- paste(simulationName, "_final_cpt_", i, ".out", |
|
81 |
+ sep="") |
|
58 | 82 |
CoGapsFromCheckpoint(sampleD, sampleS, cptFileName) |
59 | 83 |
} |
60 | 84 |
load(paste(simulationName, "_matched_ps.RData", sep="")) |
... | ... |
@@ -62,7 +86,9 @@ GWCoGapsFromCheckpoint <- function(simulationName, nCores=NA, cut=NA, minNS=NA, |
62 | 86 |
else if (file_test("-f", paste(simulationName, "_matched_ps.RData", sep=""))) |
63 | 87 |
{ |
64 | 88 |
load(paste(simulationName, "_matched_ps.RData", sep="")) |
65 |
- finalResult <- runFinalPhase(simulationName, allDataSets, matchedPatternSets, ...) |
|
89 |
+ consensusPatterns<-matchedPatternSets[[1]] |
|
90 |
+ finalResult <- runFinalPhase(simulationName, allDataSets, |
|
91 |
+ consensusPatterns, nCores, ...) |
|
66 | 92 |
} |
67 | 93 |
else if (length(initialCpts)) |
68 | 94 |
{ |
... | ... |
@@ -71,21 +97,27 @@ GWCoGapsFromCheckpoint <- function(simulationName, nCores=NA, cut=NA, minNS=NA, |
71 | 97 |
{ |
72 | 98 |
# load data set and shift values so gene minimum is zero |
73 | 99 |
load(allDataSets[[i]]) |
74 |
- sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x) pmin(0,min(x)))) |
|
100 |
+ sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x) |
|
101 |
+ pmin(0,min(x)))) |
|
75 | 102 |
|
76 | 103 |
# run CoGAPS from checkpoint |
77 |
- cptFileName <- paste(simulationName, "_initial_cpt_", i, ".out", sep="") |
|
104 |
+ cptFileName <- paste(simulationName, "_initial_cpt_", i, ".out", |
|
105 |
+ sep="") |
|
78 | 106 |
CoGapsFromCheckpoint(sampleD, sampleS, cptFileName) |
79 | 107 |
} |
80 |
- matchedPatternSets <- postInitialPhase(initialResult, length(allDataSets), cut, minNS) |
|
81 |
- save(matchedPatternSets, file=paste(simulationName, "_matched_ps.RData", sep="")) |
|
82 |
- finalResult <- runFinalPhase(simulationName, allDataSets, matchedPatternSets, ...) |
|
108 |
+ matchedPatternSets <- postInitialPhase(initialResult, |
|
109 |
+ length(allDataSets), cut, minNS) |
|
110 |
+ save(matchedPatternSets, file=paste(simulationName, "_matched_ps.RData", |
|
111 |
+ sep="")) |
|
112 |
+ consensusPatterns<-matchedPatternSets[[1]] |
|
113 |
+ finalResult <- runFinalPhase(simulationName, allDataSets, |
|
114 |
+ consensusPatterns, nCores, ...) |
|
83 | 115 |
} |
84 | 116 |
else |
85 | 117 |
{ |
86 | 118 |
stop("no checkpoint files found") |
87 | 119 |
} |
88 |
- return(postFinalPhase(finalResult, matchedPatternSets)) |
|
120 |
+ return(postFinalPhase(finalResult, consensusPatterns)) |
|
89 | 121 |
} |
90 | 122 |
|
91 | 123 |
preInitialPhase <- function(simulationName, nCores) |
... | ... |
@@ -96,7 +128,9 @@ preInitialPhase <- function(simulationName, nCores) |
96 | 128 |
|
97 | 129 |
# establish the number of cores that we are able to use |
98 | 130 |
if (is.na(nCores)) |
131 |
+ { |
|
99 | 132 |
nCores <- length(allDataSets) |
133 |
+ } |
|
100 | 134 |
registerDoParallel(cores=nCores) |
101 | 135 |
return(allDataSets) |
102 | 136 |
} |
... | ... |
@@ -128,18 +162,36 @@ postInitialPhase <- function(initialResult, nSets, cut, minNS) |
128 | 162 |
|
129 | 163 |
#run postpattern match function |
130 | 164 |
if (is.na(cut)) |
165 |
+ { |
|
131 | 166 |
cut <- nFactor |
167 |
+ } |
|
168 |
+ |
|
132 | 169 |
return(patternMatch4Parallel(Ptot=BySet$P, nP=nFactor, nSets=nSets, cnt=cut, |
133 | 170 |
minNS=minNS, bySet=TRUE)) |
134 | 171 |
} |
135 | 172 |
|
136 |
-runFinalPhase <- function(simulationName, allDataSets, matchedPatternSets, ...) |
|
137 |
-{ |
|
173 |
+runFinalPhase <- function(simulationName, allDataSets, consensusPatterns, nCores, ...) |
|
174 |
+{ |
|
175 |
+ if (length(dim(consensusPatterns)) != 2) |
|
176 |
+ { |
|
177 |
+ stop("consensusPatterns must be a matrix") |
|
178 |
+ } |
|
179 |
+ |
|
180 |
+ # find data files if providing consensus patterns |
|
181 |
+ fileSig <- paste(simulationName, "_partition_[0-9]+.RData", sep="") |
|
182 |
+ allDataSets <- list.files(full.names=TRUE, pattern=fileSig) |
|
183 |
+ |
|
184 |
+ if (is.na(nCores)) |
|
185 |
+ { |
|
186 |
+ nCores <- length(allDataSets) |
|
187 |
+ } |
|
188 |
+ registerDoParallel(cores=nCores) |
|
189 |
+ |
|
138 | 190 |
# generate seeds for parallelization |
139 | 191 |
nut <- generateSeeds(chains=length(allDataSets), seed=-1) |
140 | 192 |
|
141 | 193 |
# final number of factors |
142 |
- nFactorFinal <- nrow(matchedPatternSets[[1]]) |
|
194 |
+ nFactorFinal <- nrow(consensusPatterns) |
|
143 | 195 |
|
144 | 196 |
# run fixed CoGAPS |
145 | 197 |
finalResult <- foreach(i=1:length(allDataSets)) %dopar% |
... | ... |
@@ -150,7 +202,7 @@ runFinalPhase <- function(simulationName, allDataSets, matchedPatternSets, ...) |
150 | 202 |
|
151 | 203 |
# run CoGAPS with fixed patterns |
152 | 204 |
cptFileName <- paste(simulationName, "_final_cpt_", i, ".out", sep="") |
153 |
- CoGAPS(sampleD, sampleS, fixedPatterns=matchedPatternSets[[1]], |
|
205 |
+ CoGAPS(sampleD, sampleS, fixedPatterns=consensusPatterns, |
|
154 | 206 |
nFactor=nFactorFinal, seed=nut[i], checkpointFile=cptFileName, |
155 | 207 |
whichMatrixFixed='P', ...) |
156 | 208 |
|
... | ... |
@@ -158,11 +210,10 @@ runFinalPhase <- function(simulationName, allDataSets, matchedPatternSets, ...) |
158 | 210 |
return(finalResult) |
159 | 211 |
} |
160 | 212 |
|
161 |
-postFinalPhase <- function(finalResult, matchedPatternSets) |
|
213 |
+postFinalPhase <- function(finalResult, consensusPatterns) |
|
162 | 214 |
{ |
163 |
- Aresult <- postFixed4Parallel(finalResult, matchedPatternSets[[1]]) |
|
164 |
- finalResult <- list("Amean"=Aresult$A, "Asd"=Aresult$Asd, |
|
165 |
- "Pmean"=matchedPatternSets, "PbySet"=matchedPatternSets[["PBySet"]]) |
|
215 |
+ Aresult <- postFixed4Parallel(finalResult, consensusPatterns) |
|
216 |
+ finalResult <- list("Amean"=Aresult$A, "Asd"=Aresult$Asd,"Pmean"=consensusPatterns) |
|
166 | 217 |
class(finalResult) <- append(class(finalResult), "CoGAPS") |
167 | 218 |
return(finalResult) |
168 | 219 |
} |
... | ... |
@@ -11,8 +11,8 @@ |
11 | 11 |
#' @param nullGenes logical indicating gene adjustment |
12 | 12 |
#' @return gene similiarity statistic |
13 | 13 |
#' @examples |
14 |
-#' data('SimpSim') |
|
15 |
-#' calcGeneGSStat(SimpSim.result$Amean, SimpSim.result$Asd, GStoGenes=GSets[[1]], |
|
14 |
+#' data("SimpSim") |
|
15 |
+#' calcGeneGSStat(SimpSim.result$Amean, SimpSim.result$Asd, GSGenes=GSets[[1]], |
|
16 | 16 |
#' numPerm=500) |
17 | 17 |
#' @export |
18 | 18 |
calcGeneGSStat <- function(Amean, Asd, GSGenes, numPerm, Pw=rep(1,ncol(Amean)), |
... | ... |
@@ -67,8 +67,8 @@ nullGenes=FALSE) |
67 | 67 |
#' @return A vector of length GSGenes containing the p-values of set membership |
68 | 68 |
#' for each gene containined in the set specified in GSGenes. |
69 | 69 |
#' @examples |
70 |
-#' data('SimpSim') |
|
71 |
-#' computeGeneGSProb(SimpSim.result$Amean, SimpSim.result$Asd, GStoGenes=GSets[[1]], |
|
70 |
+#' data("SimpSim") |
|
71 |
+#' computeGeneGSProb(SimpSim.result$Amean, SimpSim.result$Asd, GSGenes=GSets[[1]], |
|
72 | 72 |
#' numPerm=500) |
73 | 73 |
#' @export |
74 | 74 |
computeGeneGSProb <- function(Amean, Asd, GSGenes, Pw=rep(1,ncol(Amean)), |
75 | 75 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,121 @@ |
1 |
+#' cellMatchR |
|
2 |
+#' @export |
|
3 |
+#' |
|
4 |
+#' @param Atot a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet} |
|
5 |
+#' @param nSets number of parallel sets used to generate \code{Atot} |
|
6 |
+#' @param cnt number of branches at which to cut dendrogram |
|
7 |
+#' @param minNS minimum of individual set contributions a cluster must contain |
|
8 |
+#' @param maxNS maximum of individual set contributions a cluster must contain |
|
9 |
+#' @param ignore.NA logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE. |
|
10 |
+#' @param bySet logical indicating whether to return list of matched set solutions from \code{Atot} |
|
11 |
+#' @param plotDen plot |
|
12 |
+#' @param ... additional parameters for \code{agnes} |
|
13 |
+#' @return a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each |
|
14 |
+#' concensus pattern is also returned. |
|
15 |
+cellMatchR <- function(Atot, nSets, cnt, minNS=NA, maxNS=NA, |
|
16 |
+ignore.NA=FALSE, bySet=FALSE, plotDen=FALSE, ...) |
|
17 |
+{ |
|
18 |
+ if (is.na(minNS)) |
|
19 |
+ { |
|
20 |
+ minNS <- nSets / 2 |
|
21 |
+ } |
|
22 |
+ if (is.na(maxNS)) |
|
23 |
+ { |
|
24 |
+ maxNS <- nSets + minNS |
|
25 |
+ } |
|
26 |
+ if (!ignore.NA) |
|
27 |
+ { |
|
28 |
+ if (anyNA(Atot)) |
|
29 |
+ { |
|
30 |
+ warning(paste("Non-sparse matrixes produced. Reducing the number", |
|
31 |
+ "of patterns asked for and rerun.")) |
|
32 |
+ } |
|
33 |
+ } |
|
34 |
+ else |
|
35 |
+ { |
|
36 |
+ Atot <- Atot[complete.cases(Atot),] |
|
37 |
+ } |
|
38 |
+ |
|
39 |
+ corcut <- function(Atot, minNS, cnt, cluster.method) |
|
40 |
+ { |
|
41 |
+ corr.dist <- cor(Atot) |
|
42 |
+ corr.dist <- 1-corr.dist |
|
43 |
+ |
|
44 |
+ clust <- agnes(x=corr.dist, diss=TRUE, cluster.method) |
|
45 |
+ #clust=fastcluster::hclust(dist(corr.dist)) |
|
46 |
+ cut <- cutree(as.hclust(clust),k=cnt) |
|
47 |
+ |
|
48 |
+ cls <- sort(unique(cut)) |
|
49 |
+ cMNs <- matrix(ncol=cnt,nrow=dim(Atot)[1]) |
|
50 |
+ colnames(cMNs) <- cls |
|
51 |
+ rownames(cMNs) <- rownames(Atot) |
|
52 |
+ |
|
53 |
+ RtoMeanPattern <- list() |
|
54 |
+ AByClust <- list() |
|
55 |
+ for (i in cls) |
|
56 |
+ { |
|
57 |
+ if (!is.null(dim(Atot[,cut == i]))) |
|
58 |
+ { |
|
59 |
+ if (dim(Atot[,cut == i])[2] >= minNS) |
|
60 |
+ { |
|
61 |
+ cMNs[,i] <- rowMeans(Atot[,cut==i]) |
|
62 |
+ AByClust[[i]] <- Atot[,cut==i] |
|
63 |
+ nIN <- sum(cut==i) |
|
64 |
+ RtoMeanPattern[[i]] <- sapply(1:nIN, function(j) |
|
65 |
+ round(cor(x=Atot[,cut==i][,j], y=cMNs[,i]),3)) |
|
66 |
+ } |
|
67 |
+ } |
|
68 |
+ } |
|
69 |
+ |
|
70 |
+ AByClust[sapply(AByClust,is.null)] <- NULL |
|
71 |
+ RtoMeanPattern[sapply(RtoMeanPattern,is.null)] <- NULL |
|
72 |
+ return(list("RtoMeanPattern"=RtoMeanPattern, "AByClust"=AByClust)) |
|
73 |
+ } |
|
74 |
+ |
|
75 |
+ cc <- corcut(Atot, minNS, cnt, cluster.method) |
|
76 |
+ |
|
77 |
+ ### split by maxNS |
|
78 |
+ indx <- which(unlist(lapply(cc$AByClust, function(x) dim(x)[1] > maxNS))) |
|
79 |
+ i <- 1 |
|
80 |
+ while (length(indx) > 0) |
|
81 |
+ { |
|
82 |
+ icc <- corcut(cc$AByClust[[indx[1]]], minNS, 2, cluster.method) |
|
83 |
+ if (length(icc[[2]]) == 0) |
|
84 |
+ { |
|
85 |
+ indx <- indx[-1] |
|
86 |
+ next |
|
87 |
+ } |
|
88 |
+ else |
|
89 |
+ { |
|
90 |
+ cc$AByClust[[indx[1]]] <- icc[[2]][[1]] |
|
91 |
+ cc$RtoMeanPattern[[indx[1]]] <- icc[[1]][[1]] |
|
92 |
+ if (length(icc[[2]]) > 1) |
|
93 |
+ { |
|
94 |
+ cc$AByClust<-append(cc$AByClust,icc[[2]][2]) |
|
95 |
+ cc$RtoMeanPattern<-append(cc$RtoMeanPattern,icc[[1]][2]) |
|
96 |
+ } |
|
97 |
+ indx <- which(unlist(lapply(cc$AByClust,function(x) dim(x)[1]>maxNS))) |
|
98 |
+ } |
|
99 |
+ } |
|
100 |
+ |
|
101 |
+ #weighted.mean(AByClustDrop[[1]],RtoMPDrop[[1]]) |
|
102 |
+ AByCDSWavg <- t(sapply(1:length(cc$AByClust), function(z) |
|
103 |
+ apply(cc$AByClust[[z]], 1, function(x) |
|
104 |
+ weighted.mean(x, (cc$RtoMeanPattern[[z]])^3)))) |
|
105 |
+ rownames(AByCDSWavg) <- lapply(1:length(cc$AByClust), function(x) |
|
106 |
+ paste("Pattern",x)) |
|
107 |
+ #scale As |
|
108 |
+ Amax <- apply(AByCDSWavg, 1, max) |
|
109 |
+ AByCDSWavgScaled <- t(sapply(1:dim(AByCDSWavg)[1], function(x) |
|
110 |
+ AByCDSWavg[x,] / Amax[x])) |
|
111 |
+ rownames(AByCDSWavgScaled) <- rownames(AByCDSWavg) |
|
112 |
+ |
|
113 |
+ if (bySet) |
|
114 |
+ { |
|
115 |
+ return(list("consenusAs"=t(AByCDSWavgScaled),"ABySet"=cc)) |
|
116 |
+ } |
|
117 |
+ else |
|
118 |
+ { |
|
119 |
+ return(AByCDSWavgScaled) |
|
120 |
+ } |
|
121 |
+} |
41 | 41 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,57 @@ |
1 |
+#' Create Gene Sets for scCoGAPS |
|
2 |
+#' @export |
|
3 |
+#' |
|
4 |
+#' @description factors whole genome data into randomly generated sets for indexing |
|
5 |
+#' @param D data matrix |
|
6 |
+#' @param nSets number of sets to partition the data into |
|
7 |
+#' @param simulationName name used to identify files created by this simulation |
|
8 |
+#' @param samplingRatio vector of relative quantities to use for sampling celltypes |
|
9 |
+#' @param anotionObj vector of same length as number of columns of D |
|
10 |
+#' @param path character string indicating were to save resulting data objects. default is current working dir |
|
11 |
+#' @return simulationName used to identify saved files |
|
12 |
+#' @examples |
|
13 |
+#' data(SimpSim) |
|
14 |
+#' createscCoGAPSSets(SimpSim.D, nSets=2, simulationName="example") |
|
15 |
+createscCoGAPSSets <- function(D, nSets, simulationName, samplingRatio=NULL, |
|
16 |
+path="", anotionObj=NULL) |
|
17 |
+{ |
|
18 |
+ # check gene names |
|
19 |
+ if (length(unique(colnames(D))) != length(colnames(D))) |
|
20 |
+ { |
|
21 |
+ warning("Cell identifiers not unique!") |
|
22 |
+ } |
|
23 |
+ |
|
24 |
+ # partition data by sampling random sets of cells |
|
25 |
+ cells <- 1:ncol(D) |
|
26 |
+ setSize <- floor(length(cells) / nSets) |
|
27 |
+ for (set in 1:nSets) |
|
28 |
+ { |
|
29 |
+ if (is.null(samplingRatio)) |
|
30 |
+ { |
|
31 |
+ # sample cell names |
|
32 |
+ sampleSize <- ifelse(set == nSets, length(cells), setSize) |
|
33 |
+ cellset <- sample(cells, sampleSize, replace=FALSE) |
|
34 |
+ cells <- cells[!(cells %in% cellset)] |
|
35 |
+ } |
|
36 |
+ else |
|
37 |
+ { |
|
38 |
+ if (length(unique(anotionObj)) != length(samplingRatio)) |
|
39 |
+ { |
|
40 |
+ warning("Not all celltypes will be sampled from.") |
|
41 |
+ } |
|
42 |
+ ct.indx <- lapply(unique(anotionObj), function(x) which(anotionObj == x)) |
|
43 |
+ cellset <- lapply(unique(anotionObj), function(x) |
|
44 |
+ sample(colnames(D)[ct.indx[[x]]], samplingRatio[x],replace=TRUE)) |
|
45 |
+ } |
|
46 |
+ |
|
47 |
+ # partition data |
|
48 |
+ sampleD <- D[,cellset] |
|
49 |
+ #log transform |
|
50 |
+ sampleD <- log2(sampleD+1) |
|
51 |
+ # generate S |
|
52 |
+ sampleS <- pmax(.1*sampleD, .1) |
|
53 |
+ save(sampleD, sampleS, file=paste0(path,simulationName, "_partition_", |
|
54 |
+ set,".RData")); |
|
55 |
+ } |
|
56 |
+ return(simulationName) |
|
57 |
+} |
... | ... |
@@ -8,70 +8,79 @@ |
8 | 8 |
#' @param full logical indicating whether to return the ranks of each gene for each pattern |
9 | 9 |
#' @return By default a non-overlapping list of genes associated with each \code{lp}. If \code{full=TRUE} a data.frame of |
10 | 10 |
#' genes rankings with a column for each \code{lp} will also be returned. |
11 |
+#' @export |
|
11 | 12 |
patternMarkers <- function(Amatrix=NA, scaledPmatrix=FALSE, Pmatrix=NA, |
12 | 13 |
threshold="all", lp=NA, full=FALSE) |
13 | 14 |
{ |
14 |
- if(scaledPmatrix==FALSE) |
|
15 |
+ if (scaledPmatrix==FALSE) |
|
15 | 16 |
{ |
16 |
- if(!is.na(Pmatrix)){ |
|
17 |
- pscale <- apply(Pmatrix,1,max) # rescale p's to have max 1 |
|
18 |
- Amatrix <- sweep(Amatrix, 2, pscale, FUN="*") # rescale A in accordance with p's having max 1 |
|
17 |
+ if(!is.na(Pmatrix)) |
|
18 |
+ { |
|
19 |
+ pscale <- apply(Pmatrix,1,max) # rescale p's to have max 1 |
|
20 |
+ Amatrix <- sweep(Amatrix, 2, pscale, FUN="*") # rescale A in accordance with p's having max 1 |
|
21 |
+ } |
|
22 |
+ else |
|
23 |
+ { |
|
24 |
+ warning("P values must be provided if not already scaled") |
|
19 | 25 |
} |
20 |
- else(warning("P values must be provided if not already scaled")) |
|
21 | 26 |
} |
22 | 27 |
|
23 | 28 |
# find the A with the highest magnitude |
24 | 29 |
Arowmax <- t(apply(Amatrix, 1, function(x) x/max(x))) |
25 |
- pmax<-apply(Amatrix, 1, max) |
|
26 |
- # determine which genes are most associated with each pattern |
|
27 |
- ssranks<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=dimnames(Amatrix))#list() |
|
28 |
- ssgenes<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=NULL) |
|
29 |
- nP=dim(Amatrix)[2] |
|
30 |
- if(!is.na(lp)) |
|
30 |
+ |
|
31 |
+ if (!is.na(lp)) |
|
31 | 32 |
{ |
32 |
- if(length(lp)!=dim(Amatrix)[2]) |
|
33 |
+ if (length(lp) != dim(Amatrix)[2]) |
|
33 | 34 |
{ |
34 | 35 |
warning("lp length must equal the number of columns of the Amatrix") |
35 | 36 |
} |
36 |
- for (i in 1:nP) |
|
37 |
- { |
|
38 |
- sstat <- apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp))) |
|
39 |
- ssranks[order(sstat),i] <- 1:length(sstat) |
|
40 |
- ssgenes[,i]<-names(sort(sstat,decreasing=FALSE)) |
|
41 |
- } |
|
37 |
+ sstat <- apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp))) |
|
38 |
+ ssranks <-rank(sstat) |
|
39 |
+ ssgenes.th <- names(sort(sstat,decreasing=FALSE,na.last=TRUE)) |
|
42 | 40 |
} |
43 | 41 |
else |
44 | 42 |
{ |
45 |
- for(i in 1:nP) |
|
43 |
+ # determine which genes are most associated with each pattern |
|
44 |
+ |
|
45 |
+ sstat<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=dimnames(Amatrix)) |
|
46 |
+ ssranks<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=dimnames(Amatrix))#list() |
|
47 |
+ ssgenes<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=NULL) |
|
48 |
+ for (i in 1:nP) |
|
46 | 49 |
{ |
47 | 50 |
lp <- rep(0,dim(Amatrix)[2]) |
48 | 51 |
lp[i] <- 1 |
49 |
- sstat <- apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp))) |
|
50 |
- ssranks[order(sstat),i] <- 1:length(sstat) |
|
51 |
- ssgenes[,i]<-names(sort(sstat,decreasing=FALSE)) |
|
52 |
+ sstat[,i] <- unlist(apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp)))) |
|
53 |
+ ssranks[,i]<-rank(sstat[,i]) |
|
54 |
+ ssgenes[,i]<-names(sort(sstat[,i],decreasing=FALSE,na.last=TRUE)) |
|
55 |
+ } |
|
56 |
+ if (threshold=="cut") |
|
57 |
+ { |
|
58 |
+ geneThresh <- sapply(1:nP,function(x) min(which(ssranks[ssgenes[,x],x] > apply(ssranks[ssgenes[,x],],1,min)))) |
|
59 |
+ ssgenes.th <- sapply(1:nP,function(x) ssgenes[1:geneThresh[x],x]) |
|
60 |
+ } |
|
61 |
+ else if (threshold=="all") |
|
62 |
+ { |
|
63 |
+ pIndx<-unlist(apply(sstat,1,which.min)) |
|
64 |
+ gBYp <- list() |
|
65 |
+ for(i in sort(unique(pIndx))) |
|
66 |
+ { |
|
67 |
+ gBYp[[i]]<-sapply(strsplit(names(pIndx[pIndx==i]),"[.]"),function(x) x[[1]][1]) |
|
68 |
+ } |
|
69 |
+ ssgenes.th <- lapply(1:max(sort(unique(pIndx))), function(x) |
|
70 |
+ ssgenes[which(ssgenes[,x] %in% gBYp[[x]]),x]) |
|
71 |
+ } |
|
72 |
+ else |
|
73 |
+ { |
|
74 |
+ stop("Threshold arguement not viable option") |
|
52 | 75 |
} |
53 | 76 |
} |
54 |
- |
|
55 |
- if(threshold=="cut") |
|
56 |
- { |
|
57 |
- geneThresh <- sapply(1:nP,function(x) min(which(ssranks[ssgenes[,x],x] > apply(ssranks[ssgenes[,x],],1,min)))) |
|
58 |
- ssgenes.th <- sapply(1:nP,function(x) ssgenes[1:geneThresh[x],x]) |
|
59 |
- #geneThresh <- apply(sweep(ssranks,1,t(apply(ssranks, 1, min)),"-"),2,function(x) which(x==0)) |
|
60 |
- #ssgenes.th <- lapply(geneThresh,names) |
|
61 |
- } |
|
62 |
- else if (threshold=="all") |
|
77 |
+ if (full) |
|
63 | 78 |
{ |
64 |
- pIndx<-apply(ssranks,1,which.min) |
|
65 |
- gBYp <- lapply(sort(unique(pIndx)),function(x) names(pIndx[pIndx==x])) |
|
66 |
- ssgenes.th <- lapply(1:nP, function(x) ssgenes[which(ssgenes[,x] %in% gBYp[[x]]),x]) |
|
79 |
+ return(list("PatternMarkers"=ssgenes.th, "PatternRanks"=ssranks, |
|
80 |
+ "PatternMarkerScores"=sstat)) |
|
67 | 81 |
} |
68 | 82 |
else |
69 | 83 |
{ |
70 |
- stop("Threshold arguement not viable option") |
|
71 |
- } |
|
72 |
- |
|
73 |
- if (full) |
|
74 |
- return(list("PatternMarkers"=ssgenes.th,"PatternRanks"=ssranks)) |
|
75 |
- else |
|
76 | 84 |
return("PatternMarkers"=ssgenes.th) |
85 |
+ } |
|
77 | 86 |
} |
... | ... |
@@ -1,129 +1,129 @@ |
1 | 1 |
#' patternMatch4Parallel |
2 | 2 |
#' |
3 |
-#' @param Ptot a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet} |
|
3 |
+#' @param Ptot a matrix containing the total by set estimates of Pmean output |
|
4 |
+#' from \code{reOrderBySet} |
|
4 | 5 |
#' @param nSets number of parallel sets used to generate \code{Ptot} |
5 | 6 |
#' @param cnt number of branches at which to cut dendrogram |
6 | 7 |
#' @param minNS minimum of individual set contributions a cluster must contain |
8 |
+#' @param maxNS max of individual set contributions a cluster must contain. |
|
9 |
+#' default is nSets+minNS |
|
7 | 10 |
#' @param cluster.method the agglomeration method to be used for clustering |
8 |
-#' @param ignore.NA logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE. |
|
9 |
-#' @param bySet logical indicating whether to return list of matched set solutions from \code{Ptot} |
|
11 |
+#' @param ignore.NA logical indicating whether or not to ignore NAs from |
|
12 |
+#' potential over dimensionalization. Default is FALSE. |
|
13 |
+#' @param bySet logical indicating whether to return list of matched set |
|
14 |
+#' solutions from \code{Ptot} |
|
10 | 15 |
#' @param ... additional parameters for \code{agnes} |
11 |
-#' @return a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each |
|
16 |
+#' @return a matrix of concensus patterns by samples. If \code{bySet=TRUE} then |
|
17 |
+#' a list of the set contributions to each |
|
12 | 18 |
#' concensus pattern is also returned. |
13 | 19 |
#' @seealso \code{\link{agnes}} |
14 |
-patternMatch4Parallel <- function(Ptot, nSets, cnt, minNS, |
|
20 |
+#' @export |
|
21 |
+patternMatch4Parallel <- function(Ptot, nSets, cnt, minNS=NA, maxNS=NULL, |
|
15 | 22 |
cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...) |
16 | 23 |
{ |
17 |
- if (!is.null(minNS)) |
|
18 |
- minNS=nSets/2 |
|
19 |
- |
|
24 |
+ if (is.na(minNS)) |
|
25 |
+ { |
|
26 |
+ minNS <- ceiling(nSets / 2) |
|
27 |
+ } |
|
28 |
+ if (is.null(maxNS)) |
|
29 |
+ { |
|
30 |
+ maxNS=nSets+minNS |
|
31 |
+ } |
|
20 | 32 |
if (ignore.NA==FALSE & anyNA(Ptot)) |
21 |
- warning('Non-sparse matrixes produced. Reducing the number of patterns asked for and rerun.') |
|
33 |
+ { |
|
34 |
+ warning(paste("Non-sparse matrixes produced. Reducing the number of", |
|
35 |
+ "patterns asked for and rerun.")) |
|
36 |
+ } |
|
22 | 37 |
if (ignore.NA==TRUE) |
38 |
+ { |
|
23 | 39 |
Ptot <- Ptot[complete.cases(Ptot),] |
40 |
+ } |
|
24 | 41 |
|
25 |
- # corr dist |
|
26 |
- corr.dist=cor(t(Ptot)) |
|
27 |
- corr.dist=1-corr.dist |
|
28 |
- # cluster |
|
29 |
- #library(cluster) |
|
30 |
- clust=agnes(x=corr.dist,diss=TRUE,method=cluster.method) |
|
31 |
- cut=cutree(as.hclust(clust),k=cnt) |
|
32 |
- #save.image(file=paste("CoGAPS.",nP,"P.",nS,"Set.CorrClustCut",cnt,".RData")) |
|
42 |
+ corcut <-function(Ptot,minNS,cnt,cluster.method) |
|
43 |
+ { |
|
44 |
+ # corr dist |
|
45 |
+ corr.dist <- cor(t(Ptot)) |
|
46 |
+ corr.dist <- 1-corr.dist |
|
47 |
+ # cluster |
|
48 |
+ #library(cluster) |
|
49 |
+ clust <- agnes(x=corr.dist, diss=TRUE, method=cluster.method) |
|
50 |
+ cut <- cutree(as.hclust(clust), k=cnt) |
|
51 |
+ #save.image(file=paste("CoGAPS.", nP, "P.", nS, "Set.CorrClustCut", |
|
52 |
+ #cnt,".RData")) |
|
33 | 53 |
|
34 |
- #drop n<4 and get weighted Avg |
|
35 |
- cls=sort(unique(cut)) |
|
36 |
- cMNs=matrix(nrow=cnt,ncol=dim(Ptot)[2]) |
|
37 |
- rownames(cMNs)=cls |
|
38 |
- colnames(cMNs)=colnames(Ptot) |
|
54 |
+ cls <- sort(unique(cut)) |
|
55 |
+ cMNs <- matrix(nrow=cnt, ncol=dim(Ptot)[2]) |
|
56 |
+ rownames(cMNs) <- cls |
|
57 |
+ colnames(cMNs) <- colnames(Ptot) |
|
39 | 58 |
|
40 |
- RtoMeanPattern <- list() |
|
41 |
- PByClust <- list() |
|
42 |
- for(i in cls) |
|
43 |
- { |
|
44 |
- if (is.null(dim(Ptot[cut == i, ]))==TRUE) |
|
59 |
+ RtoMeanPattern <- list() |
|
60 |
+ PByClust <- list() |
|
61 |
+ for (i in cls) |
|
45 | 62 |
{ |
46 |
- cMNs[i,] <- Ptot[cut == i, ] |
|
47 |
- RtoMeanPattern[[i]] <- rep(1,length(Ptot[cut == i, ])) |
|
48 |
- PByClust[[i]] <- t(as.matrix(Ptot[cut == i, ])) |
|
49 |
- } |
|
50 |
- else |
|
51 |
- { |
|
52 |
- cMNs[i,]=colMeans(Ptot[cut==i,]) |
|
53 |
- PByClust[[i]] <- Ptot[cut==i,] |
|
54 |
- nIN=sum(cut==i) |
|
55 |
- RtoMeanPattern[[i]] <- sapply(1:nIN,function(j) {round(cor(x=Ptot[cut==i,][j,],y=cMNs[i,]),3)}) |
|
63 |
+ if (!is.null(dim(Ptot[cut == i,]))) |
|
64 |
+ { |
|
65 |
+ if (dim(Ptot[cut == i,])[1] >= minNS) |
|
66 |
+ { |
|
67 |
+ cMNs[i,] <- colMeans(Ptot[cut==i,]) |
|
68 |
+ PByClust[[i]] <- Ptot[cut==i,] |
|
69 |
+ nIN <- sum(cut==i) |
|
70 |
+ RtoMeanPattern[[i]] <- sapply(1:nIN, function(j) |
|
71 |
+ round(cor(x=Ptot[cut==i,][j,], y=cMNs[i,]),3) |
|
72 |
+ ) |
|
73 |
+ } |
|
74 |
+ } |
|
56 | 75 |
} |
57 |
- } |
|
58 | 76 |
|
59 |
- #drop n<minNS |
|
60 |
- PByClustDrop <- list() |
|
61 |
- RtoMPDrop <- list() |
|
62 |
- for(i in cls) |
|
63 |
- { |
|
64 |
- if (is.null(dim(PByClust[[i]])) == TRUE) |
|
65 |
- next |
|
66 |
- if (dim(PByClust[[i]])[1] < minNS) |
|
67 |
- { |
|
68 |
- next |
|
69 |
- } |
|
70 |
- else |
|
71 |
- { |
|
72 |
- #indx <- which(RtoMeanPattern[[i]]>.7,arr.ind = TRUE) |
|
73 |
- PByClustDrop <- append(PByClustDrop,list(PByClust[[i]])) |
|
74 |
- RtoMPDrop <- append(RtoMPDrop,list(RtoMeanPattern[[i]])) |
|
75 |
- } |
|
76 |
- } |
|
77 |
+ PByClust[sapply(PByClust,is.null)] <- NULL |
|
78 |
+ RtoMeanPattern[sapply(RtoMeanPattern,is.null)] <- NULL |
|
79 |
+ return(list("RtoMeanPattern"=RtoMeanPattern, "PByClust"=PByClust)) |
|
80 |
+ } |
|
77 | 81 |
|
78 |
- ### split by corr (build in drop if below minNS) |
|
79 |
- PByCDS <- list() |
|
80 |
- RtoMPDS <- list() |
|
81 |
- for (j in 1:length(PByClustDrop)) |
|
82 |
+ cc <- corcut(Ptot,minNS,cnt,cluster.method) |
|
83 |
+ |
|
84 |
+ ### split by maxNS |
|
85 |
+ indx <- which(unlist(lapply(cc$PByClust,function(x) dim(x)[1]>maxNS))) |
|
86 |
+ while(length(indx) > 0) |
|
82 | 87 |
{ |
83 |
- if (is.null(dim(PByClustDrop[[j]]))==TRUE) |
|
88 |
+ icc <- corcut(cc$PByClust[[indx[1]]],minNS,2,cluster.method) |
|
89 |
+ if (length(icc[[2]])==0) |
|
84 | 90 |
{ |
85 |
- next |
|
91 |
+ indx <- indx[-1] |
|
92 |
+ next |
|
86 | 93 |
} |
87 |
- if (dim(PByClustDrop[[j]])[1]<minNS+nSets) |
|
88 |
- { |
|
89 |
- PByCDS <- append(PByCDS,PByClustDrop[j]) |
|
90 |
- RtoMPDS <- append(RtoMPDS,RtoMPDrop[j]) |
|
91 |
- } |
|
92 |
- if (dim(PByClustDrop[[j]])[1]>=minNS+nSets) |
|
94 |
+ else |
|
93 | 95 |
{ |
94 |
- corr.distPBCD=cor(t(PByClustDrop[[j]])) |
|
95 |
- corr.distPBCD=1-corr.distPBCD |
|
96 |
- clustPBCD=agnes(x=corr.distPBCD,diss=TRUE,method="complete") |
|
97 |
- cutPBCD=cutree(as.hclust(clustPBCD),k=2) |
|
98 |
- g1 <- PByClustDrop[[j]][cutPBCD==1,] |
|
99 |
- PByCDS <- append(PByCDS,list(g1)) |
|
100 |
- RtoMPDS <- append(RtoMPDS,list(sapply(1:dim(g1)[1],function(z) round(cor(x=g1[z,],y=colMeans(PByClustDrop[[j]][cutPBCD==1,])),3)))) |
|
101 |
- g2 <- PByClustDrop[[j]][cutPBCD==2,] |
|
102 |
- if (is.null(dim(g2)[1])==FALSE) |
|
96 |
+ cc$PByClust[[indx[1]]] <- icc[[2]][[1]] |
|
97 |
+ cc$RtoMeanPattern[[indx[1]]] <- icc[[1]][[1]] |
|
98 |
+ if (length(icc[[2]]) > 1) |
|
103 | 99 |
{ |
104 |
- PByCDS <- append(PByCDS,list(g2)) |
|
105 |
- RtoMPDS <- append(RtoMPDS,list(sapply(1:dim(g2)[1],function(z) round(cor(x=g2[z,],y=colMeans(PByClustDrop[[j]][cutPBCD==2,])),3)))) |
|
106 |
- } |
|
100 |
+ cc$PByClust <- append(cc$PByClust,icc[[2]][2]) |
|
101 |
+ cc$RtoMeanPattern <- append(cc$RtoMeanPattern,icc[[1]][2]) |
|
102 |
+ } |
|
103 |
+ indx <- which(unlist(lapply(cc$PByClust, function(x) |
|
104 |
+ dim(x)[1] > maxNS))) |
|
107 | 105 |
} |
108 | 106 |
} |
109 | 107 |
|
110 |
- #weighted.mean(PByClustDrop[[1]],RtoMPDrop[[1]]) |
|
111 |
- PByCDSWavg<- t(sapply(1:length(PByCDS),function(z) apply(PByCDS[[z]],2,function(x) weighted.mean(x,(RtoMPDS[[z]])^3)))) |
|
112 |
- rownames(PByCDSWavg) <- lapply(1:length(PByCDS),function(x) paste("Pattern",x)) |
|
108 |
+ #weighted.mean |
|
109 |
+ PByCDSWavg <- t(sapply(1:length(cc$PByClust), function(z) |
|
110 |
+ apply(cc$PByClust[[z]],2, function(x) |
|
111 |
+ weighted.mean(x, (cc$RtoMeanPattern[[z]])^3)))) |
|
112 |
+ rownames(PByCDSWavg) <- lapply(1:length(cc$PByClust), function(x) |
|
113 |
+ paste("Pattern", x)) |
|
113 | 114 |
|
114 | 115 |
#scale ps |
115 | 116 |
Pmax <- apply(PByCDSWavg,1,max) |
116 |
- PByCDSWavgScaled <- t(sapply(1:dim(PByCDSWavg)[1],function(x) PByCDSWavg[x,]/Pmax[x])) |
|
117 |
+ PByCDSWavgScaled <- t(sapply(1:dim(PByCDSWavg)[1], function(x) |
|
118 |
+ PByCDSWavg[x,] / Pmax[x])) |
|
117 | 119 |
rownames(PByCDSWavgScaled) <- rownames(PByCDSWavg) |
118 | 120 |
|
119 |
- if(bySet) |
|
121 |
+ if (bySet) |
|
120 | 122 |
{ |
121 |
- # return by set and final |
|
122 |
- PBySet<-PByCDS |
|
123 |
- return(list("consenusPatterns"=PByCDSWavgScaled,"PBySet"=PBySet,"RtoMPDS"=RtoMPDS)) |
|
123 |
+ return(list("consenusPatterns"=PByCDSWavgScaled, "PBySet"=cc)) |
|
124 | 124 |
} |
125 | 125 |
else |
126 | 126 |
{ |
127 |
- return(PByCDSWavgScaled) |
|
127 |
+ return("consenusPatterns"=PByCDSWavgScaled) |
|
128 | 128 |
} |
129 | 129 |
} |
... | ... |
@@ -13,7 +13,7 @@ |
13 | 13 |
#' @return heatmap of the \code{data} values for the \code{patternMarkers} |
14 | 14 |
#' @seealso \code{\link{heatmap.2}} |
15 | 15 |
plotPatternMarkers <- function(data=NA, patternMarkers=NA, patternPalette=NA, |
16 |
-sampleNames=NA, samplePalette=NULL, colDenogram=TRUE, heatmapCol="bluered", |
|
16 |
+sampleNames=NA, samplePalette=NULL, colDenogram=TRUE, heatmapCol, |
|
17 | 17 |
scale='row', ...) |
18 | 18 |
{ |
19 | 19 |
if (is.null(samplePalette)) |
... | ... |
@@ -1,27 +1,61 @@ |
1 | 1 |
#' Post Processing of Parallel Output |
2 | 2 |
#' |
3 | 3 |
#' @param AP.fixed output of parallel gapsMapRun calls with same FP |
4 |
-#' @param setPs data.frame with rows giving fixed patterns for P used as input |
|
4 |
+#' @param setValues data.frame with rows giving fixed patterns for P used as input |
|
5 | 5 |
#' for gapsMapRun |
6 |
+#' @param setMatrix which matrix, A or P |
|
6 | 7 |
#' @return list of two data.frames containing the A matrix estimates or their |
7 | 8 |
#' corresponding standard deviations from output of parallel CoGAPS |
8 |
-postFixed4Parallel <- function(AP.fixed, setPs) |
|
9 |
+postFixed4Parallel <- function(AP.fixed, setValues, setMatrix="P") |
|
9 | 10 |
{ |
10 |
- ASummary <- do.call(rbind,lapply(AP.fixed, function(x) x$Amean)) |
|
11 |
- Asd <- do.call(rbind,lapply(AP.fixed, function(x) x$Asd)) |
|
12 |
- #PSummary <- do.call(rbind,lapply(AP.fixed, function(x) x$Pmean)) |
|
13 |
- PSummary <- AP.fixed[[1]]$Pmean |
|
11 |
+ if (setMatrix=="P") |
|
12 |
+ { |
|
13 |
+ ASummary <- do.call(rbind,lapply(AP.fixed, function(x) x$Amean)) |
|
14 |
+ Asd <- do.call(rbind,lapply(AP.fixed, function(x) x$Asd)) |
|
15 |
+ #PSummary <- do.call(rbind,lapply(AP.fixed, function(x) x$Pmean)) |
|
16 |
+ PSummary <- AP.fixed[[1]]$Pmean |
|
14 | 17 |
|
15 |
- Pmax <- apply(PSummary,1,max) |
|
16 |
- Pneu <- sweep(PSummary,1,Pmax,FUN="/") |
|
17 |
- Aneu <- sweep(ASummary,2,Pmax,FUN="*") |
|
18 |
+ Pmax <- apply(PSummary,1,max) |
|
19 |
+ Pneu <- sweep(PSummary,1,Pmax,FUN="/") |
|
20 |
+ Aneu <- sweep(ASummary,2,Pmax,FUN="*") |
|
18 | 21 |
|
19 |
- X <- apply(Pneu,1,range) |
|
20 |
- Y <- apply(setPs,1,range) |
|
21 |
- colnames(X) <- colnames(Y) |
|
22 |
- if (all.equal(X,Y,tolerance=0.01) != TRUE) |
|
23 |
- warning("Patterns do not match fixed values.") |
|
22 |
+ X <- apply(Pneu,1,range) |
|
23 |
+ Y <- apply(setValues,1,range) |
|
24 |
+ colnames(X) <- colnames(Y) |
|
25 |
+ if (!all.equal(X,Y,tolerance=0.01)) |
|
26 |
+ { |
|
27 |
+ warning("Patterns do not match fixed values.") |
|
28 |
+ } |
|
29 |
+ |
|
30 |
+ As4fixPs<-list("A"=Aneu,"Asd"=Asd) |
|
31 |
+ return(As4fixPs) |
|
32 |
+ } |
|
33 |
+ else if (setMatrix=="A") |
|
34 |
+ { |
|
35 |
+ PSummary <- do.call(cbind, lapply(AP.fixed, function(x) x$Pmean)) |
|
36 |
+ Psd <- do.call(cbind, lapply(AP.fixed, function(x) x$Psd)) |
|
37 |
+ #PSummary <- do.call(rbind,lapply(AP.fixed, function(x) x$Pmean)) |
|
38 |
+ ASummary <- AP.fixed[[1]]$Amean |
|
39 |
+ |
|
40 |
+ Amax <- apply(ASummary,2,max) |
|
41 |
+ Aneu <- sweep(ASummary,2,Amax,FUN="/") |
|
42 |
+ Pneu <- sweep(PSummary,1,Amax,FUN="*") |
|
43 |
+ |
|
44 |
+ X <- apply(Aneu, 2, range) |
|
45 |
+ Y <- apply(setValues, 2, range) |
|
46 |
+ rownames(X) <- rownames(Y) |
|
47 |
+ colnames(X) <- colnames(Y) |
|
48 |
+ if (!all.equal(X, Y, tolerance=0.01)) |
|
49 |
+ { |
|
50 |
+ warning("As do not match fixed values.") |
|
51 |
+ } |
|
52 |
+ |
|
53 |
+ Ps4fixAs <- list("P"=Pneu,"Psd"=Psd) |
|
54 |
+ return(Ps4fixAs) |
|
55 |
+ } |
|
56 |
+ else |
|
57 |
+ { |
|
58 |
+ warning("setMatrix can only take values of 'A' or 'P'") |
|
59 |
+ } |
|
24 | 60 |
|
25 |
- As4fixPs<-list("A"=Aneu,"Asd"=Asd) |
|
26 |
- return(As4fixPs) |
|
27 | 61 |
} |
28 | 62 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,26 @@ |
1 |
+#' Post Processing of Parallel Output |
|
2 |
+#' |
|
3 |
+#' @param AP.fixed output of parallel gapsMapRun calls with same FP |
|
4 |
+#' @param setAs data.frame with rows giving fixed patterns for A used as input |
|
5 |
+#' for gapsMapRun |
|
6 |
+#' @return list of two data.frames containing the A matrix estimates or their |
|
7 |
+#' corresponding standard deviations from output of parallel CoGAPS |
|
8 |
+postFixed4SC <- function(AP.fixed, setAs) |
|
9 |
+{ |
|
10 |
+ ASummary <- AP.fixed[[1]]$Amean |
|
11 |
+ PSummary <- do.call(cbind,lapply(AP.fixed, function(x) x$Pmean)) |
|
12 |
+ Psd <- do.call(cbind,lapply(AP.fixed, function(x) x$Psd)) |
|
13 |
+ |
|
14 |
+ Amax <- apply(ASummary,2,max) |
|
15 |
+ Aneu <- sweep(ASummary,2,Amax,FUN="/") |
|
16 |
+ Pneu <- sweep(PSummary,1,Amax,FUN="*") |
|
17 |
+ |
|
18 |
+ X <- apply(Aneu,2,range) |
|
19 |
+ Y <- apply(setAs,2,range) |
|
20 |
+ colnames(X) <- colnames(Y) |
|
21 |
+ if (all.equal(X,Y,tolerance=0.01) != TRUE) |
|
22 |
+ warning("Patterns do not match fixed values.") |
|
23 |
+ |
|
24 |
+ Ps4fixAs<-list("P"=Pneu,"Psd"=Psd) |
|
25 |
+ return(Ps4fixAs) |
|
26 |
+} |
0 | 27 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,206 @@ |
1 |
+#' scCoGAPS |
|
2 |
+#' @export |
|
3 |
+#' |
|
4 |
+#' @details calls the C++ MCMC code and performs Bayesian |
|
5 |
+#' matrix factorization returning the two matrices that reconstruct |
|
6 |
+#' the data matrix for whole genome data; |
|
7 |
+#' @param simulationName name of this simulation |
|
8 |
+#' @param nFactor number of patterns (basis vectors, metagenes), which must be |
|
9 |
+#' greater than or equal to the number of rows of FP |
|
10 |
+#' @param nCores number of cores for parallelization. If left to the default NA, nCores = nSets. |
|
11 |
+#' @param cut number of branches at which to cut dendrogram used in patternMatch4singleCell |
|
12 |
+#' @param minNS minimum of individual set contributions a cluster must contain |
|
13 |
+#' @param manualMatch logical indicating whether or not to stop after initial phase for manual pattern matching |
|
14 |
+#' @param consensusAs fixed pattern matrix to be used to ensure reciprocity of A weights accross sets |
|
15 |
+#' @param ... additional parameters to be fed into \code{gapsRun} and \code{gapsMapRun} |
|
16 |
+#' @return list of A and P estimates |
|
17 |
+scCoGAPS <- function(simulationName, nFactor, nCores=NA, cut=NA, minNS=NA, manualMatch=FALSE, consensusAs=NULL, ...) |
|
18 |
+{ |
|
19 |
+ if (!is.null(list(...)$checkpointFile)) |
|
20 |
+ { |
|
21 |
+ stop("checkpoint file name automatically set in GWCoGAPS - don't pass this parameter") |
|
22 |
+ } |
|
23 |
+ |
|
24 |
+ if (is.null(consensusAs)) |
|
25 |
+ { |
|
26 |
+ allDataSets <- sc_preInitialPhase(simulationName, nCores) |
|
27 |
+ initialResult <- sc_runInitialPhase(simulationName, allDataSets, nFactor, ...) |
|
28 |
+ if (manualMatch) |
|
29 |
+ { |
|
30 |
+ saveRDS(initialResult,file=paste(simulationName,"_initial.rds", sep="")) |
|
31 |
+ stop("Please provide concensus gene weights upon restarting.") |
|
32 |
+ } |
|
33 |
+ matchedAmplitudes <- sc_postInitialPhase(initialResult, length(allDataSets), cut, minNS) |
|
34 |
+ consensusAs <- matchedAmplitudes[[1]] |
|
35 |
+ save(consensusAs, file=paste(simulationName, "_matched_As.RData", sep="")) |
|
36 |
+ } |
|
37 |
+ finalResult <- sc_runFinalPhase(simulationName, allDataSets, consensusAs, nCores, ...) |
|
38 |
+ return(sc_postFinalPhase(finalResult, consensusAs)) |
|
39 |
+} |
|
40 |
+ |
|
41 |
+#' Restart a scCoGAPS run from a Checkpoint |
|
42 |
+#' |
|
43 |
+#' @inheritParams GWCoGAPS |
|
44 |
+#' @return list of A and P estimates |
|
45 |
+#' @importFrom utils file_test |
|
46 |
+scCoGapsFromCheckpoint <- function(simulationName, nCores, cut=NA, minNS=NA, ...) |
|
47 |
+{ |
|
48 |
+ # find data files |
|
49 |
+ allDataSets <- sc_preInitialPhase(simulationName, nCores) |
|
50 |
+ |
|
51 |
+ # figure out phase from file signature |
|
52 |
+ initialCpts <- list.files(full.names=TRUE, pattern=paste(simulationName, |
|
53 |
+ "_initial_cpt_[0-9]+.out", sep="")) |
|
54 |
+ finalCpts <- list.files(full.names=TRUE, pattern=paste(simulationName, |
|
55 |
+ "_final_cpt_[0-9]+.out", sep="")) |
|
56 |
+ |
|
57 |
+ if (length(finalCpts)) |
|
58 |
+ { |
|
59 |
+ finalResult <- foreach(i=1:length(allDataSets)) %dopar% |
|
60 |
+ { |
|
61 |
+ # load data set and shift values so gene minimum is zero |
|
62 |
+ load(allDataSets[[i]]) |
|
63 |
+ sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x) |
|
64 |
+ pmin(0,min(x)))) |
|
65 |
+ |
|
66 |
+ # run CoGAPS with fixed patterns |
|
67 |
+ cptFileName <- paste(simulationName, "_final_cpt_", i, ".out", |
|
68 |
+ sep="") |
|
69 |
+ CoGapsFromCheckpoint(sampleD, sampleS, cptFileName) |
|
70 |
+ } |
|
71 |
+ load(paste(simulationName, "_matched_As.RData", sep="")) |
|
72 |
+ } |
|
73 |
+ else if (file_test("-f", paste(simulationName, "_matched_As.RData", sep=""))) |
|
74 |
+ { |
|
75 |
+ load(paste(simulationName, "_matched_As.RData", sep="")) |
|
76 |
+ finalResult <- sc_runFinalPhase(simulationName, allDataSets, |
|
77 |
+ consensusAs, nCores, ...) |
|
78 |
+ } |
|
79 |
+ else if (length(initialCpts)) |
|
80 |
+ { |
|
81 |
+ # initial phase - always needs to be run to get matchedAmplitudes |
|
82 |
+ initialResult <- foreach(i=1:length(allDataSets)) %dopar% |
|
83 |
+ { |
|
84 |
+ # load data set and shift values so gene minimum is zero |
|
85 |
+ load(allDataSets[[i]]) |
|
86 |
+ sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x) |
|
87 |
+ pmin(0,min(x)))) |
|
88 |
+ |
|
89 |
+ # run CoGAPS from checkpoint |
|
90 |
+ cptFileName <- paste(simulationName, "_initial_cpt_", i, ".out", |
|
91 |
+ sep="") |
|
92 |
+ CoGapsFromCheckpoint(sampleD, sampleS, cptFileName) |
|
93 |
+ } |
|
94 |
+ matchedAmplitudes <- sc_postInitialPhase(initialResult, |
|
95 |
+ length(allDataSets), cut, minNS) |
|
96 |
+ consensusAs <- matchedAmplitudes[[1]] |
|
97 |
+ save(consensusAs, file=paste(simulationName, "_matched_As.RData", |
|
98 |
+ sep="")) |
|
99 |
+ finalResult <- sc_runFinalPhase(simulationName, allDataSets, |
|
100 |
+ consensusAs, nCores, ...) |
|
101 |
+ } |
|
102 |
+ else |
|
103 |
+ { |
|
104 |
+ stop("no checkpoint files found") |
|
105 |
+ } |
|
106 |
+ return(sc_postFinalPhase(finalResult, consensusAs)) |
|
107 |
+} |
|
108 |
+ |
|
109 |
+sc_preInitialPhase <- function(simulationName, nCores) |
|
110 |
+{ |
|
111 |
+ # find data files |
|
112 |
+ fileSig <- paste(simulationName, "_partition_[0-9]+.RData", sep="") |
|
113 |
+ allDataSets <- list.files(full.names=TRUE, pattern=fileSig) |
|
114 |
+ |
|
115 |
+ # establish the number of cores that we are able to use |
|
116 |
+ if (is.na(nCores)) |
|
117 |
+ { |
|
118 |
+ nCores <- length(allDataSets) |
|
119 |
+ } |
|
120 |
+ registerDoParallel(cores=nCores) |
|
121 |
+ return(allDataSets) |
|
122 |
+} |
|
123 |
+ |
|
124 |
+sc_runInitialPhase <- function(simulationName, allDataSets, nFactor, ...) |
|
125 |
+{ |
|
126 |
+ #generate seeds for parallelization |
|
127 |
+ nut <- generateSeeds(chains=length(allDataSets), seed=-1) |
|
128 |
+ |
|
129 |
+ # run CoGAPS for each set |
|
130 |
+ initialResult <- foreach(i=1:length(allDataSets)) %dopar% |
|
131 |
+ { |
|
132 |
+ # load data set and shift values so gene minimum is zero |
|
133 |
+ load(allDataSets[[i]]) |
|
134 |
+ sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x) pmin(0,min(x)))) |
|
135 |
+ |
|
136 |
+ # run CoGAPS without any fixed patterns |
|
137 |
+ cptFileName <- paste(simulationName, "_initial_cpt_", i, ".out", sep="") |
|
138 |
+ CoGAPS(sampleD, sampleS, nFactor=nFactor, seed=nut[i], |
|
139 |
+ checkpointFile=cptFileName, singleCellRNASeq=TRUE, ...) |
|
140 |
+ } |
|
141 |
+ return(initialResult) |
|
142 |
+} |
|
143 |
+ |
|
144 |
+sc_postInitialPhase <- function(initialResult, nSets, cut, minNS) |
|
145 |
+{ |
|
146 |
+ nFactor <- ncol(initialResult[[1]]$Amean) |
|
147 |
+ BySet <- reOrderBySet(AP=initialResult, nFactor=nFactor, nSets=nSets,match="A") |
|
148 |
+ |
|
149 |
+ #run postpattern match function |
|
150 |
+ if (is.na(cut)) |
|
151 |
+ { |
|
152 |
+ cut <- nFactor |
|
153 |
+ } |
|
154 |
+ |
|
155 |
+ return(cellMatchR(Atot=BySet$A, nSets=nSets, cnt=cut, minNS=minNS, |
|
156 |
+ bySet=TRUE)) |
|
157 |
+} |
|
158 |
+ |
|
159 |
+sc_runFinalPhase <- function(simulationName, allDataSets, consensusAs, nCores, ...) |
|
160 |
+{ |
|
161 |
+ if (length(dim(consensusAs)) != 2) |
|
162 |
+ { |
|
163 |
+ stop("consensusAs must be a matrix") |
|
164 |
+ } |
|
165 |
+ |
|
166 |
+ # find data files if providing consensus patterns |
|
167 |
+ fileSig <- paste(simulationName, "_partition_[0-9]+.RData", sep="") |
|
168 |
+ allDataSets <- list.files(full.names=TRUE, pattern=fileSig) |
|
169 |
+ |
|
170 |
+ if (is.na(nCores)) |
|
171 |
+ { |
|
172 |
+ nCores <- length(allDataSets) |
|
173 |
+ } |
|
174 |
+ registerDoParallel(cores=nCores) |
|
175 |
+ |
|
176 |
+ # generate seeds for parallelization |
|
177 |
+ nut <- generateSeeds(chains=length(allDataSets), seed=-1) |
|
178 |
+ |
|
179 |
+ # final number of factors |
|
180 |
+ nFactorFinal <- ncol(consensusAs) |
|
181 |
+ |
|
182 |
+ # run fixed CoGAPS |
|
183 |
+ finalResult <- foreach(i=1:length(allDataSets)) %dopar% |
|
184 |
+ { |
|
185 |
+ # load data set and shift values so gene minimum is zero |
|
186 |
+ load(allDataSets[[i]]) |
|
187 |
+ sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x) pmin(0,min(x)))) |
|
188 |
+ |
|
189 |
+ # run CoGAPS with fixed patterns |
|
190 |
+ cptFileName <- paste(simulationName, "_final_cpt_", i, ".out", sep="") |
|
191 |
+ CoGAPS(sampleD, sampleS, fixedPatterns=consensusAs, |
|
192 |
+ nFactor=nFactorFinal, seed=nut[i], checkpointFile=cptFileName, |
|
193 |
+ whichMatrixFixed='A', singleCellRNASeq=TRUE, ...) |
|
194 |
+ |
|
195 |
+ } |
|
196 |
+ return(finalResult) |
|
197 |
+} |
|
198 |
+ |
|
199 |
+sc_postFinalPhase <- function(finalResult, consensusAs) |
|
200 |
+{ |
|
201 |
+ Aresult <- postFixed4Parallel(finalResult, consensusAs, setMatrix="A") |
|
202 |
+ finalResult <- list("Pmean"=Aresult$P, "Psd"=Aresult$Psd,"Amean"=consensusAs) |
|
203 |
+ class(finalResult) <- append(class(finalResult), "CoGAPS") |
|
204 |
+ return(finalResult) |
|
205 |
+} |
|
206 |
+ |
... | ... |
@@ -1,5 +1,8 @@ |
1 | 1 |
# CoGAPS |
2 | 2 |
|
3 |
+[](https://bioconductor.org/packages/CoGAPS) |
|
4 |
+[](https://bioconductor.org/packages/CoGAPS) |
|
5 |
+ |
|
3 | 6 |
[](https://travis-ci.org/CoGAPS/CoGAPS) |
4 | 7 |
[](https://ci.appveyor.com/project/CoGAPS/CoGAPS) |
5 | 8 |
[](https://codecov.io/github/CoGAPS/CoGAPS?branch=master) |
... | ... |
@@ -42,6 +42,5 @@ artifacts: |
42 | 42 |
|
43 | 43 |
notifications: |
44 | 44 |
- provider: Slack |
45 |
- auth_token: |
|
46 |
- secure: kwWklRUn6l8enJHTEE1OtUmlx+qMLfr3WBWG/fjConXijD8BAAZb2I+lL7cdoEIACu3w== |
|
47 |
- channel: cogaps |
|
45 |
+ incoming_webhook: |
|
46 |
+ secure: https://hooks.slack.com/services/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 |
|
48 | 47 |
\ No newline at end of file |
... | ... |
@@ -136,8 +136,7 @@ AtomicProposal AtomicSupport::proposeExchange() const |
136 | 136 |
|
137 | 137 |
// calculate new mass |
138 | 138 |
float pupper = gaps::random::p_gamma(mass1 + mass2, 2.f, 1.f / mLambda); |
139 |
- float newMass = gaps::random::q_gamma(gaps::random::uniform(0.f, pupper), |
|
140 |
- 2.f, 1.f / mLambda); |
|
139 |
+ float newMass = gaps::random::inverseGammaSample(0.f, pupper, 2.f, 1.f / mLambda); |
|
141 | 140 |
|
142 | 141 |
// calculate mass changes |
143 | 142 |
float delta1 = mass1 > mass2 ? newMass - mass1 : mass2 - newMass; |
... | ... |
@@ -9,8 +9,7 @@ CoGAPS(D, S, nFactor = 7, nEquil = 1000, nSample = 1000, |
9 | 9 |
maxGibbmassA = 100, maxGibbmassP = 100, seed = -1, messages = TRUE, |
10 | 10 |
singleCellRNASeq = FALSE, whichMatrixFixed = "N", |
11 | 11 |
fixedPatterns = matrix(0), checkpointInterval = 0, |
12 |
- checkpointFile = "gaps_checkpoint.out", pumpThreshold = "unique", |
|
13 |
- nPumpSamples = 0, ...) |
|
12 |
+ checkpointFile = "gaps_checkpoint.out", ...) |
|
14 | 13 |
} |
15 | 14 |
\arguments{ |
16 | 15 |
\item{D}{data matrix} |
... | ... |
@@ -52,10 +51,6 @@ the fixed patterns} |
52 | 51 |
|
53 | 52 |
\item{checkpointFile}{name of the checkpoint file} |
54 | 53 |
|
55 |
-\item{pumpThreshold}{type of threshold for pump statistic} |
|
56 |
- |
|
57 |
-\item{nPumpSamples}{number of samples used in pump statistic} |
|
58 |
- |
|
59 | 54 |
\item{...}{keeps backwards compatibility with arguments from older versions} |
60 | 55 |
} |
61 | 56 |
\value{ |
... | ... |
@@ -4,7 +4,8 @@ |
4 | 4 |
\alias{GWCoGAPS} |
5 | 5 |
\title{GWCoGAPS} |
6 | 6 |
\usage{ |
7 |
-GWCoGAPS(simulationName, nFactor, nCores = NA, cut = NA, minNS = NA, ...) |
|
7 |
+GWCoGAPS(simulationName, nFactor, nCores = NA, cut = NA, minNS = NA, |
|
8 |
+ manualMatch = FALSE, consensusPatterns = NULL, ...) |
|
8 | 9 |
} |
9 | 10 |
\arguments{ |
10 | 11 |
\item{simulationName}{name of this simulation} |
... | ... |
@@ -18,6 +19,10 @@ greater than or equal to the number of rows of FP} |
18 | 19 |
|
19 | 20 |
\item{minNS}{minimum of individual set contributions a cluster must contain} |
20 | 21 |
|
22 |
+\item{manualMatch}{logical indicating whether or not to stop after initial phase for manual pattern matching} |
|
23 |
+ |
|
24 |
+\item{consensusPatterns}{fixed pattern matrix to be used to ensure reciprocity of A weights accross sets} |
|
25 |
+ |
|
21 | 26 |
\item{...}{additional parameters to be fed into \code{gapsRun} and \code{gapsMapRun}} |
22 | 27 |
} |
23 | 28 |
\value{ |
... | ... |
@@ -35,7 +40,7 @@ the data matrix for whole genome data; |
35 | 40 |
data(SimpSim) |
36 | 41 |
sim_name <- "example" |
37 | 42 |
createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name) |
38 |
-result <- GWCoGAPS(sim_name, nFactor=3, nEquil=1000, nSample=1000) |
|
43 |
+result <- GWCoGAPS(sim_name, nFactor=3, nEquil=200, nSample=200) |
|
39 | 44 |
} |
40 | 45 |
\seealso{ |
41 | 46 |
\code{\link{gapsRun}}, \code{\link{patternMatch4Parallel}}, and \code{\link{gapsMapRun}} |
... | ... |
@@ -4,8 +4,7 @@ |
4 | 4 |
\alias{GWCoGapsFromCheckpoint} |
5 | 5 |
\title{Restart a GWCoGaps Run from Checkpoint} |
6 | 6 |
\usage{ |
7 |
-GWCoGapsFromCheckpoint(simulationName, nCores = NA, cut = NA, minNS = NA, |
|
8 |
- ...) |
|
7 |
+GWCoGapsFromCheckpoint(simulationName, nCores, cut = NA, minNS = NA, ...) |
|
9 | 8 |
} |
10 | 9 |
\arguments{ |
11 | 10 |
\item{simulationName}{name of this simulation} |
... | ... |
@@ -24,4 +23,11 @@ list of A and P estimates |
24 | 23 |
\description{ |
25 | 24 |
Restart a GWCoGaps Run from Checkpoint |
26 | 25 |
} |
26 |
+\examples{ |
|
27 |
+data(SimpSim) |
|
28 |
+sim_name <- "example" |
|
29 |
+createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name) |
|
30 |
+trash <- GWCoGAPS(sim_name, nFactor=3, nEquil=200, nSample=200) |
|
31 |
+result <- GWCoGapsFromCheckpoint(sim_name, 2) |
|
32 |
+} |
|
27 | 33 |
|
... | ... |
@@ -32,8 +32,8 @@ listed in a gene set behaves like other genes in the set within |
32 | 32 |
the given data set |
33 | 33 |
} |
34 | 34 |
\examples{ |
35 |
-data('SimpSim') |
|
36 |
-calcGeneGSStat(SimpSim.result$Amean, SimpSim.result$Asd, GStoGenes=GSets[[1]], |
|
35 |
+data("SimpSim") |
|
36 |
+calcGeneGSStat(SimpSim.result$Amean, SimpSim.result$Asd, GSGenes=GSets[[1]], |
|
37 | 37 |
numPerm=500) |
38 | 38 |
} |
39 | 39 |
|
40 | 40 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,36 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/cellMatchR.R |
|
3 |
+\name{cellMatchR} |
|
4 |
+\alias{cellMatchR} |
|
5 |
+\title{cellMatchR} |
|
6 |
+\usage{ |
|
7 |
+cellMatchR(Atot, nSets, cnt, minNS = NA, maxNS = NA, ignore.NA = FALSE, |
|
8 |
+ bySet = FALSE, plotDen = FALSE, ...) |
|
9 |
+} |
|
10 |
+\arguments{ |
|
11 |
+\item{Atot}{a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}} |
|
12 |
+ |
|
13 |
+\item{nSets}{number of parallel sets used to generate \code{Atot}} |
|
14 |
+ |
|
15 |
+\item{cnt}{number of branches at which to cut dendrogram} |
|
16 |
+ |
|
17 |
+\item{minNS}{minimum of individual set contributions a cluster must contain} |
|
18 |
+ |
|
19 |
+\item{maxNS}{maximum of individual set contributions a cluster must contain} |
|
20 |
+ |
|
21 |
+\item{ignore.NA}{logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.} |
|
22 |
+ |
|
23 |
+\item{bySet}{logical indicating whether to return list of matched set solutions from \code{Atot}} |
|
24 |
+ |
|
25 |
+\item{plotDen}{plot} |
|
26 |
+ |
|
27 |
+\item{...}{additional parameters for \code{agnes}} |
|
28 |
+} |
|
29 |
+\value{ |
|
30 |
+a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each |
|
31 |
+concensus pattern is also returned. |
|
32 |
+} |
|
33 |
+\description{ |
|
34 |
+cellMatchR |
|
35 |
+} |
|
36 |
+ |
... | ... |
@@ -35,8 +35,8 @@ comparing the inferred activity of that gene to the average activity of the |
35 | 35 |
set. |
36 | 36 |
} |
37 | 37 |
\examples{ |
38 |
-data('SimpSim') |
|
39 |
-computeGeneGSProb(SimpSim.result$Amean, SimpSim.result$Asd, GStoGenes=GSets[[1]], |
|
38 |
+data("SimpSim") |
|
39 |
+computeGeneGSProb(SimpSim.result$Amean, SimpSim.result$Asd, GSGenes=GSets[[1]], |
|
40 | 40 |
numPerm=500) |
41 | 41 |
} |
42 | 42 |
|
43 | 43 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,33 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/createscCoGAPSSets.R |
|
3 |
+\name{createscCoGAPSSets} |
|
4 |
+\alias{createscCoGAPSSets} |
|
5 |
+\title{Create Gene Sets for scCoGAPS} |
|
6 |
+\usage{ |
|
7 |
+createscCoGAPSSets(D, nSets, simulationName, samplingRatio = NULL, |
|
8 |
+ path = "", anotionObj = NULL) |
|
9 |
+} |
|