... | ... |
@@ -17,24 +17,12 @@ Depends: |
17 | 17 |
R (>= 3.4.0), |
18 | 18 |
Rcpp (>= 0.11.0) |
19 | 19 |
Imports: |
20 |
- RColorBrewer (>= 1.0.5), |
|
21 |
- gplots (>= 2.8.0), |
|
22 |
- graphics, |
|
23 |
- grDevices, |
|
24 | 20 |
methods, |
25 |
- cluster, |
|
26 |
- shiny, |
|
27 |
- stats, |
|
28 |
- utils, |
|
29 |
- doParallel, |
|
30 |
- foreach, |
|
31 |
- ggplot2, |
|
32 |
- reshape2, |
|
33 |
- SingleCellExperiment, |
|
34 |
- SummarizedExperiment |
|
21 |
+ gplots, |
|
22 |
+ graphics, |
|
23 |
+ S4Vectors |
|
35 | 24 |
Suggests: |
36 | 25 |
testthat, |
37 |
- lintr, |
|
38 | 26 |
knitr, |
39 | 27 |
rmarkdown, |
40 | 28 |
BiocStyle |
... | ... |
@@ -49,21 +37,9 @@ RoxygenNote: 5.0.1 |
49 | 37 |
Collate: |
50 | 38 |
'class-CogapsParams.R' |
51 | 39 |
'CoGAPS.R' |
52 |
- 'GWCoGAPS.R' |
|
40 |
+ 'DistributedCogaps.R' |
|
41 |
+ 'Package.R' |
|
53 | 42 |
'RcppExports.R' |
54 |
- 'cellMatchR.R' |
|
55 | 43 |
'class-CogapsResult.R' |
56 |
- 'createGWCoGAPSSets.R' |
|
57 |
- 'createscCoGAPSSets.R' |
|
58 |
- 'distributedCogaps.R' |
|
59 |
- 'generateSeeds.R' |
|
60 |
- 'package.R' |
|
61 |
- 'patternMarkers.R' |
|
62 |
- 'patternMatch4Parallel.R' |
|
63 |
- 'patternMatcher.R' |
|
64 |
- 'plotPatternMarkers.R' |
|
65 |
- 'postFixed4Parallel.R' |
|
66 |
- 'postFixed4SC.R' |
|
67 |
- 'reOrderBySet.R' |
|
68 |
- 'reorderByPatternMatch.R' |
|
69 |
- 'scCoGAPS.R' |
|
44 |
+ 'methods-CogapsParams.R' |
|
45 |
+ 'methods-CogapsResult.R' |
... | ... |
@@ -1,79 +1,28 @@ |
1 | 1 |
# Generated by roxygen2: do not edit by hand |
2 | 2 |
|
3 | 3 |
export(CoGAPS) |
4 |
-export(GWCoGAPS) |
|
5 |
-export(GWCoGapsFromCheckpoint) |
|
6 | 4 |
export(binaryA) |
7 | 5 |
export(buildReport) |
8 | 6 |
export(calcCoGAPSStat) |
9 | 7 |
export(calcGeneGSStat) |
10 | 8 |
export(calcZ) |
11 |
-export(cellMatchR) |
|
12 | 9 |
export(computeGeneGSProb) |
13 |
-export(createGWCoGAPSSets) |
|
14 |
-export(createscCoGAPSSets) |
|
15 | 10 |
export(getMeanChiSq) |
16 | 11 |
export(getParam) |
17 | 12 |
export(patternMarkers) |
18 | 13 |
export(patternMatch) |
19 |
-export(patternMatch4Parallel) |
|
20 | 14 |
export(plot.CogapsResult) |
15 |
+export(plotPatternMarkers) |
|
21 | 16 |
export(plotResiduals) |
22 | 17 |
export(reconstructGene) |
23 |
-export(scCoGAPS) |
|
24 | 18 |
export(setParam) |
25 | 19 |
exportClasses(CogapsParams) |
26 | 20 |
exportClasses(CogapsResult) |
27 |
-import(doParallel) |
|
28 |
-import(foreach) |
|
29 |
-import(ggplot2) |
|
30 |
-import(shiny) |
|
31 | 21 |
importClassesFrom(S4Vectors,Annotated) |
32 | 22 |
importClassesFrom(S4Vectors,DataFrame) |
33 | 23 |
importClassesFrom(S4Vectors,character_OR_NULL) |
34 |
-importFrom(RColorBrewer,brewer.pal) |
|
35 |
-importFrom(Rcpp,evalCpp) |
|
36 |
-importFrom(cluster,agnes) |
|
37 |
-importFrom(gplots,heatmap.2) |
|
38 |
-importFrom(gplots,plotCI) |
|
39 |
-importFrom(grDevices,colorRampPalette) |
|
40 |
-importFrom(grDevices,dev.new) |
|
41 |
-importFrom(grDevices,dev.off) |
|
42 |
-importFrom(grDevices,pdf) |
|
43 |
-importFrom(grDevices,rainbow) |
|
44 |
-importFrom(graphics,abline) |
|
45 |
-importFrom(graphics,close.screen) |
|
46 |
-importFrom(graphics,hist) |
|
47 |
-importFrom(graphics,legend) |
|
48 |
-importFrom(graphics,lines) |
|
49 |
-importFrom(graphics,matplot) |
|
50 |
-importFrom(graphics,mtext) |
|
51 |
-importFrom(graphics,par) |
|
24 |
+importFrom(gplots,bluered) |
|
52 | 25 |
importFrom(graphics,plot) |
53 |
-importFrom(graphics,points) |
|
54 |
-importFrom(graphics,screen) |
|
55 |
-importFrom(graphics,split.screen) |
|
56 |
-importFrom(graphics,title) |
|
57 | 26 |
importFrom(methods,callNextMethod) |
58 |
-importFrom(methods,is) |
|
59 | 27 |
importFrom(methods,new) |
60 |
-importFrom(reshape2,melt) |
|
61 |
-importFrom(stats,D) |
|
62 |
-importFrom(stats,as.dist) |
|
63 |
-importFrom(stats,as.hclust) |
|
64 |
-importFrom(stats,complete.cases) |
|
65 |
-importFrom(stats,cor) |
|
66 |
-importFrom(stats,cutree) |
|
67 |
-importFrom(stats,hclust) |
|
68 |
-importFrom(stats,heatmap) |
|
69 |
-importFrom(stats,loess) |
|
70 |
-importFrom(stats,runif) |
|
71 |
-importFrom(stats,sd) |
|
72 |
-importFrom(stats,update) |
|
73 |
-importFrom(stats,variable.names) |
|
74 |
-importFrom(stats,weighted.mean) |
|
75 |
-importFrom(utils,file_test) |
|
76 |
-importFrom(utils,read.table) |
|
77 |
-importFrom(utils,str) |
|
78 |
-importFrom(utils,write.table) |
|
79 | 28 |
useDynLib(CoGAPS) |
80 | 29 |
similarity index 93% |
81 | 30 |
rename from R/distributedCogaps.R |
82 | 31 |
rename to R/DistributedCogaps.R |
... | ... |
@@ -127,7 +127,7 @@ patternMatch <- function(allPatterns, allParams) |
127 | 127 |
indx <- which(sapply(cc$PatsByClust, function(x) ncol(x) > allParams$gaps@maxNS)) |
128 | 128 |
} |
129 | 129 |
|
130 |
- # created matrix of mean patterns - weighted by R closeness of fit |
|
130 |
+ # create matrix of mean patterns - weighted by coefficient of determination |
|
131 | 131 |
PatsByCDSWavg <- t(sapply(1:length(cc$PatsByClust), function(z) |
132 | 132 |
apply(cc$PatsByClust[[z]], 1, function(x) weighted.mean(x, (cc$RtoMeanPattern[[z]])^3)))) |
133 | 133 |
rownames(PatsByCDSWavg) <- lapply(1:length(cc$PatsByClust), function(x) paste("Pattern", x)) |
... | ... |
@@ -137,13 +137,15 @@ patternMatch <- function(allPatterns, allParams) |
137 | 137 |
return(apply(PatsByCDSWavg, 1, function(row) row / max(row))) |
138 | 138 |
} |
139 | 139 |
|
140 |
+#' @importFrom cluster agnes |
|
141 |
+#' @importFrom stats cutree as.hclust |
|
140 | 142 |
corcut <- function(allPatterns, allParams) |
141 | 143 |
{ |
142 | 144 |
corr.dist <- cor(allPatterns) |
143 | 145 |
corr.dist <- 1 - corr.dist |
144 | 146 |
|
145 |
- clust <- agnes(x=corr.dist, diss=TRUE, "complete") |
|
146 |
- patternIds <- cutree(as.hclust(clust), k=allParams$gaps@cut) |
|
147 |
+ clust <- cluster::agnes(x=corr.dist, diss=TRUE, "complete") |
|
148 |
+ patternIds <- stats::cutree(stats::as.hclust(clust), k=allParams$gaps@cut) |
|
147 | 149 |
|
148 | 150 |
RtoMeanPattern <- list() |
149 | 151 |
PatsByClust <- list() |
150 | 152 |
deleted file mode 100644 |
... | ... |
@@ -1,238 +0,0 @@ |
1 |
-#' GWCoGAPS |
|
2 |
-#' |
|
3 |
-#' @details calls the C++ MCMC code and performs Bayesian |
|
4 |
-#' matrix factorization returning the two matrices that reconstruct |
|
5 |
-#' the data matrix for whole genome data; |
|
6 |
-#' @param simulationName name of this simulation |
|
7 |
-#' @param nFactor number of patterns (basis vectors, metagenes), which must be |
|
8 |
-#' greater than or equal to the number of rows of FP |
|
9 |
-#' @param nCores number of cores for parallelization. If left to the default NA, nCores = nSets. |
|
10 |
-#' @param cut number of branches at which to cut dendrogram used in patternMatch4Parallel |
|
11 |
-#' @param minNS minimum of individual set contributions a cluster must contain |
|
12 |
-#' @param manualMatch logical indicating whether or not to stop after initial phase for manual pattern matching |
|
13 |
-#' @param consensusPatterns fixed pattern matrix to be used to ensure reciprocity of A weights accross sets |
|
14 |
-#' @param ... additional parameters to be fed into \code{gapsRun} and \code{gapsMapRun} |
|
15 |
-#' @return list of A and P estimates |
|
16 |
-#' @seealso \code{\link{gapsRun}}, \code{\link{patternMatch4Parallel}}, and \code{\link{gapsMapRun}} |
|
17 |
-#' @examples |
|
18 |
-#' data(SimpSim) |
|
19 |
-#' sim_name <- "example" |
|
20 |
-#' createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name) |
|
21 |
-#' result <- GWCoGAPS(sim_name, nFactor=3, nEquil=200, nSample=200) |
|
22 |
-#' @export |
|
23 |
-GWCoGAPS <- function(simulationName, nFactor=3, nCores=NA, cut=NA, minNS=NA, |
|
24 |
-manualMatch=FALSE, consensusPatterns=NULL, saveUnmatchedPatterns=FALSE, ...) |
|
25 |
-{ |
|
26 |
- if (!is.null(list(...)$nPatterns)) |
|
27 |
- { |
|
28 |
- nFactor <- list(...)$nPatterns |
|
29 |
- } |
|
30 |
- |
|
31 |
- if (!is.null(list(...)$checkpointFile)) |
|
32 |
- { |
|
33 |
- stop("checkpoint file name automatically set in GWCoGAPS - don't pass this parameter") |
|
34 |
- } |
|
35 |
- |
|
36 |
- if (is.null(consensusPatterns)) |
|
37 |
- { |
|
38 |
- allDataSets <- preInitialPhase(simulationName, nCores) |
|
39 |
- unmatchedPatterns <- runInitialPhase(simulationName, allDataSets, nFactor, ...) |
|
40 |
- if (saveUnmatchedPatterns) |
|
41 |
- { |
|
42 |
- save(unmatchedPatterns, file=paste(simulationName, "_unmatched_patterns.RData")) |
|
43 |
- } |
|
44 |
- if (manualMatch) |
|
45 |
- { |
|
46 |
- saveRDS(unmatchedPatterns,file=paste(simulationName,"_initial.rds", sep="")) |
|
47 |
- stop("Please provide consensus patterns upon restarting.") |
|
48 |
- } |
|
49 |
- matchedPatternSets <- postInitialPhase(unmatchedPatterns, length(allDataSets), cut, minNS) |
|
50 |
- save(matchedPatternSets, file=paste(simulationName, "_matched_ps.RData", sep="")) |
|
51 |
- consensusPatterns <- matchedPatternSets[[1]] |
|
52 |
- } |
|
53 |
- finalResult <- runFinalPhase(simulationName, allDataSets, consensusPatterns, nCores, ...) |
|
54 |
- return(postFinalPhase(finalResult, consensusPatterns)) |
|
55 |
-} |
|
56 |
- |
|
57 |
-#' Restart a GWCoGaps Run from Checkpoint |
|
58 |
-#' |
|
59 |
-#' @inheritParams GWCoGAPS |
|
60 |
-#' @return list of A and P estimates |
|
61 |
-#' @importFrom utils file_test |
|
62 |
-#' @examples |
|
63 |
-#' data(SimpSim) |
|
64 |
-#' sim_name <- "example" |
|
65 |
-#' createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name) |
|
66 |
-#' trash <- GWCoGAPS(sim_name, nFactor=3, nEquil=200, nSample=200) |
|
67 |
-#' result <- GWCoGapsFromCheckpoint(sim_name, 2) |
|
68 |
-#' @export |
|
69 |
-GWCoGapsFromCheckpoint <- function(simulationName, nCores, cut=NA, minNS=NA, ...) |
|
70 |
-{ |
|
71 |
- # find data files |
|
72 |
- allDataSets <- preInitialPhase(simulationName, nCores) |
|
73 |
- |
|
74 |
- # figure out phase from file signature |
|
75 |
- initialCpts <- list.files(full.names=TRUE, pattern=paste(simulationName, |
|
76 |
- "_initial_cpt_[0-9]+.out", sep="")) |
|
77 |
- finalCpts <- list.files(full.names=TRUE, pattern=paste(simulationName, |
|
78 |
- "_final_cpt_[0-9]+.out", sep="")) |
|
79 |
- |
|
80 |
- if (length(finalCpts)) |
|
81 |
- { |
|
82 |
- finalResult <- foreach(i=1:length(allDataSets)) %dopar% |
|
83 |
- { |
|
84 |
- # load data set and shift values so gene minimum is zero |
|
85 |
- load(allDataSets[[i]]) |
|
86 |
- sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x) |
|
87 |
- pmin(0,min(x)))) |
|
88 |
- |
|
89 |
- # run CoGAPS with fixed patterns |
|
90 |
- cptFileName <- paste(simulationName, "_final_cpt_", i, ".out", |
|
91 |
- sep="") |
|
92 |
- CoGapsFromCheckpoint(sampleD, sampleS, cptFileName) |
|
93 |
- } |
|
94 |
- load(paste(simulationName, "_matched_ps.RData", sep="")) |
|
95 |
- } |
|
96 |
- else if (file_test("-f", paste(simulationName, "_matched_ps.RData", sep=""))) |
|
97 |
- { |
|
98 |
- load(paste(simulationName, "_matched_ps.RData", sep="")) |
|
99 |
- consensusPatterns<-matchedPatternSets[[1]] |
|
100 |
- finalResult <- runFinalPhase(simulationName, allDataSets, |
|
101 |
- consensusPatterns, nCores, ...) |
|
102 |
- } |
|
103 |
- else if (length(initialCpts)) |
|
104 |
- { |
|
105 |
- # initial phase - always needs to be run to get matchedPatternSets |
|
106 |
- initialResult <- foreach(i=1:length(allDataSets)) %dopar% |
|
107 |
- { |
|
108 |
- # load data set and shift values so gene minimum is zero |
|
109 |
- load(allDataSets[[i]]) |
|
110 |
- sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x) |
|
111 |
- pmin(0,min(x)))) |
|
112 |
- |
|
113 |
- # run CoGAPS from checkpoint |
|
114 |
- cptFileName <- paste(simulationName, "_initial_cpt_", i, ".out", |
|
115 |
- sep="") |
|
116 |
- CoGapsFromCheckpoint(sampleD, sampleS, cptFileName) |
|
117 |
- } |
|
118 |
- matchedPatternSets <- postInitialPhase(initialResult, |
|
119 |
- length(allDataSets), cut, minNS) |
|
120 |
- save(matchedPatternSets, file=paste(simulationName, "_matched_ps.RData", |
|
121 |
- sep="")) |
|
122 |
- consensusPatterns<-matchedPatternSets[[1]] |
|
123 |
- finalResult <- runFinalPhase(simulationName, allDataSets, |
|
124 |
- consensusPatterns, nCores, ...) |
|
125 |
- } |
|
126 |
- else |
|
127 |
- { |
|
128 |
- stop("no checkpoint files found") |
|
129 |
- } |
|
130 |
- return(postFinalPhase(finalResult, consensusPatterns)) |
|
131 |
-} |
|
132 |
- |
|
133 |
-preInitialPhase <- function(simulationName, nCores) |
|
134 |
-{ |
|
135 |
- # find data files |
|
136 |
- fileSig <- paste(simulationName, "_partition_[0-9]+.RData", sep="") |
|
137 |
- allDataSets <- list.files(full.names=TRUE, pattern=fileSig) |
|
138 |
- |
|
139 |
- # establish the number of cores that we are able to use |
|
140 |
- if (is.na(nCores)) |
|
141 |
- { |
|
142 |
- nCores <- length(allDataSets) |
|
143 |
- } |
|
144 |
- registerDoParallel(cores=nCores) |
|
145 |
- return(allDataSets) |
|
146 |
-} |
|
147 |
- |
|
148 |
-runInitialPhase <- function(simulationName, allDataSets, nFactor, ...) |
|
149 |
-{ |
|
150 |
- #generate seeds for parallelization |
|
151 |
- nut <- generateSeeds(chains=length(allDataSets), seed=-1) |
|
152 |
- |
|
153 |
- # run CoGAPS for each set |
|
154 |
- initialResult <- foreach(i=1:length(allDataSets)) %dopar% |
|
155 |
- { |
|
156 |
- # load data set and shift values so gene minimum is zero |
|
157 |
- load(allDataSets[[i]]) |
|
158 |
- sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x) pmin(0,min(x)))) |
|
159 |
- |
|
160 |
- # run CoGAPS without any fixed patterns |
|
161 |
- cptFileName <- paste(simulationName, "_initial_cpt_", i, ".out", sep="") |
|
162 |
- CoGAPS(sampleD, uncertainty=sampleS, nFactor=nFactor, seed=nut[i], |
|
163 |
- checkpointFile=cptFileName) |
|
164 |
- } |
|
165 |
- return(initialResult) |
|
166 |
-} |
|
167 |
- |
|
168 |
-postInitialPhase <- function(initialResult, nSets, cut, minNS) |
|
169 |
-{ |
|
170 |
- nFactor <- ncol(initialResult[[1]]@featureLoadings) |
|
171 |
- BySet <- reOrderBySet(AP=initialResult, nFactor=nFactor, nSets=nSets) |
|
172 |
- |
|
173 |
- #run postpattern match function |
|
174 |
- if (is.na(cut)) |
|
175 |
- { |
|
176 |
- cut <- nFactor |
|
177 |
- } |
|
178 |
- |
|
179 |
- return(patternMatch4Parallel(Ptot=BySet$P, nP=nFactor, nSets=nSets, cnt=cut, |
|
180 |
- minNS=minNS, bySet=TRUE)) |
|
181 |
-} |
|
182 |
- |
|
183 |
-runFinalPhase <- function(simulationName, allDataSets, consensusPatterns, nCores, ...) |
|
184 |
-{ |
|
185 |
- if (class(consensusPatterns) != "matrix") |
|
186 |
- { |
|
187 |
- stop("consensusPatterns must be a matrix") |
|
188 |
- } |
|
189 |
- |
|
190 |
- # find data files if providing consensus patterns |
|
191 |
- fileSig <- paste(simulationName, "_partition_[0-9]+.RData", sep="") |
|
192 |
- allDataSets <- list.files(full.names=TRUE, pattern=fileSig) |
|
193 |
- |
|
194 |
- if (is.na(nCores)) |
|
195 |
- { |
|
196 |
- nCores <- length(allDataSets) |
|
197 |
- } |
|
198 |
- registerDoParallel(cores=nCores) |
|
199 |
- |
|
200 |
- # generate seeds for parallelization |
|
201 |
- nut <- generateSeeds(chains=length(allDataSets), seed=-1) |
|
202 |
- |
|
203 |
- # final number of factors |
|
204 |
- nFactorFinal <- nrow(consensusPatterns) |
|
205 |
- message(paste("final number of patterns:", nFactorFinal)) |
|
206 |
- |
|
207 |
- # run fixed CoGAPS |
|
208 |
- finalResult <- foreach(i=1:length(allDataSets)) %dopar% |
|
209 |
- { |
|
210 |
- # load data set and shift values so gene minimum is zero |
|
211 |
- load(allDataSets[[i]]) |
|
212 |
- sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x) pmin(0,min(x)))) |
|
213 |
- |
|
214 |
- # run CoGAPS with fixed patterns |
|
215 |
- cptFileName <- paste(simulationName, "_final_cpt_", i, ".out", sep="") |
|
216 |
- CoGAPS(sampleD, uncertainty=sampleS, fixedMatrix=consensusPatterns, |
|
217 |
- nFactor=nFactorFinal, seed=nut[i], checkpointFile=cptFileName, |
|
218 |
- whichMatrixFixed='P') |
|
219 |
- |
|
220 |
- } |
|
221 |
- return(finalResult) |
|
222 |
-} |
|
223 |
- |
|
224 |
-postFinalPhase <- function(finalResult, consensusPatterns) |
|
225 |
-{ |
|
226 |
- Aresult <- postFixed4Parallel(finalResult, consensusPatterns) |
|
227 |
- finalResult <- list("Amean"=Aresult$A, "Asd"=Aresult$Asd,"Pmean"=consensusPatterns) |
|
228 |
- |
|
229 |
- return(new("CogapsResult", |
|
230 |
- Amean = Aresult$A, |
|
231 |
- Asd = Aresult$Asd, |
|
232 |
- Pmean = consensusPatterns, |
|
233 |
- Psd = NULL, |
|
234 |
- seed = 0, |
|
235 |
- meanChiSq = 0, |
|
236 |
- diagnostics = list() |
|
237 |
- )) |
|
238 |
-} |
239 | 0 |
similarity index 76% |
240 | 1 |
rename from R/package.R |
241 | 2 |
rename to R/Package.R |
... | ... |
@@ -21,21 +21,6 @@ |
21 | 21 |
#' Bioinformatics. 2010 Nov 1;26(21):2792-3 |
22 | 22 |
#' @docType package |
23 | 23 |
#' @name CoGAPS-package |
24 |
-#' @importFrom Rcpp evalCpp |
|
25 |
-#' @importFrom gplots heatmap.2 plotCI |
|
26 |
-#' @importFrom stats variable.names sd update heatmap runif |
|
27 |
-#' @importFrom graphics matplot title abline close.screen hist legend lines mtext par plot points screen split.screen |
|
28 |
-#' @importFrom grDevices dev.new dev.off pdf colorRampPalette rainbow |
|
29 |
-#' @importFrom methods is |
|
30 |
-#' @importFrom utils read.table write.table str |
|
31 |
-#' @importFrom RColorBrewer brewer.pal |
|
32 |
-#' @importFrom stats cor loess as.dist cutree as.hclust complete.cases D hclust weighted.mean |
|
33 |
-#' @import foreach |
|
34 |
-#' @importFrom cluster agnes |
|
35 |
-#' @import doParallel |
|
36 |
-#' @import shiny |
|
37 |
-#' @importFrom reshape2 melt |
|
38 |
-#' @import ggplot2 |
|
39 | 24 |
#' @useDynLib CoGAPS |
40 | 25 |
NULL |
41 | 26 |
|
42 | 27 |
deleted file mode 100644 |
... | ... |
@@ -1,125 +0,0 @@ |
1 |
-#' cellMatchR |
|
2 |
-#' @export |
|
3 |
-#' |
|
4 |
-#' @param Atot a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet} |
|
5 |
-#' @param nSets number of parallel sets used to generate \code{Atot} |
|
6 |
-#' @param cnt number of branches at which to cut dendrogram |
|
7 |
-#' @param minNS minimum of individual set contributions a cluster must contain |
|
8 |
-#' @param maxNS maximum of individual set contributions a cluster must contain |
|
9 |
-#' @param ignore.NA logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE. |
|
10 |
-#' @param bySet logical indicating whether to return list of matched set solutions from \code{Atot} |
|
11 |
-#' @param plotDen plot |
|
12 |
-#' @param ... additional parameters for \code{agnes} |
|
13 |
-#' @return a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each |
|
14 |
-#' concensus pattern is also returned. |
|
15 |
-cellMatchR <- function(Atot, nSets, cnt, minNS=NA, maxNS=NA, |
|
16 |
-ignore.NA=FALSE, bySet=FALSE, plotDen=FALSE, ...) |
|
17 |
-{ |
|
18 |
- if (is.na(minNS)) |
|
19 |
- { |
|
20 |
- minNS <- nSets / 2 |
|
21 |
- } |
|
22 |
- if (is.na(maxNS)) |
|
23 |
- { |
|
24 |
- maxNS <- nSets + minNS |
|
25 |
- } |
|
26 |
- if (!ignore.NA) |
|
27 |
- { |
|
28 |
- if (anyNA(Atot)) |
|
29 |
- { |
|
30 |
- warning(paste("Non-sparse matrixes produced. Reducing the number", |
|
31 |
- "of patterns asked for and rerun.")) |
|
32 |
- } |
|
33 |
- } |
|
34 |
- else |
|
35 |
- { |
|
36 |
- Atot <- Atot[complete.cases(Atot),] |
|
37 |
- } |
|
38 |
- |
|
39 |
- corcut <- function(Atot, minNS, cnt, cluster.method) |
|
40 |
- { |
|
41 |
- corr.dist <- cor(Atot) |
|
42 |
- corr.dist <- 1-corr.dist |
|
43 |
- |
|
44 |
- clust <- agnes(x=corr.dist, diss=TRUE, cluster.method) |
|
45 |
- #clust=fastcluster::hclust(dist(corr.dist)) |
|
46 |
- cut <- cutree(as.hclust(clust),k=cnt) |
|
47 |
- |
|
48 |
- cls <- sort(unique(cut)) |
|
49 |
- cMNs <- matrix(ncol=cnt,nrow=dim(Atot)[1]) |
|
50 |
- colnames(cMNs) <- cls |
|
51 |
- rownames(cMNs) <- rownames(Atot) |
|
52 |
- |
|
53 |
- RtoMeanPattern <- list() |
|
54 |
- AByClust <- list() |
|
55 |
- for (i in cls) |
|
56 |
- { |
|
57 |
- if (!is.null(dim(Atot[,cut == i]))) |
|
58 |
- { |
|
59 |
- if (dim(Atot[,cut == i])[2] >= minNS) |
|
60 |
- { |
|
61 |
- cMNs[,i] <- rowMeans(Atot[,cut==i]) |
|
62 |
- AByClust[[i]] <- Atot[,cut==i] |
|
63 |
- nIN <- sum(cut==i) |
|
64 |
- RtoMeanPattern[[i]] <- sapply(1:nIN, function(j) |
|
65 |
- round(cor(x=Atot[,cut==i][,j], y=cMNs[,i]),3)) |
|
66 |
- } |
|
67 |
- } |
|
68 |
- } |
|
69 |
- |
|
70 |
- #AByClust[sapply(AByClust,is.null)] <- NULL |
|
71 |
- #RtoMeanPattern[sapply(RtoMeanPattern,is.null)] <- NULL |
|
72 |
- return(list("RtoMeanPattern"=RtoMeanPattern, "AByClust"=AByClust)) |
|
73 |
- } |
|
74 |
- |
|
75 |
- cc <- corcut(Atot, minNS, cnt, cluster.method) |
|
76 |
- |
|
77 |
- ### split by maxNS |
|
78 |
- indx <- which(unlist(lapply(cc$AByClust, function(x) dim(x)[1] > maxNS))) |
|
79 |
- i <- 1 |
|
80 |
- while (length(indx) > 0) |
|
81 |
- { |
|
82 |
- icc <- corcut(cc$AByClust[[indx[1]]], minNS, 2, cluster.method) |
|
83 |
- if (length(icc[[2]]) == 0) |
|
84 |
- { |
|
85 |
- indx <- indx[-1] |
|
86 |
- next |
|
87 |
- } |
|
88 |
- else |
|
89 |
- { |
|
90 |
- cc$AByClust[[indx[1]]] <- icc[[2]][[1]] |
|
91 |
- cc$RtoMeanPattern[[indx[1]]] <- icc[[1]][[1]] |
|
92 |
- if (length(icc[[2]]) > 1) |
|
93 |
- { |
|
94 |
- cc$AByClust<-append(cc$AByClust,icc[[2]][2]) |
|
95 |
- cc$RtoMeanPattern<-append(cc$RtoMeanPattern,icc[[1]][2]) |
|
96 |
- } |
|
97 |
- indx <- which(unlist(lapply(cc$AByClust,function(x) dim(x)[1]>maxNS))) |
|
98 |
- } |
|
99 |
- } |
|
100 |
- |
|
101 |
- #weighted.mean(AByClustDrop[[1]],RtoMPDrop[[1]]) |
|
102 |
- AByCDSWavg <- t(sapply(1:length(cc$AByClust), function(z) |
|
103 |
- apply(cc$AByClust[[z]], 1, function(x) |
|
104 |
- weighted.mean(x, (cc$RtoMeanPattern[[z]])^3)))) |
|
105 |
- rownames(AByCDSWavg) <- lapply(1:length(cc$AByClust), function(x) |
|
106 |
- paste("Pattern",x)) |
|
107 |
- #scale As |
|
108 |
- Amax <- apply(AByCDSWavg, 1, max) |
|
109 |
- AByCDSWavgScaled <- t(sapply(1:dim(AByCDSWavg)[1], function(x) |
|
110 |
- AByCDSWavg[x,] / Amax[x])) |
|
111 |
- rownames(AByCDSWavgScaled) <- rownames(AByCDSWavg) |
|
112 |
- |
|
113 |
- if (bySet) |
|
114 |
- { |
|
115 |
- return(list("consenusAs"=t(AByCDSWavgScaled),"ABySet"=cc)) |
|
116 |
- } |
|
117 |
- else |
|
118 |
- { |
|
119 |
- return(AByCDSWavgScaled) |
|
120 |
- } |
|
121 |
-} |
|
122 |
- |
|
123 |
-# pattern first then file name |
|
124 |
-# dont overwrite |
|
125 |
-# some elements are negative |
|
126 | 0 |
\ No newline at end of file |
... | ... |
@@ -120,95 +120,3 @@ setGeneric("parseOldParams", function(object, oldArgs) |
120 | 120 |
setGeneric("parseDirectParams", function(object, args) |
121 | 121 |
{standardGeneric("parseDirectParams")}) |
122 | 122 |
|
123 |
-#' @rdname setParam-methods |
|
124 |
-#' @aliases setParam |
|
125 |
-setMethod("setParam", signature(object="CogapsParams"), |
|
126 |
-function(object, whichParam, value) |
|
127 |
-{ |
|
128 |
- slot(object, whichParam) <- value |
|
129 |
- validObject(object) |
|
130 |
- return(object) |
|
131 |
-}) |
|
132 |
- |
|
133 |
-#' @rdname getParam-methods |
|
134 |
-#' @aliases getParam |
|
135 |
-setMethod("getParam", signature(object="CogapsParams"), |
|
136 |
-function(object, whichParam) |
|
137 |
-{ |
|
138 |
- slot(object, whichParam) |
|
139 |
-}) |
|
140 |
- |
|
141 |
-#' @rdname parseOldParams-methods |
|
142 |
-#' @aliases parseOldParams |
|
143 |
-setMethod("parseOldParams", signature(object="CogapsParams"), |
|
144 |
-function(object, oldArgs) |
|
145 |
-{ |
|
146 |
- helper <- function(arg, params, newArg) |
|
147 |
- { |
|
148 |
- if (!is.null(oldArgs[[arg]])) |
|
149 |
- { |
|
150 |
- warning(paste("parameter", arg, "is deprecated, it will still", |
|
151 |
- "work, but setting", newArg, "in the params object is the", |
|
152 |
- "preferred method")) |
|
153 |
- params <- setParam(params, newArg, oldArgs[[arg]]) |
|
154 |
- oldArgs[[arg]] <- NULL |
|
155 |
- } |
|
156 |
- return(params) |
|
157 |
- } |
|
158 |
- |
|
159 |
- object <- helper("nFactor", object, "nPatterns") |
|
160 |
- object <- helper("nIter", object, "nIterations") |
|
161 |
- object <- helper("nEquil", object, "nIterations") |
|
162 |
- object <- helper("nSample", object, "nIterations") |
|
163 |
- object <- helper("nOutR", object, "outputFrequency") |
|
164 |
- object <- helper("nOutput", object, "outputFrequency") |
|
165 |
- object <- helper("maxGibbmassA", object, "maxGibbsMassA") |
|
166 |
- object <- helper("max_gibbmass_paraA", object, "maxGibbsMassA") |
|
167 |
- object <- helper("maxGibbmassP", object, "maxGibbsMassP") |
|
168 |
- object <- helper("max_gibbmass_paraP", object, "maxGibbsMassP") |
|
169 |
- object <- helper("singleCellRNASeq", object, "singleCell") |
|
170 |
- |
|
171 |
- if (!is.null(oldArgs$nSnapshots) | !is.null(oldArgs$sampleSnapshots) | !is.null(oldArgs$numSnapshots)) |
|
172 |
- { |
|
173 |
- warning("snapshots not currently supported in release build") |
|
174 |
- oldArgs$nSnapshots <- NULL |
|
175 |
- oldArgs$sampleSnapshots <- NULL |
|
176 |
- oldArgs$numSnapshots <- NULL |
|
177 |
- } |
|
178 |
- if (!is.null(oldArgs$fixedPatterns)) |
|
179 |
- stop("pass fixed matrix in with 'fixedMatrix' argument") |
|
180 |
- if (!is.null(oldArgs$S)) |
|
181 |
- stop("pass uncertainty matrix in with 'uncertainty', not 'S'") |
|
182 |
- |
|
183 |
- return(object) |
|
184 |
-}) |
|
185 |
- |
|
186 |
-#' @rdname parseDirectParams-methods |
|
187 |
-#' @aliases parseDirectParams |
|
188 |
-setMethod("parseDirectParams", signature(object="CogapsParams"), |
|
189 |
-function(object, args) |
|
190 |
-{ |
|
191 |
- for (s in slotNames(object)) |
|
192 |
- { |
|
193 |
- if (!is.null(args[[s]])) |
|
194 |
- { |
|
195 |
- object <- setParam(object, s, args[[s]]) |
|
196 |
- } |
|
197 |
- } |
|
198 |
- return(object) |
|
199 |
-}) |
|
200 |
- |
|
201 |
-setMethod("show", signature("CogapsParams"), |
|
202 |
-function(object) |
|
203 |
-{ |
|
204 |
- cat("An Object of class \"CogapsParams\"\n") |
|
205 |
- cat("nPatterns ", object@nPatterns, "\n") |
|
206 |
- cat("nIterations ", object@nIterations, "\n") |
|
207 |
- cat("outputFrequency ", object@outputFrequency, "\n") |
|
208 |
- cat("nCores ", object@nCores, "\n") |
|
209 |
- cat("singleCell ", object@singleCell, "\n") |
|
210 |
- cat("seed ", object@seed, "\n") |
|
211 |
- cat("messages ", object@messages, "\n") |
|
212 |
- cat("checkpointInterval ", object@checkpointInterval, "\n") |
|
213 |
- cat("checkpointOutFile ", object@checkpointOutFile, "\n") |
|
214 |
-}) |
... | ... |
@@ -3,14 +3,10 @@ |
3 | 3 |
#' |
4 | 4 |
#' @description Contains all output from Cogaps run |
5 | 5 |
#' @importClassesFrom S4Vectors DataFrame Annotated character_OR_NULL |
6 |
-setClass("CogapsResult", contains="Annotated", slots=c( |
|
7 |
- sampleFactors = "ANY", # Pmean transpose |
|
8 |
- featureLoadings = "ANY", # Amean |
|
9 |
- featureStdDev = "ANY", # Asd |
|
10 |
- sampleStdDev = "ANY", # Psd |
|
11 |
- NAMES = "character_OR_NULL", |
|
12 |
- factorData = "DataFrame"), |
|
13 |
-) |
|
6 |
+setClass("CogapsResult", contains="LinearEmbeddingMatrix", slots=c( |
|
7 |
+ sampleStdDev = "ANY", # Psd transpose |
|
8 |
+ featureStdDev = "ANY", # Asd |
|
9 |
+)) |
|
14 | 10 |
|
15 | 11 |
#' Constructor for CogapsResult |
16 | 12 |
#' @return initialized CogapsResult object |
... | ... |
@@ -194,369 +190,21 @@ setGeneric("computeGeneGSProb", function(object, GStoGenes, numPerm=500, |
194 | 190 |
Pw=rep(1,ncol(Amean)), PwNull=FALSE) |
195 | 191 |
{standardGeneric("computeGeneGSProb")}) |
196 | 192 |
|
197 |
-################################## METHODS ##################################### |
|
198 |
- |
|
199 |
-setMethod("show", signature("CogapsResult"), |
|
200 |
-function(object) |
|
201 |
-{ |
|
202 |
- nFeatures <- nrow(object@featureLoadings) |
|
203 |
- nSamples <- nrow(object@sampleFactors) |
|
204 |
- nPatterns <- ncol(object@featureLoadings) |
|
205 |
- |
|
206 |
- print(paste("CogapsResult object with", nFeatures, "features and", nSamples, |
|
207 |
- "samples")) |
|
208 |
- print(paste(nPatterns, "patterns were learned")) |
|
209 |
-}) |
|
210 |
- |
|
193 |
+#' compute pattern markers statistic |
|
211 | 194 |
#' @export |
212 |
-#' @importFrom graphics plot |
|
213 |
-plot.CogapsResult <- function(object, ...) |
|
214 |
-{ |
|
215 |
- nSamples <- nrow(object@sampleFactors) |
|
216 |
- nFactors <- ncol(object@sampleFactors) |
|
217 |
- colors <- rainbow(nFactors) |
|
218 |
- xlimits <- c(0, nSamples + 1) |
|
219 |
- ylimits <- c(0, max(object@sampleFactors) * 1.1) |
|
220 |
- |
|
221 |
- plot(NULL, xlim=xlimits, ylim=ylimits, ylab="Relative Amplitude") |
|
222 |
- |
|
223 |
- for (i in 1:nFactors) |
|
224 |
- { |
|
225 |
- lines(x=1:nSamples, y=object@sampleFactors[,i], col=colors[i]) |
|
226 |
- points(x=1:nSamples, y=object@sampleFactors[,i], col=colors[i], pch=i) |
|
227 |
- } |
|
228 |
- |
|
229 |
- legend("top", paste("Pattern", 1:nFactors, sep = ""), pch = 1:nFactors, |
|
230 |
- lty=1, cex=0.8, col=colors, bty="y", ncol=5) |
|
231 |
-} |
|
232 |
- |
|
233 |
-#' @rdname getMeanChiSq-methods |
|
234 |
-#' @aliases getMeanChiSq |
|
235 |
-setMethod("getMeanChiSq", signature(object="CogapsResult"), |
|
236 |
-function(object) |
|
237 |
-{ |
|
238 |
- object@metadata$meanChiSq |
|
239 |
-}) |
|
240 |
- |
|
241 |
-#' @rdname calcZ-methods |
|
242 |
-#' @aliases calcZ |
|
243 |
-setMethod("calcZ", signature(object="CogapsResult"), |
|
244 |
-function(object, which) |
|
245 |
-{ |
|
246 |
- if (which == "feature") |
|
247 |
- { |
|
248 |
- object@featureLoadings / object@featureStdDev |
|
249 |
- } |
|
250 |
- else if (which == "sample") |
|
251 |
- { |
|
252 |
- object@sampleFactors / object@sampleStdDev |
|
253 |
- } |
|
254 |
- else |
|
255 |
- { |
|
256 |
- stop("\"which\" must be either \"feature\" or \"sample\"") |
|
257 |
- } |
|
258 |
-}) |
|
259 |
- |
|
260 |
-#' @rdname reconstructGene-methods |
|
261 |
-#' @aliases reconstructGene |
|
262 |
-setMethod("reconstructGene", signature(object="CogapsResult"), |
|
263 |
-function(object, genes) |
|
264 |
-{ |
|
265 |
- D <- object@featureLoadings %*% t(object@sampleFactors) |
|
266 |
- if (!is.null(genes)) |
|
267 |
- { |
|
268 |
- D <- D[genes,] |
|
269 |
- } |
|
270 |
- return(D) |
|
271 |
-}) |
|
272 |
- |
|
273 |
-#' @rdname binaryA-methods |
|
274 |
-#' @aliases binaryA |
|
275 |
-setMethod("binaryA", signature(object="CogapsResult"), |
|
276 |
-function(object, threshold) |
|
277 |
-{ |
|
278 |
- binA <- ifelse(calcZ(object) > threshold, 1, 0) |
|
279 |
- |
|
280 |
- heatmap.2(binA, Rowv = FALSE, Colv = FALSE, dendrogram="none", |
|
281 |
- scale="none", col = brewer.pal(3,"Blues"), trace="none", |
|
282 |
- density.info="none", cexCol=1.3, srtCol=45, |
|
283 |
- lmat=rbind(c(0, 3), c(2,1), c(0,4) ), |
|
284 |
- lwid=c(1,10), lhei=c(1, 4, 1.2 ), |
|
285 |
- main="Heatmap of Standardized Feature Matrix") |
|
286 |
- mtext(paste("(Threshold = ", threshold, ")")) |
|
287 |
-}) |
|
288 |
- |
|
289 |
-#' @rdname plotResiduals-methods |
|
290 |
-#' @aliases plotResiduals |
|
291 |
-setMethod("plotResiduals", signature(object="CogapsResult"), |
|
292 |
-function(object, data, uncertainty) |
|
293 |
-{ |
|
294 |
- data <- as.matrix(data) |
|
295 |
- if (is.null(uncertainty)) |
|
296 |
- uncertainty <- pmax(0.1 * data, 0.1) |
|
297 |
- uncertainty <- as.matrix(uncertainty) |
|
298 |
- |
|
299 |
- M <- reconstructGene(object) |
|
300 |
- resid <- (data - M) / uncertainty |
|
301 |
- |
|
302 |
- scaledRdYlBu <- colorRampPalette(brewer.pal(9,"RdYlBu"))(100) |
|
303 |
- heatmap.2(resid, Rowv = FALSE, Colv = FALSE, dendrogram="none", |
|
304 |
- scale="none", col = scaledRdYlBu, trace="none", density.info="none", |
|
305 |
- cexCol=1.33, srtCol=45, lmat=rbind(c(0, 3),c(2,1),c(0,4) ), |
|
306 |
- lwid=c(1,10), lhei=c(1, 4, 1.2 ), main="Heatmap of Residuals") |
|
307 |
-}) |
|
308 |
- |
|
309 |
-setMethod("patternMarkers", signature(object="CogapsResult"), |
|
310 |
-function(object, threshold="all", lp=NA, full=FALSE) |
|
311 |
-{ |
|
312 |
- nGenes <- nrow(object@featureLoadings) |
|
313 |
- nPatterns <- ncol(object@featureLoadings) |
|
314 |
- |
|
315 |
- # find the A with the highest magnitude |
|
316 |
- Arowmax <- t(apply(object@featureLoadings, 1, function(x) x/max(x))) |
|
317 |
- |
|
318 |
- if (!is.na(lp)) |
|
319 |
- { |
|
320 |
- if (length(lp) != nPatterns) |
|
321 |
- { |
|
322 |
- warning("lp length must equal the number of columns of the Amatrix") |
|
323 |
- } |
|
324 |
- sstat <- apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp))) |
|
325 |
- ssranks <-rank(sstat) |
|
326 |
- ssgenes.th <- names(sort(sstat,decreasing=FALSE,na.last=TRUE)) |
|
327 |
- } |
|
328 |
- else |
|
329 |
- { |
|
330 |
- # determine which genes are most associated with each pattern |
|
331 |
- |
|
332 |
- sstat <- matrix(NA, nrow=nGenes, ncol=nPatterns, |
|
333 |
- dimnames=dimnames(object@featureLoadings)) |
|
334 |
- ssranks <- matrix(NA, nrow=nGenes, ncol=nPatterns, |
|
335 |
- dimnames=dimnames(object@featureLoadings)) |
|
336 |
- ssgenes <- matrix(NA, nrow=nGenes, ncol=nPatterns, dimnames=NULL) |
|
337 |
- for (i in 1:nP) |
|
338 |
- { |
|
339 |
- lp <- rep(0,dim(Amatrix)[2]) |
|
340 |
- lp[i] <- 1 |
|
341 |
- sstat[,i] <- unlist(apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp)))) |
|
342 |
- ssranks[,i]<-rank(sstat[,i]) |
|
343 |
- ssgenes[,i]<-names(sort(sstat[,i],decreasing=FALSE,na.last=TRUE)) |
|
344 |
- } |
|
345 |
- if (threshold=="cut") |
|
346 |
- { |
|
347 |
- geneThresh <- sapply(1:nP,function(x) min(which(ssranks[ssgenes[,x],x] > apply(ssranks[ssgenes[,x],],1,min)))) |
|
348 |
- ssgenes.th <- sapply(1:nP,function(x) ssgenes[1:geneThresh[x],x]) |
|
349 |
- } |
|
350 |
- else if (threshold=="all") |
|
351 |
- { |
|
352 |
- pIndx<-unlist(apply(sstat,1,which.min)) |
|
353 |
- gBYp <- list() |
|
354 |
- for(i in sort(unique(pIndx))) |
|
355 |
- { |
|
356 |
- gBYp[[i]]<-sapply(strsplit(names(pIndx[pIndx==i]),"[.]"),function(x) x[[1]][1]) |
|
357 |
- } |
|
358 |
- ssgenes.th <- lapply(1:max(sort(unique(pIndx))), function(x) |
|
359 |
- ssgenes[which(ssgenes[,x] %in% gBYp[[x]]),x]) |
|
360 |
- } |
|
361 |
- else |
|
362 |
- { |
|
363 |
- stop("Threshold arguement not viable option") |
|
364 |
- } |
|
365 |
- } |
|
366 |
- if (full) |
|
367 |
- { |
|
368 |
- return(list("PatternMarkers"=ssgenes.th, "PatternRanks"=ssranks, |
|
369 |
- "PatternMarkerScores"=sstat)) |
|
370 |
- } |
|
371 |
- else |
|
372 |
- { |
|
373 |
- return("PatternMarkers"=ssgenes.th) |
|
374 |
- } |
|
375 |
-}) |
|
376 |
- |
|
377 |
-plotPatternMarkers |
|
378 |
- |
|
379 |
-#' @rdname calcCoGAPSStat-methods |
|
380 |
-#' @aliases calcCoGAPSStat |
|
381 |
-setMethod("calcCoGAPSStat", signature(object="CogapsResult"), |
|
382 |
-function(object, GStoGenes, numPerm) |
|
383 |
-{ |
|
384 |
- # test for std dev of zero, possible mostly in simple simulations |
|
385 |
- if (sum(object@featureStdDev==0) > 0) |
|
386 |
- { |
|
387 |
- object@featureStdDev[object@featureStdDev==0] <- 1e-6 |
|
388 |
- } |
|
389 |
- |
|
390 |
- # calculate Z scores |
|
391 |
- zMatrix <- calcZ(object) |
|
392 |
- |
|
393 |
- # check input arguments |
|
394 |
- if (!is(GStoGenes, "data.frame") && !is(GStoGenes, "list") && !is(GStoGenes,"GSA.genesets")) |
|
395 |
- { |
|
396 |
- stop("GStoGenes must be a data.frame,GSA.genesets, or list with format specified in the users manual.") |
|
397 |
- } |
|
398 |
- |
|
399 |
- if (is(GStoGenes, "GSA.genesets")) |
|
400 |
- { |
|
401 |
- names(GStoGenes$genesets) <- GStoGenes$geneset.names |
|
402 |
- GStoGenes <- GStoGenes$genesets |
|
403 |
- } |
|
404 |
- |
|
405 |
- if (is(GStoGenes, "list")) |
|
406 |
- { |
|
407 |
- GStoGenesList <- GStoGenes |
|
408 |
- } |
|
409 |
- else |
|
410 |
- { |
|
411 |
- GStoGenesList <- list() |
|
412 |
- for (i in 1:dim(GStoGenes)[2]) |
|
413 |
- { |
|
414 |
- GStoGenesList[[as.character(colnames(GStoGenes)[i])]] <- as.character(unique(GStoGenes[,i])) |
|
415 |
- } |
|
416 |
- } |
|
417 |
- |
|
418 |
- # get dimensions |
|
419 |
- numGS <- length(names(GStoGenesList)) |
|
420 |
- numPatt <- dim(zMatrix)[2] |
|
421 |
- numG <- dim(zMatrix)[1]+0.9999 |
|
422 |
- |
|
423 |
- # initialize matrices |
|
424 |
- statsUp <- matrix(ncol = numGS, nrow = numPatt) |
|
425 |
- statsDown <- matrix(ncol = numGS, nrow = numPatt) |
|
426 |
- actEst <- matrix(ncol = numGS, nrow = numPatt) |
|
427 |
- results <- list() |
|
428 |
- zPerm <- matrix(ncol=numPerm,nrow=numPatt) |
|
429 |
- |
|
430 |
- # do permutation test |
|
431 |
- for (gs in 1:numGS) |
|
432 |
- { |
|
433 |
- genes <- GStoGenesList[[names(GStoGenesList)[gs]]] |
|
434 |
- index <- rownames(zMatrix) %in% genes |
|
435 |
- zValues <- zMatrix[index,1] |
|
436 |
- numGenes <- length(zValues) |
|
437 |
- label <- as.character(numGenes) |
|
438 |
- |
|
439 |
- if (!any(names(results)==label)) |
|
440 |
- { |
|
441 |
- for (p in 1:numPatt) |
|
442 |
- { |
|
443 |
- for (j in 1:numPerm) |
|
444 |
- { |
|
445 |
- temp <- floor(runif(numGenes,1,numG)) |
|
446 |
- temp2 <- zMatrix[temp,p] |
|
447 |
- zPerm[p,j] <- mean(temp2) |
|
448 |
- } |
|
449 |
- } |
|
450 |
- results[[label]] <- zPerm |
|
451 |
- } |
|
452 |
- } |
|
453 |
- |
|
454 |
- # get p-value |
|
455 |
- for (p in 1:numPatt) |
|
456 |
- { |
|
457 |
- for (gs in 1:numGS) |
|
458 |
- { |
|
459 |
- genes <- GStoGenesList[[names(GStoGenesList)[gs]]] |
|
460 |
- index <- rownames(zMatrix) %in% genes |
|
461 |
- zValues <- zMatrix[index,p] |
|
462 |
- zScore <- mean(zValues) |
|
463 |
- |
|
464 |
- numGenes <- length(zValues) |
|
465 |
- label <- as.character(numGenes) |
|
466 |
- |
|
467 |
- permzValues <- results[[label]][p,] |
|
468 |
- ordering <- order(permzValues) |
|
469 |
- permzValues <- permzValues[ordering] |
|
470 |
- |
|
471 |
- statistic <- sum(zScore > permzValues) |
|
472 |
- statUpReg <- 1 - statistic/length(permzValues) |
|
473 |
- statsUp[p,gs] <- max(statUpReg, 1/numPerm) |
|
474 |
- |
|
475 |
- statistic <- sum(zScore < permzValues) |
|
476 |
- statDownReg <- 1 - statistic/length(permzValues) |
|
477 |
- statsDown[p,gs] <- max(statDownReg,1/numPerm) |
|
478 |
- |
|
479 |
- activity <- 1 - 2*max(statUpReg, 1/numPerm) |
|
480 |
- actEst[p,gs] <- activity |
|
481 |
- } |
|
482 |
- } |
|
483 |
- |
|
484 |
- # format output |
|
485 |
- colnames(statsUp) <- names(GStoGenesList) |
|
486 |
- colnames(statsDown) <- names(GStoGenesList) |
|
487 |
- colnames(actEst) <- names(GStoGenesList) |
|
488 |
- |
|
489 |
- rownames(statsUp) <- colnames(zMatrix) |
|
490 |
- rownames(statsDown) <- colnames(zMatrix) |
|
491 |
- rownames(actEst) <- colnames(zMatrix) |
|
492 |
- |
|
493 |
- results[['GSUpreg']] <- statsUp |
|
494 |
- results[['GSDownreg']] <- statsDown |
|
495 |
- results[['GSActEst']] <- actEst |
|
496 |
- |
|
497 |
- return(results) |
|
498 |
-}) |
|
499 |
- |
|
500 |
-#' @rdname calcGeneGSStat-methods |
|
501 |
-#' @aliases calcGeneGSStat |
|
502 |
-setMethod("calcGeneGSStat", signature(object="CogapsResult"), |
|
503 |
-function(object, GStoGenes, numPerm, Pw, nullGenes) |
|
504 |
-{ |
|
505 |
- gsStat <- calcCoGAPSStat(object, data.frame(GSGenes), numPerm=numPerm) |
|
506 |
- gsStat <- gsStat$GSUpreg |
|
507 |
- gsStat <- -log(gsStat) |
|
508 |
- |
|
509 |
- if (!all(is.na(Pw))) |
|
510 |
- { |
|
511 |
- if (length(Pw) != length(gsStat)) |
|
512 |
- { |
|
513 |
- stop('Invalid weighting') |
|
514 |
- } |
|
515 |
- gsStat <- gsStat*Pw |
|
516 |
- } |
|
517 |
- |
|
518 |
- if (nullGenes) |
|
519 |
- { |
|
520 |
- ZD <- object@featureLoadings[setdiff(row.names(object@featureLoadings), GSGenes),] / |
|
521 |
- object@featureStdDev[setdiff(row.names(object@featureLoadings), GSGenes),] |
|
522 |
- } |
|
523 |
- else |
|
524 |
- { |
|
525 |
- ZD <- object@featureLoadings[GSGenes,]/object@featureStdDev[GSGenes,] |
|
526 |
- } |
|
527 |
- outStats <- apply(sweep(ZD,2,gsStat,FUN="*"),1,sum) / (sum(gsStat)) |
|
528 |
- outStats <- outStats / apply(ZD,1,sum) |
|
529 |
- outStats[which(apply(ZD,1,sum) < 1e-6)] <- 0 |
|
530 |
- |
|
531 |
- if (sum(gsStat) < 1e-6) |
|
532 |
- { |
|
533 |
- return(0) |
|
534 |
- } |
|
535 |
- return(outStats) |
|
536 |
-}) |
|
537 |
- |
|
538 |
-#' @rdname computeGeneGSProb-methods |
|
539 |
-#' @aliases computeGeneGSProb |
|
540 |
-setMethod("computeGeneGSProb", signature(object="CogapsResult"), |
|
541 |
-function(object, GStoGenes, numPerm, Pw, PwNull) |
|
542 |
-{ |
|
543 |
- geneGSStat <- calcGeneGSStat(object, Pw=Pw, GSGenes=GSGenes, |
|
544 |
- numPerm=numPerm) |
|
545 |
- |
|
546 |
- if (PwNull) |
|
547 |
- { |
|
548 |
- permGSStat <- calcGeneGSStat(object, GSGenes=GSGenes, numPerm=numPerm, |
|
549 |
- Pw=Pw, nullGenes=TRUE) |
|
550 |
- } |
|
551 |
- else |
|
552 |
- { |
|
553 |
- permGSStat <- calcGeneGSStat(object, GSGenes=GSGenes, numPerm=numPerm, |
|
554 |
- nullGenes=TRUE) |
|
555 |
- } |
|
556 |
- |
|
557 |
- finalStats <- sapply(GSGenes, function(x) |
|
558 |
- length(which(permGSStat > geneGSStat[x])) / length(permGSStat)) |
|
559 |
- |
|
560 |
- return(finalStats) |
|
561 |
-}) |
|
195 |
+#' @docType methods |
|
196 |
+#' @rdname patternMarkers-methods |
|
197 |
+#' |
|
198 |
+#' @description calculate the most associated pattern for each gene |
|
199 |
+#' @param object an object of type CogapsResult |
|
200 |
+#' @param threshold the type of threshold to be used. The default "all" will |
|
201 |
+#' distribute genes into pattern with the lowest ranking. The "cut" thresholds |
|
202 |
+#' by the first gene to have a lower ranking, i.e. better fit to, a pattern. |
|
203 |
+#' @param lp a vector of weights for each pattern to be used for finding |
|
204 |
+#' markers. If NA markers for each pattern of the A matrix will be used. |
|
205 |
+#' @return By default a non-overlapping list of genes associated with each |
|
206 |
+#' \code{lp}. If \code{full=TRUE} a data.frame of genes rankings with a column |
|
207 |
+#' for each \code{lp} will also be returned. |
|
208 |
+setGeneric("patternMarkers", function(object, threshold="all", lp=NA, full=FALSE) |
|
209 |
+ {standardGeneric("patternMarkers")}) |
|
562 | 210 |
|
563 | 211 |
deleted file mode 100644 |
... | ... |
@@ -1,39 +0,0 @@ |
1 |
-#' Create Gene Sets for GWCoGAPS |
|
2 |
-#' |
|
3 |
-#' @details factors whole genome data into randomly generated sets for indexing |
|
4 |
-#' |
|
5 |
-#' @param D data matrix |
|
6 |
-#' @param S uncertainty matrix |
|
7 |
-#' @param nSets number of sets to partition the data into |
|
8 |
-#' @param simulationName name used to identify files created by this simulation |
|
9 |
-#' @return simulationName used to identify saved files |
|
10 |
-#' @examples |
|
11 |
-#' data(SimpSim) |
|
12 |
-#' createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, "example") |
|
13 |
-#' @export |
|
14 |
-createGWCoGAPSSets <- function(D, S, nSets, simulationName) |
|
15 |
-{ |
|
16 |
- # check gene names |
|
17 |
- if (length(unique(rownames(D))) != length(rownames(D))) |
|
18 |
- { |
|
19 |
- warning("Gene identifiers not unique!") |
|
20 |
- } |
|
21 |
- |
|
22 |
- # partition data by sampling random sets of genes |
|
23 |
- genes <- 1:nrow(D) |
|
24 |
- setSize <- floor(length(genes) / nSets) |
|
25 |
- for (set in 1:nSets) |
|
26 |
- { |
|
27 |
- # sample gene names |
|
28 |
- sampleSize <- ifelse(set == nSets, length(genes), setSize) |
|
29 |
- geneSet <- sample(genes, sampleSize, replace=FALSE) |
|
30 |
- genes <- genes[!(genes %in% geneSet)] |
|
31 |
- |
|
32 |
- # partition data |
|
33 |
- sampleD <- D[geneSet,] |
|
34 |
- sampleS <- S[geneSet,] |
|
35 |
- save(sampleD, sampleS, file=paste(simulationName, "_partition_", set, |
|
36 |
- ".RData", sep="")); |
|
37 |
- } |
|
38 |
- return(simulationName) |
|
39 |
-} |
40 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,59 +0,0 @@ |
1 |
-#' Create Gene Sets for scCoGAPS |
|
2 |
-#' @export |
|
3 |
-#' |
|
4 |
-#' @description factors whole genome data into randomly generated sets for indexing |
|
5 |
-#' @param D data matrix |
|
6 |
-#' @param nSets number of sets to partition the data into |
|
7 |
-#' @param simulationName name used to identify files created by this simulation |
|
8 |
-#' @param samplingRatio vector of relative quantities to use for sampling celltypes |
|
9 |
-#' @param anotionObj vector of same length as number of columns of D |
|
10 |
-#' @param path character string indicating were to save resulting data objects. default is current working dir |
|
11 |
-#' @return simulationName used to identify saved files |
|
12 |
-#' @examples |
|
13 |
-#' data(SimpSim) |
|
14 |
-#' createscCoGAPSSets(SimpSim.D, nSets=2, simulationName="example") |
|
15 |
-createscCoGAPSSets <- function(D, nSets, simulationName, samplingRatio=NULL, |
|
16 |
-path="", anotionObj=NULL) |
|
17 |
-{ |
|
18 |
- # check gene names |
|
19 |
- if (length(unique(colnames(D))) != length(colnames(D))) |
|
20 |
- { |
|
21 |
- warning("Cell identifiers not unique!") |
|
22 |
- } |
|
23 |
- |
|
24 |
- # partition data by sampling random sets of cells |
|
25 |
- cells <- 1:ncol(D) |
|
26 |
- setSize <- floor(length(cells) / nSets) |
|
27 |
- for (set in 1:nSets) |
|
28 |
- { |
|
29 |
- if (is.null(samplingRatio)) |
|
30 |
- { |
|
31 |
- # sample cell names |
|
32 |
- sampleSize <- ifelse(set == nSets, length(cells), setSize) |
|
33 |
- cellset <- sample(cells, sampleSize, replace=FALSE) |
|
34 |
- cells <- cells[!(cells %in% cellset)] |
|
35 |
- } |
|
36 |
- else |
|
37 |
- { |
|
38 |
- if (length(unique(anotionObj)) != length(samplingRatio)) |
|
39 |
- { |
|
40 |
- warning("Not all celltypes will be sampled from.") |
|
41 |
- } |
|
42 |
- ct.indx <- lapply(unique(anotionObj), function(x) which(anotionObj == x)) |
|
43 |
- names(ct.indx) <- unique(anotionObj) |
|
44 |
- cellset <- lapply(unique(anotionObj), function(x) |
|
45 |
- sample(colnames(D)[ct.indx[[x]]], samplingRatio[x],replace=TRUE)) |
|
46 |
- cellset <- unlist(cellset) |
|
47 |
- } |
|
48 |
- |
|
49 |
- # partition data |
|
50 |
- sampleD <- D[,cellset] |
|
51 |
- #log transform |
|
52 |
- sampleD <- log2(sampleD+1) |
|
53 |
- # generate S |
|
54 |
- sampleS <- pmax(.1*sampleD, .1) |
|
55 |
- save(sampleD, sampleS, file=paste0(path,simulationName, "_partition_", |
|
56 |
- set,".RData")); |
|
57 |
- } |
|
58 |
- return(simulationName) |
|
59 |
-} |
60 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,27 +0,0 @@ |
1 |
-#' Generate Seeds for Multiple Concurrent Runs |
|
2 |
-#' |
|
3 |
-#' @param chains number of seeds to generate (number of chains to run) |
|
4 |
-#' @param seed positive values are kept, negative values will be overwritten |
|
5 |
-#' by a seed generated from the current time |
|
6 |
-#' @return vector of randomly generated seeds |
|
7 |
-generateSeeds <- function(chains=2, seed=-1) |
|
8 |
-{ |
|
9 |
- if (chains < 2 || (as.integer(chains) != chains)) |
|
10 |
- { |
|
11 |
- stop("chains must be >= 2 and an integer") |
|
12 |
- } |
|
13 |
- |
|
14 |
- if (seed < 1) |
|
15 |
- { |
|
16 |
- secs <- as.numeric(difftime(Sys.time(), paste(Sys.Date(), "00:00"), |
|
17 |
- units="secs")) |
|
18 |
- secs <- round(secs) |
|
19 |
- seeds <- seq_len(chains) * secs |
|
20 |
- return(seeds) |
|
21 |
- } |
|
22 |
- else |
|
23 |
- { |
|
24 |
- seeds <- seq_len(chains) * seed |
|
25 |
- return(seeds) |
|
26 |
- } |
|
27 |
-} |
28 | 0 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,93 @@ |
1 |
+#' @rdname setParam-methods |
|
2 |
+#' @aliases setParam |
|
3 |
+setMethod("setParam", signature(object="CogapsParams"), |
|
4 |
+function(object, whichParam, value) |
|
5 |
+{ |
|
6 |
+ slot(object, whichParam) <- value |
|
7 |
+ validObject(object) |
|
8 |
+ return(object) |
|
9 |
+}) |
|
10 |
+ |
|
11 |
+#' @rdname getParam-methods |
|
12 |
+#' @aliases getParam |
|
13 |
+setMethod("getParam", signature(object="CogapsParams"), |
|
14 |
+function(object, whichParam) |
|
15 |
+{ |
|
16 |
+ slot(object, whichParam) |
|
17 |
+}) |
|
18 |
+ |
|
19 |
+#' @rdname parseOldParams-methods |
|
20 |
+#' @aliases parseOldParams |
|
21 |
+setMethod("parseOldParams", signature(object="CogapsParams"), |
|
22 |
+function(object, oldArgs) |
|
23 |
+{ |
|
24 |
+ helper <- function(arg, params, newArg) |
|
25 |
+ { |
|
26 |
+ if (!is.null(oldArgs[[arg]])) |
|
27 |
+ { |
|
28 |
+ warning(paste("parameter", arg, "is deprecated, it will still", |
|
29 |
+ "work, but setting", newArg, "in the params object is the", |
|
30 |
+ "preferred method")) |
|
31 |
+ params <- setParam(params, newArg, oldArgs[[arg]]) |
|
32 |
+ oldArgs[[arg]] <- NULL |
|
33 |
+ } |
|
34 |
+ return(params) |
|
35 |
+ } |
|
36 |
+ |
|
37 |
+ object <- helper("nFactor", object, "nPatterns") |
|
38 |
+ object <- helper("nIter", object, "nIterations") |
|
39 |
+ object <- helper("nEquil", object, "nIterations") |
|
40 |
+ object <- helper("nSample", object, "nIterations") |
|
41 |
+ object <- helper("nOutR", object, "outputFrequency") |
|
42 |
+ object <- helper("nOutput", object, "outputFrequency") |
|
43 |
+ object <- helper("maxGibbmassA", object, "maxGibbsMassA") |
|
44 |
+ object <- helper("max_gibbmass_paraA", object, "maxGibbsMassA") |
|
45 |
+ object <- helper("maxGibbmassP", object, "maxGibbsMassP") |
|
46 |
+ object <- helper("max_gibbmass_paraP", object, "maxGibbsMassP") |
|
47 |
+ object <- helper("singleCellRNASeq", object, "singleCell") |
|
48 |
+ |
|
49 |
+ if (!is.null(oldArgs$nSnapshots) | !is.null(oldArgs$sampleSnapshots) | !is.null(oldArgs$numSnapshots)) |
|
50 |
+ { |
|
51 |
+ warning("snapshots not currently supported in release build") |
|
52 |
+ oldArgs$nSnapshots <- NULL |
|
53 |
+ oldArgs$sampleSnapshots <- NULL |
|
54 |
+ oldArgs$numSnapshots <- NULL |
|
55 |
+ } |
|
56 |
+ if (!is.null(oldArgs$fixedPatterns)) |
|
57 |
+ stop("pass fixed matrix in with 'fixedMatrix' argument") |
|
58 |
+ if (!is.null(oldArgs$S)) |
|
59 |
+ stop("pass uncertainty matrix in with 'uncertainty', not 'S'") |
|
60 |
+ |
|
61 |
+ return(object) |
|
62 |
+}) |
|
63 |
+ |
|
64 |
+#' @rdname parseDirectParams-methods |
|
65 |
+#' @aliases parseDirectParams |
|
66 |
+setMethod("parseDirectParams", signature(object="CogapsParams"), |
|
67 |
+function(object, args) |
|
68 |
+{ |
|
69 |
+ for (s in slotNames(object)) |
|
70 |
+ { |
|
71 |
+ if (!is.null(args[[s]])) |
|
72 |
+ { |
|
73 |
+ object <- setParam(object, s, args[[s]]) |
|
74 |
+ } |
|
75 |
+ } |
|
76 |
+ return(object) |
|
77 |
+}) |
|
78 |
+ |
|
79 |
+setMethod("show", signature("CogapsParams"), |
|
80 |
+function(object) |
|
81 |
+{ |
|
82 |
+ cat("An Object of class \"CogapsParams\"\n") |
|
83 |
+ cat("nPatterns ", object@nPatterns, "\n") |
|
84 |
+ cat("nIterations ", object@nIterations, "\n") |
|
85 |
+ cat("outputFrequency ", object@outputFrequency, "\n") |
|
86 |
+ cat("nCores ", object@nCores, "\n") |
|
87 |
+ cat("singleCell ", object@singleCell, "\n") |
|
88 |
+ cat("seed ", object@seed, "\n") |
|
89 |
+ cat("messages ", object@messages, "\n") |
|
90 |
+ cat("checkpointInterval ", object@checkpointInterval, "\n") |
|
91 |
+ cat("checkpointOutFile ", object@checkpointOutFile, "\n") |
|
92 |
+}) |
|
93 |
+ |
0 | 94 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,407 @@ |
1 |
+setMethod("show", signature("CogapsResult"), |
|
2 |
+function(object) |
|
3 |
+{ |
|
4 |
+ nFeatures <- nrow(object@featureLoadings) |
|
5 |
+ nSamples <- nrow(object@sampleFactors) |
|
6 |
+ nPatterns <- ncol(object@featureLoadings) |
|
7 |
+ |
|
8 |
+ print(paste("CogapsResult object with", nFeatures, "features and", nSamples, |
|
9 |
+ "samples")) |
|
10 |
+ print(paste(nPatterns, "patterns were learned")) |
|
11 |
+}) |
|
12 |
+ |
|
13 |
+#' @export |
|
14 |
+#' @importFrom graphics plot |
|
15 |
+plot.CogapsResult <- function(x, ...) |
|
16 |
+{ |
|
17 |
+ nSamples <- nrow(object@sampleFactors) |
|
18 |
+ nFactors <- ncol(object@sampleFactors) |
|
19 |
+ colors <- rainbow(nFactors) |
|
20 |
+ xlimits <- c(0, nSamples + 1) |
|
21 |
+ ylimits <- c(0, max(object@sampleFactors) * 1.1) |
|
22 |
+ |
|
23 |
+ plot(NULL, xlim=xlimits, ylim=ylimits, ylab="Relative Amplitude") |
|
24 |
+ |
|
25 |
+ for (i in 1:nFactors) |
|
26 |
+ { |
|
27 |
+ lines(x=1:nSamples, y=object@sampleFactors[,i], col=colors[i]) |
|
28 |
+ points(x=1:nSamples, y=object@sampleFactors[,i], col=colors[i], pch=i) |
|
29 |
+ } |
|
30 |
+ |
|
31 |
+ legend("top", paste("Pattern", 1:nFactors, sep = ""), pch = 1:nFactors, |
|
32 |
+ lty=1, cex=0.8, col=colors, bty="y", ncol=5) |
|
33 |
+} |
|
34 |
+ |
|
35 |
+#' @rdname getMeanChiSq-methods |
|
36 |
+#' @aliases getMeanChiSq |
|
37 |
+setMethod("getMeanChiSq", signature(object="CogapsResult"), |
|
38 |
+function(object) |
|
39 |
+{ |
|
40 |
+ object@metadata$meanChiSq |
|
41 |
+}) |
|
42 |
+ |
|
43 |
+#' @rdname calcZ-methods |
|
44 |
+#' @aliases calcZ |
|
45 |
+setMethod("calcZ", signature(object="CogapsResult"), |
|
46 |
+function(object, which) |
|
47 |
+{ |
|
48 |
+ if (which == "feature") |
|
49 |
+ { |
|
50 |
+ object@featureLoadings / object@featureStdDev |
|
51 |
+ } |
|
52 |
+ else if (which == "sample") |
|
53 |
+ { |
|
54 |
+ object@sampleFactors / object@sampleStdDev |
|
55 |
+ } |
|
56 |
+ else |
|
57 |
+ { |
|
58 |
+ stop("\"which\" must be either \"feature\" or \"sample\"") |
|
59 |
+ } |
|
60 |
+}) |
|
61 |
+ |
|
62 |
+#' @rdname reconstructGene-methods |
|
63 |
+#' @aliases reconstructGene |
|
64 |
+setMethod("reconstructGene", signature(object="CogapsResult"), |
|
65 |
+function(object, genes) |
|
66 |
+{ |
|
67 |
+ D <- object@featureLoadings %*% t(object@sampleFactors) |
|
68 |
+ if (!is.null(genes)) |
|
69 |
+ { |
|
70 |
+ D <- D[genes,] |
|
71 |
+ } |
|
72 |
+ return(D) |
|
73 |
+}) |
|
74 |
+ |
|
75 |
+#' @rdname binaryA-methods |
|
76 |
+#' @aliases binaryA |
|
77 |
+#' @importFrom gplots heatmap.2 |
|
78 |
+setMethod("binaryA", signature(object="CogapsResult"), |
|
79 |
+function(object, threshold) |
|
80 |
+{ |
|
81 |
+ binA <- ifelse(calcZ(object) > threshold, 1, 0) |
|
82 |
+ |
|
83 |
+ gplots::heatmap.2(binA, Rowv = FALSE, Colv = FALSE, dendrogram="none", |
|
84 |
+ scale="none", col = brewer.pal(3,"Blues"), trace="none", |
|
85 |
+ density.info="none", cexCol=1.3, srtCol=45, |
|
86 |
+ lmat=rbind(c(0, 3), c(2,1), c(0,4) ), |
|
87 |
+ lwid=c(1,10), lhei=c(1, 4, 1.2 ), |
|
88 |
+ main="Heatmap of Standardized Feature Matrix") |
|
89 |
+ mtext(paste("(Threshold = ", threshold, ")")) |
|
90 |
+}) |
|
91 |
+ |
|
92 |
+#' @rdname plotResiduals-methods |
|
93 |
+#' @aliases plotResiduals |
|
94 |
+#' @importFrom gplots heatmap.2 |
|
95 |
+setMethod("plotResiduals", signature(object="CogapsResult"), |
|
96 |
+function(object, data, uncertainty) |
|
97 |
+{ |
|
98 |
+ data <- as.matrix(data) |
|
99 |
+ if (is.null(uncertainty)) |
|
100 |
+ uncertainty <- pmax(0.1 * data, 0.1) |
|
101 |
+ uncertainty <- as.matrix(uncertainty) |
|
102 |
+ |
|
103 |
+ M <- reconstructGene(object) |
|
104 |
+ resid <- (data - M) / uncertainty |
|
105 |
+ |
|
106 |
+ scaledRdYlBu <- colorRampPalette(brewer.pal(9,"RdYlBu"))(100) |
|
107 |
+ gplots::heatmap.2(resid, Rowv = FALSE, Colv = FALSE, dendrogram="none", |
|
108 |
+ scale="none", col = scaledRdYlBu, trace="none", density.info="none", |
|
109 |
+ cexCol=1.33, srtCol=45, lmat=rbind(c(0, 3),c(2,1),c(0,4) ), |
|
110 |
+ lwid=c(1,10), lhei=c(1, 4, 1.2 ), main="Heatmap of Residuals") |
|
111 |
+}) |
|
112 |
+ |
|
113 |
+#' @rdname patternMarkers-methods |
|
114 |
+#' @aliases patternMarkers |
|
115 |
+setMethod("patternMarkers", signature(object="CogapsResult"), |
|
116 |
+function(object, threshold, lp) |
|
117 |
+{ |
|
118 |
+ nGenes <- nrow(object@featureLoadings) |
|
119 |
+ nPatterns <- ncol(object@featureLoadings) |
|
120 |
+ |
|
121 |
+ # find the A with the highest magnitude |
|
122 |
+ Arowmax <- t(apply(object@featureLoadings, 1, function(x) x/max(x))) |
|
123 |
+ |
|
124 |
+ if (!is.na(lp)) |
|
125 |
+ { |
|
126 |
+ if (length(lp) != nPatterns) |
|
127 |
+ { |
|
128 |
+ warning("lp length must equal the number of columns of the Amatrix") |
|
129 |
+ } |
|
130 |
+ sstat <- apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp))) |
|
131 |
+ ssranks <-rank(sstat) |
|
132 |
+ ssgenes.th <- names(sort(sstat,decreasing=FALSE,na.last=TRUE)) |
|
133 |
+ } |
|
134 |
+ else |
|
135 |
+ { |
|
136 |
+ # determine which genes are most associated with each pattern |
|
137 |
+ sstat <- matrix(NA, nrow=nGenes, ncol=nPatterns, |
|
138 |
+ dimnames=dimnames(object@featureLoadings)) |
|
139 |
+ ssranks <- matrix(NA, nrow=nGenes, ncol=nPatterns, |
|
140 |
+ dimnames=dimnames(object@featureLoadings)) |
|
141 |
+ ssgenes <- matrix(NA, nrow=nGenes, ncol=nPatterns, dimnames=NULL) |
|
142 |
+ for (i in 1:nP) |
|
143 |
+ { |
|
144 |
+ lp <- rep(0,dim(Amatrix)[2]) |
|
145 |
+ lp[i] <- 1 |
|
146 |
+ sstat[,i] <- unlist(apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp)))) |
|
147 |
+ ssranks[,i]<-rank(sstat[,i]) |
|
148 |
+ ssgenes[,i]<-names(sort(sstat[,i],decreasing=FALSE,na.last=TRUE)) |
|
149 |
+ } |
|
150 |
+ if (threshold=="cut") |
|
151 |
+ { |
|
152 |
+ geneThresh <- sapply(1:nP,function(x) min(which(ssranks[ssgenes[,x],x] > apply(ssranks[ssgenes[,x],],1,min)))) |
|
153 |
+ ssgenes.th <- sapply(1:nP,function(x) ssgenes[1:geneThresh[x],x]) |
|
154 |
+ } |
|
155 |
+ else if (threshold=="all") |
|
156 |
+ { |
|
157 |
+ pIndx<-unlist(apply(sstat,1,which.min)) |
|
158 |
+ gBYp <- list() |
|
159 |
+ for(i in sort(unique(pIndx))) |
|
160 |
+ { |
|
161 |
+ gBYp[[i]]<-sapply(strsplit(names(pIndx[pIndx==i]),"[.]"),function(x) x[[1]][1]) |
|
162 |
+ } |
|
163 |
+ ssgenes.th <- lapply(1:max(sort(unique(pIndx))), function(x) |
|
164 |
+ ssgenes[which(ssgenes[,x] %in% gBYp[[x]]),x]) |
|
165 |
+ } |
|
166 |
+ else |
|
167 |
+ { |
|
168 |
+ stop("Threshold arguement not viable option") |
|
169 |
+ } |
|
170 |
+ } |
|
171 |
+ return(list("PatternMarkers"=ssgenes.th, "PatternRanks"=ssranks, |
|
172 |
+ "PatternMarkerScores"=sstat)) |
|
173 |
+}) |
|
174 |
+ |
|
175 |
+#' heatmap of original data clustered by pattern markers statistic |
|
176 |
+#' @export |
|
177 |
+#' @docType methods |
|
178 |
+#' @rdname plotPatternMarkers-methods |
|
179 |
+#' |
|
180 |
+#' @param object an object of type CogapsResult |
|
181 |
+#' @param data the original data as a matrix |
|
182 |
+#' @param patternPalette a vector indicating what color should be used |
|
183 |
+#' for each pattern |
|
184 |
+#' @param sampleNames names of the samples to use for labeling |
|
185 |
+#' @param samplePalette a vector indicating what color should be used |
|
186 |
+#' for each sample |
|
187 |
+#' @param colDenogram logical indicating whether to display sample denogram |
|
188 |
+#' @param heatmapCol pallelet giving color scheme for heatmap |
|
189 |
+#' @param scale character indicating if the values should be centered and scaled |
|
190 |
+#' in either the row direction or the column direction, or none. The |
|
191 |
+#' default is "row". |
|
192 |
+#' @param ... additional graphical parameters to be passed to \code{heatmap.2} |
|
193 |
+#' @return heatmap of the \code{data} values for the \code{patternMarkers} |
|
194 |
+#' @seealso \code{\link{heatmap.2}} |
|
195 |
+#' @importFrom gplots bluered |
|
196 |
+#' @importFrom gplots heatmap.2 |
|
197 |
+#' @importFrom stats hclust |
|
198 |
+plotPatternMarkers <- function(object, data, patternPalette, sampleNames, |
|
199 |
+samplePalette=NULL, heatmapCol=bluered, colDenogram=TRUE, scale="row", ...) |
|
200 |
+{ |
|
201 |
+ if (is.null(samplePalette)) |
|
202 |
+ samplePalette <- rep("black", ncol(data)) |
|
203 |
+ |
|
204 |
+ ### coloring of genes |
|
205 |
+ patternCols <- rep(NA, length(unlist(patternMarkers))) |
|
206 |
+ names(patternCols) <- unlist(patternMarkers) |
|
207 |
+ for (i in 1:length(patternMarkers)) |
|
208 |
+ { |
|
209 |
+ patternCols[patternMarkers[[i]]] <- patternPalette[i] |
|
210 |
+ } |
|
211 |
+ |
|
212 |
+ gplots::heatmap.2(as.matrix(data[unlist(patternMarkers),],...), |
|
213 |
+ symkey=FALSE, |
|
214 |
+ symm=FALSE, |
|
215 |
+ scale=scale, |
|
216 |
+ col=heatmapCol, |
|
217 |
+ distfun=function(x) as.dist((1-cor(t(x)))/2), |
|
218 |
+ hclustfun=function(x) stats::hclust(x,method="average"), |
|
219 |
+ Rowv=FALSE, |
|
220 |
+ Colv=colDenogram, |
|
221 |
+ trace='none', |
|
222 |
+ RowSideColors = as.character(patternCols[unlist(patternMarkers)]), |
|
223 |
+ labCol= sampleNames, |
|
224 |
+ cexCol=0.8, |
|
225 |
+ ColSideColors=as.character(samplePalette), |
|
226 |
+ rowsep=cumsum(sapply(patternMarkers,length)) |
|
227 |
+ ) |
|
228 |
+}) |
|
229 |
+ |
|
230 |
+#' @rdname calcCoGAPSStat-methods |
|
231 |
+#' @aliases calcCoGAPSStat |
|
232 |
+setMethod("calcCoGAPSStat", signature(object="CogapsResult"), |
|
233 |
+function(object, GStoGenes, numPerm) |
|
234 |
+{ |
|
235 |
+ # test for std dev of zero, possible mostly in simple simulations |
|
236 |
+ if (sum(object@featureStdDev==0) > 0) |
|
237 |
+ { |
|
238 |
+ object@featureStdDev[object@featureStdDev==0] <- 1e-6 |
|
239 |
+ } |
|
240 |
+ |
|
241 |
+ # calculate Z scores |
|
242 |
+ zMatrix <- calcZ(object) |
|
243 |
+ |
|
244 |
+ # check input arguments |
|
245 |
+ if (!is(GStoGenes, "data.frame") && !is(GStoGenes, "list") && !is(GStoGenes,"GSA.genesets")) |
|
246 |
+ { |
|
247 |
+ stop("GStoGenes must be a data.frame,GSA.genesets, or list with format specified in the users manual.") |
|
248 |
+ } |
|
249 |
+ |
|
250 |
+ if (is(GStoGenes, "GSA.genesets")) |
|
251 |
+ { |
|
252 |
+ names(GStoGenes$genesets) <- GStoGenes$geneset.names |
|
253 |
+ GStoGenes <- GStoGenes$genesets |
|
254 |
+ } |
|
255 |
+ |
|
256 |
+ if (is(GStoGenes, "list")) |
|
257 |
+ { |
|
258 |
+ GStoGenesList <- GStoGenes |
|
259 |
+ } |
|
260 |
+ else |
|
261 |
+ { |
|
262 |
+ GStoGenesList <- list() |
|
263 |
+ for (i in 1:dim(GStoGenes)[2]) |
|
264 |
+ { |
|
265 |
+ GStoGenesList[[as.character(colnames(GStoGenes)[i])]] <- as.character(unique(GStoGenes[,i])) |
|
266 |
+ } |
|
267 |
+ } |
|
268 |
+ |
|
269 |
+ # get dimensions |
|
270 |
+ numGS <- length(names(GStoGenesList)) |
|
271 |
+ numPatt <- dim(zMatrix)[2] |
|
272 |
+ numG <- dim(zMatrix)[1]+0.9999 |
|
273 |
+ |
|
274 |
+ # initialize matrices |
|
275 |
+ statsUp <- matrix(ncol = numGS, nrow = numPatt) |
|
276 |
+ statsDown <- matrix(ncol = numGS, nrow = numPatt) |
|
277 |
+ actEst <- matrix(ncol = numGS, nrow = numPatt) |
|
278 |
+ results <- list() |
|
279 |
+ zPerm <- matrix(ncol=numPerm,nrow=numPatt) |
|
280 |
+ |
|
281 |
+ # do permutation test |
|
282 |
+ for (gs in 1:numGS) |
|
283 |
+ { |
|
284 |
+ genes <- GStoGenesList[[names(GStoGenesList)[gs]]] |
|
285 |
+ index <- rownames(zMatrix) %in% genes |
|
286 |
+ zValues <- zMatrix[index,1] |
|
287 |
+ numGenes <- length(zValues) |
|
288 |
+ label <- as.character(numGenes) |
|
289 |
+ |
|
290 |
+ if (!any(names(results)==label)) |
|
291 |
+ { |
|
292 |
+ for (p in 1:numPatt) |
|
293 |
+ { |
|
294 |
+ for (j in 1:numPerm) |
|
295 |
+ { |
|
296 |
+ temp <- floor(runif(numGenes,1,numG)) |
|
297 |
+ temp2 <- zMatrix[temp,p] |
|
298 |
+ zPerm[p,j] <- mean(temp2) |
|
299 |
+ } |
|
300 |
+ } |
|
301 |
+ results[[label]] <- zPerm |
|
302 |
+ } |
|
303 |
+ } |
|
304 |
+ |
|
305 |
+ # get p-value |
|
306 |
+ for (p in 1:numPatt) |
|
307 |
+ { |
|
308 |
+ for (gs in 1:numGS) |
|
309 |
+ { |
|
310 |
+ genes <- GStoGenesList[[names(GStoGenesList)[gs]]] |
|
311 |
+ index <- rownames(zMatrix) %in% genes |
|
312 |
+ zValues <- zMatrix[index,p] |
|
313 |
+ zScore <- mean(zValues) |
|
314 |
+ |
|
315 |
+ numGenes <- length(zValues) |
|
316 |
+ label <- as.character(numGenes) |
|
317 |
+ |
|
318 |
+ permzValues <- results[[label]][p,] |
|
319 |
+ ordering <- order(permzValues) |
|
320 |
+ permzValues <- permzValues[ordering] |
|
321 |
+ |
|
322 |
+ statUpReg <- 1 - sum(zScore > permzValues) / length(permzValues) |
|
323 |
+ statsUp[p,gs] <- max(statUpReg, 1 / numPerm) |
|
324 |
+ |
|
325 |
+ statDownReg <- 1 - sum(zScore < permzValues) / length(permzValues) |
|
326 |
+ statsDown[p,gs] <- max(statDownReg, 1 / numPerm) |
|
327 |
+ |
|
328 |
+ activity <- 1 - 2*max(statUpReg, 1/numPerm) |
|
329 |
+ actEst[p,gs] <- activity |
|
330 |
+ } |
|
331 |
+ } |
|
332 |
+ |
|
333 |
+ # format output |
|
334 |
+ rownames(statsUp) <- rownames(statsDown) <- rownames(actEst) <- colnames(zMatrix) |
|
335 |
+ colnames(statsUp) <- colnames(statsDown) <- colnames(actEst) <- names(GStoGenesList) |
|
336 |
+ |
|
337 |
+ results[['GSUpreg']] <- statsUp |
|
338 |
+ results[['GSDownreg']] <- statsDown |
|
339 |
+ results[['GSActEst']] <- actEst |
|
340 |
+ |
|
341 |
+ return(results) |
|
342 |
+}) |
|
343 |
+ |
|
344 |
+#' @rdname calcGeneGSStat-methods |
|
345 |
+#' @aliases calcGeneGSStat |
|
346 |
+setMethod("calcGeneGSStat", signature(object="CogapsResult"), |
|
347 |
+function(object, GStoGenes, numPerm, Pw, nullGenes) |
|
348 |
+{ |
|
349 |
+ gsStat <- calcCoGAPSStat(object, data.frame(GSGenes), numPerm=numPerm) |
|
350 |
+ gsStat <- gsStat$GSUpreg |
|
351 |
+ gsStat <- -log(gsStat) |
|
352 |
+ |
|
353 |
+ if (!all(is.na(Pw))) |
|
354 |
+ { |
|
355 |
+ if (length(Pw) != length(gsStat)) |
|
356 |
+ { |
|
357 |
+ stop('Invalid weighting') |
|
358 |
+ } |
|
359 |
+ gsStat <- gsStat*Pw |
|
360 |
+ } |
|
361 |
+ |
|
362 |
+ if (nullGenes) |
|
363 |
+ { |
|
364 |
+ ZD <- object@featureLoadings[setdiff(row.names(object@featureLoadings), GSGenes),] / |
|
365 |
+ object@featureStdDev[setdiff(row.names(object@featureLoadings), GSGenes),] |
|
366 |
+ } |
|
367 |
+ else |
|
368 |
+ { |
|
369 |
+ ZD <- object@featureLoadings[GSGenes,]/object@featureStdDev[GSGenes,] |
|
370 |
+ } |
|
371 |
+ outStats <- apply(sweep(ZD,2,gsStat,FUN="*"),1,sum) / (sum(gsStat)) |
|
372 |
+ outStats <- outStats / apply(ZD,1,sum) |
|
373 |
+ outStats[which(apply(ZD,1,sum) < 1e-6)] <- 0 |
|
374 |
+ |
|
375 |
+ if (sum(gsStat) < 1e-6) |
|
376 |
+ { |
|
377 |
+ return(0) |
|
378 |
+ } |
|
379 |
+ return(outStats) |
|
380 |
+}) |
|
381 |
+ |
|
382 |
+#' @rdname computeGeneGSProb-methods |
|
383 |
+#' @aliases computeGeneGSProb |
|
384 |
+setMethod("computeGeneGSProb", signature(object="CogapsResult"), |
|
385 |
+function(object, GStoGenes, numPerm, Pw, PwNull) |
|
386 |
+{ |
|
387 |
+ geneGSStat <- calcGeneGSStat(object, Pw=Pw, GSGenes=GSGenes, |
|
388 |
+ numPerm=numPerm) |
|
389 |
+ |
|
390 |
+ if (PwNull) |
|
391 |
+ { |
|
392 |
+ permGSStat <- calcGeneGSStat(object, GSGenes=GSGenes, numPerm=numPerm, |
|
393 |
+ Pw=Pw, nullGenes=TRUE) |
|
394 |
+ } |
|
395 |
+ else |
|
396 |
+ { |
|
397 |
+ permGSStat <- calcGeneGSStat(object, GSGenes=GSGenes, numPerm=numPerm, |
|
398 |
+ nullGenes=TRUE) |
|
399 |
+ } |
|
400 |
+ |
|
401 |
+ finalStats <- sapply(GSGenes, function(x) |
|
402 |
+ length(which(permGSStat > geneGSStat[x])) / length(permGSStat)) |
|
403 |
+ |
|
404 |
+ return(finalStats) |
|
405 |
+}) |
|
406 |
+ |
|
407 |
+ |
0 | 408 |
deleted file mode 100644 |
... | ... |
@@ -1,86 +0,0 @@ |
1 |
-#' patternMarkers |
|
2 |
-#' |
|
3 |
-#' @param Amatrix A matrix of genes by weights resulting from CoGAPS or other NMF decomposition |
|
4 |
-#' @param scaledPmatrix logical indicating whether the corresponding pattern matrix was fixed to have max 1 during decomposition |
|
5 |
-#' @param Pmatrix the corresponding Pmatrix (patterns X samples) for the provided Amatrix (genes x patterns). This must be supplied if scaledPmatrix is FALSE. |
|
6 |
-#' @param threshold # the type of threshold to be used. The default "all" will distribute genes into pattern with the lowest ranking. The "cut" thresholding by the first gene to have a lower ranking, i.e. better fit to, a pattern. |
|
7 |
-#' @param lp a vector of weights for each pattern to be used for finding markers. If NA markers for each pattern of the A matrix will be used. |
|
8 |
-#' @param full logical indicating whether to return the ranks of each gene for each pattern |
|
9 |
-#' @return By default a non-overlapping list of genes associated with each \code{lp}. If \code{full=TRUE} a data.frame of |
|
10 |
-#' genes rankings with a column for each \code{lp} will also be returned. |
|
11 |
-#' @export |
|
12 |
-patternMarkers <- function(Amatrix=NA, scaledPmatrix=FALSE, Pmatrix=NA, |
|
13 |
-threshold="all", lp=NA, full=FALSE) |
|
14 |
-{ |
|
15 |
- if (scaledPmatrix==FALSE) |
|
16 |
- { |
|
17 |
- if(!is.na(Pmatrix)) |
|
18 |
- { |
|
19 |
- pscale <- apply(Pmatrix,1,max) # rescale p's to have max 1 |
|
20 |
- Amatrix <- sweep(Amatrix, 2, pscale, FUN="*") # rescale A in accordance with p's having max 1 |
|
21 |
- } |
|
22 |
- else |
|
23 |
- { |
|
24 |
- warning("P values must be provided if not already scaled") |
|
25 |
- } |
|
26 |
- } |
|
27 |
- |
|
28 |
- # find the A with the highest magnitude |
|
29 |
- Arowmax <- t(apply(Amatrix, 1, function(x) x/max(x))) |
|
30 |
- |
|
31 |
- if (!is.na(lp)) |
|
32 |
- { |
|
33 |
- if (length(lp) != dim(Amatrix)[2]) |
|
34 |
- { |
|
35 |
- warning("lp length must equal the number of columns of the Amatrix") |
|
36 |
- } |
|
37 |
- sstat <- apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp))) |
|
38 |
- ssranks <-rank(sstat) |
|
39 |
- ssgenes.th <- names(sort(sstat,decreasing=FALSE,na.last=TRUE)) |
|
40 |
- } |
|
41 |
- else |
|
42 |
- { |
|
43 |
- # determine which genes are most associated with each pattern |
|
44 |
- |
|
45 |
- sstat<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=dimnames(Amatrix)) |
|
46 |
- ssranks<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=dimnames(Amatrix))#list() |
|
47 |
- ssgenes<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=NULL) |
|
48 |
- for (i in 1:nP) |
|
49 |
- { |
|
50 |
- lp <- rep(0,dim(Amatrix)[2]) |
|
51 |
- lp[i] <- 1 |
|
52 |
- sstat[,i] <- unlist(apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp)))) |
|
53 |
- ssranks[,i]<-rank(sstat[,i]) |
|
54 |
- ssgenes[,i]<-names(sort(sstat[,i],decreasing=FALSE,na.last=TRUE)) |
|
55 |
- } |
|
56 |
- if (threshold=="cut") |
|
57 |
- { |
|
58 |
- geneThresh <- sapply(1:nP,function(x) min(which(ssranks[ssgenes[,x],x] > apply(ssranks[ssgenes[,x],],1,min)))) |
|
59 |
- ssgenes.th <- sapply(1:nP,function(x) ssgenes[1:geneThresh[x],x]) |
|
60 |
- } |
|
61 |
- else if (threshold=="all") |
|
62 |
- { |
|
63 |
- pIndx<-unlist(apply(sstat,1,which.min)) |
|
64 |
- gBYp <- list() |
|
65 |
- for(i in sort(unique(pIndx))) |
|
66 |
- { |
|
67 |
- gBYp[[i]]<-sapply(strsplit(names(pIndx[pIndx==i]),"[.]"),function(x) x[[1]][1]) |
|
68 |
- } |
|
69 |
- ssgenes.th <- lapply(1:max(sort(unique(pIndx))), function(x) |
|
70 |
- ssgenes[which(ssgenes[,x] %in% gBYp[[x]]),x]) |
|
71 |
- } |
|
72 |
- else |
|
73 |
- { |
|
74 |
- stop("Threshold arguement not viable option") |
|
75 |
- } |
|
76 |
- } |
|
77 |
- if (full) |
|
78 |
- { |
|
79 |
- return(list("PatternMarkers"=ssgenes.th, "PatternRanks"=ssranks, |
|
80 |
- "PatternMarkerScores"=sstat)) |
|
81 |
- } |
|
82 |
- else |
|
83 |
- { |
|
84 |
- return("PatternMarkers"=ssgenes.th) |
|
85 |
- } |
|
86 |
-} |
87 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,129 +0,0 @@ |
1 |
-#' patternMatch4Parallel |
|
2 |
-#' |
|
3 |
-#' @param Ptot a matrix containing the total by set estimates of Pmean output |
|
4 |
-#' from \code{reOrderBySet} |
|
5 |
-#' @param nSets number of parallel sets used to generate \code{Ptot} |
|
6 |
-#' @param cnt number of branches at which to cut dendrogram |
|
7 |
-#' @param minNS minimum of individual set contributions a cluster must contain |
|
8 |
-#' @param maxNS max of individual set contributions a cluster must contain. |
|
9 |
-#' default is nSets+minNS |
|
10 |
-#' @param cluster.method the agglomeration method to be used for clustering |
|
11 |
-#' @param ignore.NA logical indicating whether or not to ignore NAs from |
|
12 |
-#' potential over dimensionalization. Default is FALSE. |
|
13 |
-#' @param bySet logical indicating whether to return list of matched set |
|
14 |
-#' solutions from \code{Ptot} |
|
15 |
-#' @param ... additional parameters for \code{agnes} |
|
16 |
-#' @return a matrix of concensus patterns by samples. If \code{bySet=TRUE} then |
|
17 |
-#' a list of the set contributions to each |
|
18 |
-#' concensus pattern is also returned. |
|
19 |
-#' @seealso \code{\link{agnes}} |
|
20 |
-#' @export |
|
21 |
-patternMatch4Parallel <- function(Ptot, nSets, cnt, minNS=NA, maxNS=NULL, |
|
22 |
-cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...) |
|
23 |
-{ |
|
24 |
- if (is.na(minNS)) |
|
25 |
- { |
|
26 |
- minNS <- ceiling(nSets / 2) |
|
27 |
- } |
|
28 |
- if (is.null(maxNS)) |
|
29 |
- { |
|
30 |
- maxNS=nSets+minNS |
|
31 |
- } |
|
32 |
- if (ignore.NA==FALSE & anyNA(Ptot)) |
|
33 |
- { |
|
34 |
- warning(paste("Non-sparse matrixes produced. Reducing the number of", |
|
35 |
- "patterns asked for and rerun.")) |
|
36 |
- } |
|
37 |
- if (ignore.NA==TRUE) |
|
38 |
- { |
|
39 |
- Ptot <- Ptot[complete.cases(Ptot),] |
|
40 |
- } |
|
41 |
- |
|
42 |
- corcut <-function(Ptot,minNS,cnt,cluster.method) |
|
43 |
- { |
|
44 |
- # corr dist |
|
45 |
- corr.dist <- cor(t(Ptot)) |
|
46 |
- corr.dist <- 1-corr.dist |
|
47 |
- # cluster |
|
48 |
- #library(cluster) |
|
49 |
- clust <- agnes(x=corr.dist, diss=TRUE, method=cluster.method) |
|
50 |
- cut <- cutree(as.hclust(clust), k=cnt) |
|
51 |
- #save.image(file=paste("CoGAPS.", nP, "P.", nS, "Set.CorrClustCut", |
|
52 |
- #cnt,".RData")) |
|
53 |
- |
|
54 |
- cls <- sort(unique(cut)) |
|
55 |
- cMNs <- matrix(nrow=cnt, ncol=dim(Ptot)[2]) |
|
56 |
- rownames(cMNs) <- cls |
|
57 |
- colnames(cMNs) <- colnames(Ptot) |
|
58 |
- |
|
59 |
- RtoMeanPattern <- list() |
|
60 |
- PByClust <- list() |
|
61 |
- for (i in cls) |
|
62 |
- { |
|
63 |
- if (!is.null(dim(Ptot[cut == i,]))) |
|
64 |
- { |
|
65 |
- if (dim(Ptot[cut == i,])[1] >= minNS) |
|
66 |
- { |
|
67 |
- cMNs[i,] <- colMeans(Ptot[cut==i,]) |
|
68 |
- PByClust[[i]] <- Ptot[cut==i,] |
|
69 |
- nIN <- sum(cut==i) |
|
70 |
- RtoMeanPattern[[i]] <- sapply(1:nIN, function(j) |
|
71 |
- round(cor(x=Ptot[cut==i,][j,], y=cMNs[i,]),3) |
|
72 |
- ) |
|
73 |
- } |
|
74 |
- } |
|
75 |
- } |
|
76 |
- |
|
77 |
- #PByClust[sapply(PByClust,is.null)] <- NULL |
|
78 |
- #RtoMeanPattern[sapply(RtoMeanPattern,is.null)] <- NULL |
|
79 |
- return(list("RtoMeanPattern"=RtoMeanPattern, "PByClust"=PByClust)) |
|
80 |
- } |
|
81 |
- |
|
82 |
- cc <- corcut(Ptot,minNS,cnt,cluster.method) |
|
83 |
- |
|
84 |
- ### split by maxNS |
|
85 |
- indx <- which(unlist(lapply(cc$PByClust,function(x) dim(x)[1]>maxNS))) |
|
86 |
- while(length(indx) > 0) |
|
87 |
- { |
|
88 |
- icc <- corcut(cc$PByClust[[indx[1]]],minNS,2,cluster.method) |
|
89 |
- if (length(icc[[2]])==0) |
|
90 |
- { |
|
91 |
- indx <- indx[-1] |
|
92 |
- next |
|
93 |
- } |
|
94 |
- else |
|
95 |
- { |
|
96 |
- cc$PByClust[[indx[1]]] <- icc[[2]][[1]] |
|
97 |
- cc$RtoMeanPattern[[indx[1]]] <- icc[[1]][[1]] |
|
98 |
- if (length(icc[[2]]) > 1) |
|
99 |
- { |
|
100 |
- cc$PByClust <- append(cc$PByClust,icc[[2]][2]) |
|
101 |
- cc$RtoMeanPattern <- append(cc$RtoMeanPattern,icc[[1]][2]) |
|
102 |
- } |
|
103 |
- indx <- which(unlist(lapply(cc$PByClust, function(x) |
|
104 |
- dim(x)[1] > maxNS))) |
|
105 |
- } |
|
106 |
- } |
|
107 |
- |
|
108 |
- #weighted.mean |
|
109 |
- PByCDSWavg <- t(sapply(1:length(cc$PByClust), function(z) |
|
110 |
- apply(cc$PByClust[[z]],2, function(x) |
|
111 |
- weighted.mean(x, (cc$RtoMeanPattern[[z]])^3)))) |
|
112 |
- rownames(PByCDSWavg) <- lapply(1:length(cc$PByClust), function(x) |
|
113 |
- paste("Pattern", x)) |
|
114 |
- |
|
115 |
- #scale ps |
|
116 |
- Pmax <- apply(PByCDSWavg,1,max) |
|
117 |
- PByCDSWavgScaled <- t(sapply(1:dim(PByCDSWavg)[1], function(x) |
|
118 |
- PByCDSWavg[x,] / Pmax[x])) |
|
119 |
- rownames(PByCDSWavgScaled) <- rownames(PByCDSWavg) |
|
120 |
- |
|
121 |
- if (bySet) |
|
122 |
- { |
|
123 |
- return(list("consenusPatterns"=PByCDSWavgScaled, "PBySet"=cc)) |
|
124 |
- } |
|
125 |
- else |
|
126 |
- { |
|
127 |
- return("consenusPatterns"=PByCDSWavgScaled) |
|
128 |
- } |
|
129 |
-} |
130 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,151 +0,0 @@ |
1 |
-#' PatternMatcher Shiny Ap |
|
2 |
-#' |
|
3 |
-#' @param PBySet list of matched set solutions for the Pmatrix from an NMF algorithm |
|
4 |
-#' @param out optional name for saving output |
|
5 |
-#' @param order optional vector indicating order of samples for plotting. Default is NULL. |
|
6 |
-#' @param sample.color optional vector of colors of same length as colnames. Default is NULL. |
|
7 |
-#' @return either an index of selected sets' contributions or the editted \code{PBySet} object |
|
8 |
-patternMatcher<-function(PBySet=NULL,out=NULL,order=NULL, sample.color=NULL) |
|
9 |
-{ |
|
10 |
- runApp(list( |
|
11 |
- ui = pageWithSidebar( |
|
12 |
- # Application title |
|
13 |
- headerPanel('NMF Pattern Matching'), |
|
14 |
- # Side pannel with controls |
|
15 |
- sidebarPanel( |
|
16 |
- # to upload file |
|
17 |
- fileInput('file1', 'Choose .Rda File', |
|
18 |
- accept=c('.RData','.Rda','R data object','.rda')), |
|
19 |
- uiOutput("pickplot"), |
|
20 |
- uiOutput("checkbs"), |
|
21 |
- downloadButton('downloadData', 'Download'), |
|
22 |
- actionButton("end", "Return") |
|
23 |
- ), |
|
24 |
- # Main panel with plots |
|
25 |
- mainPanel(plotOutput('plot1')) |
|
26 |
- ), |
|
27 |
- |
|
28 |
- server = function(input, output, session) |
|
29 |
- { |
|
30 |
- #load in the data |
|
31 |
- df<-reactive({ |
|
32 |
- if(!is.null(PBySet)) |
|
33 |
- { |
|
34 |
- df<-PBySet |
|
35 |
- return(df) |
|
36 |
- } |
|
37 |
- inFile <- input$file1 # get the path to the input file on the server |
|
38 |
- if (is.null(inFile)){return(NULL)} |
|
39 |
- load(inFile$datapath) #load it |
|
40 |
- df <- get(load(inFile$datapath))# get the name of the object that was loaded |
|
41 |
- return(df) |