... | ... |
@@ -1,92 +1,112 @@ |
1 |
-# CoGAPS: function to call gapsRun and calculate gene set |
|
2 |
-# statistics on result, also to produce A and P plots |
|
3 |
-# History: v1.0 - EJF original CoGAPS, MFO modified to to match |
|
4 |
-# new variable names |
|
5 |
- |
|
6 |
-# Inputs: data - data matrix |
|
7 |
-# unc - uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
8 |
-# GStoGenes - data.frame or list of gene sets |
|
9 |
-# nFactor - number of patterns (basis vectors, metagenes) |
|
10 |
-# nEquil - number of iterations for burn-in |
|
11 |
-# nSample - number of iterations for sampling |
|
12 |
-# nOutR - how often to print status into R by iterations |
|
13 |
-# output_atomic - whether to write atom files (large) |
|
14 |
-# simulation_id - name to attach to atoms files if created |
|
15 |
-# plot - logical to determine if plots produced |
|
16 |
-# nPerm - number of permutations for gene set test |
|
17 |
-# alphaA, alphaP - sparsity parameters for A and P domains |
|
18 |
-# max_gibbmass_paraA(P) - limit truncated normal to max size |
|
19 |
-# nMaxA, nMaxP - PRESENTLY UNUSED, future = limit number of atoms |
|
20 |
- |
|
1 |
+#' CoGAPS Matrix Factorization Algorithm |
|
2 |
+#' |
|
3 |
+#' @details calls the C++ MCMC code and performs Bayesian |
|
4 |
+#'matrix factorization returning the two matrices that reconstruct |
|
5 |
+#'the data matrix |
|
6 |
+#' @param D data matrix |
|
7 |
+#' @param S uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
8 |
+#' @param params GapsParams object |
|
9 |
+#' @return list with A and P matrix estimates |
|
10 |
+#' @export |
|
11 |
+CoGaps <- function(D, S, params = new('GapsParams', 7, 1000, 1000), ...) |
|
12 |
+{ |
|
13 |
+ # process v2 style arguments for backwards compatibility |
|
14 |
+ params <- oldParams(params, list(...)) |
|
21 | 15 |
|
22 |
-# Output: list with matrices, mean chi-squared value, and gene set |
|
23 |
-# results |
|
16 |
+ # check data |
|
17 |
+ checkParamsWithData(params, nrow(D), ncol(D), nrow(S), ncol(S)) |
|
18 |
+ if (any(D) < 0 | any(S) < 0) # too slow for large matrices ? |
|
19 |
+ stop('D and S matrix must be non-negative') |
|
24 | 20 |
|
25 |
-#'\code{CoGAPS} calls the C++ MCMC code through gapsRun and performs Bayesian |
|
26 |
-#'matrix factorization returning the two matrices that reconstruct |
|
27 |
-#'the data matrix and then calls calcCoGAPSStat to estimate gene set |
|
28 |
-#'activity with nPerm set to 500 |
|
29 |
-#' |
|
30 |
-#'@param data data matrix |
|
31 |
-#'@param unc uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
32 |
-#'@param ABins a matrix of same size as A which gives relative |
|
33 |
-#' probability of that element being non-zero |
|
34 |
-#'@param PBins a matrix of same size as P which gives relative |
|
35 |
-#' probability of that element being non-zero |
|
36 |
-#'@param GStoGenes data.frame or list with gene sets |
|
37 |
-#'@param nFactor number of patterns (basis vectors, metagenes) |
|
38 |
-#'@param simulation_id name to attach to atoms files if created |
|
39 |
-#'@param nEquil number of iterations for burn-in |
|
40 |
-#'@param nSample number of iterations for sampling |
|
41 |
-#'@param nOutR how often to print status into R by iterations |
|
42 |
-#'@param output_atomic whether to write atom files (large) |
|
43 |
-#'@param fixedBinProbs Boolean for using relative probabilities |
|
44 |
-#' given in Abins and Pbins |
|
45 |
-#'@param fixedDomain character to indicate whether A or P is |
|
46 |
-#' domain for relative probabilities |
|
47 |
-#'@param sampleSnapshots Boolean to indicate whether to capture |
|
48 |
-#' individual samples from Markov chain during sampling |
|
49 |
-#'@param numSnapshots the number of individual samples to capture |
|
50 |
-#'@param plot Boolean to indicate whether to produce output graphics |
|
51 |
-#'@param nPerm number of permutations in gene set test |
|
52 |
-#'@param alphaA sparsity parameter for A domain |
|
53 |
-#'@param nMaxA PRESENTLY UNUSED, future = limit number of atoms |
|
54 |
-#'@param alphaP sparsity parameter for P domain |
|
55 |
-#'@param max_gibbmass_paraA limit truncated normal to max size |
|
56 |
-#'@param nMaxP PRESENTLY UNUSED, future = limit number of atoms |
|
57 |
-#'@param max_gibbmass_paraP limit truncated normal to max size |
|
58 |
-#'@export |
|
21 |
+ # run algorithm |
|
22 |
+ result <- cogaps_cpp(as.matrix(D), as.matrix(S), nFactor, alphaA, |
|
23 |
+ alphaP, nEquil, floor(nEquil/10), nSample, max_gibbmass_paraA, |
|
24 |
+ max_gibbmass_paraP, fixedPatterns, whichMatrixFixed, seed, messages, |
|
25 |
+ singleCellRNASeq, nOutR, numSnapshots, checkpoint_interval) |
|
59 | 26 |
|
60 |
-CoGAPS <- function(data, unc, ABins = data.frame(), PBins = data.frame(), GStoGenes, nFactor = 7, simulation_id="simulation", nEquil=1000, |
|
61 |
- nSample=1000, nOutR=1000, output_atomic=FALSE, |
|
62 |
- fixedBinProbs = FALSE, fixedDomain = "N", |
|
63 |
- sampleSnapshots = TRUE, numSnapshots = 100, |
|
64 |
- plot=TRUE, nPerm=500, |
|
65 |
- alphaA = 0.01, nMaxA = 100000, |
|
66 |
- max_gibbmass_paraA = 100.0, |
|
67 |
- alphaP = 0.01, nMaxP = 100000, |
|
68 |
- max_gibbmass_paraP = 100.0) { |
|
27 |
+ # backwards compatible with v2 |
|
28 |
+ if (length(list(...)$GStoGenes)) |
|
29 |
+ { |
|
30 |
+ warning('the GStoGenes argument is depracted with v3.0,') |
|
31 |
+ if (list(...)$plot) |
|
32 |
+ plotGAPS(result$Amean, result$Pmean) |
|
33 |
+ GSP <- calcCoGAPSStat(result$Amean, result$Asd, list(...)$GStoGenes, |
|
34 |
+ list(...)$nPerm) |
|
35 |
+ result <- list(meanChi2=result$meanChi2, D=D, Sigma=S, |
|
36 |
+ Amean=result$Amean, Asd=result$Asd, Pmean=result$Pmean, |
|
37 |
+ Psd=result$Psd, GSUpreg=GSP$GSUpreg, GSDownreg=GSP$GSDownreg, |
|
38 |
+ GSActEst=GSP$GSActEst) |
|
39 |
+ } |
|
69 | 40 |
|
41 |
+ return(result) |
|
42 |
+} |
|
70 | 43 |
|
71 |
- # decompose the data |
|
72 |
- matrixDecomp <- gapsRun(data, unc, ABins, PBins, nFactor, simulation_id, |
|
73 |
- nEquil, nSample, nOutR, output_atomic, fixedBinProbs, |
|
74 |
- fixedDomain, sampleSnapshots, numSnapshots, alphaA, nMaxA, max_gibbmass_paraA, |
|
75 |
- alphaP, nMaxP, max_gibbmass_paraP) |
|
44 |
+#' Restart CoGAPS from Checkpoint File |
|
45 |
+#' |
|
46 |
+#' @details loads the state of a previous CoGAPS run from a file and |
|
47 |
+#' continues the run from that point |
|
48 |
+#' @param D data matrix |
|
49 |
+#' @param S uncertainty matrix |
|
50 |
+#' @param path path to checkpoint file |
|
51 |
+#' @return list with A and P matrix estimates |
|
52 |
+#' @export |
|
53 |
+CoGapsFromCheckpoint <- function(D, S, path) |
|
54 |
+{ |
|
55 |
+ # TODO add checksum to make sure D,S are correct |
|
56 |
+ cogapsFromCheckpoint_cpp(D, S, path) |
|
57 |
+} |
|
76 | 58 |
|
77 |
- # plot patterns and show heatmap of Anorm |
|
78 |
- if (plot) { |
|
79 |
- plotGAPS(matrixDecomp$Amean, matrixDecomp$Pmean) |
|
80 |
- } |
|
59 |
+#' Backwards Compatibility with v2 |
|
60 |
+#' |
|
61 |
+#' @param D data matrix |
|
62 |
+#' @param S uncertainty matrix |
|
63 |
+#' @param ... v2 style parameters |
|
64 |
+#' @return list with A and P matrix estimates |
|
65 |
+#' @export |
|
66 |
+gapsRun <- function(D, S, ...) |
|
67 |
+{ |
|
68 |
+ warning('gapsRun is depracated with v3.0, use CoGAPS') |
|
69 |
+ params <- new('GapsParams', 7, 1000, 1000) |
|
70 |
+ params <- oldParams(params, list(...)) |
|
71 |
+ CoGAPS(D, S, params) |
|
72 |
+} |
|
81 | 73 |
|
82 |
- # compute the gene set scores |
|
83 |
- GSP <- calcCoGAPSStat(matrixDecomp$Amean, matrixDecomp$Asd, GStoGenes, nPerm) |
|
74 |
+#' Backwards Compatibility with v2 |
|
75 |
+#' |
|
76 |
+#' @param D data matrix |
|
77 |
+#' @param S uncertainty matrix |
|
78 |
+#' @param FP data.frame with rows giving fixed patterns for P |
|
79 |
+#' @param ... v2 style parameters |
|
80 |
+#' @return list with A and P matrix estimates |
|
81 |
+#' @export |
|
82 |
+gapsMapRun <- function(D, S, FP, ...) |
|
83 |
+{ |
|
84 |
+ warning('gapsMapRun is depracated with v3.0, use CoGaps') |
|
85 |
+ params <- new('GapsParams', 7, 1000, 1000) |
|
86 |
+ params <- oldParams(params, list(...)) |
|
87 |
+ CoGAPS(D, S, params) |
|
88 |
+} |
|
84 | 89 |
|
85 |
- return(list(meanChi2=matrixDecomp$calcChiSq, |
|
86 |
- D=data, Sigma=unc, |
|
87 |
- Amean=matrixDecomp$Amean, Asd=matrixDecomp$Asd, |
|
88 |
- Pmean=matrixDecomp$Pmean, Psd=matrixDecomp$Psd, |
|
89 |
- GSUpreg=GSP$GSUpreg, GSDownreg=GSP$GSDownreg, GSActEst=GSP$GSActEst)) |
|
90 |
+# helper function to process v2 parameters |
|
91 |
+oldParams <- function(params, args) |
|
92 |
+{ |
|
93 |
+ # standard params |
|
94 |
+ if (length(args$nFactor)) params@nFactor <- args$nFactor |
|
95 |
+ if (length(args$nEquil)) params@nEquil <- args$nEquil |
|
96 |
+ if (length(args$nSample)) params@nSample <- args$nSample |
|
97 |
+ if (length(args$numSnapshots)) params@nSnapshots <- args$numSnapshots |
|
98 |
+ if (length(args$alphaA)) params@alphaA <- args$alphaA |
|
99 |
+ if (length(args$alphaP)) params@alphaP <- args$alphaP |
|
100 |
+ if (length(args$seed)) params@seed <- args$seed |
|
101 |
+ if (length(args$messages)) params@messages <- args$messages |
|
90 | 102 |
|
103 |
+ # gapsMap params |
|
104 |
+ if (length(args$FP)) |
|
105 |
+ { |
|
106 |
+ params@fixedPatterns <- as.matrix(args$FP) |
|
107 |
+ params@whichMatrixFixed <- 'P' |
|
108 |
+ } |
|
91 | 109 |
|
110 |
+ # return v3 style GapsParams |
|
111 |
+ return(params) |
|
92 | 112 |
} |
... | ... |
@@ -1,90 +1,214 @@ |
1 |
-#' GWCoGAPS |
|
1 |
+#' Genome Wide CoGAPS |
|
2 | 2 |
#' |
3 |
-#'\code{GWCoGAPS} calls the C++ MCMC code and performs Bayesian |
|
4 |
-#'matrix factorization returning the two matrices that reconstruct |
|
5 |
-#'the data matrix for whole genome data; |
|
6 |
-#' |
|
7 |
-#'@param D data matrix |
|
8 |
-#'@param S uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
9 |
-#'@param nFactor number of patterns (basis vectors, metagenes), which must be |
|
10 |
-#'greater than or equal to the number of rows of FP |
|
11 |
-#'@param nSets number of sets for parallelization |
|
12 |
-#'@param nCores number of cores for parallelization. If left to the default NA, nCores = nSets. |
|
13 |
-#'@param saveBySetResults logical indicating whether to save by intermediary by set results. Default is FALSE. |
|
14 |
-#'@param fname character string used to label file output. Default is "GWCoGAPS.AP.fixed" |
|
15 |
-#'@param PatternsMatchFN function to use for pattern matching across sets |
|
16 |
-#'@param Cut number of branches at which to cut dendrogram used in patternMatch4Parallel |
|
17 |
-#'@param minNS minimum of individual set contributions a cluster must contain |
|
18 |
-#'@param ... additional parameters to be fed into \code{gapsRun} and \code{gapsMapRun} |
|
19 |
-#'@export |
|
20 |
-#'@seealso \code{\link{gapsRun}}, \code{\link{patternMatch4Parallel}}, and \code{\link{gapsMapRun}} |
|
3 |
+#' @details calls the C++ MCMC code and performs Bayesian |
|
4 |
+#' matrix factorization returning the two matrices that reconstruct |
|
5 |
+#' the data matrix for whole genome data; |
|
6 |
+#' @param D data matrix |
|
7 |
+#' @param S uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
8 |
+#' @param params GapsParams object |
|
9 |
+#' @param nSets number of sets for parallelization |
|
10 |
+#' @param nCores number of cores for parallelization. If left to the default NA, nCores = nSets. |
|
11 |
+#' @param saveBySetResults logical indicating whether to save by intermediary by set results. Default is FALSE. |
|
12 |
+#' @param fname character string used to label file output. Default is "GWCoGAPS.AP.fixed" |
|
13 |
+#' @param PatternsMatchFN function to use for pattern matching across sets |
|
14 |
+#' @param Cut number of branches at which to cut dendrogram used in patternMatch4Parallel |
|
15 |
+#' @param minNS minimum of individual set contributions a cluster must contain |
|
16 |
+#' @param ... additional parameters to be fed into \code{gapsRun} and \code{gapsMapRun} |
|
17 |
+#' @export |
|
18 |
+#' @seealso \code{\link{gapsRun}}, \code{\link{patternMatch4Parallel}}, and \code{\link{gapsMapRun}} |
|
21 | 19 |
#' @examples \dontrun{ |
22 | 20 |
#' GWCoGAPS(nCores=NA, D, S, nFactor, nSets,saveBySetResults=TRUE, fname=fname, |
23 | 21 |
#' PatternsMatchFN = patternMatch4Parallel,numSnapshots=numSnapshots,minNS=minNS) |
24 | 22 |
#' } |
25 | 23 |
#' |
24 |
+GWCoGAPS <- function(D, S, params, nSets, nCores=NA, saveBySetResults=FALSE, |
|
25 |
+fname="GWCoGAPS.AP.fixed", PatternsMatchFN = patternMatch4Parallel, Cut=NA, |
|
26 |
+minNS=NA, ...) |
|
27 |
+{ |
|
28 |
+ # v2 style parameters compatibility |
|
29 |
+ params <- oldParams(params, ...) |
|
30 |
+ |
|
31 |
+ # establish the number of cores that you are able to use |
|
32 |
+ if(is.na(nCores)) |
|
33 |
+ nCores <- nSets |
|
34 |
+ registerDoParallel(cores=nCores) |
|
35 |
+ |
|
36 |
+ # break the data into sets |
|
37 |
+ genesInSets <- createGWCoGAPSSets(data=D, nSets=nSets, keep=FALSE) |
|
38 |
+ |
|
39 |
+ # set gene min to 0 |
|
40 |
+ D <- sweep(D, 1, apply(D,1,function(x) pmin(0,min(x)))) |
|
41 |
+ |
|
42 |
+ #generate seeds for parallelization |
|
43 |
+ nut <- generateSeeds(chains=nSets, seed=-1) |
|
44 |
+ |
|
45 |
+ # run CoGAPS for each set |
|
46 |
+ AP <- foreach(i=1:nSets) %dopar% |
|
47 |
+ { |
|
48 |
+ D <- as.matrix(D[genesInSets[[i]],]) |
|
49 |
+ S <- as.matrix(S[genesInSets[[i]],]) |
|
50 |
+ params <- setParam(params, 'seed', nut[i]) |
|
51 |
+ CoGAPS(D=D, S=S, params) |
|
52 |
+ } |
|
26 | 53 |
|
27 |
-GWCoGAPS <- function(D, S, nFactor, nSets, nCores=NA, |
|
28 |
- saveBySetResults=FALSE, fname="GWCoGAPS.AP.fixed", |
|
29 |
- PatternsMatchFN = patternMatch4Parallel, |
|
30 |
- Cut=NA, minNS=NA, ...) { |
|
54 |
+ BySet <- reOrderBySet(AP=AP, nFactor=nFactor, nSets=nSets) |
|
31 | 55 |
|
32 |
-# establish the number of cores that you are able to use |
|
33 |
-if(is.na(nCores)){nCores<-nSets} |
|
34 |
-registerDoParallel(cores=nCores) |
|
56 |
+ #run postpattern match function |
|
57 |
+ if (is.na(Cut)) |
|
58 |
+ Cut <- nFactor |
|
59 |
+ matchedPs <- patternMatch4Parallel(Ptot=BySet$P, nP=nFactor, nSets=nSets, |
|
60 |
+ cnt=Cut, minNS=minNS, bySet=TRUE) |
|
35 | 61 |
|
36 |
-# break the data into sets |
|
37 |
-genesInSets<-createGWCoGAPSSets(data=D, nSets=nSets, keep=FALSE) |
|
62 |
+ #save BySet outputs |
|
63 |
+ class(AP) <- append(class(AP),"CoGAPS") |
|
64 |
+ if (saveBySetResults==TRUE) |
|
65 |
+ { |
|
66 |
+ save(AP,BySet,matchedPs,file=sprintf('APbySet.%s.nP%d.set%d.Rda',fname, nFactor,nSets)) |
|
67 |
+ message(sprintf('APbySet.%s.nP%d.set%d.Rda',fname, nFactor,nSets)) |
|
68 |
+ } |
|
38 | 69 |
|
39 |
-# set gene min to 0 |
|
40 |
-D <- sweep(D, 1, apply(D,1,function(x){pmin(0,min(x))})) |
|
70 |
+ PbySet <- matchedPs[["PBySet"]] |
|
71 |
+ matchedPs <- matchedPs[[1]] |
|
72 |
+ |
|
73 |
+ # generate seeds for parallelization |
|
74 |
+ nut <- generateSeeds(chains=nSets, seed=-1) |
|
75 |
+ |
|
76 |
+ # final number of factors |
|
77 |
+ nFactorFinal <- dim(matchedPs)[1] |
|
78 |
+ |
|
79 |
+ # run fixed CoGAPS |
|
80 |
+ params <- setParam('fixedMatrix', as.matrix(matchedPs)) |
|
81 |
+ params <- setParam('whichMatrixFixed', 'P') |
|
82 |
+ params <- setParam('nFactor', nFactorFinal) |
|
83 |
+ Fixed <- foreach(i=1:nSets) %dopar% |
|
84 |
+ { |
|
85 |
+ D <- as.matrix(D[genesInSets[[i]],]) |
|
86 |
+ S <- as.matrix(S[genesInSets[[i]],]) |
|
87 |
+ params <- setParam(params, 'seed', nut[i]) |
|
88 |
+ CoGAPS(D=D, S=S, params) |
|
89 |
+ } |
|
41 | 90 |
|
42 |
-#generate seeds for parallelization |
|
43 |
-nut<-generateSeeds(chains=nSets, seed=-1) |
|
91 |
+ #extract A and Asds |
|
92 |
+ As4fixPs <- postFixed4Parallel(AP.fixed=Fixed, setPs=matchedPs) |
|
44 | 93 |
|
45 |
-# run CoGAPS for each set |
|
46 |
-AP <- foreach(i=1:nSets) %dopar% { |
|
47 |
- D <- as.matrix(D[genesInSets[[i]],]) |
|
48 |
- S <- as.matrix(S[genesInSets[[i]],]) |
|
49 |
- gapsRun(D=D, S=S, nFactor=nFactor,seed=nut[i],...) |
|
94 |
+ #save final |
|
95 |
+ AP.fixed <- list("Amean"=As4fixPs$A, "Asd"=As4fixPs$Asd, "Pmean"=matchedPs, |
|
96 |
+ "PbySet"=PbySet) |
|
97 |
+ class(AP.fixed) <- append(class(AP.fixed),"CoGAPS") |
|
98 |
+ save(AP.fixed, file=paste(fname, ".Rda", sep="")) |
|
99 |
+ message(paste(fname, ".Rda", sep="")) |
|
50 | 100 |
} |
51 | 101 |
|
52 |
-BySet<-reOrderBySet(AP=AP,nFactor=nFactor,nSets=nSets) |
|
53 |
- |
|
54 |
-#run postpattern match function |
|
55 |
-if(is.na(Cut)){Cut<-nFactor} |
|
56 |
-matchedPs<-patternMatch4Parallel(Ptot=BySet$P,nP=nFactor,nSets=nSets,cnt=Cut,minNS=minNS,bySet=TRUE) |
|
102 |
+#' Create Gene Sets for GWCoGAPS |
|
103 |
+#' |
|
104 |
+#'\code{createGWCoGAPSSets} factors whole genome data into randomly generated sets for indexing; |
|
105 |
+#' |
|
106 |
+#'@param data data matrix with unique rownames |
|
107 |
+#'@param nSets number of sets for parallelization |
|
108 |
+#'@param outRDA name of output file |
|
109 |
+#'@param keep logical indicating whether or not to save gene set list. Default is TRUE. |
|
110 |
+#'@export |
|
111 |
+#'@return list with randomly generated sets of genes from whole genome data |
|
112 |
+#'@examples \dontrun{ |
|
113 |
+#'createGWCoGAPSSet(D,nSets=nSets) |
|
114 |
+#'} |
|
115 |
+createGWCoGAPSSets <- function(data, nSets, outRDA="GenesInCoGAPSSets.Rda", |
|
116 |
+keep=TRUE) |
|
117 |
+{ |
|
118 |
+ genes <- rownames(data) |
|
119 |
+ setSize <- floor(length(genes) / nSets) |
|
120 |
+ genesInSets <- list() |
|
121 |
+ for (set in 1:nSets) |
|
122 |
+ { |
|
123 |
+ if (set != nSets) |
|
124 |
+ genesInSets[[set]] <- sample(genes,setSize) |
|
125 |
+ if (set == nSets) |
|
126 |
+ genesInSets[[set]] <- genes |
|
127 |
+ genes <- genes[!genes %in% genesInSets[[set]]] |
|
128 |
+ } |
|
57 | 129 |
|
58 |
-#save BySet outputs |
|
59 |
-class(AP)<-append(class(AP),"CoGAPS") |
|
60 |
-if(saveBySetResults==TRUE){ |
|
61 |
- save(AP,BySet,matchedPs,file=sprintf('APbySet.%s.nP%d.set%d.Rda',fname, nFactor,nSets)) |
|
62 |
- print(sprintf('APbySet.%s.nP%d.set%d.Rda',fname, nFactor,nSets)) |
|
130 |
+ if (!identical(sort(unlist(genesInSets)),sort(rownames(data)))) |
|
131 |
+ warning("Gene identifiers not unique!") |
|
132 |
+ if (keep == TRUE) |
|
133 |
+ save(list=c('genesInSets'), file=outRDA) |
|
134 |
+ return(genesInSets) |
|
63 | 135 |
} |
64 | 136 |
|
65 |
-PbySet<-matchedPs[["PBySet"]] |
|
66 |
-matchedPs<-matchedPs[[1]] |
|
67 |
- |
|
68 |
-#generate seeds for parallelization |
|
69 |
-nut<-generateSeeds(chains=nSets, seed=-1) |
|
70 |
- |
|
71 |
-#final number of factors |
|
72 |
-nFactorFinal<-dim(matchedPs)[1] |
|
73 |
- |
|
74 |
-# run fixed CoGAPS |
|
75 |
-Fixed <- foreach(i=1:nSets) %dopar% { |
|
76 |
- D <- as.matrix(D[genesInSets[[i]],]) |
|
77 |
- S <- as.matrix(S[genesInSets[[i]],]) |
|
78 |
- AP <- gapsMapRun(D, S, FP=matchedPs, nFactor=nFactorFinal, fixedMatrix = "P",seed=nut[i],...) |
|
137 |
+#' Generate Seeds for Multiple Concurrent Runs |
|
138 |
+#' |
|
139 |
+#' @param chains number of seeds to generate (number of chains to run) |
|
140 |
+#' @param seed positive values are kept, negative values will be overwritten |
|
141 |
+#' by a seed generated from the current time |
|
142 |
+#' @return vector of randomly generated seeds |
|
143 |
+#' @export |
|
144 |
+#' @examples |
|
145 |
+#' generateSeeds(chains=2, seed=-1) |
|
146 |
+generateSeeds <- function(chains=2, seed=-1) |
|
147 |
+{ |
|
148 |
+ if (chains < 2 | (as.integer(chains) != chains)) |
|
149 |
+ stop("chains must be >= 2 and an integer") |
|
150 |
+ |
|
151 |
+ if (seed < 0) |
|
152 |
+ { |
|
153 |
+ secs <- as.numeric(difftime(Sys.time(), paste(Sys.Date(), "00:00"), |
|
154 |
+ units="secs")) |
|
155 |
+ return(seq_len(chains) * round(secs)) |
|
79 | 156 |
} |
157 |
+ else |
|
158 |
+ { |
|
159 |
+ return(seq_len(chains) * seed) |
|
160 |
+ } |
|
161 |
+} |
|
80 | 162 |
|
81 |
-#extract A and Asds |
|
82 |
-As4fixPs <- postFixed4Parallel(AP.fixed=Fixed,setPs=matchedPs) |
|
83 |
- |
|
163 |
+#' reOrderBySet |
|
164 |
+#' |
|
165 |
+#' @description <restructures output of gapsRun into a list containing each sets solution for Amean, Pmean, and Asd> |
|
166 |
+#' @param AP output of gapsRun in parallel |
|
167 |
+#' @param nFactor number of patterns |
|
168 |
+#' @param nSets number of sets |
|
169 |
+#' |
|
170 |
+#' @return a list containing the \code{nSets} sets solution for Amean under "A", Pmean under "P", and Asd under "Asd" |
|
171 |
+#' @export |
|
172 |
+#' |
|
173 |
+#' @examples \dontrun{ |
|
174 |
+#' reOrderBySet(AP,nFactor,nSets) |
|
175 |
+#' } |
|
176 |
+#' |
|
177 |
+reOrderBySet<-function(AP, nFactor, nSets) |
|
178 |
+{ |
|
179 |
+ P <- do.call(rbind,lapply(AP, function(x) x$Pmean)) |
|
180 |
+ rownames(P) <- paste(rep(1:nSets,each=nFactor),rep(1:nFactor,nSets),sep=".") |
|
181 |
+ A <- lapply(AP, function(x) x$Amean) |
|
182 |
+ Asd <- lapply(AP, function(x) x$Asd) |
|
183 |
+ names(A) <- names(Asd) <- paste(rep("Set", nSets), rep(1:nSets), sep="") |
|
184 |
+ return(list("A"=A, "Asd"=Asd, "P"=P)) |
|
185 |
+} |
|
84 | 186 |
|
85 |
-#save final |
|
86 |
-AP.fixed<-list("Amean"=As4fixPs$A, "Asd"=As4fixPs$Asd, "Pmean"=matchedPs,"PbySet"=PbySet) |
|
87 |
-class(AP.fixed)<-append(class(AP.fixed),"CoGAPS") |
|
88 |
-save(AP.fixed, file=paste(fname,".Rda",sep="")) |
|
89 |
-print(paste(fname,".Rda",sep="")) |
|
187 |
+#' postFixed4Parallel |
|
188 |
+#' |
|
189 |
+#' @param AP.fixed output of parallel gapsMapRun calls with same FP |
|
190 |
+#' @param setPs data.frame with rows giving fixed patterns for P used as input for gapsMapRun |
|
191 |
+#' |
|
192 |
+#' @return list of two data.frames containing the A matrix estimates or their corresponding standard deviations |
|
193 |
+#' from output of parallel gapsMapRun |
|
194 |
+#' @export |
|
195 |
+postFixed4Parallel <- function(AP.fixed=NA, setPs=NA) |
|
196 |
+{ |
|
197 |
+ ASummary <- do.call(rbind,lapply(AP.fixed, function(x) x$Amean)) |
|
198 |
+ Asd <- do.call(rbind,lapply(AP.fixed, function(x) x$Asd)) |
|
199 |
+ #PSummary <- do.call(rbind,lapply(AP.fixed, function(x) x$Pmean)) |
|
200 |
+ PSummary <- AP.fixed[[1]]$Pmean |
|
201 |
+ |
|
202 |
+ Pmax <- apply(PSummary,1,max) |
|
203 |
+ Pneu <- sweep(PSummary,1,Pmax,FUN="/") |
|
204 |
+ Aneu <- sweep(ASummary,2,Pmax,FUN="*") |
|
205 |
+ |
|
206 |
+ X <- apply(Pneu,1,range) |
|
207 |
+ Y <- apply(setPs,1,range) |
|
208 |
+ colnames(X) <- colnames(Y) |
|
209 |
+ if (!all.equal(X, Y, tolerance=1e-5)) |
|
210 |
+ warning("Patterns do not match fixed values.") |
|
211 |
+ |
|
212 |
+ As4fixPs <- list("A"=Aneu, "Asd"=Asd) |
|
213 |
+ return(As4fixPs) |
|
90 | 214 |
} |
91 | 215 |
deleted file mode 100644 |
... | ... |
@@ -1,28 +0,0 @@ |
1 |
-#Binary Heatmap for Standardized A Matrix |
|
2 |
- |
|
3 |
-#'\code{binaryA} creates a binarized heatmap of the A matrix |
|
4 |
-#'in which the value is 1 if the value in Amean is greater than |
|
5 |
-#'threshold * Asd and 0 otherwise |
|
6 |
-#' |
|
7 |
-#'@param Amean the mean estimate for the A matrix |
|
8 |
-#'@param Asd the standard deviations on Amean |
|
9 |
-#'@param threshold the number of standard deviations above zero |
|
10 |
-#'that an element of Amean must be to get a value of 1 |
|
11 |
-#'@export |
|
12 |
-binaryA <-function(Amean, Asd, threshold=3) { |
|
13 |
- |
|
14 |
- |
|
15 |
- BinA_Map <- ifelse(Amean/Asd > threshold,1,0) |
|
16 |
- colnames(BinA_Map) <-colnames(BinA_Map,do.NULL=F,prefix = "Pattern ") |
|
17 |
- rownames(BinA_Map) <- rep(" ",nrow(BinA_Map)) |
|
18 |
- |
|
19 |
- |
|
20 |
- heatmap.2(BinA_Map, Rowv = FALSE, Colv = FALSE,dendrogram="none", |
|
21 |
- scale="none",col = brewer.pal(3,"Blues"), trace="none", |
|
22 |
- density.info="none",cexCol=1.3,srtCol=45, |
|
23 |
- lmat=rbind(c(0, 3),c(2,1),c(0,4) ), |
|
24 |
- lwid=c(1,10),lhei=c(1, 4, 1.2 ), |
|
25 |
- main="Heatmap of Standardized A Matrix") |
|
26 |
- mtext(paste("(Threshold = ", threshold, ")")) |
|
27 |
- |
|
28 |
-} |
29 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,130 +0,0 @@ |
1 |
-# calcCoGAPSStat: calculate gene set statistics for A matrix columns |
|
2 |
-# History: v1.0 EJF original CoGAPS |
|
3 |
- |
|
4 |
-# Inputs: Amean - A matrix mean values |
|
5 |
-# Asd - A matrix standard deviations |
|
6 |
-# GStoGenes - data.frame, GSA.genesets class, or list with gene sets |
|
7 |
-# numPerm - number of permutations for null |
|
8 |
- |
|
9 |
-# Output: list with gene set statistics |
|
10 |
- |
|
11 |
-#'\code{calcCoGAPSStat} calculates the gene set statistics for each |
|
12 |
-#'column of A using a Z-score from the elements of the A matrix, |
|
13 |
-#'the input gene set, and permutation tests |
|
14 |
-#' |
|
15 |
-#'@param Amean A matrix mean values |
|
16 |
-#'@param Asd A matrix standard deviations |
|
17 |
-#'@param GStoGenes data.frame or list with gene sets |
|
18 |
-#'@param numPerm number of permutations for null |
|
19 |
-#'@export |
|
20 |
- |
|
21 |
-calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) { |
|
22 |
- |
|
23 |
- # test for std dev of zero, possible mostly in simple simulations |
|
24 |
- if (sum(Asd==0) > 0) { |
|
25 |
- #temp <- min(Asd[Asd>0]) |
|
26 |
- Asd[Asd==0] <- .Machine$double.eps |
|
27 |
- } |
|
28 |
- |
|
29 |
- # calculate Z scores |
|
30 |
- zMatrix <- calcZ(Amean,Asd) |
|
31 |
- |
|
32 |
- # compute the p-value for each gene set belonging to each pattern |
|
33 |
- |
|
34 |
- # check input arguments |
|
35 |
- if (!is(GStoGenes, "data.frame") && !is(GStoGenes, "list") && !is(GStoGenes,"GSA.genesets")) { |
|
36 |
- stop("GStoGenes must be a data.frame,GSA.genesets, or list with format specified in the users manual.") |
|
37 |
- } |
|
38 |
- |
|
39 |
- if (is(GStoGenes, "GSA.genesets")) { |
|
40 |
- names(GStoGenes$genesets) <- GStoGenes$geneset.names |
|
41 |
- GStoGenes <- GStoGenes$genesets |
|
42 |
- } |
|
43 |
- |
|
44 |
- if (is(GStoGenes, "list")) { |
|
45 |
- GStoGenesList <- GStoGenes |
|
46 |
- } else { |
|
47 |
- GStoGenesList <- list() |
|
48 |
- for (i in 1:dim(GStoGenes)[2]) { |
|
49 |
- GStoGenesList[[as.character(colnames(GStoGenes)[i])]] <- as.character(unique(GStoGenes[,i])) |
|
50 |
- } |
|
51 |
- } |
|
52 |
- |
|
53 |
- # get dimensions |
|
54 |
- numGS <- length(names(GStoGenesList)) |
|
55 |
- numPatt <- dim(zMatrix)[2] |
|
56 |
- numG <- dim(zMatrix)[1]+0.9999 |
|
57 |
- |
|
58 |
- # initialize matrices |
|
59 |
- statsUp <- matrix(ncol = numGS, nrow = numPatt) |
|
60 |
- statsDown <- matrix(ncol = numGS, nrow = numPatt) |
|
61 |
- actEst <- matrix(ncol = numGS, nrow = numPatt) |
|
62 |
- results <- list() |
|
63 |
- zPerm <- matrix(ncol=numPerm,nrow=numPatt) |
|
64 |
- |
|
65 |
- # do permutation test |
|
66 |
- for (gs in 1:numGS) { |
|
67 |
- genes <- GStoGenesList[[names(GStoGenesList)[gs]]] |
|
68 |
- index <- rownames(zMatrix) %in% genes |
|
69 |
- zValues <- zMatrix[index,1] |
|
70 |
- numGenes <- length(zValues) |
|
71 |
- label <- as.character(numGenes) |
|
72 |
- |
|
73 |
- if (!any(names(results)==label)) { |
|
74 |
- for (p in 1:numPatt) { |
|
75 |
- for (j in 1:numPerm) { |
|
76 |
- temp <- floor(runif(numGenes,1,numG)) |
|
77 |
- temp2 <- zMatrix[temp,p] |
|
78 |
- zPerm[p,j] <- mean(temp2) |
|
79 |
- } |
|
80 |
- } |
|
81 |
- results[[label]] <- zPerm |
|
82 |
- } |
|
83 |
- } |
|
84 |
- |
|
85 |
-# get p-value |
|
86 |
- for (p in 1:numPatt) { |
|
87 |
- |
|
88 |
- for (gs in 1:numGS) { |
|
89 |
- |
|
90 |
- genes <- GStoGenesList[[names(GStoGenesList)[gs]]] |
|
91 |
- index <- rownames(zMatrix) %in% genes |
|
92 |
- zValues <- zMatrix[index,p] |
|
93 |
- zScore <- mean(zValues) |
|
94 |
- |
|
95 |
- numGenes <- length(zValues) |
|
96 |
- label <- as.character(numGenes) |
|
97 |
- |
|
98 |
- permzValues <- results[[label]][p,] |
|
99 |
- ordering <- order(permzValues) |
|
100 |
- permzValues <- permzValues[ordering] |
|
101 |
- |
|
102 |
- statistic <- sum(zScore > permzValues) |
|
103 |
- statUpReg <- 1 - statistic/length(permzValues) |
|
104 |
- statsUp[p,gs] <- max(statUpReg, 1/numPerm) |
|
105 |
- |
|
106 |
- statistic <- sum(zScore < permzValues) |
|
107 |
- statDownReg <- 1 - statistic/length(permzValues) |
|
108 |
- statsDown[p,gs] <- max(statDownReg,1/numPerm) |
|
109 |
- |
|
110 |
- activity <- 1 - 2*max(statUpReg, 1/numPerm) |
|
111 |
- actEst[p,gs] <- activity |
|
112 |
- } |
|
113 |
- |
|
114 |
- } |
|
115 |
- |
|
116 |
- # format output |
|
117 |
- colnames(statsUp) <- names(GStoGenesList) |
|
118 |
- colnames(statsDown) <- names(GStoGenesList) |
|
119 |
- colnames(actEst) <- names(GStoGenesList) |
|
120 |
- |
|
121 |
- rownames(statsUp) <- colnames(zMatrix) |
|
122 |
- rownames(statsDown) <- colnames(zMatrix) |
|
123 |
- rownames(actEst) <- colnames(zMatrix) |
|
124 |
- |
|
125 |
- results[['GSUpreg']] <- statsUp |
|
126 |
- results[['GSDownreg']] <- statsDown |
|
127 |
- results[['GSActEst']] <- actEst |
|
128 |
- |
|
129 |
- return(results) |
|
130 |
-} |
131 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,85 +0,0 @@ |
1 |
-# calcGeneGSStat: calculate probability gene belongs in gene set |
|
2 |
-# History: v1.0 EJF original CoGAPS |
|
3 |
- |
|
4 |
-# Inputs: Amean - A matrix mean values |
|
5 |
-# Asd - A matrix standard deviations |
|
6 |
-# GStoGenes - data.frame or list with gene sets |
|
7 |
-# numPerm - number of permutations for null |
|
8 |
-# Pw - weighting on genes |
|
9 |
-# nullGenes - adjust genes |
|
10 |
- |
|
11 |
-# Output: environment with gene set statistics |
|
12 |
-# NOTE: should make into list object, env historical |
|
13 |
- |
|
14 |
-#'\code{calcGeneGSStat} calculates the probability that a gene |
|
15 |
-#'listed in a gene set behaves like other genes in the set within |
|
16 |
-#'the given data set |
|
17 |
-#' |
|
18 |
-#'@param Amean A matrix mean values |
|
19 |
-#'@param Asd A matrix standard deviations |
|
20 |
-#'@param GSGenes data.frame or list with gene sets |
|
21 |
-#'@param numPerm number of permutations for null |
|
22 |
-#'@param Pw weight on genes |
|
23 |
-#'@param nullGenes - logical indicating gene adjustment |
|
24 |
-#'@export |
|
25 |
- |
|
26 |
-calcGeneGSStat <- function(Amean, Asd, GSGenes, numPerm, Pw=rep(1,ncol(Amean)), nullGenes=F) { |
|
27 |
- gsStat <- calcCoGAPSStat(Amean, Asd, data.frame(GSGenes), |
|
28 |
- numPerm=numPerm) |
|
29 |
- gsStat <- gsStat$GSUpreg |
|
30 |
- |
|
31 |
- gsStat <- -log(gsStat) |
|
32 |
- |
|
33 |
- if (!all(is.na(Pw))) { |
|
34 |
- if (length(Pw) != length(gsStat)) { |
|
35 |
- stop('Invalid weighting') |
|
36 |
- } |
|
37 |
- gsStat <- gsStat*Pw |
|
38 |
- } |
|
39 |
- |
|
40 |
- if (nullGenes) { |
|
41 |
- ZD <- Amean[setdiff(row.names(Amean), GSGenes),] / |
|
42 |
- Asd[setdiff(row.names(Amean), GSGenes),] |
|
43 |
- } else { |
|
44 |
- ZD <- Amean[GSGenes,]/Asd[GSGenes,] |
|
45 |
- } |
|
46 |
- outStats <- apply(sweep(ZD,2,gsStat,FUN="*"),1,sum) / (sum(gsStat)) |
|
47 |
- |
|
48 |
- outStats <- outStats / apply(ZD,1,sum) |
|
49 |
- |
|
50 |
- outStats[which(apply(ZD,1,sum) < .Machine$double.eps)] <- 0. |
|
51 |
- |
|
52 |
- |
|
53 |
- if (sum(gsStat) < .Machine$double.eps) { |
|
54 |
- return(0.) |
|
55 |
- } |
|
56 |
- |
|
57 |
- return(outStats) |
|
58 |
- |
|
59 |
-} |
|
60 |
- |
|
61 |
- |
|
62 |
-computeGeneGSProb <- function(Amean, Asd, GSGenes, Pw=rep(1,ncol(Amean)), |
|
63 |
- numPerm=500, PwNull=F) { |
|
64 |
- geneGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, Pw=Pw, |
|
65 |
- GSGenes=GSGenes, numPerm=numPerm) |
|
66 |
- |
|
67 |
- |
|
68 |
- |
|
69 |
- if (PwNull) { |
|
70 |
- permGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, |
|
71 |
- GSGenes=GSGenes, numPerm=numPerm, Pw=Pw, |
|
72 |
- nullGenes=T) |
|
73 |
- } else { |
|
74 |
- permGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, |
|
75 |
- GSGenes=GSGenes, numPerm=numPerm, |
|
76 |
- nullGenes=T) |
|
77 |
- } |
|
78 |
- |
|
79 |
- finalStats <- sapply(GSGenes, |
|
80 |
- function(x) length(which(permGSStat > geneGSStat[x])) / length(permGSStat)) |
|
81 |
- |
|
82 |
- return(finalStats) |
|
83 |
- |
|
84 |
- |
|
85 |
-} |
86 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,40 +0,0 @@ |
1 |
-# calcZ: function to generate Z-score matrix |
|
2 |
-# History: v1.0 EJF original CoGAPS |
|
3 |
- |
|
4 |
-# Inputs: meanMat - matrix of mean values |
|
5 |
-# sdMat - matrix of standard deviations |
|
6 |
- |
|
7 |
-# Output: matrix of Z-scores |
|
8 |
- |
|
9 |
-#'\code{calcZ} calculates the Z-score for each element based |
|
10 |
-#'on input mean and standard deviation matrices |
|
11 |
-#' |
|
12 |
-#'@param meanMat matrix of mean values |
|
13 |
-#'@param sdMat matrix of standard deviation values |
|
14 |
-#'@export |
|
15 |
- |
|
16 |
-calcZ <- function (meanMat, sdMat) { |
|
17 |
- |
|
18 |
- # compute the z score for each gene's association to each pattern |
|
19 |
- |
|
20 |
- # find matrix dimensions |
|
21 |
- nrows <- dim(meanMat)[1] |
|
22 |
- ncols <- dim(meanMat)[2] |
|
23 |
- |
|
24 |
- check <- dim(sdMat)[1] |
|
25 |
- if (nrows != check) { |
|
26 |
- stop("Number of rows in the mean and standard deviation of A do not agree.") |
|
27 |
- } |
|
28 |
- |
|
29 |
- check <- dim(sdMat)[2] |
|
30 |
- if (ncols != check) { |
|
31 |
- stop("Number of columns in the mean and standard deviation of A do not agree.") |
|
32 |
- } |
|
33 |
- |
|
34 |
- # compute the matrix of z scores |
|
35 |
- zMat <- meanMat/sdMat |
|
36 |
- rownames(zMat) <- rownames(meanMat) |
|
37 |
- colnames(zMat) <- colnames(meanMat) |
|
38 |
- |
|
39 |
- return(zMat) |
|
40 |
-} |
41 | 0 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,107 @@ |
1 |
+#' @title GapsParams |
|
2 |
+#' @description Parameters for running CoGAPS |
|
3 |
+#' |
|
4 |
+#' @slot nFactor number of patterns (basis vectors, metagenes) |
|
5 |
+#' @slot nEquil number of iterations for burn-in |
|
6 |
+#' @slot nSample number of iterations for sampling |
|
7 |
+#' @slot nOutput how often (number of iterations) to print status in R console |
|
8 |
+#' @slot numSnapshots the number of individual samples to capture |
|
9 |
+#' @slot alphaA sparsity parameter for A domain |
|
10 |
+#' @slot alphaP sparsity parameter for P domain |
|
11 |
+#' @slot maxGibbmassA limit truncated normal to max size |
|
12 |
+#' @slot maxGibbmassP limit truncated normal to max size |
|
13 |
+#' @slot seed positive values are kept, negative values will be overwritten |
|
14 |
+#' by a seed generated from the current time |
|
15 |
+#' @slot messages display messages during the run |
|
16 |
+#' @slot singleCelLRNASeq indicate if the data is single cell RNA-seq data |
|
17 |
+#' @slot fixedPatterns matrix of fixed values in either A or P matrix |
|
18 |
+#' @slot whichMatrixFixed characted indicating whether A or P contains |
|
19 |
+#' the fixed patterns |
|
20 |
+#' @slot checkpointInterval how often (number of seconds) to create a checkpoint file |
|
21 |
+#' @slot nCores max number of cores to run on |
|
22 |
+#' @export |
|
23 |
+setClass('GapsParams', slots = c( |
|
24 |
+ nFactor = 'integer', |
|
25 |
+ nEquil = 'integer', |
|
26 |
+ nSample = 'integer', |
|
27 |
+ nOutput = 'integer', |
|
28 |
+ nSnapshots = 'integer', |
|
29 |
+ alphaA = 'numeric', |
|
30 |
+ alphaP = 'numeric', |
|
31 |
+ maxGibbmassA = 'numeric', |
|
32 |
+ maxGibbmassP = 'numeric', |
|
33 |
+ seed = 'integer', |
|
34 |
+ messages = 'logical', |
|
35 |
+ singleCelLRNASeq = 'logical', |
|
36 |
+ fixedPatterns = 'matrix', |
|
37 |
+ whichMatrixFixed = 'character', |
|
38 |
+ checkpointInterval = 'integer', |
|
39 |
+ nCores = 'integer' |
|
40 |
+)) |
|
41 |
+ |
|
42 |
+setMethod('initialize', 'GapsParams', |
|
43 |
+ function(.Object, nFactor, nEquil, nSample, ...) |
|
44 |
+ { |
|
45 |
+ .Object@nFactor <- nFactor |
|
46 |
+ .Object@nEquil <- nEquil |
|
47 |
+ .Object@nSample <- nSample |
|
48 |
+ .Object@nOutput <- 1000 |
|
49 |
+ .Object@numSnapshots <- 0 |
|
50 |
+ .Object@alphaA <- 0.01 |
|
51 |
+ .Object@alphaP <- 0.01 |
|
52 |
+ .Object@maxGibbmassA <- 100.0 |
|
53 |
+ .Object@maxGibbmassP <- 100.0 |
|
54 |
+ .Object@seed <- -1 |
|
55 |
+ .Object@messages <- TRUE |
|
56 |
+ .Object@singleCelLRNASeq <- FALSE |
|
57 |
+ .Object@fixedPatterns <- matrix(0) |
|
58 |
+ .Object@whichMatrixFixed <- 'N' |
|
59 |
+ .Object@checkpointInterval <- 0 |
|
60 |
+ |
|
61 |
+ .Object <- callNextMethod(.Object, ...) |
|
62 |
+ .Object |
|
63 |
+ } |
|
64 |
+) |
|
65 |
+ |
|
66 |
+setValidity('GapsParams', |
|
67 |
+ function(object) |
|
68 |
+ { |
|
69 |
+ if (object@whichMatrixFixed == 'P' & nrow(object@fixedPatterns) > object@nFactor) |
|
70 |
+ 'number of fixed patterns greater than nFactor' |
|
71 |
+ if (object@whichMatrixFixed == 'A' & ncol(object@fixedPatterns) > object@nFactor) |
|
72 |
+ 'number of fixed patterns greater than nFactor' |
|
73 |
+ } |
|
74 |
+) |
|
75 |
+ |
|
76 |
+#' @rdname getParam |
|
77 |
+#' @importFrom methods slot |
|
78 |
+setMethod("getParam", "GapsParams", function(object, name) |
|
79 |
+{ |
|
80 |
+ slot(object, name) |
|
81 |
+}) |
|
82 |
+ |
|
83 |
+#' @rdname setParam |
|
84 |
+#' @importFrom methods slot<- validObject |
|
85 |
+setMethod("setParam", "GapsParams", function(object, name, value) |
|
86 |
+{ |
|
87 |
+ slot(object, name) <- value |
|
88 |
+ validObject(object) |
|
89 |
+ return(object) |
|
90 |
+}) |
|
91 |
+ |
|
92 |
+#' @importFrom methods slotNames |
|
93 |
+setMethod("show", "GapsParams", function(object) |
|
94 |
+{ |
|
95 |
+ cat("CoGAPS Parameters Object\n") |
|
96 |
+ cat("Number of patterns to estimate: ", object@nFactor) |
|
97 |
+}) |
|
98 |
+ |
|
99 |
+checkParamsWithData <- function(params, nRD, nCD, nRS, nCS) |
|
100 |
+{ |
|
101 |
+ if (nRD != nRS | nCD != nCS) |
|
102 |
+ stop('D and S matrices have mismached dimensions') |
|
103 |
+ if (params@whichMatrixFixed == 'A' & nrow(params@fixedPatterns) != nRD) |
|
104 |
+ stop('invalid fixed pattern length') |
|
105 |
+ if (params@whichMatrixFixed == 'P' & ncol(params@fixedPatterns) != nCD) |
|
106 |
+ stop('invalid fixed pattern length') |
|
107 |
+}) |
0 | 108 |
deleted file mode 100644 |
... | ... |
@@ -1,34 +0,0 @@ |
1 |
-#' createGWCoGAPSSets |
|
2 |
-#' |
|
3 |
-#'\code{createGWCoGAPSSets} factors whole genome data into randomly generated sets for indexing; |
|
4 |
-#' |
|
5 |
-#'@param data data matrix with unique rownames |
|
6 |
-#'@param nSets number of sets for parallelization |
|
7 |
-#'@param outRDA name of output file |
|
8 |
-#'@param keep logical indicating whether or not to save gene set list. Default is TRUE. |
|
9 |
-#'@export |
|
10 |
-#'@return list with randomly generated sets of genes from whole genome data |
|
11 |
-#'@examples \dontrun{ |
|
12 |
-#'createGWCoGAPSSet(D,nSets=nSets) |
|
13 |
-#'} |
|
14 |
-#' |
|
15 |
- |
|
16 |
-createGWCoGAPSSets<-function(data=D, #data matrix with unique rownames |
|
17 |
- nSets=nSets, #number of sets for parallelization |
|
18 |
- outRDA="GenesInCoGAPSSets.Rda", #name of output file |
|
19 |
- keep=TRUE #logical indicating whether or not to save gene set list. Default is TRUE. |
|
20 |
- ){ |
|
21 |
-genes=rownames(data) |
|
22 |
-setSize=floor(length(genes)/nSets) |
|
23 |
-genesInSets <- list() |
|
24 |
-for (set in 1:nSets) { |
|
25 |
- if(set!=nSets){genesInSets[[set]] <- sample(genes,setSize)} |
|
26 |
- if(set==nSets){genesInSets[[set]] <- genes} |
|
27 |
- genes=genes[!genes%in%genesInSets[[set]]] |
|
28 |
-} |
|
29 |
-if(!identical(sort(unlist(genesInSets)),sort(rownames(data)))){print("Gene identifiers not unique!")} |
|
30 |
-if(keep==TRUE){save(list=c('genesInSets'),file=outRDA)} |
|
31 |
-return(genesInSets) |
|
32 |
-} |
|
33 |
- |
|
34 |
- |
35 | 0 |
deleted file mode 100755 |
... | ... |
@@ -1,144 +0,0 @@ |
1 |
-#Calculates significant genes in each pattern according to certain threshold |
|
2 |
-#Returns the significant gene names as well as well as the means of these matrices and number of genes in each |
|
3 |
- |
|
4 |
-gapsInterPattern <- function(Amean, Asd, sdThreshold = 3) |
|
5 |
-{ |
|
6 |
- #number of rows and cols of Asd |
|
7 |
- numGenes = length(Asd[,1]); |
|
8 |
- numCols = length(Asd[1,]); |
|
9 |
- |
|
10 |
- #Vector holding the number of each significant gene in each column |
|
11 |
- sigGeneNums = data.frame(); |
|
12 |
- |
|
13 |
- #Temp number of sig genes in the col |
|
14 |
- sigCount = 0; |
|
15 |
- |
|
16 |
- #Number to catch the amount of columns without significant genes |
|
17 |
- numEmptyCols = 0; |
|
18 |
- |
|
19 |
- #Keep an array of the significant gene counts |
|
20 |
- significantGeneNums = c(0); |
|
21 |
- |
|
22 |
- #Names of the genes from the data matrix |
|
23 |
- geneNames = names(Asd[,1]); |
|
24 |
- |
|
25 |
- #Names of the genes that are significant from the data matrix |
|
26 |
- sigGeneNames = "0"; |
|
27 |
- |
|
28 |
- #The numerator of our statistic |
|
29 |
- dimensionStatNumerator = 0; |
|
30 |
- |
|
31 |
- #The denominator of our statistic |
|
32 |
- dimensionStatDenominator = 0; |
|
33 |
- |
|
34 |
- #The value of our statistic |
|
35 |
- dimensionStatistic = 0; |
|
36 |
- |
|
37 |
- #A matrix holding the values of our statistics |
|
38 |
- dimensionStatisticMatrix = matrix(nrow = numCols, ncol = numCols); |
|
39 |
- |
|
40 |
- #The mean of the statistic matrix |
|
41 |
- sbar = 0; |
|
42 |
- |
|
43 |
- #A list to return the significant genes in each col and the statistic matrix |
|
44 |
- results = list(list()); |
|
45 |
- |
|
46 |
- #Scan in the significant genes from each column of Asd |
|
47 |
- #The columns of sigGeneNums hold the significant genes for each col of Asd |
|
48 |
- for(i in 1:numCols) |
|
49 |
- { |
|
50 |
- sigCount = 0; |
|
51 |
- for(j in 1:numGenes) |
|
52 |
- { |
|
53 |
- if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0) |
|
54 |
- { |
|
55 |
- sigCount = sigCount + 1; |
|
56 |
- sigGeneNums[sigCount, i] = j; |
|
57 |
- } |
|
58 |
- } |
|
59 |
- |
|
60 |
- if(sigCount == 0) |
|
61 |
- { |
|
62 |
- sigGeneNums[1, i] = 0; |
|
63 |
- numEmptyCols = numEmptyCols + 1; |
|
64 |
- } |
|
65 |
- |
|
66 |
- #Save the number of sigGenes |
|
67 |
- significantGeneNums[i] = sigCount; |
|
68 |
- |
|
69 |
- #Get the names and store them |
|
70 |
- if(sigCount != 0) |
|
71 |
- { |
|
72 |
- for(k in 1:sigCount) |
|
73 |
- { |
|
74 |
- sigGeneNames[k] = geneNames[sigGeneNums[k, i]]; |
|
75 |
- } |
|
76 |
- results[[1]][[i]] = sigGeneNames; |
|
77 |
- sigGeneNames = "0"; |
|
78 |
- } |
|
79 |
- else |
|
80 |
- { |
|
81 |
- results[[1]][[i]] = "None"; |
|
82 |
- } |
|
83 |
- } |
|
84 |
- |
|
85 |
- if(any(significantGeneNums == 0)) |
|
86 |
- { |
|
87 |
- zeroSigCols = which(significantGeneNums == 0); |
|
88 |
- print("Warning: No Significant Genes in Pattern(s): "); |
|
89 |
- |
|
90 |
- for(z in 1:length(zeroSigCols)) |
|
91 |
- { |
|
92 |
- print(zeroSigCols[z]); |
|
93 |
- } |
|
94 |
- } |
|
95 |
- |
|
96 |
- #Now that we have the significant genes want to see if these genes are significant in other columns |
|
97 |
- #Fill across the row then down the column |
|
98 |
- |
|
99 |
- #This compares the significant genes in the specified by 'j' to the same genes in Asd in the column specified by 'i' |
|
100 |
- for(i in 1:numCols) |
|
101 |
- { |
|
102 |
- for(j in 1:numCols) |
|
103 |
- { |
|
104 |
- #Grab the number of significant genes from the interested column |
|
105 |
- sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE); |
|
106 |
- |
|
107 |
- if(sigCount != 0) |
|
108 |
- { |
|
109 |
- dimensionStatDenominator = sigCount; |
|
110 |
- dimensionStatNumerator = 0; |
|
111 |
- |
|
112 |
- #loop through the number of significant genes and compare to these genes in the 'ith' col of Asd. |
|
113 |
- #Find the significance of these genes, Amean - threshold * Asd |
|
114 |
- for(k in 1:sigCount) |
|
115 |
- { |
|
116 |
- if((Amean[sigGeneNums[k,j],i] - (sdThreshold*Asd[sigGeneNums[k,j],i])) > 0) |
|
117 |
- { |
|
118 |
- dimensionStatNumerator = dimensionStatNumerator + 1; |
|
119 |
- } |
|
120 |
- } |
|
121 |
- |
|
122 |
- dimensionStatistic = dimensionStatNumerator/dimensionStatDenominator; |
|
123 |
- |
|
124 |
- dimensionStatisticMatrix[i, j] = dimensionStatistic; |
|
125 |
- } |
|
126 |
- else |
|
127 |
- { |
|
128 |
- dimensionStatisticMatrix[i, j] = 0; |
|
129 |
- } |
|
130 |
- } |
|
131 |
- } |
|
132 |
- |
|
133 |
- #Find mean of the matrices (excluding the diagonal elements) |
|
134 |
- #we can subtract numCols as the diagonal elements are 1 |
|
135 |
- sbar = ((sum(dimensionStatisticMatrix) - (numCols - numEmptyCols))/(length(dimensionStatisticMatrix) - (numCols - numEmptyCols))); |
|
136 |
- |
|
137 |
- results[[2]] = significantGeneNums; |
|
138 |
- results[[3]] = t(dimensionStatisticMatrix); |
|
139 |
- results[[4]] = sbar; |
|
140 |
- |
|
141 |
- names(results) = c("SignificantGeneNames", "SignificantGeneTotals", "SeparationMatrix", "InterPatternValue"); |
|
142 |
- |
|
143 |
- return(results); |
|
144 |
-} |
145 | 0 |
deleted file mode 100755 |
... | ... |
@@ -1,135 +0,0 @@ |
1 |
-#Calculates significant genes in each pattern according to certain threshold |
|
2 |
-#Returns the significant gene names as well as well as the correlation matrices between these genes and the means of these matrices |
|
3 |
- |
|
4 |
-gapsIntraPattern <- function(Amean, Asd, DMatrix, sdThreshold = 3) |
|
5 |
-{ |
|
6 |
- #number of rows and cols of Asd |
|
7 |
- numGenes = length(Asd[,1]); |
|
8 |
- numCols = length(Asd[1,]); |
|
9 |
- |
|
10 |
- #number of samples in DMatrix |
|
11 |
- numSamp = ncol(DMatrix); |
|
12 |
- |
|
13 |
- #Vector holding the number of each significant gene in each column |
|
14 |
- sigGeneNums = data.frame(); |
|
15 |
- |
|
16 |
- #Temp number of sig genes in the col |
|
17 |
- sigCount = 0; |
|
18 |
- |
|
19 |
- #Keep an array of the significant gene counts |
|
20 |
- significantGeneNums = c(0); |
|
21 |
- |
|
22 |
- #A matrix to hold the significant genes in D for the current pattern |
|
23 |
- #The matrix just acts as a subset of D, just eliminates non relevant rows |
|
24 |
- tempSubsetD = matrix(); |
|
25 |
- |
|
26 |
- #A matrix holding the values of our correlation coefficients between genes for the current column |
|
27 |
- tempGeneCorrMatrix = matrix(); |
|
28 |
- |
|
29 |
- #A list to hold all the correlation matrices |
|
30 |
- geneCorrMatrices = list(); |
|
31 |
- |
|
32 |
- #A list to hold all the means |
|
33 |
- geneCorrMatrMeans = list(); |
|
34 |
- |
|
35 |
- #The mean of all the correlation matrices |
|
36 |
- cbar = 0; |
|
37 |
- |
|
38 |
- #A list to return the means and the matrices |
|
39 |
- results = list(); |
|
40 |
- |
|
41 |
- #Scan in the significant genes from each column of Asd |
|
42 |
- #The columns of sigGeneNums hold the significant genes for each col of Asd |
|
43 |
- for(i in 1:numCols) |
|
44 |
- { |
|
45 |
- sigCount = 0; |
|
46 |
- for(j in 1:numGenes) |
|
47 |
- { |
|
48 |
- if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0) |
|
49 |
- { |
|
50 |
- sigCount = sigCount + 1; |
|
51 |
- sigGeneNums[sigCount, i] = j; |
|
52 |
- } |
|
53 |
- } |
|
54 |
- |
|
55 |
- if(sigCount == 0) |
|
56 |
- { |
|
57 |
- sigGeneNums[1, i] = 0; |
|
58 |
- } |
|
59 |
- |
|
60 |
- #Save the number of sigGenes |
|
61 |
- significantGeneNums[i] = sigCount; |
|
62 |
- } |
|
63 |
- |
|
64 |
- #If a pattern has no significant genes this is clearly an error so return such |
|
65 |
- if(any(significantGeneNums == 0)) |
|
66 |
- { |
|
67 |
- zeroSigCols = which(significantGeneNums == 0); |
|
68 |
- warning("Warning: No Significant Genes in Pattern(s): "); |
|
69 |
- |
|
70 |
- for(z in 1:length(zeroSigCols)) |
|
71 |
- { |
|
72 |
- message(zeroSigCols[z]); |
|
73 |
- } |
|
74 |
- } |
|
75 |
- |
|
76 |
- |
|
77 |
- #Now that we have the significant genes want to grab these from our original D matrix |
|
78 |
- #and find the sigGene x sigGene correlation matrix and find its mean |
|
79 |
- |
|
80 |
- for(j in 1:numCols) |
|
81 |
- { |
|
82 |
- #Grab the number of significant genes from the interested column |
|
83 |
- sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE); |
|
84 |
- |
|
85 |
- if(sigCount != 0) |
|
86 |
- { |
|
87 |
- |
|
88 |
- #loop through the number of significant genes and pull out the rows of D that represent these genes. |
|
89 |
- #Then find the correlation between them with the built in R corr function |
|
90 |
- tempSubsetD = matrix(nrow = sigCount, ncol = numSamp); |
|
91 |
- for(k in 1:sigCount) |
|
92 |
- { |
|
93 |
- #Subset D based on significant Genes |
|
94 |
- #need to transpose as it reads this in as column vector otherwise |
|
95 |
- tempSubsetD[k,] = t(DMatrix[sigGeneNums[k,j], ]); |
|
96 |
- } |
|
97 |
- |
|
98 |
- #Find the correlation between these genes in D |
|
99 |
- #Need to transpose as it calculates correlations between the columns |
|
100 |
- tempGeneCorrMatrix = cor(t(tempSubsetD)); |
|
101 |
- |
|
102 |
- #Find the mean of this matrix |
|
103 |
- tempGeneCorrMatrMean = mean(tempGeneCorrMatrix); |
|
104 |
- |
|
105 |
- } |
|
106 |
- else |
|
107 |
- { |
|
108 |
- tempGeneCorrMatrix = 0; |
|
109 |
- tempGeneCorrMatrMean = 0; |
|
110 |
- } |
|
111 |
- |
|
112 |
- #Save these in the overall list |
|
113 |
- geneCorrMatrices[[j]] = tempGeneCorrMatrix; |
|
114 |
- geneCorrMatrMeans[[j]] = tempGeneCorrMatrMean; |
|
115 |
- |
|
116 |
- } |
|
117 |
- |
|
118 |
- #Return as an overall list of lists |
|
119 |
- # We return Corr Matrices themselves, their means, and the means of the means (cbar) |
|
120 |
- results[[1]] = geneCorrMatrices; |
|
121 |
- results[[2]] = geneCorrMatrMeans; |
|
122 |
- |
|
123 |
- #Return as an overall list of lists |
|
124 |
- for(i in 1:numCols) |
|
125 |
- { |
|
126 |
- cbar = cbar + results[[2]][[i]]; |
|
127 |
- } |
|
128 |
- |
|
129 |
- cbar = cbar/numCols; |
|
130 |
- results[[3]] = cbar; |
|
131 |
- |
|
132 |
- names(results) = c("CorrelationMatrices", "CorrelationMatrixMeans", "IntraPatternValue"); |
|
133 |
- |
|
134 |
- return(results); |
|
135 |
-} |
136 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,69 +0,0 @@ |
1 |
- |
|
2 |
-# gapsMapRun: function to call C++ cogaps code |
|
3 |
-# History: v1.0 CK with MFO edits, August 2014 |
|
4 |
- |
|
5 |
-# Inputs: D - data matrix |
|
6 |
-# S - uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
7 |
-# FP - fixed A columns or P rows |
|
8 |
-# nFactor - number of patterns (basis vectors, metagenes) |
|
9 |
-# simulation_id - name to attach to atoms files if created |
|
10 |
-# nEquil - number of iterations for burn-in |
|
11 |
-# nSample - number of iterations for sampling |
|
12 |
-# nOutR - how often to print status into R by iterations |
|
13 |
-# output_atomic - whether to write atom files (large) |
|
14 |
-# alphaA, alphaP - sparsity parameters for A and P domains |
|
15 |
-# max_gibbmass_paraA(P) - limit truncated normal to max size |
|
16 |
-# nMaxA, nMaxP - PRESENTLY UNUSED, future = limit number of atoms |
|
17 |
- |
|
18 |
- |
|
19 |
-# Output: list with A and P matrix estimates, chi-squared and atom |
|
20 |
-# numbers of sample by iteration, and chi-squared of mean |
|
21 |
- |
|
22 |
-#'\code{gapsMapRun} calls the C++ MCMC code and performs Bayesian |
|
23 |
-#'matrix factorization returning the two matrices that reconstruct |
|
24 |
-#'the data matrix; as opposed to gapsRun, this method takes an |
|
25 |
-#'additional input specifying set patterns in the P matrix |
|
26 |
-#' |
|
27 |
-#'@param D data matrix |
|
28 |
-#'@param S uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
29 |
-#'@param FP data.frame with rows giving fixed patterns for P |
|
30 |
-#'@param ABins a matrix of same size as A which gives relative |
|
31 |
-#' probability of that element being non-zero |
|
32 |
-#'@param PBins a matrix of same size as P which gives relative |
|
33 |
-#' probability of that element being non-zero |
|
34 |
-#'@param nFactor number of patterns (basis vectors, metagenes), which must be |
|
35 |
-#'greater than or equal to the number of rows of FP |
|
36 |
-#'@param simulation_id name to attach to atoms files if created |
|
37 |
-#'@param nEquil number of iterations for burn-in |
|
38 |
-#'@param nSample number of iterations for sampling |
|
39 |
-#'@param nOutR how often to print status into R by iterations |
|
40 |
-#'@param output_atomic whether to write atom files (large) |
|
41 |
-#'@param fixedMatrix character indicating whether A or P matrix |
|
42 |
-#' has fixed columns or rows respectively |
|
43 |
-#'@param fixedBinProbs Boolean for using relative probabilities |
|
44 |
-#' given in Abins and Pbins |
|
45 |
-#'@param fixedDomain character to indicate whether A or P is |
|
46 |
-#' domain for relative probabilities |
|
47 |
-#'@param sampleSnapshots Boolean to indicate whether to capture |
|
48 |
-#' individual samples from Markov chain during sampling |
|
49 |
-#'@param numSnapshots the number of individual samples to capture |
|
50 |
-#'@param alphaA sparsity parameter for A domain |
|
51 |
-#'@param nMaxA PRESENTLY UNUSED, future = limit number of atoms |
|
52 |
-#'@param max_gibbmass_paraA limit truncated normal to max size |
|
53 |
-#'@param alphaP sparsity parameter for P domain |
|
54 |
-#'@param nMaxP PRESENTLY UNUSED, future = limit number of atoms |
|
55 |
-#'@param max_gibbmass_paraP limit truncated normal to max size |
|
56 |
-#'@param seed Set seed for reproducibility. Positive values provide initial seed, negative values just use the time. |
|
57 |
-#'@param messages Display progress messages |
|
58 |
-#'@export |
|
59 |
- |
|
60 |
-gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFactor = 7, simulation_id = "simulation", |
|
61 |
- nEquil = 1000, nSample = 1000, nOutR = 1000, output_atomic = FALSE, fixedMatrix = "P", |
|
62 |
- fixedBinProbs = FALSE, fixedDomain = "N", sampleSnapshots = TRUE, numSnapshots = 100, alphaA = 0.01, |
|
63 |
- nMaxA = 100000, max_gibbmass_paraA = 100.0, alphaP = 0.01, nMaxP = 100000, max_gibbmass_paraP = 100.0, |
|
64 |
- seed=-1, messages=TRUE) |
|
65 |
-{ |
|
66 |
- gapsRun(D, S, ABins, PBins, nFactor, simulation_id, nEquil, nSample, nOutR, output_atomic, fixedBinProbs, |
|
67 |
- fixedDomain, sampleSnapshots, numSnapshots, alphaA, nMaxA, max_gibbmass_paraA, alphaP, nMaxP, |
|
68 |
- max_gibbmass_paraP, seed, messages, FALSE, FP, 'P') |
|
69 |
-} |
70 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,202 +0,0 @@ |
1 |
- |
|
2 |
-# gapsRun: function to call C++ cogaps code |
|
3 |
-# History: v1.0 CK with MFO edits, August 2014 |
|
4 |
- |
|
5 |
-# Inputs: D - data matrix |
|
6 |
-# S - uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
7 |
-# nFactor - number of patterns (basis vectors, metagenes) |
|
8 |
-# simulation_id - name to attach to atoms files if created |
|
9 |
-# nEquil - number of iterations for burn-in |
|
10 |
-# nSample - number of iterations for sampling |
|
11 |
-# nOutR - how often to print status into R by iterations |
|
12 |
-# output_atomic - whether to write atom files (large) |
|
13 |
-# alphaA, alphaP - sparsity parameters for A and P domains |
|
14 |
-# max_gibbmass_paraA(P) - limit truncated normal to max size |
|
15 |
-# nMaxA, nMaxP - PRESENTLY UNUSED, future = limit number of atoms |
|
16 |
- |
|
17 |
- |
|
18 |
-# Output: list with A and P matrix estimates, chi-squared and atom |
|
19 |
-# numbers of sample by iteration, and chi-squared of mean |
|
20 |
-#'\code{gapsRun} calls the C++ MCMC code and performs Bayesian |
|
21 |
-#'matrix factorization returning the two matrices that reconstruct |
|
22 |
-#'the data matrix |
|
23 |
-#' |
|
24 |
-#'@param D data matrix |
|
25 |
-#'@param S uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
26 |
-#'@param ABins a matrix of same size as A which gives relative |
|
27 |
-#' probability of that element being non-zero |
|
28 |
-#'@param PBins a matrix of same size as P which gives relative |
|
29 |
-#' probability of that element being non-zero |
|
30 |
-#'@param nFactor number of patterns (basis vectors, metagenes), which must be |
|
31 |
-#'greater than or equal to the number of rows of FP |
|
32 |
-#'@param simulation_id name to attach to atoms files if created |
|
33 |
-#'@param nEquil number of iterations for burn-in |
|
34 |
-#'@param nSample number of iterations for sampling |
|
35 |
-#'@param nOutR how often to print status into R by iterations |
|
36 |
-#'@param output_atomic whether to write atom files (large) |
|
37 |
-#'@param fixedBinProbs Boolean for using relative probabilities |
|
38 |
-#' given in Abins and Pbins |
|
39 |
-#'@param fixedDomain character to indicate whether A or P is |
|
40 |
-#' domain for relative probabilities |
|
41 |
-#'@param numSnapshots the number of individual samples to capture |
|
42 |
-#'@param alphaA sparsity parameter for A domain |
|
43 |
-#'@param nMaxA PRESENTLY UNUSED, future = limit number of atoms |
|
44 |
-#'@param max_gibbmass_paraA limit truncated normal to max size |
|
45 |
-#'@param alphaP sparsity parameter for P domain |
|
46 |
-#'@param nMaxP PRESENTLY UNUSED, future = limit number of atoms |
|
47 |
-#'@param max_gibbmass_paraP limit truncated normal to max size |
|
48 |
-#'@param seed Set seed for reproducibility. Positive values provide initial seed, negative values just use the time. |
|
49 |
-#'@param messages Display progress messages |
|
50 |
-#'@param singleCellRNASeq T/F indicating if the data is single cell RNA-seq data |
|
51 |
-#'@param fixedPatterns matrix of fixed values in either A or P matrix |
|
52 |
-#'@param whichMatrixFixed character to indicate whether A or P matrix |
|
53 |
-#' contains the fixed patterns |
|
54 |
-#'@param checkpoint_interval time (in seconds) between cogaps checkpoints |
|
55 |
-#'@export |
|
56 |
- |
|
57 |
-#--CHANGES 1/20/15-- |
|
58 |
-#Added FixedPatt frame to C++ version to match Ondrej's code for updating bin sizes based on an input matrix |
|
59 |
-gapsRun <- function(D, S, ABins = data.frame(), PBins = data.frame(), |
|
60 |
- nFactor = 7, simulation_id = "simulation", |
|
61 |
- nEquil = 1000, nSample = 1000, nOutR = 1000, |
|
62 |
- output_atomic = FALSE, fixedBinProbs = FALSE, |
|
63 |
- fixedDomain = "N", numSnapshots = 0, alphaA = 0.01, |
|
64 |
- nMaxA = 100000, max_gibbmass_paraA = 100.0, |
|
65 |
- alphaP = 0.01, nMaxP = 100000, max_gibbmass_paraP = 100.0, |
|
66 |
- seed=-1, messages=TRUE, singleCellRNASeq=FALSE, |
|
67 |
- fixedPatterns = matrix(0), whichMatrixFixed = 'N', |
|
68 |
- checkpoint_interval = 0) |
|
69 |
-{ |
|
70 |
- # Floor the parameters that are integers to prevent allowing doubles. |
|
71 |
- nFactor <- floor(nFactor) |
|
72 |
- nEquil <- floor(nEquil) |
|
73 |
- nSample <- floor(nSample) |
|
74 |
- nOutR <- floor(nOutR) |
|
75 |
- numSnapshots <- floor(numSnapshots) |
|
76 |
- nMaxA <- floor(nMaxA) |
|
77 |
- nMaxP <- floor(nMaxP) |
|
78 |
- |
|
79 |
- # check the fixed patterns |
|
80 |
- if ((whichMatrixFixed == 'A' & nrow(fixedPatterns) != nrow(D)) |
|
81 |
- | (whichMatrixFixed == 'P' & nrow(fixedPatterns) > nFactor)) |
|
82 |
- { |
|
83 |
- stop('invalid number of rows in fixed pattern matrix') |
|
84 |
- } |
|
85 |
- if ((whichMatrixFixed == 'A' & ncol(fixedPatterns) > nFactor) |
|
86 |
- | (whichMatrixFixed == 'P' & ncol(fixedPatterns) != ncol(D))) |
|
87 |
- { |
|
88 |
- stop('invalid number of cols in fixed pattern matrix') |
|
89 |
- } |
|
90 |
- |
|
91 |
- geneNames <- rownames(D); |
|
92 |
- sampleNames <- colnames(D); |
|
93 |
- |
|
94 |
- # label patterns as Patt N |
|
95 |
- patternNames <- c("0"); |
|
96 |
- for(i in 1:nFactor) |
|
97 |
- { |
|
98 |
- patternNames[i] <- paste('Patt', i); |
|
99 |
- } |
|
100 |
- |
|
101 |
- # call to C++ Rcpp code |
|
102 |
- cogapResult <- cogaps(as.matrix(D), as.matrix(S), nFactor, alphaA, alphaP, |
|
103 |
- nEquil, floor(nEquil/10), nSample, max_gibbmass_paraA, max_gibbmass_paraP, |
|
104 |
- fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, |
|
105 |
- nOutR, numSnapshots, checkpoint_interval) |
|
106 |
- |
|
107 |
- # convert returned files to matrices to simplify visualization and processing |
|
108 |
- cogapResult$Amean <- as.matrix(cogapResult$Amean); |
|
109 |
- cogapResult$Asd <- as.matrix(cogapResult$Asd); |
|
110 |
- cogapResult$Pmean <- as.matrix(cogapResult$Pmean); |
|
111 |
- cogapResult$Psd <- as.matrix(cogapResult$Psd); |
|
112 |
- |
|
113 |
- ## label matrices |
|
114 |
- colnames(cogapResult$Amean) <- patternNames; |
|
115 |
- rownames(cogapResult$Amean) <- geneNames; |
|
116 |
- colnames(cogapResult$Asd) <- patternNames; |
|
117 |
- rownames(cogapResult$Asd) <- geneNames; |
|
118 |
- colnames(cogapResult$Pmean) <- sampleNames; |
|
119 |
- rownames(cogapResult$Pmean) <- patternNames; |
|
120 |