... | ... |
@@ -1,16 +1,18 @@ |
1 | 1 |
# Generated by roxygen2: do not edit by hand |
2 | 2 |
|
3 | 3 |
export(CoGAPS) |
4 |
+export(CoGapsFromCheckpoint) |
|
4 | 5 |
export(GWCoGAPS) |
5 | 6 |
export(binaryA) |
6 | 7 |
export(calcCoGAPSStat) |
7 | 8 |
export(calcGeneGSStat) |
8 | 9 |
export(calcZ) |
9 | 10 |
export(createGWCoGAPSSets) |
11 |
+export(displayBuildReport) |
|
10 | 12 |
export(gapsMapRun) |
11 | 13 |
export(gapsRun) |
12 |
-export(gapsRunFromCheckpoint) |
|
13 | 14 |
export(generateSeeds) |
15 |
+export(getParam) |
|
14 | 16 |
export(patternMarkers) |
15 | 17 |
export(patternMatch4Parallel) |
16 | 18 |
export(patternMatcher) |
... | ... |
@@ -25,6 +27,8 @@ export(reOrderBySet) |
25 | 27 |
export(reconstructGene) |
26 | 28 |
export(reorderByPatternMatch) |
27 | 29 |
export(residuals) |
30 |
+export(setParam) |
|
31 |
+exportClasses(GapsParams) |
|
28 | 32 |
import(doParallel) |
29 | 33 |
import(foreach) |
30 | 34 |
import(ggplot2) |
... | ... |
@@ -52,7 +56,12 @@ importFrom(graphics,points) |
52 | 56 |
importFrom(graphics,screen) |
53 | 57 |
importFrom(graphics,split.screen) |
54 | 58 |
importFrom(graphics,title) |
59 |
+importFrom(methods,"slot<-") |
|
60 |
+importFrom(methods,callNextMethod) |
|
55 | 61 |
importFrom(methods,is) |
62 |
+importFrom(methods,new) |
|
63 |
+importFrom(methods,slot) |
|
64 |
+importFrom(methods,validObject) |
|
56 | 65 |
importFrom(reshape2,melt) |
57 | 66 |
importFrom(stats,D) |
58 | 67 |
importFrom(stats,as.dist) |
... | ... |
@@ -5,39 +5,43 @@ |
5 | 5 |
#'the data matrix |
6 | 6 |
#' @param D data matrix |
7 | 7 |
#' @param S uncertainty matrix (std devs for chi-squared of Log Likelihood) |
8 |
-#' @param params GapsParams object |
|
9 | 8 |
#' @return list with A and P matrix estimates |
9 |
+#' @importFrom methods new |
|
10 | 10 |
#' @export |
11 |
-CoGaps <- function(D, S, params = new('GapsParams', 7, 1000, 1000), ...) |
|
11 |
+CoGAPS <- function(D, S, ...) |
|
12 | 12 |
{ |
13 | 13 |
# process v2 style arguments for backwards compatibility |
14 |
- params <- oldParams(params, list(...)) |
|
14 |
+ extraArgs <- list(...) |
|
15 |
+ params <- oldParams(new('GapsParams', 7, 1000, 1000), extraArgs) |
|
15 | 16 |
|
16 | 17 |
# check data |
17 |
- checkParamsWithData(params, nrow(D), ncol(D), nrow(S), ncol(S)) |
|
18 |
- if (any(D) < 0 | any(S) < 0) # too slow for large matrices ? |
|
18 |
+ if (missing(D) & length(extraArgs$data)) |
|
19 |
+ D <- extraArgs$data |
|
20 |
+ if (missing(S) & length(extraArgs$unc)) |
|
21 |
+ S <- extraArgs$unc |
|
22 |
+ if (any(D < 0) | any(S < 0)) |
|
19 | 23 |
stop('D and S matrix must be non-negative') |
24 |
+ checkParamsWithData(params, nrow(D), ncol(D), nrow(S), ncol(S)) |
|
20 | 25 |
|
21 |
- # run algorithm |
|
22 |
- result <- cogaps_cpp(as.matrix(D), as.matrix(S), nFactor, alphaA, |
|
23 |
- alphaP, nEquil, floor(nEquil/10), nSample, max_gibbmass_paraA, |
|
24 |
- max_gibbmass_paraP, fixedPatterns, whichMatrixFixed, seed, messages, |
|
25 |
- singleCellRNASeq, nOutR, numSnapshots, checkpoint_interval) |
|
26 |
+ # run algorithm with call to C++ code |
|
27 |
+ result <- cogaps_cpp(as.matrix(D), as.matrix(S), |
|
28 |
+ as.matrix(params$fixedPatterns), params) |
|
26 | 29 |
|
27 | 30 |
# backwards compatible with v2 |
28 |
- if (length(list(...)$GStoGenes)) |
|
31 |
+ if (!is.null(extraArgs$GStoGenes)) |
|
29 | 32 |
{ |
30 |
- warning('the GStoGenes argument is depracted with v3.0,') |
|
31 |
- if (list(...)$plot) |
|
33 |
+ #warning('the GStoGenes argument is deprecated with v3.0') |
|
34 |
+ if (is.null(extraArgs$plot) | extraArgs$plot) |
|
32 | 35 |
plotGAPS(result$Amean, result$Pmean) |
33 |
- GSP <- calcCoGAPSStat(result$Amean, result$Asd, list(...)$GStoGenes, |
|
34 |
- list(...)$nPerm) |
|
36 |
+ if (is.null(extraArgs$nPerm)) |
|
37 |
+ extraArgs$nPerm <- 500 |
|
38 |
+ GSP <- calcCoGAPSStat(result$Amean, result$Asd, extraArgs$GStoGenes, |
|
39 |
+ extraArgs$nPerm) |
|
35 | 40 |
result <- list(meanChi2=result$meanChi2, D=D, Sigma=S, |
36 | 41 |
Amean=result$Amean, Asd=result$Asd, Pmean=result$Pmean, |
37 | 42 |
Psd=result$Psd, GSUpreg=GSP$GSUpreg, GSDownreg=GSP$GSDownreg, |
38 | 43 |
GSActEst=GSP$GSActEst) |
39 | 44 |
} |
40 |
- |
|
41 | 45 |
return(result) |
42 | 46 |
} |
43 | 47 |
|
... | ... |
@@ -52,22 +56,33 @@ CoGaps <- function(D, S, params = new('GapsParams', 7, 1000, 1000), ...) |
52 | 56 |
#' @export |
53 | 57 |
CoGapsFromCheckpoint <- function(D, S, path) |
54 | 58 |
{ |
55 |
- # TODO add checksum to make sure D,S are correct |
|
56 | 59 |
cogapsFromCheckpoint_cpp(D, S, path) |
57 | 60 |
} |
58 | 61 |
|
62 |
+GWCoGapsFromCheckpoint <- function(fname) |
|
63 |
+{ |
|
64 |
+ #TODO |
|
65 |
+} |
|
66 |
+ |
|
67 |
+#' Display Information About Package Compilation |
|
68 |
+#' @export |
|
69 |
+displayBuildReport <- function() |
|
70 |
+{ |
|
71 |
+ displayBuildReport_cpp() |
|
72 |
+} |
|
73 |
+ |
|
59 | 74 |
#' Backwards Compatibility with v2 |
60 | 75 |
#' |
61 | 76 |
#' @param D data matrix |
62 | 77 |
#' @param S uncertainty matrix |
63 | 78 |
#' @param ... v2 style parameters |
64 | 79 |
#' @return list with A and P matrix estimates |
80 |
+#' @importFrom methods new |
|
65 | 81 |
#' @export |
66 | 82 |
gapsRun <- function(D, S, ...) |
67 | 83 |
{ |
68 |
- warning('gapsRun is depracated with v3.0, use CoGAPS') |
|
69 |
- params <- new('GapsParams', 7, 1000, 1000) |
|
70 |
- params <- oldParams(params, list(...)) |
|
84 |
+ #warning('gapsRun is deprecated with v3.0, use CoGAPS') |
|
85 |
+ params <- oldParams(new('GapsParams', 7, 1000, 1000), list(...)) |
|
71 | 86 |
CoGAPS(D, S, params) |
72 | 87 |
} |
73 | 88 |
|
... | ... |
@@ -78,12 +93,12 @@ gapsRun <- function(D, S, ...) |
78 | 93 |
#' @param FP data.frame with rows giving fixed patterns for P |
79 | 94 |
#' @param ... v2 style parameters |
80 | 95 |
#' @return list with A and P matrix estimates |
96 |
+#' @importFrom methods new |
|
81 | 97 |
#' @export |
82 | 98 |
gapsMapRun <- function(D, S, FP, ...) |
83 | 99 |
{ |
84 |
- warning('gapsMapRun is depracated with v3.0, use CoGaps') |
|
85 |
- params <- new('GapsParams', 7, 1000, 1000) |
|
86 |
- params <- oldParams(params, list(...)) |
|
100 |
+ #warning('gapsMapRun is deprecated with v3.0, use CoGaps') |
|
101 |
+ params <- oldParams(new('GapsParams', 7, 1000, 1000), list(...)) |
|
87 | 102 |
CoGAPS(D, S, params) |
88 | 103 |
} |
89 | 104 |
|
... | ... |
@@ -91,14 +106,15 @@ gapsMapRun <- function(D, S, FP, ...) |
91 | 106 |
oldParams <- function(params, args) |
92 | 107 |
{ |
93 | 108 |
# standard params |
94 |
- if (length(args$nFactor)) params@nFactor <- args$nFactor |
|
95 |
- if (length(args$nEquil)) params@nEquil <- args$nEquil |
|
96 |
- if (length(args$nSample)) params@nSample <- args$nSample |
|
97 |
- if (length(args$numSnapshots)) params@nSnapshots <- args$numSnapshots |
|
109 |
+ if (length(args$nFactor)) params@nFactor <- as.integer(args$nFactor) |
|
110 |
+ if (length(args$nEquil)) params@nEquil <- as.integer(args$nEquil) |
|
111 |
+ if (length(args$nSample)) params@nSample <- as.integer(args$nSample) |
|
98 | 112 |
if (length(args$alphaA)) params@alphaA <- args$alphaA |
99 | 113 |
if (length(args$alphaP)) params@alphaP <- args$alphaP |
100 |
- if (length(args$seed)) params@seed <- args$seed |
|
114 |
+ if (length(args$seed)) params@seed <- as.integer(args$seed) |
|
101 | 115 |
if (length(args$messages)) params@messages <- args$messages |
116 |
+ if (length(args$numSnapshots)) |
|
117 |
+ params@nSnapshots <- as.integer(args$numSnapshots) |
|
102 | 118 |
|
103 | 119 |
# gapsMap params |
104 | 120 |
if (length(args$FP)) |
... | ... |
@@ -109,4 +125,4 @@ oldParams <- function(params, args) |
109 | 125 |
|
110 | 126 |
# return v3 style GapsParams |
111 | 127 |
return(params) |
112 |
-} |
|
128 |
+} |
|
113 | 129 |
\ No newline at end of file |
... | ... |
@@ -1,43 +1,42 @@ |
1 |
-#' Genome Wide CoGAPS |
|
1 |
+#' GWCoGAPS |
|
2 | 2 |
#' |
3 |
-#' @details calls the C++ MCMC code and performs Bayesian |
|
4 |
-#' matrix factorization returning the two matrices that reconstruct |
|
5 |
-#' the data matrix for whole genome data; |
|
6 |
-#' @param D data matrix |
|
7 |
-#' @param S uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
8 |
-#' @param params GapsParams object |
|
9 |
-#' @param nSets number of sets for parallelization |
|
10 |
-#' @param nCores number of cores for parallelization. If left to the default NA, nCores = nSets. |
|
11 |
-#' @param saveBySetResults logical indicating whether to save by intermediary by set results. Default is FALSE. |
|
12 |
-#' @param fname character string used to label file output. Default is "GWCoGAPS.AP.fixed" |
|
13 |
-#' @param PatternsMatchFN function to use for pattern matching across sets |
|
14 |
-#' @param Cut number of branches at which to cut dendrogram used in patternMatch4Parallel |
|
15 |
-#' @param minNS minimum of individual set contributions a cluster must contain |
|
16 |
-#' @param ... additional parameters to be fed into \code{gapsRun} and \code{gapsMapRun} |
|
17 |
-#' @export |
|
18 |
-#' @seealso \code{\link{gapsRun}}, \code{\link{patternMatch4Parallel}}, and \code{\link{gapsMapRun}} |
|
3 |
+#'\code{GWCoGAPS} calls the C++ MCMC code and performs Bayesian |
|
4 |
+#'matrix factorization returning the two matrices that reconstruct |
|
5 |
+#'the data matrix for whole genome data; |
|
6 |
+#' |
|
7 |
+#'@param D data matrix |
|
8 |
+#'@param S uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
9 |
+#'@param nFactor number of patterns (basis vectors, metagenes), which must be |
|
10 |
+#'greater than or equal to the number of rows of FP |
|
11 |
+#'@param nSets number of sets for parallelization |
|
12 |
+#'@param nCores number of cores for parallelization. If left to the default NA, nCores = nSets. |
|
13 |
+#'@param saveBySetResults logical indicating whether to save by intermediary by set results. Default is FALSE. |
|
14 |
+#'@param fname character string used to label file output. Default is "GWCoGAPS.AP.fixed" |
|
15 |
+#'@param PatternsMatchFN function to use for pattern matching across sets |
|
16 |
+#'@param Cut number of branches at which to cut dendrogram used in patternMatch4Parallel |
|
17 |
+#'@param minNS minimum of individual set contributions a cluster must contain |
|
18 |
+#'@param ... additional parameters to be fed into \code{gapsRun} and \code{gapsMapRun} |
|
19 |
+#'@export |
|
20 |
+#'@seealso \code{\link{gapsRun}}, \code{\link{patternMatch4Parallel}}, and \code{\link{gapsMapRun}} |
|
19 | 21 |
#' @examples \dontrun{ |
20 | 22 |
#' GWCoGAPS(nCores=NA, D, S, nFactor, nSets,saveBySetResults=TRUE, fname=fname, |
21 | 23 |
#' PatternsMatchFN = patternMatch4Parallel,numSnapshots=numSnapshots,minNS=minNS) |
22 | 24 |
#' } |
23 | 25 |
#' |
24 |
-GWCoGAPS <- function(D, S, params, nSets, nCores=NA, saveBySetResults=FALSE, |
|
26 |
+GWCoGAPS <- function(D, S, nFactor, nSets, nCores=NA, saveBySetResults=FALSE, |
|
25 | 27 |
fname="GWCoGAPS.AP.fixed", PatternsMatchFN = patternMatch4Parallel, Cut=NA, |
26 | 28 |
minNS=NA, ...) |
27 | 29 |
{ |
28 |
- # v2 style parameters compatibility |
|
29 |
- params <- oldParams(params, ...) |
|
30 |
- |
|
31 | 30 |
# establish the number of cores that you are able to use |
32 | 31 |
if(is.na(nCores)) |
33 |
- nCores <- nSets |
|
32 |
+ nCores<-nSets |
|
34 | 33 |
registerDoParallel(cores=nCores) |
35 | 34 |
|
36 | 35 |
# break the data into sets |
37 | 36 |
genesInSets <- createGWCoGAPSSets(data=D, nSets=nSets, keep=FALSE) |
38 | 37 |
|
39 | 38 |
# set gene min to 0 |
40 |
- D <- sweep(D, 1, apply(D,1,function(x) pmin(0,min(x)))) |
|
39 |
+ D <- sweep(D, 1, apply(D,1,function(x){pmin(0,min(x))})) |
|
41 | 40 |
|
42 | 41 |
#generate seeds for parallelization |
43 | 42 |
nut <- generateSeeds(chains=nSets, seed=-1) |
... | ... |
@@ -47,15 +46,14 @@ minNS=NA, ...) |
47 | 46 |
{ |
48 | 47 |
D <- as.matrix(D[genesInSets[[i]],]) |
49 | 48 |
S <- as.matrix(S[genesInSets[[i]],]) |
50 |
- params <- setParam(params, 'seed', nut[i]) |
|
51 |
- CoGAPS(D=D, S=S, params) |
|
49 |
+ gapsRun(D=D, S=S, nFactor=nFactor,seed=nut[i],...) |
|
52 | 50 |
} |
53 | 51 |
|
54 |
- BySet <- reOrderBySet(AP=AP, nFactor=nFactor, nSets=nSets) |
|
52 |
+ BySet <- reOrderBySet(AP=AP,nFactor=nFactor,nSets=nSets) |
|
55 | 53 |
|
56 | 54 |
#run postpattern match function |
57 | 55 |
if (is.na(Cut)) |
58 |
- Cut <- nFactor |
|
56 |
+ Cut<-nFactor |
|
59 | 57 |
matchedPs <- patternMatch4Parallel(Ptot=BySet$P, nP=nFactor, nSets=nSets, |
60 | 58 |
cnt=Cut, minNS=minNS, bySet=TRUE) |
61 | 59 |
|
... | ... |
@@ -63,29 +61,27 @@ minNS=NA, ...) |
63 | 61 |
class(AP) <- append(class(AP),"CoGAPS") |
64 | 62 |
if (saveBySetResults==TRUE) |
65 | 63 |
{ |
66 |
- save(AP,BySet,matchedPs,file=sprintf('APbySet.%s.nP%d.set%d.Rda',fname, nFactor,nSets)) |
|
64 |
+ save(AP, BySet, matchedPs, file=sprintf('APbySet.%s.nP%d.set%d.Rda', |
|
65 |
+ fname, nFactor, nSets)) |
|
67 | 66 |
message(sprintf('APbySet.%s.nP%d.set%d.Rda',fname, nFactor,nSets)) |
68 | 67 |
} |
69 | 68 |
|
70 | 69 |
PbySet <- matchedPs[["PBySet"]] |
71 | 70 |
matchedPs <- matchedPs[[1]] |
72 | 71 |
|
73 |
- # generate seeds for parallelization |
|
72 |
+ #generate seeds for parallelization |
|
74 | 73 |
nut <- generateSeeds(chains=nSets, seed=-1) |
75 | 74 |
|
76 |
- # final number of factors |
|
75 |
+ #final number of factors |
|
77 | 76 |
nFactorFinal <- dim(matchedPs)[1] |
78 | 77 |
|
79 | 78 |
# run fixed CoGAPS |
80 |
- params <- setParam('fixedMatrix', as.matrix(matchedPs)) |
|
81 |
- params <- setParam('whichMatrixFixed', 'P') |
|
82 |
- params <- setParam('nFactor', nFactorFinal) |
|
83 | 79 |
Fixed <- foreach(i=1:nSets) %dopar% |
84 | 80 |
{ |
85 |
- D <- as.matrix(D[genesInSets[[i]],]) |
|
86 |
- S <- as.matrix(S[genesInSets[[i]],]) |
|
87 |
- params <- setParam(params, 'seed', nut[i]) |
|
88 |
- CoGAPS(D=D, S=S, params) |
|
81 |
+ D <- as.matrix(D[genesInSets[[i]],]) |
|
82 |
+ S <- as.matrix(S[genesInSets[[i]],]) |
|
83 |
+ AP <- gapsMapRun(D, S, FP=matchedPs, nFactor=nFactorFinal, |
|
84 |
+ fixedMatrix = "P",seed=nut[i],...) |
|
89 | 85 |
} |
90 | 86 |
|
91 | 87 |
#extract A and Asds |
... | ... |
@@ -95,120 +91,6 @@ minNS=NA, ...) |
95 | 91 |
AP.fixed <- list("Amean"=As4fixPs$A, "Asd"=As4fixPs$Asd, "Pmean"=matchedPs, |
96 | 92 |
"PbySet"=PbySet) |
97 | 93 |
class(AP.fixed) <- append(class(AP.fixed),"CoGAPS") |
98 |
- save(AP.fixed, file=paste(fname, ".Rda", sep="")) |
|
99 |
- message(paste(fname, ".Rda", sep="")) |
|
100 |
-} |
|
101 |
- |
|
102 |
-#' Create Gene Sets for GWCoGAPS |
|
103 |
-#' |
|
104 |
-#'\code{createGWCoGAPSSets} factors whole genome data into randomly generated sets for indexing; |
|
105 |
-#' |
|
106 |
-#'@param data data matrix with unique rownames |
|
107 |
-#'@param nSets number of sets for parallelization |
|
108 |
-#'@param outRDA name of output file |
|
109 |
-#'@param keep logical indicating whether or not to save gene set list. Default is TRUE. |
|
110 |
-#'@export |
|
111 |
-#'@return list with randomly generated sets of genes from whole genome data |
|
112 |
-#'@examples \dontrun{ |
|
113 |
-#'createGWCoGAPSSet(D,nSets=nSets) |
|
114 |
-#'} |
|
115 |
-createGWCoGAPSSets <- function(data, nSets, outRDA="GenesInCoGAPSSets.Rda", |
|
116 |
-keep=TRUE) |
|
117 |
-{ |
|
118 |
- genes <- rownames(data) |
|
119 |
- setSize <- floor(length(genes) / nSets) |
|
120 |
- genesInSets <- list() |
|
121 |
- for (set in 1:nSets) |
|
122 |
- { |
|
123 |
- if (set != nSets) |
|
124 |
- genesInSets[[set]] <- sample(genes,setSize) |
|
125 |
- if (set == nSets) |
|
126 |
- genesInSets[[set]] <- genes |
|
127 |
- genes <- genes[!genes %in% genesInSets[[set]]] |
|
128 |
- } |
|
129 |
- |
|
130 |
- if (!identical(sort(unlist(genesInSets)),sort(rownames(data)))) |
|
131 |
- warning("Gene identifiers not unique!") |
|
132 |
- if (keep == TRUE) |
|
133 |
- save(list=c('genesInSets'), file=outRDA) |
|
134 |
- return(genesInSets) |
|
135 |
-} |
|
136 |
- |
|
137 |
-#' Generate Seeds for Multiple Concurrent Runs |
|
138 |
-#' |
|
139 |
-#' @param chains number of seeds to generate (number of chains to run) |
|
140 |
-#' @param seed positive values are kept, negative values will be overwritten |
|
141 |
-#' by a seed generated from the current time |
|
142 |
-#' @return vector of randomly generated seeds |
|
143 |
-#' @export |
|
144 |
-#' @examples |
|
145 |
-#' generateSeeds(chains=2, seed=-1) |
|
146 |
-generateSeeds <- function(chains=2, seed=-1) |
|
147 |
-{ |
|
148 |
- if (chains < 2 | (as.integer(chains) != chains)) |
|
149 |
- stop("chains must be >= 2 and an integer") |
|
150 |
- |
|
151 |
- if (seed < 0) |
|
152 |
- { |
|
153 |
- secs <- as.numeric(difftime(Sys.time(), paste(Sys.Date(), "00:00"), |
|
154 |
- units="secs")) |
|
155 |
- return(seq_len(chains) * round(secs)) |
|
156 |
- } |
|
157 |
- else |
|
158 |
- { |
|
159 |
- return(seq_len(chains) * seed) |
|
160 |
- } |
|
161 |
-} |
|
162 |
- |
|
163 |
-#' reOrderBySet |
|
164 |
-#' |
|
165 |
-#' @description <restructures output of gapsRun into a list containing each sets solution for Amean, Pmean, and Asd> |
|
166 |
-#' @param AP output of gapsRun in parallel |
|
167 |
-#' @param nFactor number of patterns |
|
168 |
-#' @param nSets number of sets |
|
169 |
-#' |
|
170 |
-#' @return a list containing the \code{nSets} sets solution for Amean under "A", Pmean under "P", and Asd under "Asd" |
|
171 |
-#' @export |
|
172 |
-#' |
|
173 |
-#' @examples \dontrun{ |
|
174 |
-#' reOrderBySet(AP,nFactor,nSets) |
|
175 |
-#' } |
|
176 |
-#' |
|
177 |
-reOrderBySet<-function(AP, nFactor, nSets) |
|
178 |
-{ |
|
179 |
- P <- do.call(rbind,lapply(AP, function(x) x$Pmean)) |
|
180 |
- rownames(P) <- paste(rep(1:nSets,each=nFactor),rep(1:nFactor,nSets),sep=".") |
|
181 |
- A <- lapply(AP, function(x) x$Amean) |
|
182 |
- Asd <- lapply(AP, function(x) x$Asd) |
|
183 |
- names(A) <- names(Asd) <- paste(rep("Set", nSets), rep(1:nSets), sep="") |
|
184 |
- return(list("A"=A, "Asd"=Asd, "P"=P)) |
|
185 |
-} |
|
186 |
- |
|
187 |
-#' postFixed4Parallel |
|
188 |
-#' |
|
189 |
-#' @param AP.fixed output of parallel gapsMapRun calls with same FP |
|
190 |
-#' @param setPs data.frame with rows giving fixed patterns for P used as input for gapsMapRun |
|
191 |
-#' |
|
192 |
-#' @return list of two data.frames containing the A matrix estimates or their corresponding standard deviations |
|
193 |
-#' from output of parallel gapsMapRun |
|
194 |
-#' @export |
|
195 |
-postFixed4Parallel <- function(AP.fixed=NA, setPs=NA) |
|
196 |
-{ |
|
197 |
- ASummary <- do.call(rbind,lapply(AP.fixed, function(x) x$Amean)) |
|
198 |
- Asd <- do.call(rbind,lapply(AP.fixed, function(x) x$Asd)) |
|
199 |
- #PSummary <- do.call(rbind,lapply(AP.fixed, function(x) x$Pmean)) |
|
200 |
- PSummary <- AP.fixed[[1]]$Pmean |
|
201 |
- |
|
202 |
- Pmax <- apply(PSummary,1,max) |
|
203 |
- Pneu <- sweep(PSummary,1,Pmax,FUN="/") |
|
204 |
- Aneu <- sweep(ASummary,2,Pmax,FUN="*") |
|
205 |
- |
|
206 |
- X <- apply(Pneu,1,range) |
|
207 |
- Y <- apply(setPs,1,range) |
|
208 |
- colnames(X) <- colnames(Y) |
|
209 |
- if (!all.equal(X, Y, tolerance=1e-5)) |
|
210 |
- warning("Patterns do not match fixed values.") |
|
211 |
- |
|
212 |
- As4fixPs <- list("A"=Aneu, "Asd"=Asd) |
|
213 |
- return(As4fixPs) |
|
214 |
-} |
|
94 |
+ save(AP.fixed, file=paste(fname,".Rda",sep="")) |
|
95 |
+ message(paste(fname,".Rda",sep="")) |
|
96 |
+} |
|
215 | 97 |
\ No newline at end of file |
... | ... |
@@ -1,12 +1,16 @@ |
1 | 1 |
# Generated by using Rcpp::compileAttributes() -> do not edit by hand |
2 | 2 |
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 |
3 | 3 |
|
4 |
-cogaps <- function(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, numOutputs, numSnapshots, checkpointInterval) { |
|
5 |
- .Call('_CoGAPS_cogaps', PACKAGE = 'CoGAPS', DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, numOutputs, numSnapshots, checkpointInterval) |
|
4 |
+cogaps_cpp <- function(DMatrix, SMatrix, fixedPatterns, params) { |
|
5 |
+ .Call('_CoGAPS_cogaps_cpp', PACKAGE = 'CoGAPS', DMatrix, SMatrix, fixedPatterns, params) |
|
6 | 6 |
} |
7 | 7 |
|
8 |
-cogapsFromCheckpoint <- function(fileName) { |
|
9 |
- .Call('_CoGAPS_cogapsFromCheckpoint', PACKAGE = 'CoGAPS', fileName) |
|
8 |
+cogapsFromCheckpoint_cpp <- function(D, S, fileName) { |
|
9 |
+ .Call('_CoGAPS_cogapsFromCheckpoint_cpp', PACKAGE = 'CoGAPS', D, S, fileName) |
|
10 |
+} |
|
11 |
+ |
|
12 |
+displayBuildReport_cpp <- function() { |
|
13 |
+ invisible(.Call('_CoGAPS_displayBuildReport_cpp', PACKAGE = 'CoGAPS')) |
|
10 | 14 |
} |
11 | 15 |
|
12 | 16 |
run_catch_unit_tests <- function() { |
13 | 17 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,24 @@ |
1 |
+#' Binary Heatmap for Standardized A Matrix |
|
2 |
+#' |
|
3 |
+#' @details creates a binarized heatmap of the A matrix |
|
4 |
+#' in which the value is 1 if the value in Amean is greater than |
|
5 |
+#' threshold * Asd and 0 otherwise |
|
6 |
+#' @param Amean the mean estimate for the A matrix |
|
7 |
+#' @param Asd the standard deviations on Amean |
|
8 |
+#' @param threshold the number of standard deviations above zero |
|
9 |
+#' that an element of Amean must be to get a value of 1 |
|
10 |
+#' @export |
|
11 |
+binaryA <-function(Amean, Asd, threshold=3) |
|
12 |
+{ |
|
13 |
+ BinA_Map <- ifelse(Amean/Asd > threshold,1,0) |
|
14 |
+ colnames(BinA_Map) <- colnames(BinA_Map, do.NULL=FALSE, prefix = "Pattern ") |
|
15 |
+ rownames(BinA_Map) <- rep(" ",nrow(BinA_Map)) |
|
16 |
+ |
|
17 |
+ heatmap.2(BinA_Map, Rowv = FALSE, Colv = FALSE,dendrogram="none", |
|
18 |
+ scale="none",col = brewer.pal(3,"Blues"), trace="none", |
|
19 |
+ density.info="none",cexCol=1.3,srtCol=45, |
|
20 |
+ lmat=rbind(c(0, 3),c(2,1),c(0,4) ), |
|
21 |
+ lwid=c(1,10),lhei=c(1, 4, 1.2 ), |
|
22 |
+ main="Heatmap of Standardized A Matrix") |
|
23 |
+ mtext(paste("(Threshold = ", threshold, ")")) |
|
24 |
+} |
|
0 | 25 |
\ No newline at end of file |
1 | 26 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,127 @@ |
1 |
+#' Calculate Gene Set Statistics |
|
2 |
+#' |
|
3 |
+#' @details calculates the gene set statistics for each |
|
4 |
+#' column of A using a Z-score from the elements of the A matrix, |
|
5 |
+#' the input gene set, and permutation tests |
|
6 |
+#' @param Amean A matrix mean values |
|
7 |
+#' @param Asd A matrix standard deviations |
|
8 |
+#' @param GStoGenes data.frame or list with gene sets |
|
9 |
+#' @param numPerm number of permutations for null |
|
10 |
+#' @export |
|
11 |
+calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) |
|
12 |
+{ |
|
13 |
+ # test for std dev of zero, possible mostly in simple simulations |
|
14 |
+ if (sum(Asd==0) > 0) |
|
15 |
+ { |
|
16 |
+ #temp <- min(Asd[Asd>0]) |
|
17 |
+ Asd[Asd==0] <- .Machine$double.eps |
|
18 |
+ } |
|
19 |
+ |
|
20 |
+ # calculate Z scores |
|
21 |
+ zMatrix <- calcZ(Amean,Asd) |
|
22 |
+ |
|
23 |
+ if (is(GStoGenes, "GSA.genesets")) |
|
24 |
+ { |
|
25 |
+ names(GStoGenes$genesets) <- GStoGenes$geneset.names |
|
26 |
+ GStoGenes <- GStoGenes$genesets |
|
27 |
+ } |
|
28 |
+ else if (is(GStoGenes, "list")) |
|
29 |
+ { |
|
30 |
+ GStoGenesList <- GStoGenes |
|
31 |
+ } |
|
32 |
+ else if (is(GStoGenes, "data.frame")) |
|
33 |
+ { |
|
34 |
+ GStoGenesList <- list() |
|
35 |
+ for (i in 1:dim(GStoGenes)[2]) |
|
36 |
+ { |
|
37 |
+ GStoGenesList[[as.character(colnames(GStoGenes)[i])]] <- |
|
38 |
+ as.character(unique(GStoGenes[,i])) |
|
39 |
+ } |
|
40 |
+ } |
|
41 |
+ else |
|
42 |
+ { |
|
43 |
+ stop(paste("GStoGenes must be a data.frame, GSA.genesets, or list with", |
|
44 |
+ "format specified in the users manual.")) |
|
45 |
+ } |
|
46 |
+ |
|
47 |
+ # get dimensions |
|
48 |
+ numGS <- length(names(GStoGenesList)) |
|
49 |
+ numPatt <- dim(zMatrix)[2] |
|
50 |
+ numG <- dim(zMatrix)[1]+0.9999 |
|
51 |
+ |
|
52 |
+ # initialize matrices |
|
53 |
+ statsUp <- matrix(ncol = numGS, nrow = numPatt) |
|
54 |
+ statsDown <- matrix(ncol = numGS, nrow = numPatt) |
|
55 |
+ actEst <- matrix(ncol = numGS, nrow = numPatt) |
|
56 |
+ results <- list() |
|
57 |
+ zPerm <- matrix(ncol=numPerm,nrow=numPatt) |
|
58 |
+ |
|
59 |
+ # do permutation test |
|
60 |
+ for (gs in 1:numGS) |
|
61 |
+ { |
|
62 |
+ genes <- GStoGenesList[[names(GStoGenesList)[gs]]] |
|
63 |
+ index <- rownames(zMatrix) %in% genes |
|
64 |
+ zValues <- zMatrix[index,1] |
|
65 |
+ numGenes <- length(zValues) |
|
66 |
+ label <- as.character(numGenes) |
|
67 |
+ |
|
68 |
+ if (!any(names(results)==label)) |
|
69 |
+ { |
|
70 |
+ for (p in 1:numPatt) |
|
71 |
+ { |
|
72 |
+ for (j in 1:numPerm) |
|
73 |
+ { |
|
74 |
+ temp <- floor(runif(numGenes,1,numG)) |
|
75 |
+ temp2 <- zMatrix[temp,p] |
|
76 |
+ zPerm[p,j] <- mean(temp2) |
|
77 |
+ } |
|
78 |
+ } |
|
79 |
+ results[[label]] <- zPerm |
|
80 |
+ } |
|
81 |
+ } |
|
82 |
+ |
|
83 |
+ # get p-value |
|
84 |
+ for (p in 1:numPatt) |
|
85 |
+ { |
|
86 |
+ for (gs in 1:numGS) |
|
87 |
+ { |
|
88 |
+ genes <- GStoGenesList[[names(GStoGenesList)[gs]]] |
|
89 |
+ index <- rownames(zMatrix) %in% genes |
|
90 |
+ zValues <- zMatrix[index,p] |
|
91 |
+ zScore <- mean(zValues) |
|
92 |
+ |
|
93 |
+ numGenes <- length(zValues) |
|
94 |
+ label <- as.character(numGenes) |
|
95 |
+ |
|
96 |
+ permzValues <- results[[label]][p,] |
|
97 |
+ ordering <- order(permzValues) |
|
98 |
+ permzValues <- permzValues[ordering] |
|
99 |
+ |
|
100 |
+ statistic <- sum(zScore > permzValues) |
|
101 |
+ statUpReg <- 1 - statistic/length(permzValues) |
|
102 |
+ statsUp[p,gs] <- max(statUpReg, 1/numPerm) |
|
103 |
+ |
|
104 |
+ statistic <- sum(zScore < permzValues) |
|
105 |
+ statDownReg <- 1 - statistic/length(permzValues) |
|
106 |
+ statsDown[p,gs] <- max(statDownReg,1/numPerm) |
|
107 |
+ |
|
108 |
+ activity <- 1 - 2*max(statUpReg, 1/numPerm) |
|
109 |
+ actEst[p,gs] <- activity |
|
110 |
+ } |
|
111 |
+ } |
|
112 |
+ |
|
113 |
+ # format output |
|
114 |
+ colnames(statsUp) <- names(GStoGenesList) |
|
115 |
+ colnames(statsDown) <- names(GStoGenesList) |
|
116 |
+ colnames(actEst) <- names(GStoGenesList) |
|
117 |
+ |
|
118 |
+ rownames(statsUp) <- colnames(zMatrix) |
|
119 |
+ rownames(statsDown) <- colnames(zMatrix) |
|
120 |
+ rownames(actEst) <- colnames(zMatrix) |
|
121 |
+ |
|
122 |
+ results[['GSUpreg']] <- statsUp |
|
123 |
+ results[['GSDownreg']] <- statsDown |
|
124 |
+ results[['GSActEst']] <- actEst |
|
125 |
+ |
|
126 |
+ return(results) |
|
127 |
+} |
|
0 | 128 |
\ No newline at end of file |
1 | 129 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,80 @@ |
1 |
+#' Probability Gene Belongs in Gene Set |
|
2 |
+#' |
|
3 |
+#' @details calculates the probability that a gene |
|
4 |
+#' listed in a gene set behaves like other genes in the set within |
|
5 |
+#' the given data set |
|
6 |
+#' @param Amean A matrix mean values |
|
7 |
+#' @param Asd A matrix standard deviations |
|
8 |
+#' @param GSGenes data.frame or list with gene sets |
|
9 |
+#' @param numPerm number of permutations for null |
|
10 |
+#' @param Pw weight on genes |
|
11 |
+#' @param nullGenes - logical indicating gene adjustment |
|
12 |
+#' @export |
|
13 |
+calcGeneGSStat <- function(Amean, Asd, GSGenes, numPerm, Pw=rep(1,ncol(Amean)), |
|
14 |
+nullGenes=FALSE) |
|
15 |
+{ |
|
16 |
+ gsStat <- calcCoGAPSStat(Amean, Asd, data.frame(GSGenes), numPerm=numPerm) |
|
17 |
+ gsStat <- gsStat$GSUpreg |
|
18 |
+ gsStat <- -log(gsStat) |
|
19 |
+ |
|
20 |
+ if (!all(is.na(Pw))) |
|
21 |
+ { |
|
22 |
+ if (length(Pw) != length(gsStat)) |
|
23 |
+ { |
|
24 |
+ stop('Invalid weighting') |
|
25 |
+ } |
|
26 |
+ gsStat <- gsStat*Pw |
|
27 |
+ } |
|
28 |
+ |
|
29 |
+ if (nullGenes) |
|
30 |
+ { |
|
31 |
+ ZD <- Amean[setdiff(row.names(Amean), GSGenes),] / |
|
32 |
+ Asd[setdiff(row.names(Amean), GSGenes),] |
|
33 |
+ } |
|
34 |
+ else |
|
35 |
+ { |
|
36 |
+ ZD <- Amean[GSGenes,]/Asd[GSGenes,] |
|
37 |
+ } |
|
38 |
+ outStats <- apply(sweep(ZD,2,gsStat,FUN="*"),1,sum) / (sum(gsStat)) |
|
39 |
+ outStats <- outStats / apply(ZD,1,sum) |
|
40 |
+ outStats[which(apply(ZD,1,sum) < .Machine$double.eps)] <- 0 |
|
41 |
+ |
|
42 |
+ if (sum(gsStat) < .Machine$double.eps) |
|
43 |
+ { |
|
44 |
+ return(0) |
|
45 |
+ } |
|
46 |
+ return(outStats) |
|
47 |
+} |
|
48 |
+ |
|
49 |
+#' Compute Gene Probability |
|
50 |
+#' |
|
51 |
+#' @param Amean A matrix mean values |
|
52 |
+#' @param Asd A matrix standard deviations |
|
53 |
+#' @param GSGenes data.frame or list with gene sets |
|
54 |
+#' @param Pw weight on genes |
|
55 |
+#' @param numPerm number of permutations for null |
|
56 |
+#' @param PwNull - logical indicating gene adjustment |
|
57 |
+computeGeneGSProb <- function(Amean, Asd, GSGenes, Pw=rep(1,ncol(Amean)), |
|
58 |
+numPerm=500, PwNull=FALSE) |
|
59 |
+{ |
|
60 |
+ geneGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, Pw=Pw, |
|
61 |
+ GSGenes=GSGenes, numPerm=numPerm) |
|
62 |
+ |
|
63 |
+ if (PwNull) |
|
64 |
+ { |
|
65 |
+ permGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, |
|
66 |
+ GSGenes=GSGenes, numPerm=numPerm, Pw=Pw, |
|
67 |
+ nullGenes=TRUE) |
|
68 |
+ } |
|
69 |
+ else |
|
70 |
+ { |
|
71 |
+ permGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, |
|
72 |
+ GSGenes=GSGenes, numPerm=numPerm, |
|
73 |
+ nullGenes=TRUE) |
|
74 |
+ } |
|
75 |
+ |
|
76 |
+ finalStats <- sapply(GSGenes, function(x) |
|
77 |
+ length(which(permGSStat > geneGSStat[x])) / length(permGSStat)) |
|
78 |
+ |
|
79 |
+ return(finalStats) |
|
80 |
+} |
0 | 81 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,32 @@ |
1 |
+#' Compute Z-Score Matrix |
|
2 |
+#' |
|
3 |
+#' @details calculates the Z-score for each element based on input mean |
|
4 |
+#' and standard deviation matrices |
|
5 |
+#' @param meanMat matrix of mean values |
|
6 |
+#' @param sdMat matrix of standard deviation values |
|
7 |
+#' @export |
|
8 |
+calcZ <- function(meanMat, sdMat) |
|
9 |
+{ |
|
10 |
+ # find matrix dimensions |
|
11 |
+ nrows <- dim(meanMat)[1] |
|
12 |
+ ncols <- dim(meanMat)[2] |
|
13 |
+ |
|
14 |
+ check <- dim(sdMat)[1] |
|
15 |
+ if (nrows != check) |
|
16 |
+ { |
|
17 |
+ stop("Number of rows in the mean and standard deviation of A do not agree.") |
|
18 |
+ } |
|
19 |
+ |
|
20 |
+ check <- dim(sdMat)[2] |
|
21 |
+ if (ncols != check) |
|
22 |
+ { |
|
23 |
+ stop("Number of columns in the mean and standard deviation of A do not agree.") |
|
24 |
+ } |
|
25 |
+ |
|
26 |
+ # compute the matrix of z scores |
|
27 |
+ zMat <- meanMat/sdMat |
|
28 |
+ rownames(zMat) <- rownames(meanMat) |
|
29 |
+ colnames(zMat) <- colnames(meanMat) |
|
30 |
+ |
|
31 |
+ return(zMat) |
|
32 |
+} |
|
0 | 33 |
\ No newline at end of file |
... | ... |
@@ -23,40 +23,44 @@ |
23 | 23 |
setClass('GapsParams', slots = c( |
24 | 24 |
nFactor = 'integer', |
25 | 25 |
nEquil = 'integer', |
26 |
+ nEquilCool = 'integer', |
|
26 | 27 |
nSample = 'integer', |
27 | 28 |
nOutput = 'integer', |
28 | 29 |
nSnapshots = 'integer', |
29 |
- alphaA = 'numeric', |
|
30 |
- alphaP = 'numeric', |
|
30 |
+ alphaA = 'numeric', |
|
31 |
+ alphaP = 'numeric', |
|
31 | 32 |
maxGibbmassA = 'numeric', |
32 | 33 |
maxGibbmassP = 'numeric', |
33 | 34 |
seed = 'integer', |
34 | 35 |
messages = 'logical', |
35 |
- singleCelLRNASeq = 'logical', |
|
36 |
+ singleCellRNASeq = 'logical', |
|
36 | 37 |
fixedPatterns = 'matrix', |
37 | 38 |
whichMatrixFixed = 'character', |
38 | 39 |
checkpointInterval = 'integer', |
39 | 40 |
nCores = 'integer' |
40 | 41 |
)) |
41 | 42 |
|
43 |
+#' @importFrom methods callNextMethod |
|
42 | 44 |
setMethod('initialize', 'GapsParams', |
43 | 45 |
function(.Object, nFactor, nEquil, nSample, ...) |
44 | 46 |
{ |
45 |
- .Object@nFactor <- nFactor |
|
46 |
- .Object@nEquil <- nEquil |
|
47 |
- .Object@nSample <- nSample |
|
48 |
- .Object@nOutput <- 1000 |
|
49 |
- .Object@numSnapshots <- 0 |
|
47 |
+ .Object@nFactor <- as.integer(nFactor) |
|
48 |
+ .Object@nEquil <- as.integer(nEquil) |
|
49 |
+ .Object@nEquil <- as.integer(floor(nEquil/10)) |
|
50 |
+ .Object@nSample <- as.integer(nSample) |
|
51 |
+ .Object@nOutput <- as.integer(1000) |
|
52 |
+ .Object@nSnapshots <- as.integer(0) |
|
50 | 53 |
.Object@alphaA <- 0.01 |
51 | 54 |
.Object@alphaP <- 0.01 |
52 | 55 |
.Object@maxGibbmassA <- 100.0 |
53 | 56 |
.Object@maxGibbmassP <- 100.0 |
54 |
- .Object@seed <- -1 |
|
57 |
+ .Object@seed <- as.integer(-1) |
|
55 | 58 |
.Object@messages <- TRUE |
56 |
- .Object@singleCelLRNASeq <- FALSE |
|
59 |
+ .Object@singleCellRNASeq <- FALSE |
|
57 | 60 |
.Object@fixedPatterns <- matrix(0) |
58 | 61 |
.Object@whichMatrixFixed <- 'N' |
59 |
- .Object@checkpointInterval <- 0 |
|
62 |
+ .Object@checkpointInterval <- as.integer(0) |
|
63 |
+ .Object@nCores <- as.integer(1) |
|
60 | 64 |
|
61 | 65 |
.Object <- callNextMethod(.Object, ...) |
62 | 66 |
.Object |
... | ... |
@@ -73,14 +77,24 @@ setValidity('GapsParams', |
73 | 77 |
} |
74 | 78 |
) |
75 | 79 |
|
76 |
-#' @rdname getParam |
|
80 |
+##################### Generics ################### |
|
81 |
+ |
|
82 |
+#' @export |
|
83 |
+setGeneric('getParam', function(object, name) |
|
84 |
+ {standardGeneric('getParam')}) |
|
85 |
+ |
|
86 |
+#' @export |
|
87 |
+setGeneric('setParam', function(object, name, value) |
|
88 |
+ {standardGeneric('setParam')}) |
|
89 |
+ |
|
90 |
+##################### Methods #################### |
|
91 |
+ |
|
77 | 92 |
#' @importFrom methods slot |
78 | 93 |
setMethod("getParam", "GapsParams", function(object, name) |
79 | 94 |
{ |
80 | 95 |
slot(object, name) |
81 | 96 |
}) |
82 | 97 |
|
83 |
-#' @rdname setParam |
|
84 | 98 |
#' @importFrom methods slot<- validObject |
85 | 99 |
setMethod("setParam", "GapsParams", function(object, name, value) |
86 | 100 |
{ |
... | ... |
@@ -89,7 +103,6 @@ setMethod("setParam", "GapsParams", function(object, name, value) |
89 | 103 |
return(object) |
90 | 104 |
}) |
91 | 105 |
|
92 |
-#' @importFrom methods slotNames |
|
93 | 106 |
setMethod("show", "GapsParams", function(object) |
94 | 107 |
{ |
95 | 108 |
cat("CoGAPS Parameters Object\n") |
... | ... |
@@ -104,4 +117,4 @@ checkParamsWithData <- function(params, nRD, nCD, nRS, nCS) |
104 | 117 |
stop('invalid fixed pattern length') |
105 | 118 |
if (params@whichMatrixFixed == 'P' & ncol(params@fixedPatterns) != nCD) |
106 | 119 |
stop('invalid fixed pattern length') |
107 |
-}) |
|
120 |
+} |
108 | 121 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,34 @@ |
1 |
+#' Create Gene Sets for GWCoGAPS |
|
2 |
+#' |
|
3 |
+#' @details factors whole genome data into randomly generated sets for indexing |
|
4 |
+#' |
|
5 |
+#' @param data data matrix with unique rownames |
|
6 |
+#' @param nSets number of sets for parallelization |
|
7 |
+#' @param outRDA name of output file |
|
8 |
+#' @param keep logical indicating whether or not to save gene set list. |
|
9 |
+#' @return list with randomly generated sets of genes from whole genome data |
|
10 |
+#' @examples \dontrun{ |
|
11 |
+#'createGWCoGAPSSet(D,nSets=nSets) |
|
12 |
+#'} |
|
13 |
+#' @export |
|
14 |
+createGWCoGAPSSets<-function(data=D, nSets=nSets, |
|
15 |
+outRDA="GenesInCoGAPSSets.Rda", keep=TRUE) |
|
16 |
+{ |
|
17 |
+ genes=rownames(data) |
|
18 |
+ setSize=floor(length(genes)/nSets) |
|
19 |
+ genesInSets <- list() |
|
20 |
+ for (set in 1:nSets) |
|
21 |
+ { |
|
22 |
+ if(set!=nSets) |
|
23 |
+ genesInSets[[set]] <- sample(genes,setSize) |
|
24 |
+ if(set==nSets) |
|
25 |
+ genesInSets[[set]] <- genes |
|
26 |
+ genes=genes[!genes%in%genesInSets[[set]]] |
|
27 |
+ } |
|
28 |
+ if (!identical(sort(unlist(genesInSets)),sort(rownames(data)))) |
|
29 |
+ message("Gene identifiers not unique!") |
|
30 |
+ if (keep==TRUE) |
|
31 |
+ save(list=c('genesInSets'),file=outRDA) |
|
32 |
+ return(genesInSets) |
|
33 |
+} |
|
34 |
+ |
0 | 35 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,144 @@ |
1 |
+#Calculates significant genes in each pattern according to certain threshold |
|
2 |
+#Returns the significant gene names as well as well as the means of these matrices and number of genes in each |
|
3 |
+ |
|
4 |
+gapsInterPattern <- function(Amean, Asd, sdThreshold = 3) |
|
5 |
+{ |
|
6 |
+ #number of rows and cols of Asd |
|
7 |
+ numGenes = length(Asd[,1]); |
|
8 |
+ numCols = length(Asd[1,]); |
|
9 |
+ |
|
10 |
+ #Vector holding the number of each significant gene in each column |
|
11 |
+ sigGeneNums = data.frame(); |
|
12 |
+ |
|
13 |
+ #Temp number of sig genes in the col |
|
14 |
+ sigCount = 0; |
|
15 |
+ |
|
16 |
+ #Number to catch the amount of columns without significant genes |
|
17 |
+ numEmptyCols = 0; |
|
18 |
+ |
|
19 |
+ #Keep an array of the significant gene counts |
|
20 |
+ significantGeneNums = c(0); |
|
21 |
+ |
|
22 |
+ #Names of the genes from the data matrix |
|
23 |
+ geneNames = names(Asd[,1]); |
|
24 |
+ |
|
25 |
+ #Names of the genes that are significant from the data matrix |
|
26 |
+ sigGeneNames = "0"; |
|
27 |
+ |
|
28 |
+ #The numerator of our statistic |
|
29 |
+ dimensionStatNumerator = 0; |
|
30 |
+ |
|
31 |
+ #The denominator of our statistic |
|
32 |
+ dimensionStatDenominator = 0; |
|
33 |
+ |
|
34 |
+ #The value of our statistic |
|
35 |
+ dimensionStatistic = 0; |
|
36 |
+ |
|
37 |
+ #A matrix holding the values of our statistics |
|
38 |
+ dimensionStatisticMatrix = matrix(nrow = numCols, ncol = numCols); |
|
39 |
+ |
|
40 |
+ #The mean of the statistic matrix |
|
41 |
+ sbar = 0; |
|
42 |
+ |
|
43 |
+ #A list to return the significant genes in each col and the statistic matrix |
|
44 |
+ results = list(list()); |
|
45 |
+ |
|
46 |
+ #Scan in the significant genes from each column of Asd |
|
47 |
+ #The columns of sigGeneNums hold the significant genes for each col of Asd |
|
48 |
+ for(i in 1:numCols) |
|
49 |
+ { |
|
50 |
+ sigCount = 0; |
|
51 |
+ for(j in 1:numGenes) |
|
52 |
+ { |
|
53 |
+ if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0) |
|
54 |
+ { |
|
55 |
+ sigCount = sigCount + 1; |
|
56 |
+ sigGeneNums[sigCount, i] = j; |
|
57 |
+ } |
|
58 |
+ } |
|
59 |
+ |
|
60 |
+ if(sigCount == 0) |
|
61 |
+ { |
|
62 |
+ sigGeneNums[1, i] = 0; |
|
63 |
+ numEmptyCols = numEmptyCols + 1; |
|
64 |
+ } |
|
65 |
+ |
|
66 |
+ #Save the number of sigGenes |
|
67 |
+ significantGeneNums[i] = sigCount; |
|
68 |
+ |
|
69 |
+ #Get the names and store them |
|
70 |
+ if(sigCount != 0) |
|
71 |
+ { |
|
72 |
+ for(k in 1:sigCount) |
|
73 |
+ { |
|
74 |
+ sigGeneNames[k] = geneNames[sigGeneNums[k, i]]; |
|
75 |
+ } |
|
76 |
+ results[[1]][[i]] = sigGeneNames; |
|
77 |
+ sigGeneNames = "0"; |
|
78 |
+ } |
|
79 |
+ else |
|
80 |
+ { |
|
81 |
+ results[[1]][[i]] = "None"; |
|
82 |
+ } |
|
83 |
+ } |
|
84 |
+ |
|
85 |
+ if(any(significantGeneNums == 0)) |
|
86 |
+ { |
|
87 |
+ zeroSigCols = which(significantGeneNums == 0); |
|
88 |
+ print("Warning: No Significant Genes in Pattern(s): "); |
|
89 |
+ |
|
90 |
+ for(z in 1:length(zeroSigCols)) |
|
91 |
+ { |
|
92 |
+ print(zeroSigCols[z]); |
|
93 |
+ } |
|
94 |
+ } |
|
95 |
+ |
|
96 |
+ #Now that we have the significant genes want to see if these genes are significant in other columns |
|
97 |
+ #Fill across the row then down the column |
|
98 |
+ |
|
99 |
+ #This compares the significant genes in the specified by 'j' to the same genes in Asd in the column specified by 'i' |
|
100 |
+ for(i in 1:numCols) |
|
101 |
+ { |
|
102 |
+ for(j in 1:numCols) |
|
103 |
+ { |
|
104 |
+ #Grab the number of significant genes from the interested column |
|
105 |
+ sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE); |
|
106 |
+ |
|
107 |
+ if(sigCount != 0) |
|
108 |
+ { |
|
109 |
+ dimensionStatDenominator = sigCount; |
|
110 |
+ dimensionStatNumerator = 0; |
|
111 |
+ |
|
112 |
+ #loop through the number of significant genes and compare to these genes in the 'ith' col of Asd. |
|
113 |
+ #Find the significance of these genes, Amean - threshold * Asd |
|
114 |
+ for(k in 1:sigCount) |
|
115 |
+ { |
|
116 |
+ if((Amean[sigGeneNums[k,j],i] - (sdThreshold*Asd[sigGeneNums[k,j],i])) > 0) |
|
117 |
+ { |
|
118 |
+ dimensionStatNumerator = dimensionStatNumerator + 1; |
|
119 |
+ } |
|
120 |
+ } |
|
121 |
+ |
|
122 |
+ dimensionStatistic = dimensionStatNumerator/dimensionStatDenominator; |
|
123 |
+ |
|
124 |
+ dimensionStatisticMatrix[i, j] = dimensionStatistic; |
|
125 |
+ } |
|
126 |
+ else |
|
127 |
+ { |
|
128 |
+ dimensionStatisticMatrix[i, j] = 0; |
|
129 |
+ } |
|
130 |
+ } |
|
131 |
+ } |
|
132 |
+ |
|
133 |
+ #Find mean of the matrices (excluding the diagonal elements) |
|
134 |
+ #we can subtract numCols as the diagonal elements are 1 |
|
135 |
+ sbar = ((sum(dimensionStatisticMatrix) - (numCols - numEmptyCols))/(length(dimensionStatisticMatrix) - (numCols - numEmptyCols))); |
|
136 |
+ |
|
137 |
+ results[[2]] = significantGeneNums; |
|
138 |
+ results[[3]] = t(dimensionStatisticMatrix); |
|
139 |
+ results[[4]] = sbar; |
|
140 |
+ |
|
141 |
+ names(results) = c("SignificantGeneNames", "SignificantGeneTotals", "SeparationMatrix", "InterPatternValue"); |
|
142 |
+ |
|
143 |
+ return(results); |
|
144 |
+} |
|
0 | 145 |
\ No newline at end of file |
1 | 146 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,135 @@ |
1 |
+#Calculates significant genes in each pattern according to certain threshold |
|
2 |
+#Returns the significant gene names as well as well as the correlation matrices between these genes and the means of these matrices |
|
3 |
+ |
|
4 |
+gapsIntraPattern <- function(Amean, Asd, DMatrix, sdThreshold = 3) |
|
5 |
+{ |
|
6 |
+ #number of rows and cols of Asd |
|
7 |
+ numGenes = length(Asd[,1]); |
|
8 |
+ numCols = length(Asd[1,]); |
|
9 |
+ |
|
10 |
+ #number of samples in DMatrix |
|
11 |
+ numSamp = ncol(DMatrix); |
|
12 |
+ |
|
13 |
+ #Vector holding the number of each significant gene in each column |
|
14 |
+ sigGeneNums = data.frame(); |
|
15 |
+ |
|
16 |
+ #Temp number of sig genes in the col |
|
17 |
+ sigCount = 0; |
|
18 |
+ |
|
19 |
+ #Keep an array of the significant gene counts |
|
20 |
+ significantGeneNums = c(0); |
|
21 |
+ |
|
22 |
+ #A matrix to hold the significant genes in D for the current pattern |
|
23 |
+ #The matrix just acts as a subset of D, just eliminates non relevant rows |
|
24 |
+ tempSubsetD = matrix(); |
|
25 |
+ |
|
26 |
+ #A matrix holding the values of our correlation coefficients between genes for the current column |
|
27 |
+ tempGeneCorrMatrix = matrix(); |
|
28 |
+ |
|
29 |
+ #A list to hold all the correlation matrices |
|
30 |
+ geneCorrMatrices = list(); |
|
31 |
+ |
|
32 |
+ #A list to hold all the means |
|
33 |
+ geneCorrMatrMeans = list(); |
|
34 |
+ |
|
35 |
+ #The mean of all the correlation matrices |
|
36 |
+ cbar = 0; |
|
37 |
+ |
|
38 |
+ #A list to return the means and the matrices |
|
39 |
+ results = list(); |
|
40 |
+ |
|
41 |
+ #Scan in the significant genes from each column of Asd |
|
42 |
+ #The columns of sigGeneNums hold the significant genes for each col of Asd |
|
43 |
+ for(i in 1:numCols) |
|
44 |
+ { |
|
45 |
+ sigCount = 0; |
|
46 |
+ for(j in 1:numGenes) |
|
47 |
+ { |
|
48 |
+ if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0) |
|
49 |
+ { |
|
50 |
+ sigCount = sigCount + 1; |
|
51 |
+ sigGeneNums[sigCount, i] = j; |
|
52 |
+ } |
|
53 |
+ } |
|
54 |
+ |
|
55 |
+ if(sigCount == 0) |
|
56 |
+ { |
|
57 |
+ sigGeneNums[1, i] = 0; |
|
58 |
+ } |
|
59 |
+ |
|
60 |
+ #Save the number of sigGenes |
|
61 |
+ significantGeneNums[i] = sigCount; |
|
62 |
+ } |
|
63 |
+ |
|
64 |
+ #If a pattern has no significant genes this is clearly an error so return such |
|
65 |
+ if(any(significantGeneNums == 0)) |
|
66 |
+ { |
|
67 |
+ zeroSigCols = which(significantGeneNums == 0); |
|
68 |
+ warning("Warning: No Significant Genes in Pattern(s): "); |
|
69 |
+ |
|
70 |
+ for(z in 1:length(zeroSigCols)) |
|
71 |
+ { |
|
72 |
+ message(zeroSigCols[z]); |
|
73 |
+ } |
|
74 |
+ } |
|
75 |
+ |
|
76 |
+ |
|
77 |
+ #Now that we have the significant genes want to grab these from our original D matrix |
|
78 |
+ #and find the sigGene x sigGene correlation matrix and find its mean |
|
79 |
+ |
|
80 |
+ for(j in 1:numCols) |
|
81 |
+ { |
|
82 |
+ #Grab the number of significant genes from the interested column |
|
83 |
+ sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE); |
|
84 |
+ |
|
85 |
+ if(sigCount != 0) |
|
86 |
+ { |
|
87 |
+ |
|
88 |
+ #loop through the number of significant genes and pull out the rows of D that represent these genes. |
|
89 |
+ #Then find the correlation between them with the built in R corr function |
|
90 |
+ tempSubsetD = matrix(nrow = sigCount, ncol = numSamp); |
|
91 |
+ for(k in 1:sigCount) |
|
92 |
+ { |
|
93 |
+ #Subset D based on significant Genes |
|
94 |
+ #need to transpose as it reads this in as column vector otherwise |
|
95 |
+ tempSubsetD[k,] = t(DMatrix[sigGeneNums[k,j], ]); |
|
96 |
+ } |
|
97 |
+ |
|
98 |
+ #Find the correlation between these genes in D |
|
99 |
+ #Need to transpose as it calculates correlations between the columns |
|
100 |
+ tempGeneCorrMatrix = cor(t(tempSubsetD)); |
|
101 |
+ |
|
102 |
+ #Find the mean of this matrix |
|
103 |
+ tempGeneCorrMatrMean = mean(tempGeneCorrMatrix); |
|
104 |
+ |
|
105 |
+ } |
|
106 |
+ else |
|
107 |
+ { |
|
108 |
+ tempGeneCorrMatrix = 0; |
|
109 |
+ tempGeneCorrMatrMean = 0; |
|
110 |
+ } |
|
111 |
+ |
|
112 |
+ #Save these in the overall list |
|
113 |
+ geneCorrMatrices[[j]] = tempGeneCorrMatrix; |
|
114 |
+ geneCorrMatrMeans[[j]] = tempGeneCorrMatrMean; |
|
115 |
+ |
|
116 |
+ } |
|
117 |
+ |
|
118 |
+ #Return as an overall list of lists |
|
119 |
+ # We return Corr Matrices themselves, their means, and the means of the means (cbar) |
|
120 |
+ results[[1]] = geneCorrMatrices; |
|
121 |
+ results[[2]] = geneCorrMatrMeans; |
|
122 |
+ |
|
123 |
+ #Return as an overall list of lists |
|
124 |
+ for(i in 1:numCols) |
|
125 |
+ { |
|
126 |
+ cbar = cbar + results[[2]][[i]]; |
|
127 |
+ } |
|
128 |
+ |
|
129 |
+ cbar = cbar/numCols; |
|
130 |
+ results[[3]] = cbar; |
|
131 |
+ |
|
132 |
+ names(results) = c("CorrelationMatrices", "CorrelationMatrixMeans", "IntraPatternValue"); |
|
133 |
+ |
|
134 |
+ return(results); |
|
135 |
+} |
|
0 | 136 |
\ No newline at end of file |
1 | 137 |
deleted file mode 100644 |
... | ... |
@@ -1,237 +0,0 @@ |
1 |
-# Output: list with gene set statistics |
|
2 |
-#' Calculate Gene Set Statistics |
|
3 |
-#' |
|
4 |
-#' @details calculates the gene set statistics for each |
|
5 |
-#' column of A using a Z-score from the elements of the A matrix, |
|
6 |
-#' the input gene set, and permutation tests |
|
7 |
-#' @param Amean A matrix mean values |
|
8 |
-#' @param Asd A matrix standard deviations |
|
9 |
-#' @param GStoGenes data.frame or list with gene sets |
|
10 |
-#' @param numPerm number of permutations for null |
|
11 |
-#' @export |
|
12 |
-calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) |
|
13 |
-{ |
|
14 |
- # test for std dev of zero, possible mostly in simple simulations |
|
15 |
- if (sum(Asd==0) > 0) |
|
16 |
- Asd[Asd==0] <- .Machine$double.eps |
|
17 |
- |
|
18 |
- # calculate Z scores |
|
19 |
- zMatrix <- calcZ(Amean,Asd) |
|
20 |
- |
|
21 |
- # check input arguments |
|
22 |
- if (!is(GStoGenes, "data.frame") && !is(GStoGenes, "list") && !is(GStoGenes,"GSA.genesets")) |
|
23 |
- stop("GStoGenes must be a data.frame,GSA.genesets, or list with format specified in the users manual.") |
|
24 |
- |
|
25 |
- if (is(GStoGenes, "GSA.genesets")) |
|
26 |
- { |
|
27 |
- names(GStoGenes$genesets) <- GStoGenes$geneset.names |
|
28 |
- GStoGenes <- GStoGenes$genesets |
|
29 |
- } |
|
30 |
- |
|
31 |
- if (is(GStoGenes, "list")) |
|
32 |
- GStoGenesList <- GStoGenes |
|
33 |
- else |
|
34 |
- GStoGenesList <- list() |
|
35 |
- |
|
36 |
- for (i in 1:dim(GStoGenes)[2]) |
|
37 |
- { |
|
38 |
- GStoGenesList[[as.character(colnames(GStoGenes)[i])]] <- as.character(unique(GStoGenes[,i])) |
|
39 |
- } |
|
40 |
- |
|
41 |
- # get dimensions |
|
42 |
- numGS <- length(names(GStoGenesList)) |
|
43 |
- numPatt <- dim(zMatrix)[2] |
|
44 |
- numG <- dim(zMatrix)[1]+0.9999 |
|
45 |
- |
|
46 |
- # initialize matrices |
|
47 |
- statsUp <- matrix(ncol = numGS, nrow = numPatt) |
|
48 |
- statsDown <- matrix(ncol = numGS, nrow = numPatt) |
|
49 |
- actEst <- matrix(ncol = numGS, nrow = numPatt) |
|
50 |
- results <- list() |
|
51 |
- zPerm <- matrix(ncol=numPerm,nrow=numPatt) |
|
52 |
- |
|
53 |
- # do permutation test |
|
54 |
- for (gs in 1:numGS) |
|
55 |
- { |
|
56 |
- genes <- GStoGenesList[[names(GStoGenesList)[gs]]] |
|
57 |
- index <- rownames(zMatrix) %in% genes |
|
58 |
- zValues <- zMatrix[index,1] |
|
59 |
- numGenes <- length(zValues) |
|
60 |
- label <- as.character(numGenes) |
|
61 |
- |
|
62 |
- if (!any(names(results)==label)) |
|
63 |
- { |
|
64 |
- for (p in 1:numPatt) |
|
65 |
- { |
|
66 |
- for (j in 1:numPerm) |
|
67 |
- { |
|
68 |
- temp <- floor(runif(numGenes,1,numG)) |
|
69 |
- temp2 <- zMatrix[temp,p] |
|
70 |
- zPerm[p,j] <- mean(temp2) |
|
71 |
- } |
|
72 |
- } |
|
73 |
- results[[label]] <- zPerm |
|
74 |
- } |
|
75 |
- } |
|
76 |
- |
|
77 |
- # get p-value |
|
78 |
- for (p in 1:numPatt) |
|
79 |
- { |
|
80 |
- for (gs in 1:numGS) |
|
81 |
- { |
|
82 |
- genes <- GStoGenesList[[names(GStoGenesList)[gs]]] |
|
83 |
- index <- rownames(zMatrix) %in% genes |
|
84 |
- zValues <- zMatrix[index,p] |
|
85 |
- zScore <- mean(zValues) |
|
86 |
- |
|
87 |
- numGenes <- length(zValues) |
|
88 |
- label <- as.character(numGenes) |
|
89 |
- |
|
90 |
- permzValues <- results[[label]][p,] |
|
91 |
- ordering <- order(permzValues) |
|
92 |
- permzValues <- permzValues[ordering] |
|
93 |
- |
|
94 |
- statistic <- sum(zScore > permzValues) |
|
95 |
- statUpReg <- 1 - statistic/length(permzValues) |
|
96 |
- statsUp[p,gs] <- max(statUpReg, 1/numPerm) |
|
97 |
- |
|
98 |
- statistic <- sum(zScore < permzValues) |
|
99 |
- statDownReg <- 1 - statistic/length(permzValues) |
|
100 |
- statsDown[p,gs] <- max(statDownReg,1/numPerm) |
|
101 |
- |
|
102 |
- activity <- 1 - 2*max(statUpReg, 1/numPerm) |
|
103 |
- actEst[p,gs] <- activity |
|
104 |
- } |
|
105 |
- } |
|
106 |
- |
|
107 |
- # format output |
|
108 |
- colnames(statsUp) <- names(GStoGenesList) |
|
109 |
- colnames(statsDown) <- names(GStoGenesList) |
|
110 |
- colnames(actEst) <- names(GStoGenesList) |
|
111 |
- |
|
112 |
- rownames(statsUp) <- colnames(zMatrix) |
|
113 |
- rownames(statsDown) <- colnames(zMatrix) |
|
114 |
- rownames(actEst) <- colnames(zMatrix) |
|
115 |
- |
|
116 |
- results[['GSUpreg']] <- statsUp |
|
117 |
- results[['GSDownreg']] <- statsDown |
|
118 |
- results[['GSActEst']] <- actEst |
|
119 |
- |
|
120 |
- return(results) |
|
121 |
-} |
|
122 |
- |
|
123 |
-# calcGeneGSStat: calculate probability gene belongs in gene set |
|
124 |
-# History: v1.0 EJF original CoGAPS |
|
125 |
- |
|
126 |
-# Inputs: Amean - A matrix mean values |
|
127 |
-# Asd - A matrix standard deviations |
|
128 |
-# GStoGenes - data.frame or list with gene sets |
|
129 |
-# numPerm - number of permutations for null |
|
130 |
-# Pw - weighting on genes |
|
131 |
-# nullGenes - adjust genes |
|
132 |
- |
|
133 |
-# Output: environment with gene set statistics |
|
134 |
-# NOTE: should make into list object, env historical |
|
135 |
- |
|
136 |
-#'\code{calcGeneGSStat} calculates the probability that a gene |
|
137 |
-#'listed in a gene set behaves like other genes in the set within |
|
138 |
-#'the given data set |
|
139 |
-#' |
|
140 |
-#'@param Amean A matrix mean values |
|
141 |
-#'@param Asd A matrix standard deviations |
|
142 |
-#'@param GSGenes data.frame or list with gene sets |
|
143 |
-#'@param numPerm number of permutations for null |
|
144 |
-#'@param Pw weight on genes |
|
145 |
-#'@param nullGenes - logical indicating gene adjustment |
|
146 |
-#'@export |
|
147 |
-calcGeneGSStat <- function(Amean, Asd, GSGenes, numPerm, Pw=rep(1,ncol(Amean)), nullGenes=FALSE) |
|
148 |
-{ |
|
149 |
- gsStat <- calcCoGAPSStat(Amean, Asd, data.frame(GSGenes), numPerm=numPerm) |
|
150 |
- gsStat <- gsStat$GSUpreg |
|
151 |
- |
|
152 |
- gsStat <- -log(gsStat) |
|
153 |
- |
|
154 |
- if (!all(is.na(Pw))) |
|
155 |
- { |
|
156 |
- if (length(Pw) != length(gsStat)) |
|
157 |
- { |
|
158 |
- stop('Invalid weighting') |
|
159 |
- } |
|
160 |
- gsStat <- gsStat*Pw |
|
161 |
- } |
|
162 |
- |
|
163 |
- if (nullGenes) |
|
164 |
- { |
|
165 |
- ZD <- Amean[setdiff(row.names(Amean), GSGenes),] / |
|
166 |
- Asd[setdiff(row.names(Amean), GSGenes),] |
|
167 |
- } |
|
168 |
- else |
|
169 |
- { |
|
170 |
- ZD <- Amean[GSGenes,]/Asd[GSGenes,] |
|
171 |
- } |
|
172 |
- outStats <- apply(sweep(ZD,2,gsStat,FUN="*"),1,sum) / (sum(gsStat)) |
|
173 |
- outStats <- outStats / apply(ZD,1,sum) |
|
174 |
- outStats[which(apply(ZD,1,sum) < .Machine$double.eps)] <- 0 |
|
175 |
- |
|
176 |
- if (sum(gsStat) < .Machine$double.eps) |
|
177 |
- return(0) |
|
178 |
- return(outStats) |
|
179 |
-} |
|
180 |
- |
|
181 |
-computeGeneGSProb <- function(Amean, Asd, GSGenes, Pw=rep(1,ncol(Amean)), |
|
182 |
-numPerm=500, PwNull=FALSE) |
|
183 |
-{ |
|
184 |
- geneGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, Pw=Pw, |
|
185 |
- GSGenes=GSGenes, numPerm=numPerm) |
|
186 |
- |
|
187 |
- if (PwNull) |
|
188 |
- { |
|
189 |
- permGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, GSGenes=GSGenes, |
|
190 |
- numPerm=numPerm, Pw=Pw, nullGenes=T) |
|
191 |
- } |
|
192 |
- else |
|
193 |
- { |
|
194 |
- permGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, GSGenes=GSGenes, |
|
195 |
- numPerm=numPerm, nullGenes=T) |
|
196 |
- } |
|
197 |
- |
|
198 |
- return(sapply(GSGenes, function(x) |
|
199 |
- length(which(permGSStat > geneGSStat[x])) / length(permGSStat))) |
|
200 |
-} |
|
201 |
- |
|
202 |
-#' Compute Z-Score Matrix |
|
203 |
-#' |
|
204 |
-#' @details calculates the Z-score for each element based on input mean |
|
205 |
-#' and standard deviation matrices |
|
206 |
-#' @param meanMat matrix of mean values |
|
207 |
-#' @param sdMat matrix of standard deviation values |
|
208 |
-#' @export |
|
209 |
-calcZ <- function (meanMat, sdMat) |
|
210 |
-{ |
|
211 |
- if (nrow(meanMat) != nrow(sdMat)) |
|
212 |
- stop("Number of rows in the mean and standard deviation of A do not agree.") |
|
213 |
- if (ncol(meanMat) != ncol(sdMat)) |
|
214 |
- stop("Number of columns in the mean and standard deviation of A do not agree.") |
|
215 |
- |
|
216 |
- zMat <- meanMat / sdMat |
|
217 |
- rownames(zMat) <- rownames(meanMat) |
|
218 |
- colnames(zMat) <- colnames(meanMat) |
|
219 |
- |
|
220 |
- return(zMat) |
|
221 |
-} |
|
222 |
- |
|
223 |
-#' Reconstruct Gene |
|
224 |
-#' |
|
225 |
-#' @param A A matrix estimates |
|
226 |
-#' @param P corresponding P matrix estimates |
|
227 |
-#' @param genes an index of the gene or genes of interest |
|
228 |
-#' @return the D' estimate of a gene or set of genes |
|
229 |
-#' @export |
|
230 |
-reconstructGene<-function(A=NA, P=NA, genes=NA) |
|
231 |
-{ |
|
232 |
- Dneu <- A %*% P |
|
233 |
- if (!is.na(genes)) |
|
234 |
- Dneu <- Dneu[genes,] |
|
235 |
- return(Dneu) |
|
236 |
-} |
|
237 |
- |
238 | 0 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,30 @@ |
1 |
+#' Generate Seeds for Multiple Concurrent Runs |
|
2 |
+#' |
|
3 |
+#' @param chains number of seeds to generate (number of chains to run) |
|
4 |
+#' @param seed positive values are kept, negative values will be overwritten |
|
5 |
+#' by a seed generated from the current time |
|
6 |
+#' @return vector of randomly generated seeds |
|
7 |
+#' @export |
|
8 |
+#' @examples |
|
9 |
+#' generateSeeds(chains=2, seed=-1) |
|
10 |
+generateSeeds <- function(chains=2, seed=-1) |
|
11 |
+{ |
|
12 |
+ if (chains < 2 || (as.integer(chains) != chains)) |
|
13 |
+ { |
|
14 |
+ stop("chains must be >= 2 and an integer") |
|
15 |
+ } |
|
16 |
+ |
|
17 |
+ if (seed < 1) |
|
18 |
+ { |
|
19 |
+ secs <- as.numeric(difftime(Sys.time(), paste(Sys.Date(), "00:00"), |
|
20 |
+ units="secs")) |
|
21 |
+ secs <- round(secs) |
|
22 |
+ seeds <- seq_len(chains) * secs |
|
23 |
+ return(seeds) |
|
24 |
+ } |
|
25 |
+ else |
|
26 |
+ { |
|
27 |
+ seeds <- seq_len(chains) * seed |
|
28 |
+ return(seeds) |
|
29 |
+ } |
|
30 |
+} |
... | ... |
@@ -8,8 +8,8 @@ |
8 | 8 |
#' \tabular{ll}{ |
9 | 9 |
#' Package: \tab CoGAPS\cr |
10 | 10 |
#' Type: \tab Package\cr |
11 |
-#' Version: \tab 1.0\cr |
|
12 |
-#' Date: \tab 2014-07-23\cr |
|
11 |
+#' Version: \tab 2.99.0\cr |
|
12 |
+#' Date: \tab 2018-01-24\cr |
|
13 | 13 |
#' License: \tab LGPL\cr |
14 | 14 |
#' } |
15 | 15 |
#' @author Maintainer: Elana J. Fertig \email{ejfertig@jhmi.edu}, |
16 | 16 |
deleted file mode 100644 |
... | ... |
@@ -1,700 +0,0 @@ |
1 |
-#Calculates significant genes in each pattern according to certain threshold |
|
2 |
-#Returns the significant gene names as well as well as the means of these matrices and number of genes in each |
|
3 |
- |
|
4 |
-gapsInterPattern <- function(Amean, Asd, sdThreshold = 3) |
|
5 |
-{ |
|
6 |
- #number of rows and cols of Asd |
|
7 |
- numGenes = length(Asd[,1]); |
|
8 |
- numCols = length(Asd[1,]); |
|
9 |
- |
|
10 |
- #Vector holding the number of each significant gene in each column |
|
11 |
- sigGeneNums = data.frame(); |
|
12 |
- |
|
13 |
- #Temp number of sig genes in the col |
|
14 |
- sigCount = 0; |
|
15 |
- |
|
16 |
- #Number to catch the amount of columns without significant genes |
|
17 |
- numEmptyCols = 0; |
|
18 |
- |
|
19 |
- #Keep an array of the significant gene counts |
|
20 |
- significantGeneNums = c(0); |
|
21 |
- |
|
22 |
- #Names of the genes from the data matrix |
|
23 |
- geneNames = names(Asd[,1]); |
|
24 |
- |
|
25 |
- #Names of the genes that are significant from the data matrix |
|
26 |
- sigGeneNames = "0"; |
|
27 |
- |
|
28 |
- #The numerator of our statistic |
|
29 |
- dimensionStatNumerator = 0; |
|
30 |
- |
|
31 |
- #The denominator of our statistic |
|
32 |
- dimensionStatDenominator = 0; |
|
33 |
- |
|
34 |
- #The value of our statistic |
|
35 |
- dimensionStatistic = 0; |
|
36 |
- |
|
37 |
- #A matrix holding the values of our statistics |
|
38 |
- dimensionStatisticMatrix = matrix(nrow = numCols, ncol = numCols); |
|
39 |
- |
|
40 |
- #The mean of the statistic matrix |
|
41 |
- sbar = 0; |
|
42 |
- |
|
43 |
- #A list to return the significant genes in each col and the statistic matrix |
|
44 |
- results = list(list()); |
|
45 |
- |
|
46 |
- #Scan in the significant genes from each column of Asd |
|
47 |
- #The columns of sigGeneNums hold the significant genes for each col of Asd |
|
48 |
- for(i in 1:numCols) |
|
49 |
- { |
|
50 |
- sigCount = 0; |
|
51 |
- for(j in 1:numGenes) |
|
52 |
- { |
|
53 |
- if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0) |
|
54 |
- { |
|
55 |
- sigCount = sigCount + 1; |
|
56 |
- sigGeneNums[sigCount, i] = j; |
|
57 |
- } |
|
58 |
- } |
|
59 |
- |
|
60 |
- if(sigCount == 0) |
|
61 |
- { |
|
62 |
- sigGeneNums[1, i] = 0; |
|
63 |
- numEmptyCols = numEmptyCols + 1; |
|
64 |
- } |
|
65 |
- |
|
66 |
- #Save the number of sigGenes |
|
67 |
- significantGeneNums[i] = sigCount; |
|
68 |
- |
|
69 |
- #Get the names and store them |
|
70 |
- if(sigCount != 0) |
|
71 |
- { |
|
72 |
- for(k in 1:sigCount) |
|
73 |
- { |
|
74 |
- sigGeneNames[k] = geneNames[sigGeneNums[k, i]]; |
|
75 |
- } |
|
76 |
- results[[1]][[i]] = sigGeneNames; |
|
77 |
- sigGeneNames = "0"; |
|
78 |
- } |
|
79 |
- else |
|
80 |
- { |
|
81 |
- results[[1]][[i]] = "None"; |
|
82 |
- } |
|
83 |
- } |
|
84 |
- |
|
85 |
- if(any(significantGeneNums == 0)) |
|
86 |
- { |
|
87 |
- zeroSigCols = which(significantGeneNums == 0); |
|
88 |
- print("Warning: No Significant Genes in Pattern(s): "); |
|
89 |
- |
|
90 |
- for(z in 1:length(zeroSigCols)) |
|
91 |
- { |
|
92 |
- print(zeroSigCols[z]); |
|
93 |
- } |
|
94 |
- } |
|
95 |
- |
|
96 |
- #Now that we have the significant genes want to see if these genes are significant in other columns |
|
97 |
- #Fill across the row then down the column |
|
98 |
- |
|
99 |
- #This compares the significant genes in the specified by 'j' to the same genes in Asd in the column specified by 'i' |
|
100 |
- for(i in 1:numCols) |
|
101 |
- { |
|
102 |
- for(j in 1:numCols) |
|
103 |
- { |
|
104 |
- #Grab the number of significant genes from the interested column |
|
105 |
- sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE); |
|
106 |
- |
|
107 |
- if(sigCount != 0) |
|
108 |
- { |
|
109 |
- dimensionStatDenominator = sigCount; |
|
110 |
- dimensionStatNumerator = 0; |
|
111 |
- |
|
112 |
- #loop through the number of significant genes and compare to these genes in the 'ith' col of Asd. |
|
113 |
- #Find the significance of these genes, Amean - threshold * Asd |
|
114 |
- for(k in 1:sigCount) |
|
115 |
- { |
|
116 |
- if((Amean[sigGeneNums[k,j],i] - (sdThreshold*Asd[sigGeneNums[k,j],i])) > 0) |
|
117 |
- { |
|
118 |
- dimensionStatNumerator = dimensionStatNumerator + 1; |
|
119 |
- } |
|
120 |
- } |
|
121 |
- |
|
122 |
- dimensionStatistic = dimensionStatNumerator/dimensionStatDenominator; |
|
123 |
- |
|
124 |
- dimensionStatisticMatrix[i, j] = dimensionStatistic; |
|
125 |
- } |
|
126 |
- else |
|
127 |
- { |
|
128 |
- dimensionStatisticMatrix[i, j] = 0; |
|
129 |
- } |
|
130 |
- } |
|
131 |
- } |
|
132 |
- |
|
133 |
- #Find mean of the matrices (excluding the diagonal elements) |
|
134 |
- #we can subtract numCols as the diagonal elements are 1 |
|
135 |
- sbar = ((sum(dimensionStatisticMatrix) - (numCols - numEmptyCols))/(length(dimensionStatisticMatrix) - (numCols - numEmptyCols))); |
|
136 |
- |
|
137 |
- results[[2]] = significantGeneNums; |
|
138 |
- results[[3]] = t(dimensionStatisticMatrix); |
|
139 |
- results[[4]] = sbar; |
|
140 |
- |
|
141 |
- names(results) = c("SignificantGeneNames", "SignificantGeneTotals", "SeparationMatrix", "InterPatternValue"); |
|
142 |
- |
|
143 |
- return(results); |
|
144 |
-} |
|
145 |
- |
|
146 |
-#Calculates significant genes in each pattern according to certain threshold |
|
147 |
-#Returns the significant gene names as well as well as the correlation matrices between these genes and the means of these matrices |
|
148 |
- |
|
149 |
-gapsIntraPattern <- function(Amean, Asd, DMatrix, sdThreshold = 3) |
|
150 |
-{ |
|
151 |
- #number of rows and cols of Asd |
|
152 |
- numGenes = length(Asd[,1]); |
|
153 |
- numCols = length(Asd[1,]); |
|
154 |
- |
|
155 |
- #number of samples in DMatrix |
|
156 |
- numSamp = ncol(DMatrix); |
|
157 |
- |
|
158 |
- #Vector holding the number of each significant gene in each column |
|
159 |
- sigGeneNums = data.frame(); |
|
160 |
- |
|
161 |
- #Temp number of sig genes in the col |
|
162 |
- sigCount = 0; |
|
163 |
- |
|
164 |
- #Keep an array of the significant gene counts |
|
165 |
- significantGeneNums = c(0); |
|
166 |
- |
|
167 |
- #A matrix to hold the significant genes in D for the current pattern |
|
168 |
- #The matrix just acts as a subset of D, just eliminates non relevant rows |
|
169 |
- tempSubsetD = matrix(); |
|
170 |
- |
|
171 |
- #A matrix holding the values of our correlation coefficients between genes for the current column |
|
172 |
- tempGeneCorrMatrix = matrix(); |
|
173 |
- |
|
174 |
- #A list to hold all the correlation matrices |
|
175 |
- geneCorrMatrices = list(); |
|
176 |
- |
|
177 |
- #A list to hold all the means |
|
178 |
- geneCorrMatrMeans = list(); |
|
179 |
- |
|
180 |
- #The mean of all the correlation matrices |
|
181 |
- cbar = 0; |
|
182 |
- |
|
183 |
- #A list to return the means and the matrices |
|
184 |
- results = list(); |
|
185 |
- |
|
186 |
- #Scan in the significant genes from each column of Asd |
|
187 |
- #The columns of sigGeneNums hold the significant genes for each col of Asd |
|
188 |
- for(i in 1:numCols) |
|
189 |
- { |
|
190 |
- sigCount = 0; |
|
191 |
- for(j in 1:numGenes) |
|
192 |
- { |
|
193 |
- if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0) |
|
194 |
- { |
|
195 |
- sigCount = sigCount + 1; |
|
196 |
- sigGeneNums[sigCount, i] = j; |
|
197 |
- } |
|
198 |
- } |
|
199 |
- |
|
200 |
- if(sigCount == 0) |
|
201 |
- { |
|
202 |
- sigGeneNums[1, i] = 0; |
|
203 |
- } |
|
204 |
- |
|
205 |
- #Save the number of sigGenes |
|
206 |
- significantGeneNums[i] = sigCount; |
|
207 |
- } |
|
208 |
- |
|
209 |
- #If a pattern has no significant genes this is clearly an error so return such |
|
210 |
- if(any(significantGeneNums == 0)) |
|
211 |
- { |
|
212 |
- zeroSigCols = which(significantGeneNums == 0); |
|
213 |
- warning("Warning: No Significant Genes in Pattern(s): "); |
|
214 |
- |
|
215 |
- for(z in 1:length(zeroSigCols)) |
|
216 |
- { |
|
217 |
- message(zeroSigCols[z]); |
|
218 |
- } |
|
219 |
- } |
|
220 |
- |
|
221 |
- |
|
222 |
- #Now that we have the significant genes want to grab these from our original D matrix |
|
223 |
- #and find the sigGene x sigGene correlation matrix and find its mean |
|
224 |
- |
|
225 |
- for(j in 1:numCols) |
|
226 |
- { |
|
227 |
- #Grab the number of significant genes from the interested column |
|
228 |
- sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE); |
|
229 |
- |
|
230 |
- if(sigCount != 0) |
|
231 |
- { |
|
232 |
- |
|
233 |
- #loop through the number of significant genes and pull out the rows of D that represent these genes. |
|
234 |
- #Then find the correlation between them with the built in R corr function |
|
235 |
- tempSubsetD = matrix(nrow = sigCount, ncol = numSamp); |
|
236 |
- for(k in 1:sigCount) |
|
237 |
- { |
|
238 |
- #Subset D based on significant Genes |
|
239 |
- #need to transpose as it reads this in as column vector otherwise |
|
240 |
- tempSubsetD[k,] = t(DMatrix[sigGeneNums[k,j], ]); |
|
241 |
- } |
|
242 |
- |
|
243 |
- #Find the correlation between these genes in D |
|
244 |
- #Need to transpose as it calculates correlations between the columns |
|
245 |
- tempGeneCorrMatrix = cor(t(tempSubsetD)); |
|
246 |
- |
|
247 |
- #Find the mean of this matrix |
|
248 |
- tempGeneCorrMatrMean = mean(tempGeneCorrMatrix); |
|
249 |
- |
|
250 |
- } |
|
251 |
- else |
|
252 |
- { |
|
253 |
- tempGeneCorrMatrix = 0; |
|
254 |
- tempGeneCorrMatrMean = 0; |
|
255 |
- } |
|
256 |
- |
|
257 |
- #Save these in the overall list |
|
258 |
- geneCorrMatrices[[j]] = tempGeneCorrMatrix; |
|
259 |
- geneCorrMatrMeans[[j]] = tempGeneCorrMatrMean; |
|
260 |
- |
|
261 |
- } |
|
262 |
- |
|
263 |
- #Return as an overall list of lists |
|
264 |
- # We return Corr Matrices themselves, their means, and the means of the means (cbar) |
|
265 |
- results[[1]] = geneCorrMatrices; |
|
266 |
- results[[2]] = geneCorrMatrMeans; |
|
267 |
- |
|
268 |
- #Return as an overall list of lists |
|
269 |
- for(i in 1:numCols) |
|
270 |
- { |
|
271 |
- cbar = cbar + results[[2]][[i]]; |
|
272 |
- } |
|
273 |
- |
|
274 |
- cbar = cbar/numCols; |
|
275 |
- results[[3]] = cbar; |
|
276 |
- |
|
277 |
- names(results) = c("CorrelationMatrices", "CorrelationMatrixMeans", "IntraPatternValue"); |
|
278 |
- |
|
279 |
- return(results); |
|
280 |
-} |
|
281 |
- |
|
282 |
- |
|
283 |
-#' patternMarkers |
|
284 |
-#' |
|
285 |
-#' @param Amatrix A matrix of genes by weights resulting from CoGAPS or other NMF decomposition |
|
286 |
-#' @param scaledPmatrix logical indicating whether the corresponding pattern matrix was fixed to have max 1 during decomposition |
|
287 |
-#' @param Pmatrix the corresponding Pmatrix (patterns X samples) for the provided Amatrix (genes x patterns). This must be supplied if scaledPmatrix is FALSE. |
|
288 |
-#' @param threshold # the type of threshold to be used. The default "all" will distribute genes into pattern with the lowest ranking. The "cut" thresholding by the first gene to have a lower ranking, i.e. better fit to, a pattern. |
|
289 |
-#' @param lp a vector of weights for each pattern to be used for finding markers. If NA markers for each pattern of the A matrix will be used. |
|
290 |
-#' @param full logical indicating whether to return the ranks of each gene for each pattern |
|
291 |
-#' |
|
292 |
-#' @return By default a non-overlapping list of genes associated with each \code{lp}. If \code{full=TRUE} a data.frame of |
|
293 |
-#' genes rankings with a column for each \code{lp} will also be returned. |
|
294 |
-#' @export |
|
295 |
-#' |
|
296 |
-#' @examples \dontrun{ |
|
297 |
-#' patternMarkers(Amatrix=AP$Amean,scaledPmatrix=FALSE,Pmatrix=NA,threshold="All",full=TRUE) |
|
298 |
-#' } |
|
299 |
-#' |
|
300 |
-patternMarkers <- function( |
|
301 |
- Amatrix=NA, #A matrix of genes by weights resulting from CoGAPS or other NMF decomposition |
|
302 |
- scaledPmatrix=FALSE, # logical indicating whether the corresponding pattern matrix was fixed to have max 1 during decomposition |
|
303 |
- Pmatrix=NA, #the corresponding Pmatrix (patterns X samples) for the provided Amatrix (genes x patterns). This must be supplied if scaledPmatrix is FALSE. |
|
304 |
- threshold="all", # the type of threshold to be used. The default "All" will distribute genes into pattern with the highest ranking. |
|
305 |
- # The "cut" thresholding by the first gene to have a lower ranking, i.e. better fit to, a pattern. |
|
306 |
- lp=NA, # a vector of weights for each pattern to be used for finding markers. If NA markers for each pattern of the A matrix will be used. |
|
307 |
- full=FALSE #logical indicating whether to return the ranks of each gene for each pattern. |
|
308 |
-){ |
|
309 |
- |
|
310 |
- |
|
311 |
-if(scaledPmatrix==FALSE){ |
|
312 |
- if(!is.na(Pmatrix)){ |
|
313 |
- pscale <- apply(Pmatrix,1,max) # rescale p's to have max 1 |
|
314 |
- Amatrix <- sweep(Amatrix, 2, pscale, FUN="*") # rescale A in accordance with p's having max 1 |
|
315 |
- } |
|
316 |
- else(warning("P values must be provided if not already scaled")) |
|
317 |
- } |
|
318 |
-# find the A with the highest magnitude |
|
319 |
-Arowmax <- t(apply(Amatrix, 1, function(x) x/max(x))) |
|
320 |
-pmax<-apply(Amatrix, 1, max) |
|
321 |
-# determine which genes are most associated with each pattern |
|
322 |
-ssranks<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=dimnames(Amatrix))#list() |
|
323 |
-ssgenes<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=NULL) |
|
324 |
-nP=dim(Amatrix)[2] |
|
325 |
-if(!is.na(lp)){ |
|
326 |
- if(length(lp)!=dim(Amatrix)[2]){ |
|
327 |
- warning("lp length must equal the number of columns of the Amatrix") |
|
328 |
- } |
|
329 |
- sstat <- apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp))) |
|
330 |
- ssranks[order(sstat),i] <- 1:length(sstat) |
|
331 |
- ssgenes[,i]<-names(sort(sstat,decreasing=FALSE)) |
|
332 |
-} else { |
|
333 |
- for(i in 1:nP){ |
|
334 |
- lp <- rep(0,dim(Amatrix)[2]) |
|
335 |
- lp[i] <- 1 |
|
336 |
- sstat <- apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp))) |
|
337 |
- ssranks[order(sstat),i] <- 1:length(sstat) |
|
338 |
- ssgenes[,i]<-names(sort(sstat,decreasing=FALSE)) |
|
339 |
- } |
|
340 |
-} |
|
341 |
-if(threshold=="cut"){ |
|
342 |
- geneThresh <- sapply(1:nP,function(x) min(which(ssranks[ssgenes[,x],x] > apply(ssranks[ssgenes[,x],],1,min)))) |
|
343 |
- ssgenes.th <- sapply(1:nP,function(x) ssgenes[1:geneThresh[x],x]) |
|
344 |
- #geneThresh <- apply(sweep(ssranks,1,t(apply(ssranks, 1, min)),"-"),2,function(x) which(x==0)) |
|
345 |
- #ssgenes.th <- lapply(geneThresh,names) |
|
346 |
-} else if(threshold=="all"){ |
|
347 |
- pIndx<-apply(ssranks,1,which.min) |
|
348 |
- gBYp <- lapply(sort(unique(pIndx)),function(x) names(pIndx[pIndx==x])) |
|
349 |
- ssgenes.th <- lapply(1:nP, function(x) ssgenes[which(ssgenes[,x] %in% gBYp[[x]]),x]) |
|
350 |
-} else {stop("Threshold arguement not viable option")} |
|
351 |
- |
|
352 |
-if(full){return(list("PatternMarkers"=ssgenes.th,"PatternRanks"=ssranks)) |
|
353 |
-} else{return("PatternMarkers"=ssgenes.th)} |
|
354 |
-} |
|
355 |
- |
|
356 |
- |
|
357 |
-#' patternMatch4Parallel |
|
358 |
-#' |
|
359 |
-#' @param Ptot a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet} |
|
360 |
-#' @param nSets number of parallel sets used to generate \code{Ptot} |
|
361 |
-#' @param cnt number of branches at which to cut dendrogram |
|
362 |
-#' @param minNS minimum of individual set contributions a cluster must contain |
|
363 |
-#' @param cluster.method the agglomeration method to be used for clustering |
|
364 |
-#' @param ignore.NA logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE. |
|
365 |
-#' @param bySet logical indicating whether to return list of matched set solutions from \code{Ptot} |
|
366 |
-#' @param ... additional parameters for \code{agnes} |
|
367 |
-#' |
|
368 |
-#' @return a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each |
|
369 |
-#' concensus pattern is also returned. |
|
370 |
-#' @export |
|
371 |
-#' @seealso \code{\link{agnes}} |
|
372 |
-#' |
|
373 |
-#' |
|
374 |
- |
|
375 |
-patternMatch4Parallel <- function(Ptot, |
|
376 |
- nSets, #number of sets |
|
377 |
- cnt, # number of branches at which to cut dendrogram |
|
378 |
- minNS, # minimum of sets a cluster must contain |
|
379 |
- cluster.method="complete", |
|
380 |
- ignore.NA=FALSE, # ignore NAs from potential over dimensionalization |
|
381 |
- bySet=FALSE, |
|
382 |
- ...){ |
|
383 |
- |
|
384 |
-#### read in CoGAPS results |
|
385 |
-cdir <- getwd() |
|
386 |
-#if(!is.null(path)){setwd(path)} |
|
387 |
-if(!is.null(minNS)){minNS=nSets/2} |
|
388 |
- |
|
389 |
-if(ignore.NA==FALSE){if(anyNA(Ptot)){warning('Non-sparse matrixes produced. Reducing the number of patterns asked for and rerun.')}} |
|
390 |
-if(ignore.NA==TRUE){Ptot<-Ptot[complete.cases(Ptot),]} |
|
391 |
- |
|
392 |
-#################################################################### |
|
393 |
-# corr dist |
|
394 |
-corr.dist=cor(t(Ptot)) |
|
395 |
-corr.dist=1-corr.dist |
|
396 |
-# cluster |
|
397 |
-#library(cluster) |
|
398 |
-clust=agnes(x=corr.dist,diss=T,method=cluster.method) |
|
399 |
-cut=cutree(as.hclust(clust),k=cnt) |
|
400 |
-#save.image(file=paste("CoGAPS.",nP,"P.",nS,"Set.CorrClustCut",cnt,".RData")) |
|
401 |
-#################################################################### |
|
402 |
-#drop n<4 and get weighted Avg |
|
403 |
-cls=sort(unique(cut)) |
|
404 |
-cMNs=matrix(nrow=cnt,ncol=dim(Ptot)[2]) |
|
405 |
-rownames(cMNs)=cls |
|
406 |
-colnames(cMNs)=colnames(Ptot) |
|
407 |
- |
|
408 |
-RtoMeanPattern <- list() |
|
409 |
-PByClust <- list() |
|
410 |
-for(i in cls){ |
|
411 |
- if(is.null(dim(Ptot[cut == i, ]))==TRUE){ |
|
412 |
- cMNs[i,] <- Ptot[cut == i, ] |
|
413 |
- RtoMeanPattern[[i]] <- rep(1,length(Ptot[cut == i, ])) |
|
414 |
- PByClust[[i]] <- t(as.matrix(Ptot[cut == i, ])) |
|
415 |
- } |
|
416 |
- else{ |
|
417 |
- cMNs[i,]=colMeans(Ptot[cut==i,]) |
|
418 |
- PByClust[[i]] <- Ptot[cut==i,] |
|
419 |
- nIN=sum(cut==i) |
|
420 |
- RtoMeanPattern[[i]] <- sapply(1:nIN,function(j) {round(cor(x=Ptot[cut==i,][j,],y=cMNs[i,]),3)}) |
|
421 |
- } |
|
422 |
-} |
|
423 |
- |
|
424 |
-#drop n<minNS |
|
425 |
-PByClustDrop <- list() |
|
426 |
-RtoMPDrop <- list() |
|
427 |
-for(i in cls){ |
|
428 |
- if(is.null(dim(PByClust[[i]]))==TRUE){next} |
|
429 |
- if(dim(PByClust[[i]])[1]<minNS){next} |
|
430 |
- else{ |
|
431 |
- #indx <- which(RtoMeanPattern[[i]]>.7,arr.ind = TRUE) |
|
432 |
- PByClustDrop <- append(PByClustDrop,list(PByClust[[i]])) |
|
433 |
- RtoMPDrop <- append(RtoMPDrop,list(RtoMeanPattern[[i]])) |
|
434 |
- } |
|
435 |
-} |
|
436 |
- |
|
437 |
- |
|
438 |
-### split by corr (build in drop if below minNS) |
|
439 |
-PByCDS <- list() |
|
440 |
-RtoMPDS <- list() |
|
441 |
-for(j in 1:length(PByClustDrop)){ |
|
442 |
- if(is.null(dim(PByClustDrop[[j]]))==TRUE){ |
|
443 |
- next |
|
444 |
- } |
|
445 |
- if(dim(PByClustDrop[[j]])[1]<minNS+nSets){ |
|
446 |
- PByCDS <- append(PByCDS,PByClustDrop[j]) |
|
447 |
- RtoMPDS <- append(RtoMPDS,RtoMPDrop[j]) |
|
448 |
- } |
|
449 |
- if(dim(PByClustDrop[[j]])[1]>=minNS+nSets){ |
|
450 |
- corr.distPBCD=cor(t(PByClustDrop[[j]])) |
|
451 |
- corr.distPBCD=1-corr.distPBCD |
|
452 |
- clustPBCD=agnes(x=corr.distPBCD,diss=T,method="complete") |
|
453 |
- cutPBCD=cutree(as.hclust(clustPBCD),k=2) |
|
454 |
- g1 <- PByClustDrop[[j]][cutPBCD==1,] |
|
455 |
- PByCDS <- append(PByCDS,list(g1)) |
|
456 |
- RtoMPDS <- append(RtoMPDS,list(sapply(1:dim(g1)[1],function(z) round(cor(x=g1[z,],y=colMeans(PByClustDrop[[j]][cutPBCD==1,])),3)))) |
|
457 |
- g2 <- PByClustDrop[[j]][cutPBCD==2,] |
|
458 |
- if (is.null(dim(g2)[1])==FALSE){ |
|
459 |
- PByCDS <- append(PByCDS,list(g2)) |
|
460 |
- RtoMPDS <- append(RtoMPDS,list(sapply(1:dim(g2)[1],function(z) round(cor(x=g2[z,],y=colMeans(PByClustDrop[[j]][cutPBCD==2,])),3)))) |
|
461 |
- } |
|
462 |
- } |
|
463 |
-#print(j) |
|
464 |
-#print(str(PByCDS)) |
|
465 |
-} |
|
466 |
- |
|
467 |
-#weighted.mean(PByClustDrop[[1]],RtoMPDrop[[1]]) |
|
468 |
-PByCDSWavg<- t(sapply(1:length(PByCDS),function(z) apply(PByCDS[[z]],2,function(x) weighted.mean(x,(RtoMPDS[[z]])^3)))) |
|
469 |
-rownames(PByCDSWavg) <- lapply(1:length(PByCDS),function(x) paste("Pattern",x)) |
|
470 |
- |
|
471 |
-#save |
|
472 |
-#save(PByCDSWavg,file=paste("PAatternSummary.UnScaled.",fname,".CoGAPS.",nP,"P.",nS,"Set.CorrClustCut",cnt,".RData",sep="")) |
|
473 |
- |
|
474 |
-#scale ps |
|
475 |
-Pmax <- apply(PByCDSWavg,1,max) |
|
476 |
-PByCDSWavgScaled <- t(sapply(1:dim(PByCDSWavg)[1],function(x) PByCDSWavg[x,]/Pmax[x])) |
|
477 |
-rownames(PByCDSWavgScaled) <- rownames(PByCDSWavg) |
|
478 |
- |
|
479 |
-if(bySet){ |
|
480 |
-# return by set and final |
|
481 |
-PBySet<-PByCDS |
|
482 |
-return(list("consenusPatterns"=PByCDSWavgScaled,"PBySet"=PBySet)) |
|
483 |
-} else {return(PByCDSWavgScaled)} |
|
484 |
- |
|
485 |
-} |
|
486 |
- |
|
487 |
- |
|
488 |
-#################################################################### |
|
489 |
-#################################################################### |
|
490 |
- |
|
491 |
- |
|
492 |
-#' PatternMatcher Shiny Ap |
|
493 |
-#' |
|
494 |
-#' @param PBySet list of matched set solutions for the Pmatrix from an NMF algorithm |
|
495 |
-#' @param out optional name for saving output |
|
496 |
-#' @param order optional vector indicating order of samples for plotting. Default is NULL. |
|
497 |
-#' @param sample.color optional vector of colors of same length as colnames. Default is NULL. |
|
498 |
-#' |
|
499 |
-#' @return either an index of selected sets' contributions or the editted \code{PBySet} object |
|
500 |
-#' @export |
|
501 |
-#' |
|
502 |
-#' @examples \dontrun{ |
|
503 |
-#' patternMatcher(PBySet,out,order,sample.color) |
|
504 |
-#' } |
|
505 |
-#' |
|
506 |
-#' |
|
507 |
-patternMatcher<-function(PBySet=NULL,out=NULL,order=NULL, sample.color=NULL) { |
|
508 |
- |
|
509 |
-runApp(list( |
|
510 |
- ui = pageWithSidebar( |
|
511 |
- # Application title |
|
512 |
- headerPanel('NMF Pattern Matching'), |
|
513 |
- # Side pannel with controls |
|
514 |
- sidebarPanel( |
|
515 |
- # to upload file |
|
516 |
- fileInput('file1', |
|
517 |
- 'Choose .Rda File', |
|
518 |
- accept=c('.RData','.Rda','R data object','.rda') |
|
519 |
- ), |
|
520 |