... | ... |
@@ -1,92 +1,92 @@ |
1 |
-# CoGAPS: function to call gapsRun and calculate gene set |
|
2 |
-# statistics on result, also to produce A and P plots |
|
3 |
-# History: v1.0 - EJF original CoGAPS, MFO modified to to match |
|
4 |
-# new variable names |
|
5 |
- |
|
6 |
-# Inputs: data - data matrix |
|
7 |
-# unc - uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
8 |
-# GStoGenes - data.frame or list of gene sets |
|
9 |
-# nFactor - number of patterns (basis vectors, metagenes) |
|
10 |
-# nEquil - number of iterations for burn-in |
|
11 |
-# nSample - number of iterations for sampling |
|
12 |
-# nOutR - how often to print status into R by iterations |
|
13 |
-# output_atomic - whether to write atom files (large) |
|
14 |
-# simulation_id - name to attach to atoms files if created |
|
15 |
-# plot - logical to determine if plots produced |
|
16 |
-# nPerm - number of permutations for gene set test |
|
17 |
-# alphaA, alphaP - sparsity parameters for A and P domains |
|
18 |
-# max_gibbmass_paraA(P) - limit truncated normal to max size |
|
19 |
-# nMaxA, nMaxP - PRESENTLY UNUSED, future = limit number of atoms |
|
20 |
- |
|
21 |
- |
|
22 |
-# Output: list with matrices, mean chi-squared value, and gene set |
|
23 |
-# results |
|
24 |
- |
|
25 |
-#'\code{CoGAPS} calls the C++ MCMC code through gapsRun and performs Bayesian |
|
26 |
-#'matrix factorization returning the two matrices that reconstruct |
|
27 |
-#'the data matrix and then calls calcCoGAPSStat to estimate gene set |
|
28 |
-#'activity with nPerm set to 500 |
|
29 |
-#' |
|
30 |
-#'@param data data matrix |
|
31 |
-#'@param unc uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
32 |
-#'@param ABins a matrix of same size as A which gives relative |
|
33 |
-#' probability of that element being non-zero |
|
34 |
-#'@param PBins a matrix of same size as P which gives relative |
|
35 |
-#' probability of that element being non-zero |
|
36 |
-#'@param GStoGenes data.frame or list with gene sets |
|
37 |
-#'@param nFactor number of patterns (basis vectors, metagenes) |
|
38 |
-#'@param simulation_id name to attach to atoms files if created |
|
39 |
-#'@param nEquil number of iterations for burn-in |
|
40 |
-#'@param nSample number of iterations for sampling |
|
41 |
-#'@param nOutR how often to print status into R by iterations |
|
42 |
-#'@param output_atomic whether to write atom files (large) |
|
43 |
-#'@param fixedBinProbs Boolean for using relative probabilities |
|
44 |
-#' given in Abins and Pbins |
|
45 |
-#'@param fixedDomain character to indicate whether A or P is |
|
46 |
-#' domain for relative probabilities |
|
47 |
-#'@param sampleSnapshots Boolean to indicate whether to capture |
|
48 |
-#' individual samples from Markov chain during sampling |
|
49 |
-#'@param numSnapshots the number of individual samples to capture |
|
50 |
-#'@param plot Boolean to indicate whether to produce output graphics |
|
51 |
-#'@param nPerm number of permutations in gene set test |
|
52 |
-#'@param alphaA sparsity parameter for A domain |
|
53 |
-#'@param nMaxA PRESENTLY UNUSED, future = limit number of atoms |
|
54 |
-#'@param alphaP sparsity parameter for P domain |
|
55 |
-#'@param max_gibbmass_paraA limit truncated normal to max size |
|
56 |
-#'@param nMaxP PRESENTLY UNUSED, future = limit number of atoms |
|
57 |
-#'@param max_gibbmass_paraP limit truncated normal to max size |
|
58 |
-#'@export |
|
59 |
- |
|
60 |
-CoGAPS <- function(data, unc, ABins = data.frame(), PBins = data.frame(), GStoGenes, nFactor = 7, simulation_id="simulation", nEquil=1000, |
|
61 |
- nSample=1000, nOutR=1000, output_atomic=FALSE, |
|
62 |
- fixedBinProbs = FALSE, fixedDomain = "N", |
|
63 |
- sampleSnapshots = TRUE, numSnapshots = 100, |
|
64 |
- plot=TRUE, nPerm=500, |
|
65 |
- alphaA = 0.01, nMaxA = 100000, |
|
66 |
- max_gibbmass_paraA = 100.0, |
|
67 |
- alphaP = 0.01, nMaxP = 100000, |
|
68 |
- max_gibbmass_paraP = 100.0) { |
|
69 |
- |
|
70 |
- |
|
71 |
- # decompose the data |
|
72 |
- matrixDecomp <- gapsRun(data, unc, ABins, PBins, nFactor, simulation_id, |
|
73 |
- nEquil, nSample, nOutR, output_atomic, fixedBinProbs, |
|
74 |
- fixedDomain, sampleSnapshots, numSnapshots, alphaA, nMaxA, max_gibbmass_paraA, |
|
75 |
- alphaP, nMaxP, max_gibbmass_paraP) |
|
76 |
- |
|
77 |
- # plot patterns and show heatmap of Anorm |
|
78 |
- if (plot) { |
|
79 |
- plotGAPS(matrixDecomp$Amean, matrixDecomp$Pmean) |
|
80 |
- } |
|
81 |
- |
|
82 |
- # compute the gene set scores |
|
83 |
- GSP <- calcCoGAPSStat(matrixDecomp$Amean, matrixDecomp$Asd, GStoGenes, nPerm) |
|
84 |
- |
|
85 |
- return(list(meanChi2=matrixDecomp$calcChiSq, |
|
86 |
- D=data, Sigma=unc, |
|
87 |
- Amean=matrixDecomp$Amean, Asd=matrixDecomp$Asd, |
|
88 |
- Pmean=matrixDecomp$Pmean, Psd=matrixDecomp$Psd, |
|
89 |
- GSUpreg=GSP$GSUpreg, GSDownreg=GSP$GSDownreg, GSActEst=GSP$GSActEst)) |
|
90 |
- |
|
91 |
- |
|
92 |
-} |
|
1 |
+# CoGAPS: function to call gapsRun and calculate gene set |
|
2 |
+# statistics on result, also to produce A and P plots |
|
3 |
+# History: v1.0 - EJF original CoGAPS, MFO modified to to match |
|
4 |
+# new variable names |
|
5 |
+ |
|
6 |
+# Inputs: data - data matrix |
|
7 |
+# unc - uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
8 |
+# GStoGenes - data.frame or list of gene sets |
|
9 |
+# nFactor - number of patterns (basis vectors, metagenes) |
|
10 |
+# nEquil - number of iterations for burn-in |
|
11 |
+# nSample - number of iterations for sampling |
|
12 |
+# nOutR - how often to print status into R by iterations |
|
13 |
+# output_atomic - whether to write atom files (large) |
|
14 |
+# simulation_id - name to attach to atoms files if created |
|
15 |
+# plot - logical to determine if plots produced |
|
16 |
+# nPerm - number of permutations for gene set test |
|
17 |
+# alphaA, alphaP - sparsity parameters for A and P domains |
|
18 |
+# max_gibbmass_paraA(P) - limit truncated normal to max size |
|
19 |
+# nMaxA, nMaxP - PRESENTLY UNUSED, future = limit number of atoms |
|
20 |
+ |
|
21 |
+ |
|
22 |
+# Output: list with matrices, mean chi-squared value, and gene set |
|
23 |
+# results |
|
24 |
+ |
|
25 |
+#'\code{CoGAPS} calls the C++ MCMC code through gapsRun and performs Bayesian |
|
26 |
+#'matrix factorization returning the two matrices that reconstruct |
|
27 |
+#'the data matrix and then calls calcCoGAPSStat to estimate gene set |
|
28 |
+#'activity with nPerm set to 500 |
|
29 |
+#' |
|
30 |
+#'@param data data matrix |
|
31 |
+#'@param unc uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
32 |
+#'@param ABins a matrix of same size as A which gives relative |
|
33 |
+#' probability of that element being non-zero |
|
34 |
+#'@param PBins a matrix of same size as P which gives relative |
|
35 |
+#' probability of that element being non-zero |
|
36 |
+#'@param GStoGenes data.frame or list with gene sets |
|
37 |
+#'@param nFactor number of patterns (basis vectors, metagenes) |
|
38 |
+#'@param simulation_id name to attach to atoms files if created |
|
39 |
+#'@param nEquil number of iterations for burn-in |
|
40 |
+#'@param nSample number of iterations for sampling |
|
41 |
+#'@param nOutR how often to print status into R by iterations |
|
42 |
+#'@param output_atomic whether to write atom files (large) |
|
43 |
+#'@param fixedBinProbs Boolean for using relative probabilities |
|
44 |
+#' given in Abins and Pbins |
|
45 |
+#'@param fixedDomain character to indicate whether A or P is |
|
46 |
+#' domain for relative probabilities |
|
47 |
+#'@param sampleSnapshots Boolean to indicate whether to capture |
|
48 |
+#' individual samples from Markov chain during sampling |
|
49 |
+#'@param numSnapshots the number of individual samples to capture |
|
50 |
+#'@param plot Boolean to indicate whether to produce output graphics |
|
51 |
+#'@param nPerm number of permutations in gene set test |
|
52 |
+#'@param alphaA sparsity parameter for A domain |
|
53 |
+#'@param nMaxA PRESENTLY UNUSED, future = limit number of atoms |
|
54 |
+#'@param alphaP sparsity parameter for P domain |
|
55 |
+#'@param max_gibbmass_paraA limit truncated normal to max size |
|
56 |
+#'@param nMaxP PRESENTLY UNUSED, future = limit number of atoms |
|
57 |
+#'@param max_gibbmass_paraP limit truncated normal to max size |
|
58 |
+#'@export |
|
59 |
+ |
|
60 |
+CoGAPS <- function(data, unc, ABins = data.frame(), PBins = data.frame(), GStoGenes, nFactor = 7, simulation_id="simulation", nEquil=1000, |
|
61 |
+ nSample=1000, nOutR=1000, output_atomic=FALSE, |
|
62 |
+ fixedBinProbs = FALSE, fixedDomain = "N", |
|
63 |
+ sampleSnapshots = TRUE, numSnapshots = 100, |
|
64 |
+ plot=TRUE, nPerm=500, |
|
65 |
+ alphaA = 0.01, nMaxA = 100000, |
|
66 |
+ max_gibbmass_paraA = 100.0, |
|
67 |
+ alphaP = 0.01, nMaxP = 100000, |
|
68 |
+ max_gibbmass_paraP = 100.0) { |
|
69 |
+ |
|
70 |
+ |
|
71 |
+ # decompose the data |
|
72 |
+ matrixDecomp <- gapsRun(data, unc, ABins, PBins, nFactor, simulation_id, |
|
73 |
+ nEquil, nSample, nOutR, output_atomic, fixedBinProbs, |
|
74 |
+ fixedDomain, sampleSnapshots, numSnapshots, alphaA, nMaxA, max_gibbmass_paraA, |
|
75 |
+ alphaP, nMaxP, max_gibbmass_paraP) |
|
76 |
+ |
|
77 |
+ # plot patterns and show heatmap of Anorm |
|
78 |
+ if (plot) { |
|
79 |
+ plotGAPS(matrixDecomp$Amean, matrixDecomp$Pmean) |
|
80 |
+ } |
|
81 |
+ |
|
82 |
+ # compute the gene set scores |
|
83 |
+ GSP <- calcCoGAPSStat(matrixDecomp$Amean, matrixDecomp$Asd, GStoGenes, nPerm) |
|
84 |
+ |
|
85 |
+ return(list(meanChi2=matrixDecomp$calcChiSq, |
|
86 |
+ D=data, Sigma=unc, |
|
87 |
+ Amean=matrixDecomp$Amean, Asd=matrixDecomp$Asd, |
|
88 |
+ Pmean=matrixDecomp$Pmean, Psd=matrixDecomp$Psd, |
|
89 |
+ GSUpreg=GSP$GSUpreg, GSDownreg=GSP$GSDownreg, GSActEst=GSP$GSActEst)) |
|
90 |
+ |
|
91 |
+ |
|
92 |
+} |
... | ... |
@@ -10,19 +10,19 @@ |
10 | 10 |
#'that an element of Amean must be to get a value of 1 |
11 | 11 |
#'@export |
12 | 12 |
binaryA <-function(Amean, Asd, threshold=3) { |
13 |
- |
|
14 |
- |
|
13 |
+ |
|
14 |
+ |
|
15 | 15 |
BinA_Map <- ifelse(Amean/Asd > threshold,1,0) |
16 | 16 |
colnames(BinA_Map) <-colnames(BinA_Map,do.NULL=F,prefix = "Pattern ") |
17 | 17 |
rownames(BinA_Map) <- rep(" ",nrow(BinA_Map)) |
18 |
- |
|
19 |
- |
|
18 |
+ |
|
19 |
+ |
|
20 | 20 |
heatmap.2(BinA_Map, Rowv = FALSE, Colv = FALSE,dendrogram="none", |
21 | 21 |
scale="none",col = brewer.pal(3,"Blues"), trace="none", |
22 | 22 |
density.info="none",cexCol=1.3,srtCol=45, |
23 | 23 |
lmat=rbind(c(0, 3),c(2,1),c(0,4) ), |
24 |
- lwid=c(1,10),lhei=c(1, 4, 1.2 ), |
|
24 |
+ lwid=c(1,10),lhei=c(1, 4, 1.2 ), |
|
25 | 25 |
main="Heatmap of Standardized A Matrix") |
26 | 26 |
mtext(paste("(Threshold = ", threshold, ")")) |
27 |
- |
|
27 |
+ |
|
28 | 28 |
} |
... | ... |
@@ -1,12 +1,12 @@ |
1 | 1 |
# calcCoGAPSStat: calculate gene set statistics for A matrix columns |
2 |
-# History: v1.0 EJF original CoGAPS |
|
2 |
+# History: v1.0 EJF original CoGAPS |
|
3 | 3 |
|
4 | 4 |
# Inputs: Amean - A matrix mean values |
5 | 5 |
# Asd - A matrix standard deviations |
6 | 6 |
# GStoGenes - data.frame, GSA.genesets class, or list with gene sets |
7 | 7 |
# numPerm - number of permutations for null |
8 | 8 |
|
9 |
-# Output: list with gene set statistics |
|
9 |
+# Output: list with gene set statistics |
|
10 | 10 |
|
11 | 11 |
#'\code{calcCoGAPSStat} calculates the gene set statistics for each |
12 | 12 |
#'column of A using a Z-score from the elements of the A matrix, |
... | ... |
@@ -25,10 +25,10 @@ calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) { |
25 | 25 |
#temp <- min(Asd[Asd>0]) |
26 | 26 |
Asd[Asd==0] <- .Machine$double.eps |
27 | 27 |
} |
28 |
- |
|
28 |
+ |
|
29 | 29 |
# calculate Z scores |
30 | 30 |
zMatrix <- calcZ(Amean,Asd) |
31 |
- |
|
31 |
+ |
|
32 | 32 |
# compute the p-value for each gene set belonging to each pattern |
33 | 33 |
|
34 | 34 |
# check input arguments |
... | ... |
@@ -40,7 +40,7 @@ calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) { |
40 | 40 |
names(GStoGenes$genesets) <- GStoGenes$geneset.names |
41 | 41 |
GStoGenes <- GStoGenes$genesets |
42 | 42 |
} |
43 |
- |
|
43 |
+ |
|
44 | 44 |
if (is(GStoGenes, "list")) { |
45 | 45 |
GStoGenesList <- GStoGenes |
46 | 46 |
} else { |
... | ... |
@@ -49,7 +49,7 @@ calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) { |
49 | 49 |
GStoGenesList[[as.character(colnames(GStoGenes)[i])]] <- as.character(unique(GStoGenes[,i])) |
50 | 50 |
} |
51 | 51 |
} |
52 |
- |
|
52 |
+ |
|
53 | 53 |
# get dimensions |
54 | 54 |
numGS <- length(names(GStoGenesList)) |
55 | 55 |
numPatt <- dim(zMatrix)[2] |
... | ... |
@@ -59,7 +59,7 @@ calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) { |
59 | 59 |
statsUp <- matrix(ncol = numGS, nrow = numPatt) |
60 | 60 |
statsDown <- matrix(ncol = numGS, nrow = numPatt) |
61 | 61 |
actEst <- matrix(ncol = numGS, nrow = numPatt) |
62 |
- results <- list() |
|
62 |
+ results <- list() |
|
63 | 63 |
zPerm <- matrix(ncol=numPerm,nrow=numPatt) |
64 | 64 |
|
65 | 65 |
# do permutation test |
... | ... |
@@ -69,7 +69,7 @@ calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) { |
69 | 69 |
zValues <- zMatrix[index,1] |
70 | 70 |
numGenes <- length(zValues) |
71 | 71 |
label <- as.character(numGenes) |
72 |
- |
|
72 |
+ |
|
73 | 73 |
if (!any(names(results)==label)) { |
74 | 74 |
for (p in 1:numPatt) { |
75 | 75 |
for (j in 1:numPerm) { |
... | ... |
@@ -100,11 +100,11 @@ calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) { |
100 | 100 |
permzValues <- permzValues[ordering] |
101 | 101 |
|
102 | 102 |
statistic <- sum(zScore > permzValues) |
103 |
- statUpReg <- 1 - statistic/length(permzValues) |
|
103 |
+ statUpReg <- 1 - statistic/length(permzValues) |
|
104 | 104 |
statsUp[p,gs] <- max(statUpReg, 1/numPerm) |
105 | 105 |
|
106 | 106 |
statistic <- sum(zScore < permzValues) |
107 |
- statDownReg <- 1 - statistic/length(permzValues) |
|
107 |
+ statDownReg <- 1 - statistic/length(permzValues) |
|
108 | 108 |
statsDown[p,gs] <- max(statDownReg,1/numPerm) |
109 | 109 |
|
110 | 110 |
activity <- 1 - 2*max(statUpReg, 1/numPerm) |
... | ... |
@@ -124,7 +124,7 @@ calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) { |
124 | 124 |
|
125 | 125 |
results[['GSUpreg']] <- statsUp |
126 | 126 |
results[['GSDownreg']] <- statsDown |
127 |
- results[['GSActEst']] <- actEst |
|
128 |
- |
|
127 |
+ results[['GSActEst']] <- actEst |
|
128 |
+ |
|
129 | 129 |
return(results) |
130 | 130 |
} |
... | ... |
@@ -24,19 +24,19 @@ |
24 | 24 |
#'@export |
25 | 25 |
|
26 | 26 |
calcGeneGSStat <- function(Amean, Asd, GSGenes, numPerm, Pw=rep(1,ncol(Amean)), nullGenes=F) { |
27 |
- gsStat <- calcCoGAPSStat(Amean, Asd, data.frame(GSGenes), |
|
27 |
+ gsStat <- calcCoGAPSStat(Amean, Asd, data.frame(GSGenes), |
|
28 | 28 |
numPerm=numPerm) |
29 | 29 |
gsStat <- gsStat$GSUpreg |
30 |
- |
|
30 |
+ |
|
31 | 31 |
gsStat <- -log(gsStat) |
32 |
- |
|
32 |
+ |
|
33 | 33 |
if (!all(is.na(Pw))) { |
34 | 34 |
if (length(Pw) != length(gsStat)) { |
35 | 35 |
stop('Invalid weighting') |
36 | 36 |
} |
37 | 37 |
gsStat <- gsStat*Pw |
38 | 38 |
} |
39 |
- |
|
39 |
+ |
|
40 | 40 |
if (nullGenes) { |
41 | 41 |
ZD <- Amean[setdiff(row.names(Amean), GSGenes),] / |
42 | 42 |
Asd[setdiff(row.names(Amean), GSGenes),] |
... | ... |
@@ -44,18 +44,18 @@ calcGeneGSStat <- function(Amean, Asd, GSGenes, numPerm, Pw=rep(1,ncol(Amean)), |
44 | 44 |
ZD <- Amean[GSGenes,]/Asd[GSGenes,] |
45 | 45 |
} |
46 | 46 |
outStats <- apply(sweep(ZD,2,gsStat,FUN="*"),1,sum) / (sum(gsStat)) |
47 |
- |
|
47 |
+ |
|
48 | 48 |
outStats <- outStats / apply(ZD,1,sum) |
49 |
- |
|
49 |
+ |
|
50 | 50 |
outStats[which(apply(ZD,1,sum) < .Machine$double.eps)] <- 0. |
51 |
- |
|
52 |
- |
|
51 |
+ |
|
52 |
+ |
|
53 | 53 |
if (sum(gsStat) < .Machine$double.eps) { |
54 | 54 |
return(0.) |
55 | 55 |
} |
56 |
- |
|
56 |
+ |
|
57 | 57 |
return(outStats) |
58 |
- |
|
58 |
+ |
|
59 | 59 |
} |
60 | 60 |
|
61 | 61 |
|
... | ... |
@@ -63,23 +63,23 @@ computeGeneGSProb <- function(Amean, Asd, GSGenes, Pw=rep(1,ncol(Amean)), |
63 | 63 |
numPerm=500, PwNull=F) { |
64 | 64 |
geneGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, Pw=Pw, |
65 | 65 |
GSGenes=GSGenes, numPerm=numPerm) |
66 |
- |
|
67 |
- |
|
68 |
- |
|
66 |
+ |
|
67 |
+ |
|
68 |
+ |
|
69 | 69 |
if (PwNull) { |
70 | 70 |
permGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, |
71 | 71 |
GSGenes=GSGenes, numPerm=numPerm, Pw=Pw, |
72 | 72 |
nullGenes=T) |
73 | 73 |
} else { |
74 | 74 |
permGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, |
75 |
- GSGenes=GSGenes, numPerm=numPerm, |
|
75 |
+ GSGenes=GSGenes, numPerm=numPerm, |
|
76 | 76 |
nullGenes=T) |
77 | 77 |
} |
78 |
- |
|
78 |
+ |
|
79 | 79 |
finalStats <- sapply(GSGenes, |
80 | 80 |
function(x) length(which(permGSStat > geneGSStat[x])) / length(permGSStat)) |
81 |
- |
|
81 |
+ |
|
82 | 82 |
return(finalStats) |
83 |
- |
|
84 |
- |
|
83 |
+ |
|
84 |
+ |
|
85 | 85 |
} |
... | ... |
@@ -1,144 +1,144 @@ |
1 |
-#Calculates significant genes in each pattern according to certain threshold |
|
2 |
-#Returns the significant gene names as well as well as the means of these matrices and number of genes in each |
|
3 |
- |
|
4 |
-gapsInterPattern <- function(Amean, Asd, sdThreshold = 3) |
|
5 |
-{ |
|
6 |
- #number of rows and cols of Asd |
|
7 |
- numGenes = length(Asd[,1]); |
|
8 |
- numCols = length(Asd[1,]); |
|
9 |
- |
|
10 |
- #Vector holding the number of each significant gene in each column |
|
11 |
- sigGeneNums = data.frame(); |
|
12 |
- |
|
13 |
- #Temp number of sig genes in the col |
|
14 |
- sigCount = 0; |
|
15 |
- |
|
16 |
- #Number to catch the amount of columns without significant genes |
|
17 |
- numEmptyCols = 0; |
|
18 |
- |
|
19 |
- #Keep an array of the significant gene counts |
|
20 |
- significantGeneNums = c(0); |
|
21 |
- |
|
22 |
- #Names of the genes from the data matrix |
|
23 |
- geneNames = names(Asd[,1]); |
|
24 |
- |
|
25 |
- #Names of the genes that are significant from the data matrix |
|
26 |
- sigGeneNames = "0"; |
|
27 |
- |
|
28 |
- #The numerator of our statistic |
|
29 |
- dimensionStatNumerator = 0; |
|
30 |
- |
|
31 |
- #The denominator of our statistic |
|
32 |
- dimensionStatDenominator = 0; |
|
33 |
- |
|
34 |
- #The value of our statistic |
|
35 |
- dimensionStatistic = 0; |
|
36 |
- |
|
37 |
- #A matrix holding the values of our statistics |
|
38 |
- dimensionStatisticMatrix = matrix(nrow = numCols, ncol = numCols); |
|
39 |
- |
|
40 |
- #The mean of the statistic matrix |
|
41 |
- sbar = 0; |
|
42 |
- |
|
43 |
- #A list to return the significant genes in each col and the statistic matrix |
|
44 |
- results = list(list()); |
|
45 |
- |
|
46 |
- #Scan in the significant genes from each column of Asd |
|
47 |
- #The columns of sigGeneNums hold the significant genes for each col of Asd |
|
48 |
- for(i in 1:numCols) |
|
49 |
- { |
|
50 |
- sigCount = 0; |
|
51 |
- for(j in 1:numGenes) |
|
52 |
- { |
|
53 |
- if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0) |
|
54 |
- { |
|
55 |
- sigCount = sigCount + 1; |
|
56 |
- sigGeneNums[sigCount, i] = j; |
|
57 |
- } |
|
58 |
- } |
|
59 |
- |
|
60 |
- if(sigCount == 0) |
|
61 |
- { |
|
62 |
- sigGeneNums[1, i] = 0; |
|
63 |
- numEmptyCols = numEmptyCols + 1; |
|
64 |
- } |
|
65 |
- |
|
66 |
- #Save the number of sigGenes |
|
67 |
- significantGeneNums[i] = sigCount; |
|
68 |
- |
|
69 |
- #Get the names and store them |
|
70 |
- if(sigCount != 0) |
|
71 |
- { |
|
72 |
- for(k in 1:sigCount) |
|
73 |
- { |
|
74 |
- sigGeneNames[k] = geneNames[sigGeneNums[k, i]]; |
|
75 |
- } |
|
76 |
- results[[1]][[i]] = sigGeneNames; |
|
77 |
- sigGeneNames = "0"; |
|
78 |
- } |
|
79 |
- else |
|
80 |
- { |
|
81 |
- results[[1]][[i]] = "None"; |
|
82 |
- } |
|
83 |
- } |
|
84 |
- |
|
85 |
- if(any(significantGeneNums == 0)) |
|
86 |
- { |
|
87 |
- zeroSigCols = which(significantGeneNums == 0); |
|
88 |
- print("Warning: No Significant Genes in Pattern(s): "); |
|
89 |
- |
|
90 |
- for(z in 1:length(zeroSigCols)) |
|
91 |
- { |
|
92 |
- print(zeroSigCols[z]); |
|
93 |
- } |
|
94 |
- } |
|
95 |
- |
|
96 |
- #Now that we have the significant genes want to see if these genes are significant in other columns |
|
97 |
- #Fill across the row then down the column |
|
98 |
- |
|
99 |
- #This compares the significant genes in the specified by 'j' to the same genes in Asd in the column specified by 'i' |
|
100 |
- for(i in 1:numCols) |
|
101 |
- { |
|
102 |
- for(j in 1:numCols) |
|
103 |
- { |
|
104 |
- #Grab the number of significant genes from the interested column |
|
105 |
- sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE); |
|
106 |
- |
|
107 |
- if(sigCount != 0) |
|
108 |
- { |
|
109 |
- dimensionStatDenominator = sigCount; |
|
110 |
- dimensionStatNumerator = 0; |
|
111 |
- |
|
112 |
- #loop through the number of significant genes and compare to these genes in the 'ith' col of Asd. |
|
113 |
- #Find the significance of these genes, Amean - threshold * Asd |
|
114 |
- for(k in 1:sigCount) |
|
115 |
- { |
|
116 |
- if((Amean[sigGeneNums[k,j],i] - (sdThreshold*Asd[sigGeneNums[k,j],i])) > 0) |
|
117 |
- { |
|
118 |
- dimensionStatNumerator = dimensionStatNumerator + 1; |
|
119 |
- } |
|
120 |
- } |
|
121 |
- |
|
122 |
- dimensionStatistic = dimensionStatNumerator/dimensionStatDenominator; |
|
123 |
- |
|
124 |
- dimensionStatisticMatrix[i, j] = dimensionStatistic; |
|
125 |
- } |
|
126 |
- else |
|
127 |
- { |
|
128 |
- dimensionStatisticMatrix[i, j] = 0; |
|
129 |
- } |
|
130 |
- } |
|
131 |
- } |
|
132 |
- |
|
133 |
- #Find mean of the matrices (excluding the diagonal elements) |
|
134 |
- #we can subtract numCols as the diagonal elements are 1 |
|
135 |
- sbar = ((sum(dimensionStatisticMatrix) - (numCols - numEmptyCols))/(length(dimensionStatisticMatrix) - (numCols - numEmptyCols))); |
|
136 |
- |
|
137 |
- results[[2]] = significantGeneNums; |
|
138 |
- results[[3]] = t(dimensionStatisticMatrix); |
|
139 |
- results[[4]] = sbar; |
|
140 |
- |
|
141 |
- names(results) = c("SignificantGeneNames", "SignificantGeneTotals", "SeparationMatrix", "InterPatternValue"); |
|
142 |
- |
|
143 |
- return(results); |
|
144 |
-} |
|
1 |
+#Calculates significant genes in each pattern according to certain threshold |
|
2 |
+#Returns the significant gene names as well as well as the means of these matrices and number of genes in each |
|
3 |
+ |
|
4 |
+gapsInterPattern <- function(Amean, Asd, sdThreshold = 3) |
|
5 |
+{ |
|
6 |
+ #number of rows and cols of Asd |
|
7 |
+ numGenes = length(Asd[,1]); |
|
8 |
+ numCols = length(Asd[1,]); |
|
9 |
+ |
|
10 |
+ #Vector holding the number of each significant gene in each column |
|
11 |
+ sigGeneNums = data.frame(); |
|
12 |
+ |
|
13 |
+ #Temp number of sig genes in the col |
|
14 |
+ sigCount = 0; |
|
15 |
+ |
|
16 |
+ #Number to catch the amount of columns without significant genes |
|
17 |
+ numEmptyCols = 0; |
|
18 |
+ |
|
19 |
+ #Keep an array of the significant gene counts |
|
20 |
+ significantGeneNums = c(0); |
|
21 |
+ |
|
22 |
+ #Names of the genes from the data matrix |
|
23 |
+ geneNames = names(Asd[,1]); |
|
24 |
+ |
|
25 |
+ #Names of the genes that are significant from the data matrix |
|
26 |
+ sigGeneNames = "0"; |
|
27 |
+ |
|
28 |
+ #The numerator of our statistic |
|
29 |
+ dimensionStatNumerator = 0; |
|
30 |
+ |
|
31 |
+ #The denominator of our statistic |
|
32 |
+ dimensionStatDenominator = 0; |
|
33 |
+ |
|
34 |
+ #The value of our statistic |
|
35 |
+ dimensionStatistic = 0; |
|
36 |
+ |
|
37 |
+ #A matrix holding the values of our statistics |
|
38 |
+ dimensionStatisticMatrix = matrix(nrow = numCols, ncol = numCols); |
|
39 |
+ |
|
40 |
+ #The mean of the statistic matrix |
|
41 |
+ sbar = 0; |
|
42 |
+ |
|
43 |
+ #A list to return the significant genes in each col and the statistic matrix |
|
44 |
+ results = list(list()); |
|
45 |
+ |
|
46 |
+ #Scan in the significant genes from each column of Asd |
|
47 |
+ #The columns of sigGeneNums hold the significant genes for each col of Asd |
|
48 |
+ for(i in 1:numCols) |
|
49 |
+ { |
|
50 |
+ sigCount = 0; |
|
51 |
+ for(j in 1:numGenes) |
|
52 |
+ { |
|
53 |
+ if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0) |
|
54 |
+ { |
|
55 |
+ sigCount = sigCount + 1; |
|
56 |
+ sigGeneNums[sigCount, i] = j; |
|
57 |
+ } |
|
58 |
+ } |
|
59 |
+ |
|
60 |
+ if(sigCount == 0) |
|
61 |
+ { |
|
62 |
+ sigGeneNums[1, i] = 0; |
|
63 |
+ numEmptyCols = numEmptyCols + 1; |
|
64 |
+ } |
|
65 |
+ |
|
66 |
+ #Save the number of sigGenes |
|
67 |
+ significantGeneNums[i] = sigCount; |
|
68 |
+ |
|
69 |
+ #Get the names and store them |
|
70 |
+ if(sigCount != 0) |
|
71 |
+ { |
|
72 |
+ for(k in 1:sigCount) |
|
73 |
+ { |
|
74 |
+ sigGeneNames[k] = geneNames[sigGeneNums[k, i]]; |
|
75 |
+ } |
|
76 |
+ results[[1]][[i]] = sigGeneNames; |
|
77 |
+ sigGeneNames = "0"; |
|
78 |
+ } |
|
79 |
+ else |
|
80 |
+ { |
|
81 |
+ results[[1]][[i]] = "None"; |
|
82 |
+ } |
|
83 |
+ } |
|
84 |
+ |
|
85 |
+ if(any(significantGeneNums == 0)) |
|
86 |
+ { |
|
87 |
+ zeroSigCols = which(significantGeneNums == 0); |
|
88 |
+ print("Warning: No Significant Genes in Pattern(s): "); |
|
89 |
+ |
|
90 |
+ for(z in 1:length(zeroSigCols)) |
|
91 |
+ { |
|
92 |
+ print(zeroSigCols[z]); |
|
93 |
+ } |
|
94 |
+ } |
|
95 |
+ |
|
96 |
+ #Now that we have the significant genes want to see if these genes are significant in other columns |
|
97 |
+ #Fill across the row then down the column |
|
98 |
+ |
|
99 |
+ #This compares the significant genes in the specified by 'j' to the same genes in Asd in the column specified by 'i' |
|
100 |
+ for(i in 1:numCols) |
|
101 |
+ { |
|
102 |
+ for(j in 1:numCols) |
|
103 |
+ { |
|
104 |
+ #Grab the number of significant genes from the interested column |
|
105 |
+ sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE); |
|
106 |
+ |
|
107 |
+ if(sigCount != 0) |
|
108 |
+ { |
|
109 |
+ dimensionStatDenominator = sigCount; |
|
110 |
+ dimensionStatNumerator = 0; |
|
111 |
+ |
|
112 |
+ #loop through the number of significant genes and compare to these genes in the 'ith' col of Asd. |
|
113 |
+ #Find the significance of these genes, Amean - threshold * Asd |
|
114 |
+ for(k in 1:sigCount) |
|
115 |
+ { |
|
116 |
+ if((Amean[sigGeneNums[k,j],i] - (sdThreshold*Asd[sigGeneNums[k,j],i])) > 0) |
|
117 |
+ { |
|
118 |
+ dimensionStatNumerator = dimensionStatNumerator + 1; |
|
119 |
+ } |
|
120 |
+ } |
|
121 |
+ |
|
122 |
+ dimensionStatistic = dimensionStatNumerator/dimensionStatDenominator; |
|
123 |
+ |
|
124 |
+ dimensionStatisticMatrix[i, j] = dimensionStatistic; |
|
125 |
+ } |
|
126 |
+ else |
|
127 |
+ { |
|
128 |
+ dimensionStatisticMatrix[i, j] = 0; |
|
129 |
+ } |
|
130 |
+ } |
|
131 |
+ } |
|
132 |
+ |
|
133 |
+ #Find mean of the matrices (excluding the diagonal elements) |
|
134 |
+ #we can subtract numCols as the diagonal elements are 1 |
|
135 |
+ sbar = ((sum(dimensionStatisticMatrix) - (numCols - numEmptyCols))/(length(dimensionStatisticMatrix) - (numCols - numEmptyCols))); |
|
136 |
+ |
|
137 |
+ results[[2]] = significantGeneNums; |
|
138 |
+ results[[3]] = t(dimensionStatisticMatrix); |
|
139 |
+ results[[4]] = sbar; |
|
140 |
+ |
|
141 |
+ names(results) = c("SignificantGeneNames", "SignificantGeneTotals", "SeparationMatrix", "InterPatternValue"); |
|
142 |
+ |
|
143 |
+ return(results); |
|
144 |
+} |
... | ... |
@@ -1,135 +1,135 @@ |
1 |
-#Calculates significant genes in each pattern according to certain threshold |
|
2 |
-#Returns the significant gene names as well as well as the correlation matrices between these genes and the means of these matrices |
|
3 |
- |
|
4 |
-gapsIntraPattern <- function(Amean, Asd, DMatrix, sdThreshold = 3) |
|
5 |
-{ |
|
6 |
- #number of rows and cols of Asd |
|
7 |
- numGenes = length(Asd[,1]); |
|
8 |
- numCols = length(Asd[1,]); |
|
9 |
- |
|
10 |
- #number of samples in DMatrix |
|
11 |
- numSamp = ncol(DMatrix); |
|
12 |
- |
|
13 |
- #Vector holding the number of each significant gene in each column |
|
14 |
- sigGeneNums = data.frame(); |
|
15 |
- |
|
16 |
- #Temp number of sig genes in the col |
|
17 |
- sigCount = 0; |
|
18 |
- |
|
19 |
- #Keep an array of the significant gene counts |
|
20 |
- significantGeneNums = c(0); |
|
21 |
- |
|
22 |
- #A matrix to hold the significant genes in D for the current pattern |
|
23 |
- #The matrix just acts as a subset of D, just eliminates non relevant rows |
|
24 |
- tempSubsetD = matrix(); |
|
25 |
- |
|
26 |
- #A matrix holding the values of our correlation coefficients between genes for the current column |
|
27 |
- tempGeneCorrMatrix = matrix(); |
|
28 |
- |
|
29 |
- #A list to hold all the correlation matrices |
|
30 |
- geneCorrMatrices = list(); |
|
31 |
- |
|
32 |
- #A list to hold all the means |
|
33 |
- geneCorrMatrMeans = list(); |
|
34 |
- |
|
35 |
- #The mean of all the correlation matrices |
|
36 |
- cbar = 0; |
|
37 |
- |
|
38 |
- #A list to return the means and the matrices |
|
39 |
- results = list(); |
|
40 |
- |
|
41 |
- #Scan in the significant genes from each column of Asd |
|
42 |
- #The columns of sigGeneNums hold the significant genes for each col of Asd |
|
43 |
- for(i in 1:numCols) |
|
44 |
- { |
|
45 |
- sigCount = 0; |
|
46 |
- for(j in 1:numGenes) |
|
47 |
- { |
|
48 |
- if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0) |
|
49 |
- { |
|
50 |
- sigCount = sigCount + 1; |
|
51 |
- sigGeneNums[sigCount, i] = j; |
|
52 |
- } |
|
53 |
- } |
|
54 |
- |
|
55 |
- if(sigCount == 0) |
|
56 |
- { |
|
57 |
- sigGeneNums[1, i] = 0; |
|
58 |
- } |
|
59 |
- |
|
60 |
- #Save the number of sigGenes |
|
61 |
- significantGeneNums[i] = sigCount; |
|
62 |
- } |
|
63 |
- |
|
64 |
- #If a pattern has no significant genes this is clearly an error so return such |
|
65 |
- if(any(significantGeneNums == 0)) |
|
66 |
- { |
|
67 |
- zeroSigCols = which(significantGeneNums == 0); |
|
68 |
- warning("Warning: No Significant Genes in Pattern(s): "); |
|
69 |
- |
|
70 |
- for(z in 1:length(zeroSigCols)) |
|
71 |
- { |
|
72 |
- message(zeroSigCols[z]); |
|
73 |
- } |
|
74 |
- } |
|
75 |
- |
|
76 |
- |
|
77 |
- #Now that we have the significant genes want to grab these from our original D matrix |
|
78 |
- #and find the sigGene x sigGene correlation matrix and find its mean |
|
79 |
- |
|
80 |
- for(j in 1:numCols) |
|
81 |
- { |
|
82 |
- #Grab the number of significant genes from the interested column |
|
83 |
- sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE); |
|
84 |
- |
|
85 |
- if(sigCount != 0) |
|
86 |
- { |
|
87 |
- |
|
88 |
- #loop through the number of significant genes and pull out the rows of D that represent these genes. |
|
89 |
- #Then find the correlation between them with the built in R corr function |
|
90 |
- tempSubsetD = matrix(nrow = sigCount, ncol = numSamp); |
|
91 |
- for(k in 1:sigCount) |
|
92 |
- { |
|
93 |
- #Subset D based on significant Genes |
|
94 |
- #need to transpose as it reads this in as column vector otherwise |
|
95 |
- tempSubsetD[k,] = t(DMatrix[sigGeneNums[k,j], ]); |
|
96 |
- } |
|
97 |
- |
|
98 |
- #Find the correlation between these genes in D |
|
99 |
- #Need to transpose as it calculates correlations between the columns |
|
100 |
- tempGeneCorrMatrix = cor(t(tempSubsetD)); |
|
101 |
- |
|
102 |
- #Find the mean of this matrix |
|
103 |
- tempGeneCorrMatrMean = mean(tempGeneCorrMatrix); |
|
104 |
- |
|
105 |
- } |
|
106 |
- else |
|
107 |
- { |
|
108 |
- tempGeneCorrMatrix = 0; |
|
109 |
- tempGeneCorrMatrMean = 0; |
|
110 |
- } |
|
111 |
- |
|
112 |
- #Save these in the overall list |
|
113 |
- geneCorrMatrices[[j]] = tempGeneCorrMatrix; |
|
114 |
- geneCorrMatrMeans[[j]] = tempGeneCorrMatrMean; |
|
115 |
- |
|
116 |
- } |
|
117 |
- |
|
118 |
- #Return as an overall list of lists |
|
119 |
- # We return Corr Matrices themselves, their means, and the means of the means (cbar) |
|
120 |
- results[[1]] = geneCorrMatrices; |
|
121 |
- results[[2]] = geneCorrMatrMeans; |
|
122 |
- |
|
123 |
- #Return as an overall list of lists |
|
124 |
- for(i in 1:numCols) |
|
125 |
- { |
|
126 |
- cbar = cbar + results[[2]][[i]]; |
|
127 |
- } |
|
128 |
- |
|
129 |
- cbar = cbar/numCols; |
|
130 |
- results[[3]] = cbar; |
|
131 |
- |
|
132 |
- names(results) = c("CorrelationMatrices", "CorrelationMatrixMeans", "IntraPatternValue"); |
|
133 |
- |
|
134 |
- return(results); |
|
135 |
-} |
|
1 |
+#Calculates significant genes in each pattern according to certain threshold |
|
2 |
+#Returns the significant gene names as well as well as the correlation matrices between these genes and the means of these matrices |
|
3 |
+ |
|
4 |
+gapsIntraPattern <- function(Amean, Asd, DMatrix, sdThreshold = 3) |
|
5 |
+{ |
|
6 |
+ #number of rows and cols of Asd |
|
7 |
+ numGenes = length(Asd[,1]); |
|
8 |
+ numCols = length(Asd[1,]); |
|
9 |
+ |
|
10 |
+ #number of samples in DMatrix |
|
11 |
+ numSamp = ncol(DMatrix); |
|
12 |
+ |
|
13 |
+ #Vector holding the number of each significant gene in each column |
|
14 |
+ sigGeneNums = data.frame(); |
|
15 |
+ |
|
16 |
+ #Temp number of sig genes in the col |
|
17 |
+ sigCount = 0; |
|
18 |
+ |
|
19 |
+ #Keep an array of the significant gene counts |
|
20 |
+ significantGeneNums = c(0); |
|
21 |
+ |
|
22 |
+ #A matrix to hold the significant genes in D for the current pattern |
|
23 |
+ #The matrix just acts as a subset of D, just eliminates non relevant rows |
|
24 |
+ tempSubsetD = matrix(); |
|
25 |
+ |
|
26 |
+ #A matrix holding the values of our correlation coefficients between genes for the current column |
|
27 |
+ tempGeneCorrMatrix = matrix(); |
|
28 |
+ |
|
29 |
+ #A list to hold all the correlation matrices |
|
30 |
+ geneCorrMatrices = list(); |
|
31 |
+ |
|
32 |
+ #A list to hold all the means |
|
33 |
+ geneCorrMatrMeans = list(); |
|
34 |
+ |
|
35 |
+ #The mean of all the correlation matrices |
|
36 |
+ cbar = 0; |
|
37 |
+ |
|
38 |
+ #A list to return the means and the matrices |
|
39 |
+ results = list(); |
|
40 |
+ |
|
41 |
+ #Scan in the significant genes from each column of Asd |
|
42 |
+ #The columns of sigGeneNums hold the significant genes for each col of Asd |
|
43 |
+ for(i in 1:numCols) |
|
44 |
+ { |
|
45 |
+ sigCount = 0; |
|
46 |
+ for(j in 1:numGenes) |
|
47 |
+ { |
|
48 |
+ if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0) |
|
49 |
+ { |
|
50 |
+ sigCount = sigCount + 1; |
|
51 |
+ sigGeneNums[sigCount, i] = j; |
|
52 |
+ } |
|
53 |
+ } |
|
54 |
+ |
|
55 |
+ if(sigCount == 0) |
|
56 |
+ { |
|
57 |
+ sigGeneNums[1, i] = 0; |
|
58 |
+ } |
|
59 |
+ |
|
60 |
+ #Save the number of sigGenes |
|
61 |
+ significantGeneNums[i] = sigCount; |
|
62 |
+ } |
|
63 |
+ |
|
64 |
+ #If a pattern has no significant genes this is clearly an error so return such |
|
65 |
+ if(any(significantGeneNums == 0)) |
|
66 |
+ { |
|
67 |
+ zeroSigCols = which(significantGeneNums == 0); |
|
68 |
+ warning("Warning: No Significant Genes in Pattern(s): "); |
|
69 |
+ |
|
70 |
+ for(z in 1:length(zeroSigCols)) |
|
71 |
+ { |
|
72 |
+ message(zeroSigCols[z]); |
|
73 |
+ } |
|
74 |
+ } |
|
75 |
+ |
|
76 |
+ |
|
77 |
+ #Now that we have the significant genes want to grab these from our original D matrix |
|
78 |
+ #and find the sigGene x sigGene correlation matrix and find its mean |
|
79 |
+ |
|
80 |
+ for(j in 1:numCols) |
|
81 |
+ { |
|
82 |
+ #Grab the number of significant genes from the interested column |
|
83 |
+ sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE); |
|
84 |
+ |
|
85 |
+ if(sigCount != 0) |
|
86 |
+ { |
|
87 |
+ |
|
88 |
+ #loop through the number of significant genes and pull out the rows of D that represent these genes. |
|
89 |
+ #Then find the correlation between them with the built in R corr function |
|
90 |
+ tempSubsetD = matrix(nrow = sigCount, ncol = numSamp); |
|
91 |
+ for(k in 1:sigCount) |
|
92 |
+ { |
|
93 |
+ #Subset D based on significant Genes |
|
94 |
+ #need to transpose as it reads this in as column vector otherwise |
|
95 |
+ tempSubsetD[k,] = t(DMatrix[sigGeneNums[k,j], ]); |
|
96 |
+ } |
|
97 |
+ |
|
98 |
+ #Find the correlation between these genes in D |
|
99 |
+ #Need to transpose as it calculates correlations between the columns |
|
100 |
+ tempGeneCorrMatrix = cor(t(tempSubsetD)); |
|
101 |
+ |
|
102 |
+ #Find the mean of this matrix |
|
103 |
+ tempGeneCorrMatrMean = mean(tempGeneCorrMatrix); |
|
104 |
+ |
|
105 |
+ } |
|
106 |
+ else |
|
107 |
+ { |
|
108 |
+ tempGeneCorrMatrix = 0; |
|
109 |
+ tempGeneCorrMatrMean = 0; |
|
110 |
+ } |
|
111 |
+ |
|
112 |
+ #Save these in the overall list |
|
113 |
+ geneCorrMatrices[[j]] = tempGeneCorrMatrix; |
|
114 |
+ geneCorrMatrMeans[[j]] = tempGeneCorrMatrMean; |
|
115 |
+ |
|
116 |
+ } |
|
117 |
+ |
|
118 |
+ #Return as an overall list of lists |
|
119 |
+ # We return Corr Matrices themselves, their means, and the means of the means (cbar) |
|
120 |
+ results[[1]] = geneCorrMatrices; |
|
121 |
+ results[[2]] = geneCorrMatrMeans; |
|
122 |
+ |
|
123 |
+ #Return as an overall list of lists |
|
124 |
+ for(i in 1:numCols) |
|
125 |
+ { |
|
126 |
+ cbar = cbar + results[[2]][[i]]; |
|
127 |
+ } |
|
128 |
+ |
|
129 |
+ cbar = cbar/numCols; |
|
130 |
+ results[[3]] = cbar; |
|
131 |
+ |
|
132 |
+ names(results) = c("CorrelationMatrices", "CorrelationMatrixMeans", "IntraPatternValue"); |
|
133 |
+ |
|
134 |
+ return(results); |
|
135 |
+} |
... | ... |
@@ -5,7 +5,7 @@ |
5 | 5 |
# Inputs: D - data matrix |
6 | 6 |
# S - uncertainty matrix (std devs for chi-squared of Log Likelihood) |
7 | 7 |
# FP - fixed A columns or P rows |
8 |
-# nFactor - number of patterns (basis vectors, metagenes) |
|
8 |
+# nFactor - number of patterns (basis vectors, metagenes) |
|
9 | 9 |
# simulation_id - name to attach to atoms files if created |
10 | 10 |
# nEquil - number of iterations for burn-in |
11 | 11 |
# nSample - number of iterations for sampling |
... | ... |
@@ -55,29 +55,29 @@ |
55 | 55 |
#'@param max_gibbmass_paraP limit truncated normal to max size |
56 | 56 |
#'@export |
57 | 57 |
|
58 |
-gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFactor = 5, simulation_id = "simulation", |
|
59 |
- nEquil = 1000, nSample = 1000, nOutR = 1000, output_atomic = FALSE, fixedMatrix = "P", |
|
60 |
- fixedBinProbs = FALSE, fixedDomain = "N", sampleSnapshots = TRUE, numSnapshots = 100, alphaA = 0.01, |
|
58 |
+gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFactor = 5, simulation_id = "simulation", |
|
59 |
+ nEquil = 1000, nSample = 1000, nOutR = 1000, output_atomic = FALSE, fixedMatrix = "P", |
|
60 |
+ fixedBinProbs = FALSE, fixedDomain = "N", sampleSnapshots = TRUE, numSnapshots = 100, alphaA = 0.01, |
|
61 | 61 |
nMaxA = 100000, max_gibbmass_paraA = 100.0, alphaP = 0.01, nMaxP = 100000, max_gibbmass_paraP = 100.0) |
62 | 62 |
{ |
63 | 63 |
#Begin data type error checking code |
64 | 64 |
charDataErrors = c(!is.character(simulation_id), !is.character(fixedDomain), !is.character(fixedMatrix)) |
65 | 65 |
charCheck = c("simulation_id", "fixedDomain", "fixedMatrix") |
66 |
- |
|
66 |
+ |
|
67 | 67 |
boolDataErrors = c(!is.logical(output_atomic), !is.logical(fixedBinProbs), !is.logical(sampleSnapshots)) |
68 | 68 |
boolCheck = c("output_atomic", "fixedBinProbs", "sampleSnapshots") |
69 |
- |
|
70 |
- numericDataErrors = c(!is.numeric(nFactor), !is.numeric(nEquil), !is.numeric(nSample), !is.numeric(nOutR), !is.numeric(numSnapshots), |
|
71 |
- !is.numeric(alphaA), !is.numeric(nMaxA), !is.numeric(max_gibbmass_paraA), !is.numeric(alphaP), |
|
69 |
+ |
|
70 |
+ numericDataErrors = c(!is.numeric(nFactor), !is.numeric(nEquil), !is.numeric(nSample), !is.numeric(nOutR), !is.numeric(numSnapshots), |
|
71 |
+ !is.numeric(alphaA), !is.numeric(nMaxA), !is.numeric(max_gibbmass_paraA), !is.numeric(alphaP), |
|
72 | 72 |
!is.numeric(nMaxP), !is.numeric(max_gibbmass_paraP)) |
73 |
- numericCheck = c("nFactor", "nEquil", "nSample", "nOutR", "numSnapshots", "alphaA", "nMaxA", |
|
73 |
+ numericCheck = c("nFactor", "nEquil", "nSample", "nOutR", "numSnapshots", "alphaA", "nMaxA", |
|
74 | 74 |
"max_gibbmass_paraA", "alphaP", "nMaxP", "max_gibbmass_paraP") |
75 |
- |
|
75 |
+ |
|
76 | 76 |
dataFrameErrors = c(!is.data.frame(D), !is.data.frame(S), !is.data.frame(ABins), !is.data.frame(PBins), !is.data.frame(FP)) |
77 | 77 |
dataFrameCheck = c("D", "S", "ABins", "PBins", "FP") |
78 |
- |
|
78 |
+ |
|
79 | 79 |
matrixErrors = c(!is.matrix(D), !is.matrix(S), !is.matrix(ABins), !is.matrix(PBins), !is.matrix(FP)) |
80 |
- |
|
80 |
+ |
|
81 | 81 |
if(any(charDataErrors)) |
82 | 82 |
{ |
83 | 83 |
#Check which of these is not a string |
... | ... |
@@ -88,10 +88,10 @@ gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFa |
88 | 88 |
stop(paste("Error in gapsRun: Argument",charCheck[i],"is of the incorrect type. Please see documentation for details.")) |
89 | 89 |
} |
90 | 90 |
} |
91 |
- |
|
91 |
+ |
|
92 | 92 |
return() |
93 | 93 |
} |
94 |
- |
|
94 |
+ |
|
95 | 95 |
if(any(boolDataErrors)) |
96 | 96 |
{ |
97 | 97 |
#Check which of these is not a boolean |
... | ... |
@@ -102,10 +102,10 @@ gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFa |
102 | 102 |
stop(paste("Error in gapsRun: Argument",boolCheck[i],"is of the incorrect type. Please see documentation for details.")) |
103 | 103 |
} |
104 | 104 |
} |
105 |
- |
|
105 |
+ |
|
106 | 106 |
return() |
107 | 107 |
} |
108 |
- |
|
108 |
+ |
|
109 | 109 |
if(any(numericDataErrors)) |
110 | 110 |
{ |
111 | 111 |
#Check which of these is not an integer/double |
... | ... |
@@ -114,14 +114,14 @@ gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFa |
114 | 114 |
if(numericDataErrors[i] == TRUE) |
115 | 115 |
{ |
116 | 116 |
stop(paste("Error in gapsRun: Argument",numericCheck[i],"is of the incorrect type. Please see documentation for details.")) |
117 |
- |
|
117 |
+ |
|
118 | 118 |
} |
119 | 119 |
} |
120 | 120 |
return() |
121 | 121 |
} |
122 |
- |
|
123 |
- |
|
124 |
- #At least one of A, P, ABins, PBins, or FP is not a matrix or data.frame |
|
122 |
+ |
|
123 |
+ |
|
124 |
+ #At least one of A, P, ABins, PBins, or FP is not a matrix or data.frame |
|
125 | 125 |
if(any((dataFrameErrors[1] && matrixErrors[1]), (dataFrameErrors[2] && matrixErrors[2]), (dataFrameErrors[3] && matrixErrors[3]), (dataFrameErrors[4] && matrixErrors[4]), (dataFrameErrors[5] && matrixErrors[5]))) |
126 | 126 |
{ |
127 | 127 |
for(i in 1:length(dataFrameCheck)) |
... | ... |
@@ -129,12 +129,12 @@ gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFa |
129 | 129 |
if((dataFrameErrors[i] && matrixErrors[i]) == TRUE) |
130 | 130 |
{ |
131 | 131 |
stop(paste("Error in gapsRun: Argument",dataFrameCheck[i],"is not a matrix or data.frame. Please see documentation for details.")) |
132 |
- |
|
132 |
+ |
|
133 | 133 |
} |
134 | 134 |
} |
135 | 135 |
return() |
136 | 136 |
} |
137 |
- |
|
137 |
+ |
|
138 | 138 |
#Floor the parameters that are integers to prevent allowing doubles. |
139 | 139 |
nFactor = floor(nFactor) |
140 | 140 |
nEquil = floor(nEquil) |
... | ... |
@@ -143,87 +143,87 @@ gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFa |
143 | 143 |
numSnapshots = floor(numSnapshots) |
144 | 144 |
nMaxA = floor(nMaxA) |
145 | 145 |
nMaxP = floor(nMaxP) |
146 |
- |
|
146 |
+ |
|
147 | 147 |
# pass all settings to C++ within a list |
148 | 148 |
# if (is.null(P)) { |
149 | 149 |
Config = c(simulation_id, output_atomic, fixedBinProbs, fixedDomain, fixedMatrix, sampleSnapshots); |
150 |
- |
|
151 |
- ConfigNums = c(nFactor, nEquil, nSample, nOutR, alphaA, nMaxA, max_gibbmass_paraA, |
|
150 |
+ |
|
151 |
+ ConfigNums = c(nFactor, nEquil, nSample, nOutR, alphaA, nMaxA, max_gibbmass_paraA, |
|
152 | 152 |
alphaP, nMaxP, max_gibbmass_paraP, numSnapshots); |
153 |
- |
|
153 |
+ |
|
154 | 154 |
#Begin logic error checking code |
155 |
- |
|
156 |
- #Check for negative or zero arguments |
|
155 |
+ |
|
156 |
+ #Check for negative or zero arguments |
|
157 | 157 |
if(any(ConfigNums <= 0)) |
158 | 158 |
{ |
159 | 159 |
stop("Error in gapsRun: Numeric Arguments cannot be non-zero!") |
160 |
- |
|
160 |
+ |
|
161 | 161 |
return() |
162 | 162 |
} |
163 |
- |
|
163 |
+ |
|
164 | 164 |
#Check for nonsensical inputs (such as numSnapshots < nEquil or nSample) |
165 | 165 |
if((numSnapshots > nEquil) || (numSnapshots > nSample)) |
166 | 166 |
{ |
167 | 167 |
stop("Error in gapsRun: Cannot have more snapshots of A and P than equilibration and/or sampling iterations.") |
168 |
- |
|
168 |
+ |
|
169 | 169 |
return() |
170 | 170 |
} |
171 |
- |
|
171 |
+ |
|
172 | 172 |
if((nOutR > nEquil) || (nOutR > nSample)) |
173 | 173 |
{ |
174 | 174 |
stop("Error in gapsRun: Cannot have more output steps than equilibration and/or sampling iterations.") |
175 |
- |
|
175 |
+ |
|
176 | 176 |
return() |
177 | 177 |
} |
178 |
- |
|
178 |
+ |
|
179 | 179 |
if(ncol(FP) != ncol(D)) |
180 | 180 |
{ |
181 | 181 |
stop("Error in gapsRun: Columns of Data Matrix and Fixed Pattern Matrix do not line up. Please see documentation for details.") |
182 |
- |
|
182 |
+ |
|
183 | 183 |
return() |
184 | 184 |
} |
185 |
- |
|
185 |
+ |
|
186 | 186 |
if(nFactor < (nrow(FP))) |
187 | 187 |
{ |
188 | 188 |
stop("Error in gapsRun: Number of patterns cannot be less than the rows of the patterns to fix (FP). Please see documentation for details.") |
189 |
- |
|
189 |
+ |
|
190 | 190 |
return() |
191 | 191 |
} |
192 |
- |
|
192 |
+ |
|
193 | 193 |
if(nFactor > (ncol(D))) |
194 | 194 |
{ |
195 | 195 |
warning("Warning in gapsRun: Number of requested patterns greater than columns of Data Matrix.") |
196 | 196 |
} |
197 |
- |
|
198 |
- |
|
199 |
- |
|
197 |
+ |
|
198 |
+ |
|
199 |
+ |
|
200 | 200 |
# P <- as.data.frame(matrix(nrow=1,c(1,1,1))) # make something to pass |
201 | 201 |
# } else { |
202 | 202 |
# Config = c(nFactor, simulation_id, nEquil, nSample, nOutR, |
203 | 203 |
# output_atomic, alphaA, nMaxA, max_gibbmass_paraA, lambdaA_scale_factor, |
204 | 204 |
# alphaP, nMaxP, max_gibbmass_paraP, lambdaP_scale_factor, 1) |
205 |
- |
|
205 |
+ |
|
206 | 206 |
# } |
207 |
- |
|
207 |
+ |
|
208 | 208 |
geneNames = rownames(D); |
209 | 209 |
sampleNames = colnames(D); |
210 |
- |
|
210 |
+ |
|
211 | 211 |
# label patterns as Patt N |
212 | 212 |
patternNames = c("0"); |
213 | 213 |
for(i in 1:nFactor) |
214 | 214 |
{ |
215 | 215 |
patternNames[i] = paste('Patt', i); |
216 | 216 |
} |
217 |
- |
|
217 |
+ |
|
218 | 218 |
# call to C++ Rcpp code |
219 | 219 |
cogapResult = cogapsMap(D, S, FP, ABins, PBins, Config, ConfigNums); |
220 |
- |
|
220 |
+ |
|
221 | 221 |
# convert returned files to matrices to simplify visualization and processing |
222 | 222 |
cogapResult$Amean = as.matrix(cogapResult$Amean); |
223 | 223 |
cogapResult$Asd = as.matrix(cogapResult$Asd); |
224 | 224 |
cogapResult$Pmean = as.matrix(cogapResult$Pmean); |
225 | 225 |
cogapResult$Psd = as.matrix(cogapResult$Psd); |
226 |
- |
|
226 |
+ |
|
227 | 227 |
# label matrices |
228 | 228 |
colnames(cogapResult$Amean) = patternNames; |
229 | 229 |
rownames(cogapResult$Amean) = geneNames; |
... | ... |
@@ -233,13 +233,13 @@ gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFa |
233 | 233 |
rownames(cogapResult$Pmean) = patternNames; |
234 | 234 |
colnames(cogapResult$Psd) = sampleNames; |
235 | 235 |
rownames(cogapResult$Psd) = patternNames; |
236 |
- |
|
236 |
+ |
|
237 | 237 |
# calculate chi-squared of mean, this should be smaller than individual |
238 | 238 |
# chi-squared sample values if sampling is good |
239 | 239 |
calcChiSq = c(0); |
240 | 240 |
MMatrix = (cogapResult$Amean %*% cogapResult$Pmean); |
241 |
- |
|
242 |
- |
|
241 |
+ |
|
242 |
+ |
|
243 | 243 |
for(i in 1:(nrow(MMatrix))) |
244 | 244 |
{ |
245 | 245 |
for(j in 1:(ncol(MMatrix))) |
... | ... |
@@ -247,13 +247,13 @@ gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFa |
247 | 247 |
calcChiSq = (calcChiSq) + ((D[i,j] - MMatrix[i,j])/(S[i,j]))^(2); |
248 | 248 |
} |
249 | 249 |
} |
250 |
- |
|
250 |
+ |
|
251 | 251 |
cogapResult = c(cogapResult, calcChiSq); |
252 |
- |
|
253 |
- |
|
252 |
+ |
|
253 |
+ |
|
254 | 254 |
names(cogapResult)[12] = "meanChi2"; |
255 |
- |
|
255 |
+ |
|
256 | 256 |
message(paste("Chi-Squared of Mean:",calcChiSq)) |
257 |
- |
|
257 |
+ |
|
258 | 258 |
return(cogapResult); |
259 | 259 |
} |
... | ... |
@@ -5,7 +5,7 @@ |
5 | 5 |
# Inputs: D - data matrix |
6 | 6 |
# S - uncertainty matrix (std devs for chi-squared of Log Likelihood) |
7 | 7 |
# FP - fixed A columns or P rows |
8 |
-# nFactor - number of patterns (basis vectors, metagenes) |
|
8 |
+# nFactor - number of patterns (basis vectors, metagenes) |
|
9 | 9 |
# simulation_id - name to attach to atoms files if created |
10 | 10 |
# nEquil - number of iterations for burn-in |
11 | 11 |
# nSample - number of iterations for sampling |
... | ... |
@@ -61,25 +61,25 @@ gapsMapTestRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), |
61 | 61 |
max_gibbmass_paraA = 100.0, alphaP = 0.01, |
62 | 62 |
nMaxP = 100000, max_gibbmass_paraP = 100.0) |
63 | 63 |
{ |
64 |
- |
|
64 |
+ |
|
65 | 65 |
#Begin data type error checking code |
66 | 66 |
charDataErrors = c(!is.character(simulation_id), !is.character(fixedDomain), !is.character(fixedMatrix)) |
67 | 67 |
charCheck = c("simulation_id", "fixedDomain", "fixedMatrix") |
68 |
- |
|
68 |
+ |
|
69 | 69 |
boolDataErrors = c(!is.logical(output_atomic), !is.logical(fixedBinProbs)) |
70 | 70 |
boolCheck = c("output_atomic", "fixedBinProbs") |
71 |
- |
|
71 |
+ |
|
72 | 72 |
numericDataErrors = c(!is.numeric(nFactor), !is.numeric(nEquil), !is.numeric(nSample), !is.numeric(nOutR), |
73 |
- !is.numeric(alphaA), !is.numeric(nMaxA), !is.numeric(max_gibbmass_paraA), !is.numeric(alphaP), |
|
73 |
+ !is.numeric(alphaA), !is.numeric(nMaxA), !is.numeric(max_gibbmass_paraA), !is.numeric(alphaP), |
|
74 | 74 |
!is.numeric(nMaxP), !is.numeric(max_gibbmass_paraP)) |
75 |
- numericCheck = c("nFactor", "nEquil", "nSample", "nOutR", "alphaA", "nMaxA", |
|
75 |
+ numericCheck = c("nFactor", "nEquil", "nSample", "nOutR", "alphaA", "nMaxA", |
|
76 | 76 |
"max_gibbmass_paraA", "alphaP", "nMaxP", "max_gibbmass_paraP") |
77 |
- |
|
77 |
+ |
|
78 | 78 |
dataFrameErrors = c(!is.data.frame(D), !is.data.frame(S), !is.data.frame(ABins), !is.data.frame(PBins), !is.data.frame(FP)) |
79 | 79 |
dataFrameCheck = c("D", "S", "ABins", "PBins", "FP") |
80 |
- |
|
80 |
+ |
|
81 | 81 |
matrixErrors = c(!is.matrix(D), !is.matrix(S), !is.matrix(ABins), !is.matrix(PBins), !is.matrix(FP)) |
82 |
- |
|
82 |
+ |
|
83 | 83 |
if(any(charDataErrors)) |
84 | 84 |
{ |
85 | 85 |
#Check which of these is not a string |
... | ... |
@@ -90,10 +90,10 @@ gapsMapTestRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), |
90 | 90 |
stop(paste("Error in gapsRun: Argument",charCheck[i],"is of the incorrect type. Please see documentation for details.")) |
91 | 91 |
} |
92 | 92 |
} |
93 |
- |
|
93 |
+ |
|
94 | 94 |
return() |
95 | 95 |
} |
96 |
- |
|
96 |
+ |
|
97 | 97 |
if(any(boolDataErrors)) |
98 | 98 |
{ |
99 | 99 |
#Check which of these is not a boolean |
... | ... |
@@ -104,10 +104,10 @@ gapsMapTestRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), |
104 | 104 |
stop(paste("Error in gapsRun: Argument",boolCheck[i],"is of the incorrect type. Please see documentation for details.")) |
105 | 105 |
} |
106 | 106 |
} |
107 |
- |
|
107 |
+ |
|
108 | 108 |
return() |
109 | 109 |
} |
110 |
- |
|
110 |
+ |
|
111 | 111 |
if(any(numericDataErrors)) |
112 | 112 |
{ |
113 | 113 |
#Check which of these is not an integer/double |
... | ... |
@@ -116,14 +116,14 @@ gapsMapTestRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), |
116 | 116 |
if(numericDataErrors[i] == TRUE) |
117 | 117 |
{ |
118 | 118 |
stop(paste("Error in gapsRun: Argument",numericCheck[i],"is of the incorrect type. Please see documentation for details.")) |
119 |
- |
|
119 |
+ |
|
120 | 120 |
} |
121 | 121 |
} |
122 | 122 |
return() |
123 | 123 |
} |
124 |
- |
|
125 |
- |
|
126 |
- #At least one of A, P, ABins, PBins, or FP is not a matrix or data.frame |
|
124 |
+ |
|
125 |
+ |
|
126 |
+ #At least one of A, P, ABins, PBins, or FP is not a matrix or data.frame |
|
127 | 127 |
if(any((dataFrameErrors[1] && matrixErrors[1]), (dataFrameErrors[2] && matrixErrors[2]), (dataFrameErrors[3] && matrixErrors[3]), (dataFrameErrors[4] && matrixErrors[4]), (dataFrameErrors[5] && matrixErrors[5]))) |
128 | 128 |
{ |
129 | 129 |
for(i in 1:length(dataFrameCheck)) |
... | ... |
@@ -131,12 +131,12 @@ gapsMapTestRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), |
131 | 131 |
if((dataFrameErrors[i] && matrixErrors[i]) == TRUE) |
132 | 132 |
{ |
133 | 133 |
stop(paste("Error in gapsRun: Argument", dataFrameCheck[i], "is not a matrix or data.frame. Please see documentation for details.")) |
134 |
- |
|
134 |
+ |
|
135 | 135 |
} |
136 | 136 |
} |
137 | 137 |
return() |
138 | 138 |
} |
139 |
- |
|
139 |
+ |
|
140 | 140 |
#Floor the parameters that are integers to prevent allowing doubles. |
141 | 141 |
nFactor = floor(nFactor) |
142 | 142 |
nEquil = floor(nEquil) |
... | ... |
@@ -144,77 +144,77 @@ gapsMapTestRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), |
144 | 144 |
nOutR = floor(nOutR) |
145 | 145 |
nMaxA = floor(nMaxA) |
146 | 146 |
nMaxP = floor(nMaxP) |
147 |
- |
|
147 |
+ |
|
148 | 148 |
# pass all settings to C++ within a list |
149 | 149 |
# if (is.null(P)) { |
150 | 150 |
Config = c(simulation_id, output_atomic, fixedBinProbs, fixedDomain, fixedMatrix); |
151 |
- |
|
152 |
- ConfigNums = c(nFactor, nEquil, nSample, nOutR, alphaA, nMaxA, max_gibbmass_paraA, |
|
151 |
+ |
|
152 |
+ ConfigNums = c(nFactor, nEquil, nSample, nOutR, alphaA, nMaxA, max_gibbmass_paraA, |
|
153 | 153 |
alphaP, nMaxP, max_gibbmass_paraP); |
154 |
- |
|
154 |
+ |
|
155 | 155 |
#Begin logic error checking code |
156 |
- |
|
157 |
- #Check for negative or zero arguments |
|
156 |
+ |
|
157 |
+ #Check for negative or zero arguments |
|
158 | 158 |
if(any(ConfigNums <= 0)) |
159 | 159 |
{ |
160 | 160 |
stop("Error in gapsRun: Numeric Arguments cannot be non-zero!") |
161 |
- |
|
161 |
+ |
|
162 | 162 |
return() |
163 | 163 |
} |
164 |
- |
|
164 |
+ |
|
165 | 165 |
if((nOutR > nEquil) || (nOutR > nSample)) |
166 | 166 |
{ |
167 | 167 |
stop("Error in gapsRun: Cannot have more output steps than equilibration and/or sampling iterations.") |
168 |
- |
|
168 |
+ |
|
169 | 169 |
return() |
170 | 170 |
} |
171 |
- |
|
171 |
+ |
|
172 | 172 |
if(ncol(FP) != ncol(D)) |
173 | 173 |
{ |
174 | 174 |
stop("Error in gapsRun: Columns of Data Matrix and Fixed Pattern Matrix do not line up. Please see documentation for details.") |
175 |
- |
|
175 |
+ |
|
176 | 176 |
return() |
177 | 177 |
} |
178 |
- |
|
178 |
+ |
|
179 | 179 |
if(nFactor < (nrow(FP))) |
180 | 180 |
{ |
181 | 181 |
stop("Error in gapsRun: Number of patterns cannot be less than the rows of the patterns to fix (FP). Please see documentation for details.") |
182 |
- |
|
182 |
+ |
|
183 | 183 |
return() |
184 | 184 |
} |
185 |
- |
|
185 |
+ |
|
186 | 186 |
if(nFactor > (ncol(D))) |
187 | 187 |
{ |
188 | 188 |
warning("Warning in gapsRun: Number of requested patterns greater than columns of Data Matrix.") |
189 | 189 |
} |
190 |
- |
|
190 |
+ |
|
191 | 191 |
# P <- as.data.frame(matrix(nrow=1,c(1,1,1))) # make something to pass |
192 | 192 |
# } else { |
193 | 193 |
# Config = c(nFactor, simulation_id, nEquil, nSample, nOutR, |
194 | 194 |
# output_atomic, alphaA, nMaxA, max_gibbmass_paraA, lambdaA_scale_factor, |
195 | 195 |
# alphaP, nMaxP, max_gibbmass_paraP, lambdaP_scale_factor, 1) |
196 |
- |
|
196 |
+ |
|
197 | 197 |
# } |
198 |
- |
|
198 |
+ |
|
199 | 199 |
geneNames = rownames(D); |
200 | 200 |
sampleNames = colnames(D); |
201 |
- |
|
201 |
+ |
|
202 | 202 |
# label patterns as Patt N |
203 | 203 |
patternNames = c("0"); |
204 | 204 |
for(i in 1:nFactor) |
205 | 205 |
{ |
206 | 206 |
patternNames[i] = paste('Patt', i); |
207 | 207 |
} |
208 |
- |
|
208 |
+ |
|
209 | 209 |
# call to C++ Rcpp code |
210 | 210 |
cogapResult = cogapsMapTest(D, S, FP, ABins, PBins, Config, ConfigNums); |
211 |
- |
|
211 |
+ |
|
212 | 212 |
# convert returned files to matrices to simplify visualization and processing |
213 | 213 |
cogapResult$Amean = as.matrix(cogapResult$Amean); |
214 | 214 |
cogapResult$Asd = as.matrix(cogapResult$Asd); |
215 | 215 |
cogapResult$Pmean = as.matrix(cogapResult$Pmean); |
216 | 216 |
cogapResult$Psd = as.matrix(cogapResult$Psd); |
217 |
- |
|
217 |
+ |
|
218 | 218 |
# label matrices |
219 | 219 |
colnames(cogapResult$Amean) = patternNames; |
220 | 220 |
rownames(cogapResult$Amean) = geneNames; |
... | ... |
@@ -224,13 +224,13 @@ gapsMapTestRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), |
224 | 224 |
rownames(cogapResult$Pmean) = patternNames; |
225 | 225 |
colnames(cogapResult$Psd) = sampleNames; |
226 | 226 |
rownames(cogapResult$Psd) = patternNames; |
227 |
- |
|
227 |
+ |
|
228 | 228 |
# calculate chi-squared of mean, this should be smaller than individual |
229 | 229 |
# chi-squared sample values if sampling is good |
230 | 230 |
calcChiSq = c(0); |
231 | 231 |