Browse code

Remove trailing whitespaces

Jacob Carey authored on 30/12/2015 00:47:37
Showing 18 changed files

... ...
@@ -1,92 +1,92 @@
1
-# CoGAPS: function to call gapsRun and calculate gene set
2
-#         statistics on result, also to produce A and P plots
3
-# History: v1.0 - EJF original CoGAPS, MFO modified to to match
4
-#                 new variable names
5
-
6
-# Inputs: data - data matrix
7
-#         unc - uncertainty matrix (std devs for chi-squared of Log Likelihood)
8
-#         GStoGenes - data.frame or list of gene sets
9
-#         nFactor - number of patterns (basis vectors, metagenes) 
10
-#         nEquil - number of iterations for burn-in
11
-#         nSample - number of iterations for sampling
12
-#         nOutR - how often to print status into R by iterations
13
-#         output_atomic - whether to write atom files (large)
14
-#         simulation_id - name to attach to atoms files if created
15
-#         plot - logical to determine if plots produced
16
-#         nPerm - number of permutations for gene set test
17
-#         alphaA, alphaP - sparsity parameters for A and P domains
18
-#         max_gibbmass_paraA(P) - limit truncated normal to max size
19
-#         nMaxA, nMaxP - PRESENTLY UNUSED, future = limit number of atoms
20
-
21
-
22
-# Output: list with matrices, mean chi-squared value, and gene set
23
-#         results
24
-
25
-#'\code{CoGAPS} calls the C++ MCMC code through gapsRun and performs Bayesian
26
-#'matrix factorization returning the two matrices that reconstruct
27
-#'the data matrix and then calls calcCoGAPSStat to estimate gene set
28
-#'activity with nPerm set to 500
29
-#'
30
-#'@param data data matrix
31
-#'@param unc uncertainty matrix (std devs for chi-squared of Log Likelihood)
32
-#'@param ABins a matrix of same size as A which gives relative
33
-#' probability of that element being non-zero
34
-#'@param PBins a matrix of same size as P which gives relative
35
-#' probability of that element being non-zero
36
-#'@param GStoGenes data.frame or list with gene sets
37
-#'@param nFactor number of patterns (basis vectors, metagenes)
38
-#'@param simulation_id name to attach to atoms files if created
39
-#'@param nEquil number of iterations for burn-in
40
-#'@param nSample number of iterations for sampling
41
-#'@param nOutR how often to print status into R by iterations
42
-#'@param output_atomic whether to write atom files (large)
43
-#'@param fixedBinProbs Boolean for using relative probabilities
44
-#' given in Abins and Pbins
45
-#'@param fixedDomain character to indicate whether A or P is
46
-#' domain for relative probabilities
47
-#'@param sampleSnapshots Boolean to indicate whether to capture
48
-#' individual samples from Markov chain during sampling
49
-#'@param numSnapshots the number of individual samples to capture
50
-#'@param plot Boolean to indicate whether to produce output graphics
51
-#'@param nPerm number of permutations in gene set test
52
-#'@param alphaA sparsity parameter for A domain
53
-#'@param nMaxA PRESENTLY UNUSED, future = limit number of atoms
54
-#'@param alphaP sparsity parameter for P domain
55
-#'@param max_gibbmass_paraA limit truncated normal to max size
56
-#'@param nMaxP PRESENTLY UNUSED, future = limit number of atoms
57
-#'@param max_gibbmass_paraP limit truncated normal to max size
58
-#'@export
59
-
60
-CoGAPS <- function(data, unc, ABins = data.frame(), PBins = data.frame(), GStoGenes, nFactor = 7, simulation_id="simulation", nEquil=1000,
61
-                nSample=1000, nOutR=1000, output_atomic=FALSE,
62
-                fixedBinProbs = FALSE, fixedDomain = "N",
63
-                sampleSnapshots = TRUE, numSnapshots = 100,
64
-                plot=TRUE, nPerm=500,
65
-                alphaA = 0.01,  nMaxA = 100000,
66
-                max_gibbmass_paraA = 100.0,
67
-                alphaP = 0.01, nMaxP = 100000,
68
-                max_gibbmass_paraP = 100.0) {
69
-
70
-
71
-  # decompose the data
72
-  matrixDecomp <- gapsRun(data, unc, ABins, PBins, nFactor, simulation_id,
73
-                    nEquil, nSample, nOutR, output_atomic, fixedBinProbs, 
74
-                    fixedDomain, sampleSnapshots, numSnapshots, alphaA,  nMaxA, max_gibbmass_paraA, 
75
-                    alphaP, nMaxP, max_gibbmass_paraP)
76
-
77
-  # plot patterns and show heatmap of Anorm
78
-  if (plot) {
79
-    plotGAPS(matrixDecomp$Amean, matrixDecomp$Pmean)
80
-  }
81
-
82
-  # compute the gene set scores
83
-  GSP <- calcCoGAPSStat(matrixDecomp$Amean, matrixDecomp$Asd, GStoGenes, nPerm)
84
-
85
-  return(list(meanChi2=matrixDecomp$calcChiSq,
86
-              D=data, Sigma=unc,
87
-              Amean=matrixDecomp$Amean, Asd=matrixDecomp$Asd,
88
-              Pmean=matrixDecomp$Pmean, Psd=matrixDecomp$Psd,
89
-              GSUpreg=GSP$GSUpreg, GSDownreg=GSP$GSDownreg, GSActEst=GSP$GSActEst))
90
-  
91
-  
92
-}
1
+# CoGAPS: function to call gapsRun and calculate gene set
2
+#         statistics on result, also to produce A and P plots
3
+# History: v1.0 - EJF original CoGAPS, MFO modified to to match
4
+#                 new variable names
5
+
6
+# Inputs: data - data matrix
7
+#         unc - uncertainty matrix (std devs for chi-squared of Log Likelihood)
8
+#         GStoGenes - data.frame or list of gene sets
9
+#         nFactor - number of patterns (basis vectors, metagenes)
10
+#         nEquil - number of iterations for burn-in
11
+#         nSample - number of iterations for sampling
12
+#         nOutR - how often to print status into R by iterations
13
+#         output_atomic - whether to write atom files (large)
14
+#         simulation_id - name to attach to atoms files if created
15
+#         plot - logical to determine if plots produced
16
+#         nPerm - number of permutations for gene set test
17
+#         alphaA, alphaP - sparsity parameters for A and P domains
18
+#         max_gibbmass_paraA(P) - limit truncated normal to max size
19
+#         nMaxA, nMaxP - PRESENTLY UNUSED, future = limit number of atoms
20
+
21
+
22
+# Output: list with matrices, mean chi-squared value, and gene set
23
+#         results
24
+
25
+#'\code{CoGAPS} calls the C++ MCMC code through gapsRun and performs Bayesian
26
+#'matrix factorization returning the two matrices that reconstruct
27
+#'the data matrix and then calls calcCoGAPSStat to estimate gene set
28
+#'activity with nPerm set to 500
29
+#'
30
+#'@param data data matrix
31
+#'@param unc uncertainty matrix (std devs for chi-squared of Log Likelihood)
32
+#'@param ABins a matrix of same size as A which gives relative
33
+#' probability of that element being non-zero
34
+#'@param PBins a matrix of same size as P which gives relative
35
+#' probability of that element being non-zero
36
+#'@param GStoGenes data.frame or list with gene sets
37
+#'@param nFactor number of patterns (basis vectors, metagenes)
38
+#'@param simulation_id name to attach to atoms files if created
39
+#'@param nEquil number of iterations for burn-in
40
+#'@param nSample number of iterations for sampling
41
+#'@param nOutR how often to print status into R by iterations
42
+#'@param output_atomic whether to write atom files (large)
43
+#'@param fixedBinProbs Boolean for using relative probabilities
44
+#' given in Abins and Pbins
45
+#'@param fixedDomain character to indicate whether A or P is
46
+#' domain for relative probabilities
47
+#'@param sampleSnapshots Boolean to indicate whether to capture
48
+#' individual samples from Markov chain during sampling
49
+#'@param numSnapshots the number of individual samples to capture
50
+#'@param plot Boolean to indicate whether to produce output graphics
51
+#'@param nPerm number of permutations in gene set test
52
+#'@param alphaA sparsity parameter for A domain
53
+#'@param nMaxA PRESENTLY UNUSED, future = limit number of atoms
54
+#'@param alphaP sparsity parameter for P domain
55
+#'@param max_gibbmass_paraA limit truncated normal to max size
56
+#'@param nMaxP PRESENTLY UNUSED, future = limit number of atoms
57
+#'@param max_gibbmass_paraP limit truncated normal to max size
58
+#'@export
59
+
60
+CoGAPS <- function(data, unc, ABins = data.frame(), PBins = data.frame(), GStoGenes, nFactor = 7, simulation_id="simulation", nEquil=1000,
61
+                nSample=1000, nOutR=1000, output_atomic=FALSE,
62
+                fixedBinProbs = FALSE, fixedDomain = "N",
63
+                sampleSnapshots = TRUE, numSnapshots = 100,
64
+                plot=TRUE, nPerm=500,
65
+                alphaA = 0.01,  nMaxA = 100000,
66
+                max_gibbmass_paraA = 100.0,
67
+                alphaP = 0.01, nMaxP = 100000,
68
+                max_gibbmass_paraP = 100.0) {
69
+
70
+
71
+  # decompose the data
72
+  matrixDecomp <- gapsRun(data, unc, ABins, PBins, nFactor, simulation_id,
73
+                    nEquil, nSample, nOutR, output_atomic, fixedBinProbs,
74
+                    fixedDomain, sampleSnapshots, numSnapshots, alphaA,  nMaxA, max_gibbmass_paraA,
75
+                    alphaP, nMaxP, max_gibbmass_paraP)
76
+
77
+  # plot patterns and show heatmap of Anorm
78
+  if (plot) {
79
+    plotGAPS(matrixDecomp$Amean, matrixDecomp$Pmean)
80
+  }
81
+
82
+  # compute the gene set scores
83
+  GSP <- calcCoGAPSStat(matrixDecomp$Amean, matrixDecomp$Asd, GStoGenes, nPerm)
84
+
85
+  return(list(meanChi2=matrixDecomp$calcChiSq,
86
+              D=data, Sigma=unc,
87
+              Amean=matrixDecomp$Amean, Asd=matrixDecomp$Asd,
88
+              Pmean=matrixDecomp$Pmean, Psd=matrixDecomp$Psd,
89
+              GSUpreg=GSP$GSUpreg, GSDownreg=GSP$GSDownreg, GSActEst=GSP$GSActEst))
90
+
91
+
92
+}
... ...
@@ -10,19 +10,19 @@
10 10
 #'that an element of Amean must be to get a value of 1
11 11
 #'@export
12 12
 binaryA <-function(Amean, Asd, threshold=3) {
13
-  
14
-  
13
+
14
+
15 15
   BinA_Map <- ifelse(Amean/Asd > threshold,1,0)
16 16
   colnames(BinA_Map) <-colnames(BinA_Map,do.NULL=F,prefix = "Pattern ")
17 17
   rownames(BinA_Map) <- rep(" ",nrow(BinA_Map))
18
-  
19
-  
18
+
19
+
20 20
   heatmap.2(BinA_Map, Rowv = FALSE, Colv = FALSE,dendrogram="none",
21 21
             scale="none",col = brewer.pal(3,"Blues"), trace="none",
22 22
             density.info="none",cexCol=1.3,srtCol=45,
23 23
             lmat=rbind(c(0, 3),c(2,1),c(0,4) ),
24
-            lwid=c(1,10),lhei=c(1, 4, 1.2 ), 
24
+            lwid=c(1,10),lhei=c(1, 4, 1.2 ),
25 25
             main="Heatmap of Standardized A Matrix")
26 26
   mtext(paste("(Threshold = ", threshold, ")"))
27
-  
27
+
28 28
 }
... ...
@@ -1,12 +1,12 @@
1 1
 # calcCoGAPSStat: calculate gene set statistics for A matrix columns
2
-# History: v1.0 EJF original CoGAPS 
2
+# History: v1.0 EJF original CoGAPS
3 3
 
4 4
 # Inputs: Amean - A matrix mean values
5 5
 #         Asd - A matrix standard deviations
6 6
 #         GStoGenes - data.frame, GSA.genesets class, or list with gene sets
7 7
 #         numPerm - number of permutations for null
8 8
 
9
-# Output: list with gene set statistics 
9
+# Output: list with gene set statistics
10 10
 
11 11
 #'\code{calcCoGAPSStat} calculates the gene set statistics for each
12 12
 #'column of A using a Z-score from the elements of the A matrix,
... ...
@@ -25,10 +25,10 @@ calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) {
25 25
       #temp <- min(Asd[Asd>0])
26 26
       Asd[Asd==0] <- .Machine$double.eps
27 27
   }
28
-  
28
+
29 29
   # calculate Z scores
30 30
   zMatrix <- calcZ(Amean,Asd)
31
-  
31
+
32 32
   # compute the p-value for each gene set belonging to each pattern
33 33
 
34 34
   # check input arguments
... ...
@@ -40,7 +40,7 @@ calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) {
40 40
     names(GStoGenes$genesets) <- GStoGenes$geneset.names
41 41
     GStoGenes <- GStoGenes$genesets
42 42
   }
43
-    
43
+
44 44
   if (is(GStoGenes, "list")) {
45 45
     GStoGenesList <- GStoGenes
46 46
   } else {
... ...
@@ -49,7 +49,7 @@ calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) {
49 49
       GStoGenesList[[as.character(colnames(GStoGenes)[i])]] <- as.character(unique(GStoGenes[,i]))
50 50
     }
51 51
   }
52
-  
52
+
53 53
   # get dimensions
54 54
   numGS   <- length(names(GStoGenesList))
55 55
   numPatt <- dim(zMatrix)[2]
... ...
@@ -59,7 +59,7 @@ calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) {
59 59
   statsUp   <- matrix(ncol = numGS, nrow = numPatt)
60 60
   statsDown <- matrix(ncol = numGS, nrow = numPatt)
61 61
   actEst    <- matrix(ncol = numGS, nrow = numPatt)
62
-  results   <- list() 
62
+  results   <- list()
63 63
   zPerm     <- matrix(ncol=numPerm,nrow=numPatt)
64 64
 
65 65
   # do permutation test
... ...
@@ -69,7 +69,7 @@ calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) {
69 69
     zValues <- zMatrix[index,1]
70 70
     numGenes <- length(zValues)
71 71
     label <- as.character(numGenes)
72
-    
72
+
73 73
     if (!any(names(results)==label)) {
74 74
       for (p in 1:numPatt) {
75 75
         for (j in 1:numPerm) {
... ...
@@ -100,11 +100,11 @@ calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) {
100 100
       permzValues <- permzValues[ordering]
101 101
 
102 102
       statistic <- sum(zScore > permzValues)
103
-      statUpReg <- 1 - statistic/length(permzValues) 
103
+      statUpReg <- 1 - statistic/length(permzValues)
104 104
       statsUp[p,gs] <- max(statUpReg, 1/numPerm)
105 105
 
106 106
       statistic <- sum(zScore < permzValues)
107
-      statDownReg <- 1 - statistic/length(permzValues) 
107
+      statDownReg <- 1 - statistic/length(permzValues)
108 108
       statsDown[p,gs] <- max(statDownReg,1/numPerm)
109 109
 
110 110
       activity <- 1 - 2*max(statUpReg, 1/numPerm)
... ...
@@ -124,7 +124,7 @@ calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) {
124 124
 
125 125
   results[['GSUpreg']] <- statsUp
126 126
   results[['GSDownreg']] <- statsDown
127
-  results[['GSActEst']] <- actEst 
128
-  
127
+  results[['GSActEst']] <- actEst
128
+
129 129
   return(results)
130 130
 }
... ...
@@ -24,19 +24,19 @@
24 24
 #'@export
25 25
 
26 26
 calcGeneGSStat  <- function(Amean, Asd, GSGenes, numPerm, Pw=rep(1,ncol(Amean)), nullGenes=F) {
27
-    gsStat <- calcCoGAPSStat(Amean, Asd, data.frame(GSGenes), 
27
+    gsStat <- calcCoGAPSStat(Amean, Asd, data.frame(GSGenes),
28 28
                              numPerm=numPerm)
29 29
     gsStat <- gsStat$GSUpreg
30
-    
30
+
31 31
     gsStat <- -log(gsStat)
32
-  
32
+
33 33
   if (!all(is.na(Pw))) {
34 34
     if (length(Pw) != length(gsStat)) {
35 35
       stop('Invalid weighting')
36 36
     }
37 37
     gsStat <- gsStat*Pw
38 38
   }
39
-        
39
+
40 40
     if (nullGenes) {
41 41
         ZD <- Amean[setdiff(row.names(Amean), GSGenes),] /
42 42
          Asd[setdiff(row.names(Amean), GSGenes),]
... ...
@@ -44,18 +44,18 @@ calcGeneGSStat  <- function(Amean, Asd, GSGenes, numPerm, Pw=rep(1,ncol(Amean)),
44 44
         ZD <- Amean[GSGenes,]/Asd[GSGenes,]
45 45
     }
46 46
     outStats <- apply(sweep(ZD,2,gsStat,FUN="*"),1,sum) / (sum(gsStat))
47
-    
47
+
48 48
     outStats <- outStats / apply(ZD,1,sum)
49
-    
49
+
50 50
     outStats[which(apply(ZD,1,sum) < .Machine$double.eps)] <- 0.
51
-    
52
-    
51
+
52
+
53 53
     if (sum(gsStat) < .Machine$double.eps) {
54 54
         return(0.)
55 55
     }
56
-    
56
+
57 57
     return(outStats)
58
-    
58
+
59 59
 }
60 60
 
61 61
 
... ...
@@ -63,23 +63,23 @@ computeGeneGSProb <- function(Amean, Asd, GSGenes, Pw=rep(1,ncol(Amean)),
63 63
                               numPerm=500, PwNull=F) {
64 64
     geneGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, Pw=Pw,
65 65
                                  GSGenes=GSGenes, numPerm=numPerm)
66
-    
67
-    
68
-    
66
+
67
+
68
+
69 69
     if (PwNull) {
70 70
       permGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd,
71 71
                                   GSGenes=GSGenes, numPerm=numPerm, Pw=Pw,
72 72
                                   nullGenes=T)
73 73
     } else {
74 74
       permGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd,
75
-                                  GSGenes=GSGenes, numPerm=numPerm, 
75
+                                  GSGenes=GSGenes, numPerm=numPerm,
76 76
                                   nullGenes=T)
77 77
     }
78
-    
78
+
79 79
     finalStats <- sapply(GSGenes,
80 80
                          function(x) length(which(permGSStat > geneGSStat[x])) / length(permGSStat))
81
-    
81
+
82 82
     return(finalStats)
83
-    
84
-    
83
+
84
+
85 85
 }
... ...
@@ -1,144 +1,144 @@
1
-#Calculates significant genes in each pattern according to certain threshold
2
-#Returns the significant gene names as well as well as the means of these matrices and number of genes in each
3
-
4
-gapsInterPattern <- function(Amean, Asd, sdThreshold = 3)
5
-{
6
-    #number of rows and cols of Asd
7
-    numGenes = length(Asd[,1]);
8
-    numCols = length(Asd[1,]);
9
-    
10
-    #Vector holding the number of each significant gene in each column
11
-    sigGeneNums = data.frame();
12
-    
13
-    #Temp number of sig genes in the col
14
-    sigCount = 0;
15
-    
16
-    #Number to catch the amount of columns without significant genes
17
-    numEmptyCols = 0;
18
-    
19
-    #Keep an array of the significant gene counts
20
-    significantGeneNums = c(0);
21
-    
22
-    #Names of the genes from the data matrix
23
-    geneNames = names(Asd[,1]);
24
-    
25
-    #Names of the genes that are significant from the data matrix
26
-    sigGeneNames = "0";
27
-    
28
-    #The numerator of our statistic
29
-    dimensionStatNumerator = 0;
30
-    
31
-    #The denominator of our statistic
32
-    dimensionStatDenominator = 0;
33
-    
34
-    #The value of our statistic
35
-    dimensionStatistic = 0;
36
-    
37
-    #A matrix holding the values of our statistics
38
-    dimensionStatisticMatrix = matrix(nrow = numCols, ncol = numCols);
39
-    
40
-    #The mean of the statistic matrix
41
-    sbar = 0;
42
-    
43
-    #A list to return the significant genes in each col and the statistic matrix
44
-    results = list(list());
45
-    
46
-    #Scan in the significant genes from each column of Asd
47
-    #The columns of sigGeneNums hold the significant genes for each col of Asd
48
-    for(i in 1:numCols)
49
-    {
50
-        sigCount = 0;
51
-        for(j in 1:numGenes)
52
-        {
53
-            if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0)
54
-            {
55
-                sigCount = sigCount + 1;
56
-                sigGeneNums[sigCount, i] = j;
57
-            }
58
-        }
59
-        
60
-        if(sigCount == 0)
61
-        {
62
-            sigGeneNums[1, i] = 0;
63
-            numEmptyCols = numEmptyCols + 1;
64
-        }
65
-        
66
-        #Save the number of sigGenes
67
-        significantGeneNums[i] = sigCount;
68
-        
69
-        #Get the names and store them
70
-        if(sigCount != 0)
71
-        {
72
-            for(k in 1:sigCount)
73
-            {
74
-                sigGeneNames[k] = geneNames[sigGeneNums[k, i]];
75
-            }
76
-            results[[1]][[i]] = sigGeneNames;
77
-            sigGeneNames = "0";
78
-        }
79
-        else
80
-        {
81
-            results[[1]][[i]] = "None";
82
-        }
83
-    }
84
-    
85
-    if(any(significantGeneNums == 0))
86
-    {
87
-        zeroSigCols = which(significantGeneNums == 0);
88
-        print("Warning: No Significant Genes in Pattern(s): ");
89
-        
90
-        for(z in 1:length(zeroSigCols))
91
-        {
92
-            print(zeroSigCols[z]);
93
-        }
94
-    }
95
-    
96
-    #Now that we have the significant genes want to see if these genes are significant in other columns
97
-    #Fill across the row then down the column
98
-    
99
-    #This compares the significant genes in the specified by 'j' to the same genes in Asd in the column specified by 'i' 
100
-    for(i in 1:numCols)
101
-    {
102
-        for(j in 1:numCols)
103
-        {
104
-            #Grab the number of significant genes from the interested column
105
-            sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE);
106
-            
107
-            if(sigCount != 0)
108
-            {
109
-                dimensionStatDenominator = sigCount;
110
-                dimensionStatNumerator = 0;
111
-                
112
-                #loop through the number of significant genes and compare to these genes in the 'ith' col of Asd.
113
-                #Find the significance of these genes, Amean - threshold * Asd
114
-                for(k in 1:sigCount)
115
-                {
116
-                    if((Amean[sigGeneNums[k,j],i] - (sdThreshold*Asd[sigGeneNums[k,j],i])) > 0)
117
-                    {
118
-                        dimensionStatNumerator = dimensionStatNumerator + 1;
119
-                    }
120
-                }
121
-                
122
-                dimensionStatistic = dimensionStatNumerator/dimensionStatDenominator;
123
-                
124
-                dimensionStatisticMatrix[i, j] = dimensionStatistic;                
125
-            }
126
-            else
127
-            {
128
-                dimensionStatisticMatrix[i, j] = 0;
129
-            }
130
-        }
131
-    }
132
-    
133
-    #Find mean of the matrices (excluding the diagonal elements)
134
-    #we can subtract numCols as the diagonal elements are 1 
135
-    sbar = ((sum(dimensionStatisticMatrix) - (numCols - numEmptyCols))/(length(dimensionStatisticMatrix) - (numCols - numEmptyCols)));
136
-    
137
-    results[[2]] = significantGeneNums;
138
-    results[[3]] = t(dimensionStatisticMatrix);
139
-    results[[4]] = sbar;
140
-    
141
-    names(results) = c("SignificantGeneNames", "SignificantGeneTotals", "SeparationMatrix", "InterPatternValue");
142
-    
143
-    return(results);
144
-}
1
+#Calculates significant genes in each pattern according to certain threshold
2
+#Returns the significant gene names as well as well as the means of these matrices and number of genes in each
3
+
4
+gapsInterPattern <- function(Amean, Asd, sdThreshold = 3)
5
+{
6
+    #number of rows and cols of Asd
7
+    numGenes = length(Asd[,1]);
8
+    numCols = length(Asd[1,]);
9
+
10
+    #Vector holding the number of each significant gene in each column
11
+    sigGeneNums = data.frame();
12
+
13
+    #Temp number of sig genes in the col
14
+    sigCount = 0;
15
+
16
+    #Number to catch the amount of columns without significant genes
17
+    numEmptyCols = 0;
18
+
19
+    #Keep an array of the significant gene counts
20
+    significantGeneNums = c(0);
21
+
22
+    #Names of the genes from the data matrix
23
+    geneNames = names(Asd[,1]);
24
+
25
+    #Names of the genes that are significant from the data matrix
26
+    sigGeneNames = "0";
27
+
28
+    #The numerator of our statistic
29
+    dimensionStatNumerator = 0;
30
+
31
+    #The denominator of our statistic
32
+    dimensionStatDenominator = 0;
33
+
34
+    #The value of our statistic
35
+    dimensionStatistic = 0;
36
+
37
+    #A matrix holding the values of our statistics
38
+    dimensionStatisticMatrix = matrix(nrow = numCols, ncol = numCols);
39
+
40
+    #The mean of the statistic matrix
41
+    sbar = 0;
42
+
43
+    #A list to return the significant genes in each col and the statistic matrix
44
+    results = list(list());
45
+
46
+    #Scan in the significant genes from each column of Asd
47
+    #The columns of sigGeneNums hold the significant genes for each col of Asd
48
+    for(i in 1:numCols)
49
+    {
50
+        sigCount = 0;
51
+        for(j in 1:numGenes)
52
+        {
53
+            if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0)
54
+            {
55
+                sigCount = sigCount + 1;
56
+                sigGeneNums[sigCount, i] = j;
57
+            }
58
+        }
59
+
60
+        if(sigCount == 0)
61
+        {
62
+            sigGeneNums[1, i] = 0;
63
+            numEmptyCols = numEmptyCols + 1;
64
+        }
65
+
66
+        #Save the number of sigGenes
67
+        significantGeneNums[i] = sigCount;
68
+
69
+        #Get the names and store them
70
+        if(sigCount != 0)
71
+        {
72
+            for(k in 1:sigCount)
73
+            {
74
+                sigGeneNames[k] = geneNames[sigGeneNums[k, i]];
75
+            }
76
+            results[[1]][[i]] = sigGeneNames;
77
+            sigGeneNames = "0";
78
+        }
79
+        else
80
+        {
81
+            results[[1]][[i]] = "None";
82
+        }
83
+    }
84
+
85
+    if(any(significantGeneNums == 0))
86
+    {
87
+        zeroSigCols = which(significantGeneNums == 0);
88
+        print("Warning: No Significant Genes in Pattern(s): ");
89
+
90
+        for(z in 1:length(zeroSigCols))
91
+        {
92
+            print(zeroSigCols[z]);
93
+        }
94
+    }
95
+
96
+    #Now that we have the significant genes want to see if these genes are significant in other columns
97
+    #Fill across the row then down the column
98
+
99
+    #This compares the significant genes in the specified by 'j' to the same genes in Asd in the column specified by 'i'
100
+    for(i in 1:numCols)
101
+    {
102
+        for(j in 1:numCols)
103
+        {
104
+            #Grab the number of significant genes from the interested column
105
+            sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE);
106
+
107
+            if(sigCount != 0)
108
+            {
109
+                dimensionStatDenominator = sigCount;
110
+                dimensionStatNumerator = 0;
111
+
112
+                #loop through the number of significant genes and compare to these genes in the 'ith' col of Asd.
113
+                #Find the significance of these genes, Amean - threshold * Asd
114
+                for(k in 1:sigCount)
115
+                {
116
+                    if((Amean[sigGeneNums[k,j],i] - (sdThreshold*Asd[sigGeneNums[k,j],i])) > 0)
117
+                    {
118
+                        dimensionStatNumerator = dimensionStatNumerator + 1;
119
+                    }
120
+                }
121
+
122
+                dimensionStatistic = dimensionStatNumerator/dimensionStatDenominator;
123
+
124
+                dimensionStatisticMatrix[i, j] = dimensionStatistic;
125
+            }
126
+            else
127
+            {
128
+                dimensionStatisticMatrix[i, j] = 0;
129
+            }
130
+        }
131
+    }
132
+
133
+    #Find mean of the matrices (excluding the diagonal elements)
134
+    #we can subtract numCols as the diagonal elements are 1
135
+    sbar = ((sum(dimensionStatisticMatrix) - (numCols - numEmptyCols))/(length(dimensionStatisticMatrix) - (numCols - numEmptyCols)));
136
+
137
+    results[[2]] = significantGeneNums;
138
+    results[[3]] = t(dimensionStatisticMatrix);
139
+    results[[4]] = sbar;
140
+
141
+    names(results) = c("SignificantGeneNames", "SignificantGeneTotals", "SeparationMatrix", "InterPatternValue");
142
+
143
+    return(results);
144
+}
... ...
@@ -1,135 +1,135 @@
1
-#Calculates significant genes in each pattern according to certain threshold
2
-#Returns the significant gene names as well as well as the correlation matrices between these genes and the means of these matrices
3
-
4
-gapsIntraPattern <- function(Amean, Asd, DMatrix, sdThreshold = 3)
5
-{
6
-    #number of rows and cols of Asd
7
-    numGenes = length(Asd[,1]);
8
-    numCols = length(Asd[1,]);
9
-    
10
-    #number of samples in DMatrix
11
-    numSamp = ncol(DMatrix);
12
-    
13
-    #Vector holding the number of each significant gene in each column
14
-    sigGeneNums = data.frame();
15
-    
16
-    #Temp number of sig genes in the col
17
-    sigCount = 0;
18
-    
19
-    #Keep an array of the significant gene counts
20
-    significantGeneNums = c(0);
21
-    
22
-    #A matrix to hold the significant genes in D for the current pattern
23
-    #The matrix just acts as a subset of D, just eliminates non relevant rows
24
-    tempSubsetD = matrix();
25
-    
26
-    #A matrix holding the values of our correlation coefficients between genes for the current column
27
-    tempGeneCorrMatrix = matrix();
28
-    
29
-    #A list to hold all the correlation matrices
30
-    geneCorrMatrices = list();
31
-    
32
-    #A list to hold all the means
33
-    geneCorrMatrMeans = list();
34
-    
35
-    #The mean of all the correlation matrices
36
-    cbar = 0;
37
-    
38
-    #A list to return the means and the matrices
39
-    results = list();
40
-    
41
-    #Scan in the significant genes from each column of Asd
42
-    #The columns of sigGeneNums hold the significant genes for each col of Asd
43
-    for(i in 1:numCols)
44
-    {
45
-        sigCount = 0;
46
-        for(j in 1:numGenes)
47
-        {
48
-            if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0)
49
-            {
50
-                sigCount = sigCount + 1;
51
-                sigGeneNums[sigCount, i] = j;
52
-            }
53
-        }
54
-        
55
-        if(sigCount == 0)
56
-        {
57
-            sigGeneNums[1, i] = 0;
58
-        }
59
-        
60
-        #Save the number of sigGenes
61
-        significantGeneNums[i] = sigCount;
62
-    }
63
-    
64
-    #If a pattern has no significant genes this is clearly an error so return such
65
-    if(any(significantGeneNums == 0))
66
-    {
67
-        zeroSigCols = which(significantGeneNums == 0);
68
-        warning("Warning: No Significant Genes in Pattern(s): ");
69
-        
70
-        for(z in 1:length(zeroSigCols))
71
-        {
72
-            message(zeroSigCols[z]);
73
-        }
74
-    }
75
-    
76
-    
77
-    #Now that we have the significant genes want to grab these from our original D matrix 
78
-    #and find the sigGene x sigGene correlation matrix and find its mean 
79
-    
80
-    for(j in 1:numCols)
81
-    {
82
-        #Grab the number of significant genes from the interested column
83
-        sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE);
84
-            
85
-        if(sigCount != 0)
86
-        {
87
-            
88
-            #loop through the number of significant genes and pull out the rows of D that represent these genes.
89
-            #Then find the correlation between them with the built in R corr function
90
-            tempSubsetD = matrix(nrow = sigCount, ncol = numSamp);
91
-            for(k in 1:sigCount)
92
-            {
93
-                #Subset D based on significant Genes
94
-                #need to transpose as it reads this in as column vector otherwise
95
-                tempSubsetD[k,] = t(DMatrix[sigGeneNums[k,j], ]);
96
-            }
97
-            
98
-            #Find the correlation between these genes in D
99
-            #Need to transpose as it calculates correlations between the columns
100
-            tempGeneCorrMatrix = cor(t(tempSubsetD));
101
-            
102
-            #Find the mean of this matrix
103
-            tempGeneCorrMatrMean = mean(tempGeneCorrMatrix);
104
-                                
105
-        }
106
-        else
107
-        {
108
-            tempGeneCorrMatrix = 0;
109
-            tempGeneCorrMatrMean = 0;
110
-        }
111
-        
112
-        #Save these in the overall list
113
-        geneCorrMatrices[[j]] = tempGeneCorrMatrix;
114
-        geneCorrMatrMeans[[j]] = tempGeneCorrMatrMean;
115
-        
116
-    }
117
-        
118
-    #Return as an overall list of lists
119
-    # We return Corr Matrices themselves, their means, and the means of the means (cbar)
120
-    results[[1]] = geneCorrMatrices;
121
-    results[[2]] = geneCorrMatrMeans;
122
-    
123
-    #Return as an overall list of lists
124
-    for(i in 1:numCols)
125
-    {
126
-        cbar = cbar + results[[2]][[i]];
127
-    }
128
-    
129
-    cbar = cbar/numCols;
130
-    results[[3]] = cbar;
131
-    
132
-    names(results) = c("CorrelationMatrices", "CorrelationMatrixMeans", "IntraPatternValue");
133
-    
134
-    return(results);
135
-}
1
+#Calculates significant genes in each pattern according to certain threshold
2
+#Returns the significant gene names as well as well as the correlation matrices between these genes and the means of these matrices
3
+
4
+gapsIntraPattern <- function(Amean, Asd, DMatrix, sdThreshold = 3)
5
+{
6
+    #number of rows and cols of Asd
7
+    numGenes = length(Asd[,1]);
8
+    numCols = length(Asd[1,]);
9
+
10
+    #number of samples in DMatrix
11
+    numSamp = ncol(DMatrix);
12
+
13
+    #Vector holding the number of each significant gene in each column
14
+    sigGeneNums = data.frame();
15
+
16
+    #Temp number of sig genes in the col
17
+    sigCount = 0;
18
+
19
+    #Keep an array of the significant gene counts
20
+    significantGeneNums = c(0);
21
+
22
+    #A matrix to hold the significant genes in D for the current pattern
23
+    #The matrix just acts as a subset of D, just eliminates non relevant rows
24
+    tempSubsetD = matrix();
25
+
26
+    #A matrix holding the values of our correlation coefficients between genes for the current column
27
+    tempGeneCorrMatrix = matrix();
28
+
29
+    #A list to hold all the correlation matrices
30
+    geneCorrMatrices = list();
31
+
32
+    #A list to hold all the means
33
+    geneCorrMatrMeans = list();
34
+
35
+    #The mean of all the correlation matrices
36
+    cbar = 0;
37
+
38
+    #A list to return the means and the matrices
39
+    results = list();
40
+
41
+    #Scan in the significant genes from each column of Asd
42
+    #The columns of sigGeneNums hold the significant genes for each col of Asd
43
+    for(i in 1:numCols)
44
+    {
45
+        sigCount = 0;
46
+        for(j in 1:numGenes)
47
+        {
48
+            if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0)
49
+            {
50
+                sigCount = sigCount + 1;
51
+                sigGeneNums[sigCount, i] = j;
52
+            }
53
+        }
54
+
55
+        if(sigCount == 0)
56
+        {
57
+            sigGeneNums[1, i] = 0;
58
+        }
59
+
60
+        #Save the number of sigGenes
61
+        significantGeneNums[i] = sigCount;
62
+    }
63
+
64
+    #If a pattern has no significant genes this is clearly an error so return such
65
+    if(any(significantGeneNums == 0))
66
+    {
67
+        zeroSigCols = which(significantGeneNums == 0);
68
+        warning("Warning: No Significant Genes in Pattern(s): ");
69
+
70
+        for(z in 1:length(zeroSigCols))
71
+        {
72
+            message(zeroSigCols[z]);
73
+        }
74
+    }
75
+
76
+
77
+    #Now that we have the significant genes want to grab these from our original D matrix
78
+    #and find the sigGene x sigGene correlation matrix and find its mean
79
+
80
+    for(j in 1:numCols)
81
+    {
82
+        #Grab the number of significant genes from the interested column
83
+        sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE);
84
+
85
+        if(sigCount != 0)
86
+        {
87
+
88
+            #loop through the number of significant genes and pull out the rows of D that represent these genes.
89
+            #Then find the correlation between them with the built in R corr function
90
+            tempSubsetD = matrix(nrow = sigCount, ncol = numSamp);
91
+            for(k in 1:sigCount)
92
+            {
93
+                #Subset D based on significant Genes
94
+                #need to transpose as it reads this in as column vector otherwise
95
+                tempSubsetD[k,] = t(DMatrix[sigGeneNums[k,j], ]);
96
+            }
97
+
98
+            #Find the correlation between these genes in D
99
+            #Need to transpose as it calculates correlations between the columns
100
+            tempGeneCorrMatrix = cor(t(tempSubsetD));
101
+
102
+            #Find the mean of this matrix
103
+            tempGeneCorrMatrMean = mean(tempGeneCorrMatrix);
104
+
105
+        }
106
+        else
107
+        {
108
+            tempGeneCorrMatrix = 0;
109
+            tempGeneCorrMatrMean = 0;
110
+        }
111
+
112
+        #Save these in the overall list
113
+        geneCorrMatrices[[j]] = tempGeneCorrMatrix;
114
+        geneCorrMatrMeans[[j]] = tempGeneCorrMatrMean;
115
+
116
+    }
117
+
118
+    #Return as an overall list of lists
119
+    # We return Corr Matrices themselves, their means, and the means of the means (cbar)
120
+    results[[1]] = geneCorrMatrices;
121
+    results[[2]] = geneCorrMatrMeans;
122
+
123
+    #Return as an overall list of lists
124
+    for(i in 1:numCols)
125
+    {
126
+        cbar = cbar + results[[2]][[i]];
127
+    }
128
+
129
+    cbar = cbar/numCols;
130
+    results[[3]] = cbar;
131
+
132
+    names(results) = c("CorrelationMatrices", "CorrelationMatrixMeans", "IntraPatternValue");
133
+
134
+    return(results);
135
+}
... ...
@@ -5,7 +5,7 @@
5 5
 # Inputs: D - data matrix
6 6
 #         S - uncertainty matrix (std devs for chi-squared of Log Likelihood)
7 7
 #         FP - fixed A columns or P rows
8
-#         nFactor - number of patterns (basis vectors, metagenes) 
8
+#         nFactor - number of patterns (basis vectors, metagenes)
9 9
 #         simulation_id - name to attach to atoms files if created
10 10
 #         nEquil - number of iterations for burn-in
11 11
 #         nSample - number of iterations for sampling
... ...
@@ -55,29 +55,29 @@
55 55
 #'@param max_gibbmass_paraP limit truncated normal to max size
56 56
 #'@export
57 57
 
58
-gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFactor = 5, simulation_id = "simulation", 
59
-                       nEquil = 1000, nSample = 1000, nOutR = 1000, output_atomic = FALSE, fixedMatrix = "P", 
60
-                       fixedBinProbs = FALSE, fixedDomain = "N", sampleSnapshots = TRUE, numSnapshots = 100, alphaA = 0.01,  
58
+gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFactor = 5, simulation_id = "simulation",
59
+                       nEquil = 1000, nSample = 1000, nOutR = 1000, output_atomic = FALSE, fixedMatrix = "P",
60
+                       fixedBinProbs = FALSE, fixedDomain = "N", sampleSnapshots = TRUE, numSnapshots = 100, alphaA = 0.01,
61 61
                        nMaxA = 100000, max_gibbmass_paraA = 100.0, alphaP = 0.01, nMaxP = 100000, max_gibbmass_paraP = 100.0)
62 62
 {
63 63
   #Begin data type error checking code
64 64
   charDataErrors = c(!is.character(simulation_id), !is.character(fixedDomain), !is.character(fixedMatrix))
65 65
   charCheck = c("simulation_id", "fixedDomain", "fixedMatrix")
66
-  
66
+
67 67
   boolDataErrors = c(!is.logical(output_atomic), !is.logical(fixedBinProbs), !is.logical(sampleSnapshots))
68 68
   boolCheck = c("output_atomic", "fixedBinProbs", "sampleSnapshots")
69
-  
70
-  numericDataErrors = c(!is.numeric(nFactor), !is.numeric(nEquil), !is.numeric(nSample), !is.numeric(nOutR), !is.numeric(numSnapshots), 
71
-                        !is.numeric(alphaA), !is.numeric(nMaxA), !is.numeric(max_gibbmass_paraA), !is.numeric(alphaP), 
69
+
70
+  numericDataErrors = c(!is.numeric(nFactor), !is.numeric(nEquil), !is.numeric(nSample), !is.numeric(nOutR), !is.numeric(numSnapshots),
71
+                        !is.numeric(alphaA), !is.numeric(nMaxA), !is.numeric(max_gibbmass_paraA), !is.numeric(alphaP),
72 72
                         !is.numeric(nMaxP), !is.numeric(max_gibbmass_paraP))
73
-  numericCheck = c("nFactor", "nEquil", "nSample", "nOutR", "numSnapshots", "alphaA", "nMaxA", 
73
+  numericCheck = c("nFactor", "nEquil", "nSample", "nOutR", "numSnapshots", "alphaA", "nMaxA",
74 74
                    "max_gibbmass_paraA", "alphaP",    "nMaxP", "max_gibbmass_paraP")
75
-  
75
+
76 76
   dataFrameErrors = c(!is.data.frame(D), !is.data.frame(S), !is.data.frame(ABins), !is.data.frame(PBins), !is.data.frame(FP))
77 77
   dataFrameCheck = c("D", "S", "ABins", "PBins", "FP")
78
-  
78
+
79 79
   matrixErrors = c(!is.matrix(D), !is.matrix(S), !is.matrix(ABins), !is.matrix(PBins), !is.matrix(FP))
80
-  
80
+
81 81
   if(any(charDataErrors))
82 82
   {
83 83
     #Check which of these is not a string
... ...
@@ -88,10 +88,10 @@ gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFa
88 88
         stop(paste("Error in gapsRun: Argument",charCheck[i],"is of the incorrect type. Please see documentation for details."))
89 89
       }
90 90
     }
91
-    
91
+
92 92
     return()
93 93
   }
94
-  
94
+
95 95
   if(any(boolDataErrors))
96 96
   {
97 97
     #Check which of these is not a boolean
... ...
@@ -102,10 +102,10 @@ gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFa
102 102
         stop(paste("Error in gapsRun: Argument",boolCheck[i],"is of the incorrect type. Please see documentation for details."))
103 103
       }
104 104
     }
105
-    
105
+
106 106
     return()
107 107
   }
108
-  
108
+
109 109
   if(any(numericDataErrors))
110 110
   {
111 111
     #Check which of these is not an integer/double
... ...
@@ -114,14 +114,14 @@ gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFa
114 114
       if(numericDataErrors[i] == TRUE)
115 115
       {
116 116
         stop(paste("Error in gapsRun: Argument",numericCheck[i],"is of the incorrect type. Please see documentation for details."))
117
-        
117
+
118 118
       }
119 119
     }
120 120
     return()
121 121
   }
122
-  
123
-  
124
-  #At least one of A, P, ABins, PBins, or FP is not a matrix or data.frame 
122
+
123
+
124
+  #At least one of A, P, ABins, PBins, or FP is not a matrix or data.frame
125 125
   if(any((dataFrameErrors[1] && matrixErrors[1]), (dataFrameErrors[2] && matrixErrors[2]), (dataFrameErrors[3] && matrixErrors[3]), (dataFrameErrors[4] && matrixErrors[4]), (dataFrameErrors[5] && matrixErrors[5])))
126 126
   {
127 127
     for(i in 1:length(dataFrameCheck))
... ...
@@ -129,12 +129,12 @@ gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFa
129 129
       if((dataFrameErrors[i] && matrixErrors[i]) == TRUE)
130 130
       {
131 131
         stop(paste("Error in gapsRun: Argument",dataFrameCheck[i],"is not a matrix or data.frame. Please see documentation for details."))
132
-        
132
+
133 133
       }
134 134
     }
135 135
     return()
136 136
   }
137
-  
137
+
138 138
   #Floor the parameters that are integers to prevent allowing doubles.
139 139
   nFactor = floor(nFactor)
140 140
   nEquil = floor(nEquil)
... ...
@@ -143,87 +143,87 @@ gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFa
143 143
   numSnapshots = floor(numSnapshots)
144 144
   nMaxA = floor(nMaxA)
145 145
   nMaxP = floor(nMaxP)
146
-  
146
+
147 147
   # pass all settings to C++ within a list
148 148
   #    if (is.null(P)) {
149 149
   Config = c(simulation_id, output_atomic, fixedBinProbs, fixedDomain, fixedMatrix, sampleSnapshots);
150
-  
151
-  ConfigNums = c(nFactor, nEquil, nSample, nOutR, alphaA, nMaxA, max_gibbmass_paraA, 
150
+
151
+  ConfigNums = c(nFactor, nEquil, nSample, nOutR, alphaA, nMaxA, max_gibbmass_paraA,
152 152
                  alphaP, nMaxP, max_gibbmass_paraP, numSnapshots);
153
-  
153
+
154 154
   #Begin logic error checking code
155
-  
156
-  #Check for negative or zero arguments 
155
+
156
+  #Check for negative or zero arguments
157 157
   if(any(ConfigNums <= 0))
158 158
   {
159 159
     stop("Error in gapsRun: Numeric Arguments cannot be non-zero!")
160
-    
160
+
161 161
     return()
162 162
   }
163
-  
163
+
164 164
   #Check for nonsensical inputs (such as numSnapshots < nEquil or nSample)
165 165
   if((numSnapshots > nEquil) || (numSnapshots > nSample))
166 166
   {
167 167
     stop("Error in gapsRun: Cannot have more snapshots of A and P than equilibration and/or sampling iterations.")
168
-    
168
+
169 169
     return()
170 170
   }
171
-  
171
+
172 172
   if((nOutR > nEquil) || (nOutR > nSample))
173 173
   {
174 174
     stop("Error in gapsRun: Cannot have more output steps than equilibration and/or sampling iterations.")
175
-    
175
+
176 176
     return()
177 177
   }
178
-  
178
+
179 179
   if(ncol(FP) != ncol(D))
180 180
   {
181 181
     stop("Error in gapsRun: Columns of Data Matrix and Fixed Pattern Matrix do not line up. Please see documentation for details.")
182
-    
182
+
183 183
     return()
184 184
   }
185
-  
185
+
186 186
   if(nFactor < (nrow(FP)))
187 187
   {
188 188
     stop("Error in gapsRun: Number of patterns cannot be less than the rows of the patterns to fix (FP). Please see documentation for details.")
189
-    
189
+
190 190
     return()
191 191
   }
192
-  
192
+
193 193
   if(nFactor > (ncol(D)))
194 194
   {
195 195
     warning("Warning in gapsRun: Number of requested patterns greater than columns of Data Matrix.")
196 196
   }
197
-  
198
-  
199
-  
197
+
198
+
199
+
200 200
   #        P <- as.data.frame(matrix(nrow=1,c(1,1,1))) # make something to pass
201 201
   #    } else {
202 202
   #        Config = c(nFactor, simulation_id, nEquil, nSample, nOutR,
203 203
   #        output_atomic, alphaA, nMaxA, max_gibbmass_paraA, lambdaA_scale_factor,
204 204
   #        alphaP, nMaxP, max_gibbmass_paraP, lambdaP_scale_factor, 1)
205
-  
205
+
206 206
   #   }
207
-  
207
+
208 208
   geneNames = rownames(D);
209 209
   sampleNames = colnames(D);
210
-  
210
+
211 211
   # label patterns as Patt N
212 212
   patternNames = c("0");
213 213
   for(i in 1:nFactor)
214 214
   {
215 215
     patternNames[i] = paste('Patt', i);
216 216
   }
217
-  
217
+
218 218
   # call to C++ Rcpp code
219 219
   cogapResult = cogapsMap(D, S, FP, ABins, PBins, Config, ConfigNums);
220
-  
220
+
221 221
   # convert returned files to matrices to simplify visualization and processing
222 222
   cogapResult$Amean = as.matrix(cogapResult$Amean);
223 223
   cogapResult$Asd = as.matrix(cogapResult$Asd);
224 224
   cogapResult$Pmean = as.matrix(cogapResult$Pmean);
225 225
   cogapResult$Psd = as.matrix(cogapResult$Psd);
226
-  
226
+
227 227
   # label matrices
228 228
   colnames(cogapResult$Amean) = patternNames;
229 229
   rownames(cogapResult$Amean) = geneNames;
... ...
@@ -233,13 +233,13 @@ gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFa
233 233
   rownames(cogapResult$Pmean) = patternNames;
234 234
   colnames(cogapResult$Psd) = sampleNames;
235 235
   rownames(cogapResult$Psd) = patternNames;
236
-  
236
+
237 237
   # calculate chi-squared of mean, this should be smaller than individual
238 238
   # chi-squared sample values if sampling is good
239 239
   calcChiSq = c(0);
240 240
   MMatrix = (cogapResult$Amean %*% cogapResult$Pmean);
241
-  
242
-  
241
+
242
+
243 243
   for(i in 1:(nrow(MMatrix)))
244 244
   {
245 245
     for(j in 1:(ncol(MMatrix)))
... ...
@@ -247,13 +247,13 @@ gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFa
247 247
       calcChiSq = (calcChiSq) + ((D[i,j] - MMatrix[i,j])/(S[i,j]))^(2);
248 248
     }
249 249
   }
250
-  
250
+
251 251
   cogapResult = c(cogapResult, calcChiSq);
252
-  
253
-  
252
+
253
+
254 254
   names(cogapResult)[12] = "meanChi2";
255
-  
255
+
256 256
   message(paste("Chi-Squared of Mean:",calcChiSq))
257
-  
257
+
258 258
   return(cogapResult);
259 259
 }
... ...
@@ -5,7 +5,7 @@
5 5
 # Inputs: D - data matrix
6 6
 #         S - uncertainty matrix (std devs for chi-squared of Log Likelihood)
7 7
 #         FP - fixed A columns or P rows
8
-#         nFactor - number of patterns (basis vectors, metagenes) 
8
+#         nFactor - number of patterns (basis vectors, metagenes)
9 9
 #         simulation_id - name to attach to atoms files if created
10 10
 #         nEquil - number of iterations for burn-in
11 11
 #         nSample - number of iterations for sampling
... ...
@@ -61,25 +61,25 @@ gapsMapTestRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(),
61 61
                            max_gibbmass_paraA = 100.0, alphaP = 0.01,
62 62
                            nMaxP = 100000, max_gibbmass_paraP = 100.0)
63 63
 {
64
-  
64
+
65 65
   #Begin data type error checking code
66 66
   charDataErrors = c(!is.character(simulation_id), !is.character(fixedDomain), !is.character(fixedMatrix))
67 67
   charCheck = c("simulation_id", "fixedDomain", "fixedMatrix")
68
-  
68
+
69 69
   boolDataErrors = c(!is.logical(output_atomic), !is.logical(fixedBinProbs))
70 70
   boolCheck = c("output_atomic", "fixedBinProbs")
71
-  
71
+
72 72
   numericDataErrors = c(!is.numeric(nFactor), !is.numeric(nEquil), !is.numeric(nSample), !is.numeric(nOutR),
73
-                        !is.numeric(alphaA), !is.numeric(nMaxA), !is.numeric(max_gibbmass_paraA), !is.numeric(alphaP), 
73
+                        !is.numeric(alphaA), !is.numeric(nMaxA), !is.numeric(max_gibbmass_paraA), !is.numeric(alphaP),
74 74
                         !is.numeric(nMaxP), !is.numeric(max_gibbmass_paraP))
75
-  numericCheck = c("nFactor", "nEquil", "nSample", "nOutR", "alphaA", "nMaxA", 
75
+  numericCheck = c("nFactor", "nEquil", "nSample", "nOutR", "alphaA", "nMaxA",
76 76
                    "max_gibbmass_paraA", "alphaP",    "nMaxP", "max_gibbmass_paraP")
77
-  
77
+
78 78
   dataFrameErrors = c(!is.data.frame(D), !is.data.frame(S), !is.data.frame(ABins), !is.data.frame(PBins), !is.data.frame(FP))
79 79
   dataFrameCheck = c("D", "S", "ABins", "PBins", "FP")
80
-  
80
+
81 81
   matrixErrors = c(!is.matrix(D), !is.matrix(S), !is.matrix(ABins), !is.matrix(PBins), !is.matrix(FP))
82
-  
82
+
83 83
   if(any(charDataErrors))
84 84
   {
85 85
     #Check which of these is not a string
... ...
@@ -90,10 +90,10 @@ gapsMapTestRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(),
90 90
         stop(paste("Error in gapsRun: Argument",charCheck[i],"is of the incorrect type. Please see documentation for details."))
91 91
       }
92 92
     }
93
-    
93
+
94 94
     return()
95 95
   }
96
-  
96
+
97 97
   if(any(boolDataErrors))
98 98
   {
99 99
     #Check which of these is not a boolean
... ...
@@ -104,10 +104,10 @@ gapsMapTestRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(),
104 104
         stop(paste("Error in gapsRun: Argument",boolCheck[i],"is of the incorrect type. Please see documentation for details."))
105 105
       }
106 106
     }
107
-    
107
+
108 108
     return()
109 109
   }
110
-  
110
+
111 111
   if(any(numericDataErrors))
112 112
   {
113 113
     #Check which of these is not an integer/double
... ...
@@ -116,14 +116,14 @@ gapsMapTestRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(),
116 116
       if(numericDataErrors[i] == TRUE)
117 117
       {
118 118
         stop(paste("Error in gapsRun: Argument",numericCheck[i],"is of the incorrect type. Please see documentation for details."))
119
-        
119
+
120 120
       }
121 121
     }
122 122
     return()
123 123
   }
124
-  
125
-  
126
-  #At least one of A, P, ABins, PBins, or FP is not a matrix or data.frame 
124
+
125
+
126
+  #At least one of A, P, ABins, PBins, or FP is not a matrix or data.frame
127 127
   if(any((dataFrameErrors[1] && matrixErrors[1]), (dataFrameErrors[2] && matrixErrors[2]), (dataFrameErrors[3] && matrixErrors[3]), (dataFrameErrors[4] && matrixErrors[4]), (dataFrameErrors[5] && matrixErrors[5])))
128 128
   {
129 129
     for(i in 1:length(dataFrameCheck))
... ...
@@ -131,12 +131,12 @@ gapsMapTestRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(),
131 131
       if((dataFrameErrors[i] && matrixErrors[i]) == TRUE)
132 132
       {
133 133
         stop(paste("Error in gapsRun: Argument", dataFrameCheck[i], "is not a matrix or data.frame. Please see documentation for details."))
134
-        
134
+
135 135
       }
136 136
     }
137 137
     return()
138 138
   }
139
-  
139
+
140 140
   #Floor the parameters that are integers to prevent allowing doubles.
141 141
   nFactor = floor(nFactor)
142 142
   nEquil = floor(nEquil)
... ...
@@ -144,77 +144,77 @@ gapsMapTestRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(),
144 144
   nOutR = floor(nOutR)
145 145
   nMaxA = floor(nMaxA)
146 146
   nMaxP = floor(nMaxP)
147
-  
147
+
148 148
   # pass all settings to C++ within a list
149 149
   #    if (is.null(P)) {
150 150
   Config = c(simulation_id, output_atomic, fixedBinProbs, fixedDomain, fixedMatrix);
151
-  
152
-  ConfigNums = c(nFactor, nEquil, nSample, nOutR, alphaA, nMaxA, max_gibbmass_paraA, 
151
+
152
+  ConfigNums = c(nFactor, nEquil, nSample, nOutR, alphaA, nMaxA, max_gibbmass_paraA,
153 153
                  alphaP, nMaxP, max_gibbmass_paraP);
154
-  
154
+
155 155
   #Begin logic error checking code
156
-  
157
-  #Check for negative or zero arguments 
156
+
157
+  #Check for negative or zero arguments
158 158
   if(any(ConfigNums <= 0))
159 159
   {
160 160
     stop("Error in gapsRun: Numeric Arguments cannot be non-zero!")
161
-    
161
+
162 162
     return()
163 163
   }
164
-  
164
+
165 165
   if((nOutR > nEquil) || (nOutR > nSample))
166 166
   {
167 167
     stop("Error in gapsRun: Cannot have more output steps than equilibration and/or sampling iterations.")
168
-    
168
+
169 169
     return()
170 170
   }
171
-  
171
+
172 172
   if(ncol(FP) != ncol(D))
173 173
   {
174 174
     stop("Error in gapsRun: Columns of Data Matrix and Fixed Pattern Matrix do not line up. Please see documentation for details.")
175
-    
175
+
176 176
     return()
177 177
   }
178
-  
178
+
179 179
   if(nFactor < (nrow(FP)))
180 180
   {
181 181
     stop("Error in gapsRun: Number of patterns cannot be less than the rows of the patterns to fix (FP). Please see documentation for details.")
182
-    
182
+
183 183
     return()
184 184
   }
185
-  
185
+
186 186
   if(nFactor > (ncol(D)))
187 187
   {
188 188
     warning("Warning in gapsRun: Number of requested patterns greater than columns of Data Matrix.")
189 189
   }
190
-  
190
+
191 191
   #        P <- as.data.frame(matrix(nrow=1,c(1,1,1))) # make something to pass
192 192
   #    } else {
193 193
   #        Config = c(nFactor, simulation_id, nEquil, nSample, nOutR,
194 194
   #        output_atomic, alphaA, nMaxA, max_gibbmass_paraA, lambdaA_scale_factor,
195 195
   #        alphaP, nMaxP, max_gibbmass_paraP, lambdaP_scale_factor, 1)
196
-  
196
+
197 197
   #   }
198
-  
198
+
199 199
   geneNames = rownames(D);
200 200
   sampleNames = colnames(D);
201
-  
201
+
202 202
   # label patterns as Patt N
203 203
   patternNames = c("0");
204 204
   for(i in 1:nFactor)
205 205
   {
206 206
     patternNames[i] = paste('Patt', i);
207 207
   }
208
-  
208
+
209 209
   # call to C++ Rcpp code
210 210
   cogapResult = cogapsMapTest(D, S, FP, ABins, PBins, Config, ConfigNums);
211
-  
211
+
212 212
   # convert returned files to matrices to simplify visualization and processing
213 213
   cogapResult$Amean = as.matrix(cogapResult$Amean);
214 214
   cogapResult$Asd = as.matrix(cogapResult$Asd);
215 215
   cogapResult$Pmean = as.matrix(cogapResult$Pmean);
216 216
   cogapResult$Psd = as.matrix(cogapResult$Psd);
217
-  
217
+
218 218
   # label matrices
219 219
   colnames(cogapResult$Amean) = patternNames;
220 220
   rownames(cogapResult$Amean) = geneNames;
... ...
@@ -224,13 +224,13 @@ gapsMapTestRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(),
224 224
   rownames(cogapResult$Pmean) = patternNames;
225 225
   colnames(cogapResult$Psd) = sampleNames;
226 226
   rownames(cogapResult$Psd) = patternNames;
227
-  
227
+
228 228
   # calculate chi-squared of mean, this should be smaller than individual
229 229
   # chi-squared sample values if sampling is good
230 230
   calcChiSq = c(0);
231 231