Browse code

vignette for new workflow

Tom Sherman authored on 11/07/2018 16:09:37
Showing 74 changed files

... ...
@@ -1,5 +1,5 @@
1 1
 Package: CoGAPS
2
-Version: 3.1.3
2
+Version: 3.3.0
3 3
 Date: 2018-04-24
4 4
 Title: Coordinated Gene Activity in Pattern Sets
5 5
 Author: Thomas Sherman, Wai-shing Lee, Conor Kelton, Ondrej Maxian, Jacob Carey,
... ...
@@ -51,31 +51,18 @@ Collate:
51 51
     'CoGAPS.R'
52 52
     'GWCoGAPS.R'
53 53
     'RcppExports.R'
54
-    'binaryA.R'
55
-    'calcCoGAPSStat.R'
56
-    'calcGeneGSStat.R'
57
-    'calcZ.R'
58 54
     'cellMatchR.R'
59 55
     'class-CogapsResult.R'
60 56
     'createGWCoGAPSSets.R'
61 57
     'createscCoGAPSSets.R'
62
-    'gapsInterPattern.R'
63
-    'gapsIntraPattern.R'
64 58
     'generateSeeds.R'
65 59
     'package.R'
66 60
     'patternMarkers.R'
67 61
     'patternMatch4Parallel.R'
68 62
     'patternMatcher.R'
69
-    'plotAtoms.R'
70
-    'plotDiag.R'
71
-    'plotGAPS.R'
72
-    'plotP.R'
73 63
     'plotPatternMarkers.R'
74
-    'plotSmoothPatterns.R'
75 64
     'postFixed4Parallel.R'
76 65
     'postFixed4SC.R'
77 66
     'reOrderBySet.R'
78
-    'reconstructGene.R'
79 67
     'reorderByPatternMatch.R'
80
-    'residuals.R'
81 68
     'scCoGAPS.R'
... ...
@@ -3,7 +3,6 @@
3 3
 export(CoGAPS)
4 4
 export(GWCoGAPS)
5 5
 export(GWCoGapsFromCheckpoint)
6
-export(MergeResultsWithSCE)
7 6
 export(binaryA)
8 7
 export(buildReport)
9 8
 export(calcCoGAPSStat)
... ...
@@ -13,14 +12,13 @@ export(cellMatchR)
13 12
 export(computeGeneGSProb)
14 13
 export(createGWCoGAPSSets)
15 14
 export(createscCoGAPSSets)
15
+export(getMeanChiSq)
16 16
 export(getParam)
17 17
 export(patternMarkers)
18 18
 export(patternMatch4Parallel)
19
-export(plotAtoms)
20
-export(plotGAPS)
21
-export(plotP)
19
+export(plot.CogapsResult)
20
+export(plotResiduals)
22 21
 export(reconstructGene)
23
-export(residuals)
24 22
 export(scCoGAPS)
25 23
 export(setParam)
26 24
 exportClasses(CogapsParams)
... ...
@@ -29,6 +27,9 @@ import(doParallel)
29 27
 import(foreach)
30 28
 import(ggplot2)
31 29
 import(shiny)
30
+importClassesFrom(S4Vectors,Annotated)
31
+importClassesFrom(S4Vectors,DataFrame)
32
+importClassesFrom(S4Vectors,character_OR_NULL)
32 33
 importClassesFrom(SingleCellExperiment,SingleCellExperiment)
33 34
 importClassesFrom(SummarizedExperiment,SummarizedExperiment)
34 35
 importFrom(RColorBrewer,brewer.pal)
... ...
@@ -178,6 +178,8 @@ checkDataMatrix <- function(data, uncertainty, params)
178 178
 {
179 179
     if (sum(data < 0) > 0 | sum(uncertainty < 0) > 0)
180 180
         stop("negative values in data and/or uncertainty matrix")
181
-    if (nrow(data) == params@nPatterns || ncol(data) == params@nPatterns)
181
+    if (nrow(data) <= params@nPatterns | ncol(data) <= params@nPatterns)
182 182
         stop("nPatterns must be less than dimensions of data")
183
+    if (sum(dim(uncertainty)) != 2 & sum(uncertainty < 1e-5) > 0)
184
+        warning("small values in uncertainty matrix detected")
183 185
 }
184 186
\ No newline at end of file
... ...
@@ -20,9 +20,14 @@
20 20
 #' createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name)
21 21
 #' result <- GWCoGAPS(sim_name, nFactor=3, nEquil=200, nSample=200)
22 22
 #' @export
23
-GWCoGAPS <- function(simulationName, nFactor, nCores=NA, cut=NA, minNS=NA,
23
+GWCoGAPS <- function(simulationName, nFactor=3, nCores=NA, cut=NA, minNS=NA,
24 24
 manualMatch=FALSE, consensusPatterns=NULL, saveUnmatchedPatterns=FALSE, ...)
25 25
 {
26
+    if (!is.null(list(...)$nPatterns))
27
+    {
28
+        nFactor <- list(...)$nPatterns
29
+    }
30
+
26 31
     if (!is.null(list(...)$checkpointFile))
27 32
     {
28 33
         stop("checkpoint file name automatically set in GWCoGAPS - don't pass this parameter")
... ...
@@ -162,7 +167,7 @@ runInitialPhase <- function(simulationName, allDataSets, nFactor, ...)
162 167
 
163 168
 postInitialPhase <- function(initialResult, nSets, cut, minNS)
164 169
 {
165
-    nFactor <- ncol(initialResult[[1]]@Amean)
170
+    nFactor <- ncol(initialResult[[1]]@featureLoadings)
166 171
     BySet <- reOrderBySet(AP=initialResult, nFactor=nFactor, nSets=nSets)
167 172
 
168 173
     #run postpattern match function
... ...
@@ -177,7 +182,7 @@ postInitialPhase <- function(initialResult, nSets, cut, minNS)
177 182
 
178 183
 runFinalPhase <- function(simulationName, allDataSets, consensusPatterns, nCores, ...)
179 184
 {    
180
-    if (length(dim(consensusPatterns)) != 2)
185
+    if (class(consensusPatterns) != "matrix")
181 186
     {
182 187
         stop("consensusPatterns must be a matrix")
183 188
     }
... ...
@@ -197,6 +202,7 @@ runFinalPhase <- function(simulationName, allDataSets, consensusPatterns, nCores
197 202
 
198 203
     # final number of factors
199 204
     nFactorFinal <- nrow(consensusPatterns)
205
+    message(paste("final number of patterns:", nFactorFinal))
200 206
 
201 207
     # run fixed CoGAPS
202 208
     finalResult <- foreach(i=1:length(allDataSets)) %dopar%
... ...
@@ -219,6 +225,14 @@ postFinalPhase <- function(finalResult, consensusPatterns)
219 225
 {
220 226
     Aresult <- postFixed4Parallel(finalResult, consensusPatterns)
221 227
     finalResult <- list("Amean"=Aresult$A, "Asd"=Aresult$Asd,"Pmean"=consensusPatterns)
222
-    class(finalResult) <- append(class(finalResult), "CoGAPS")
223
-    return(finalResult)
228
+    
229
+    return(new("CogapsResult",
230
+        Amean       = Aresult$A,
231
+        Asd         = Aresult$Asd,
232
+        Pmean       = consensusPatterns,
233
+        Psd         = NULL,
234
+        seed        = 0,
235
+        meanChiSq   = 0,
236
+        diagnostics = list()
237
+    ))
224 238
 }
225 239
deleted file mode 100644
... ...
@@ -1,28 +0,0 @@
1
-#' Binary Heatmap for Standardized A Matrix
2
-#'
3
-#' @details creates a binarized heatmap of the A matrix
4
-#' in which the value is 1 if the value in Amean is greater than
5
-#' threshold * Asd and 0 otherwise
6
-#' @param Amean the mean estimate for the A matrix
7
-#' @param Asd the standard deviations on Amean
8
-#' @param threshold the number of standard deviations above zero
9
-#' that an element of Amean must be to get a value of 1
10
-#' @return plots a heatmap of the A Matrix
11
-#' @examples
12
-#' data(SimpSim)
13
-#' binaryA(SimpSim.result$Amean, SimpSim.result$Asd, threshold=3)
14
-#' @export
15
-binaryA <-function(Amean, Asd, threshold=3)
16
-{
17
-    BinA_Map <- ifelse(Amean/Asd > threshold,1,0)
18
-    colnames(BinA_Map) <- colnames(BinA_Map, do.NULL=FALSE, prefix = "Pattern ")
19
-    rownames(BinA_Map) <- rep(" ",nrow(BinA_Map))
20
-
21
-    heatmap.2(BinA_Map, Rowv = FALSE, Colv = FALSE,dendrogram="none",
22
-        scale="none",col = brewer.pal(3,"Blues"), trace="none",
23
-        density.info="none",cexCol=1.3,srtCol=45,
24
-        lmat=rbind(c(0, 3),c(2,1),c(0,4) ),
25
-        lwid=c(1,10),lhei=c(1, 4, 1.2 ),
26
-        main="Heatmap of Standardized A Matrix")
27
-    mtext(paste("(Threshold = ", threshold, ")"))
28
-}
29 0
deleted file mode 100644
... ...
@@ -1,130 +0,0 @@
1
-#' Calculate Gene Set Statistics
2
-#'
3
-#' @details calculates the gene set statistics for each
4
-#' column of A using a Z-score from the elements of the A matrix,
5
-#' the input gene set, and permutation tests
6
-#' @param Amean A matrix mean values
7
-#' @param Asd A matrix standard deviations
8
-#' @param GStoGenes data.frame or list with gene sets
9
-#' @param numPerm number of permutations for null
10
-#' @return gene set statistics for each column of A
11
-#' @examples
12
-#' data('SimpSim')
13
-#' calcCoGAPSStat(SimpSim.result$Amean, SimpSim.result$Asd, GStoGenes=GSets,
14
-#' numPerm=500)
15
-#' @export
16
-calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500)
17
-{
18
-    # test for std dev of zero, possible mostly in simple simulations
19
-    if (sum(Asd==0) > 0)
20
-        Asd[Asd==0] <- 1e-6
21
-
22
-    # calculate Z scores
23
-    zMatrix <- calcZ(Amean,Asd)
24
-
25
-    # check input arguments
26
-    if (!is(GStoGenes, "data.frame") && !is(GStoGenes, "list") && !is(GStoGenes,"GSA.genesets"))
27
-    {
28
-        stop("GStoGenes must be a data.frame,GSA.genesets, or list with format specified in the users manual.")
29
-    }
30
-
31
-    if (is(GStoGenes, "GSA.genesets"))
32
-    {
33
-        names(GStoGenes$genesets) <- GStoGenes$geneset.names
34
-        GStoGenes <- GStoGenes$genesets
35
-    }
36
-
37
-    if (is(GStoGenes, "list"))
38
-    {
39
-        GStoGenesList <- GStoGenes
40
-    }
41
-    else
42
-    {
43
-        GStoGenesList <- list()
44
-        for (i in 1:dim(GStoGenes)[2])
45
-        {
46
-            GStoGenesList[[as.character(colnames(GStoGenes)[i])]] <- as.character(unique(GStoGenes[,i]))
47
-        }
48
-    }
49
-
50
-    # get dimensions
51
-    numGS   <- length(names(GStoGenesList))
52
-    numPatt <- dim(zMatrix)[2]
53
-    numG    <- dim(zMatrix)[1]+0.9999
54
-
55
-    # initialize matrices
56
-    statsUp   <- matrix(ncol = numGS, nrow = numPatt)
57
-    statsDown <- matrix(ncol = numGS, nrow = numPatt)
58
-    actEst    <- matrix(ncol = numGS, nrow = numPatt)
59
-    results   <- list()
60
-    zPerm     <- matrix(ncol=numPerm,nrow=numPatt)
61
-
62
-    # do permutation test
63
-    for (gs in 1:numGS)
64
-    {
65
-        genes <- GStoGenesList[[names(GStoGenesList)[gs]]]
66
-        index <- rownames(zMatrix) %in% genes
67
-        zValues <- zMatrix[index,1]
68
-        numGenes <- length(zValues)
69
-        label <- as.character(numGenes)
70
-
71
-        if (!any(names(results)==label))
72
-        {
73
-            for (p in 1:numPatt)
74
-            {
75
-                for (j in 1:numPerm)
76
-                {
77
-                    temp <- floor(runif(numGenes,1,numG))
78
-                    temp2 <- zMatrix[temp,p]
79
-                    zPerm[p,j] <- mean(temp2)
80
-                }
81
-            }
82
-            results[[label]] <- zPerm
83
-        }
84
-    }
85
-
86
-    # get p-value
87
-    for (p in 1:numPatt)
88
-    {
89
-        for (gs in 1:numGS)
90
-        {
91
-            genes <- GStoGenesList[[names(GStoGenesList)[gs]]]
92
-            index <- rownames(zMatrix) %in% genes
93
-            zValues <- zMatrix[index,p]
94
-            zScore <- mean(zValues)
95
-
96
-            numGenes <- length(zValues)
97
-            label <- as.character(numGenes)
98
-
99
-            permzValues <- results[[label]][p,]
100
-            ordering <- order(permzValues)
101
-            permzValues <- permzValues[ordering]
102
-
103
-            statistic <- sum(zScore > permzValues)
104
-            statUpReg <- 1 - statistic/length(permzValues)
105
-            statsUp[p,gs] <- max(statUpReg, 1/numPerm)
106
-
107
-            statistic <- sum(zScore < permzValues)
108
-            statDownReg <- 1 - statistic/length(permzValues)
109
-            statsDown[p,gs] <- max(statDownReg,1/numPerm)
110
-
111
-            activity <- 1 - 2*max(statUpReg, 1/numPerm)
112
-            actEst[p,gs] <- activity
113
-        }
114
-    }
115
-
116
-    # format output
117
-    colnames(statsUp) <- names(GStoGenesList)
118
-    colnames(statsDown) <- names(GStoGenesList)
119
-    colnames(actEst) <- names(GStoGenesList)
120
-
121
-    rownames(statsUp) <- colnames(zMatrix)
122
-    rownames(statsDown) <- colnames(zMatrix)
123
-    rownames(actEst) <- colnames(zMatrix)
124
-
125
-    results[['GSUpreg']] <- statsUp
126
-    results[['GSDownreg']] <- statsDown
127
-    results[['GSActEst']] <- actEst
128
-
129
-    return(results)
130
-}
131 0
deleted file mode 100644
... ...
@@ -1,97 +0,0 @@
1
-#' Probability Gene Belongs in Gene Set
2
-#'
3
-#' @details calculates the probability that a gene
4
-#' listed in a gene set behaves like other genes in the set within
5
-#' the given data set
6
-#' @param Amean A matrix mean values
7
-#' @param Asd A matrix standard deviations
8
-#' @param GSGenes data.frame or list with gene sets
9
-#' @param numPerm number of permutations for null
10
-#' @param Pw weight on genes
11
-#' @param nullGenes logical indicating gene adjustment
12
-#' @return gene similiarity statistic
13
-#' @examples
14
-#' data("SimpSim")
15
-#' calcGeneGSStat(SimpSim.result$Amean, SimpSim.result$Asd, GSGenes=GSets[[1]],
16
-#' numPerm=500)
17
-#' @export
18
-calcGeneGSStat  <- function(Amean, Asd, GSGenes, numPerm, Pw=rep(1,ncol(Amean)),
19
-nullGenes=FALSE)
20
-{
21
-    gsStat <- calcCoGAPSStat(Amean, Asd, data.frame(GSGenes), numPerm=numPerm)
22
-    gsStat <- gsStat$GSUpreg
23
-    gsStat <- -log(gsStat)
24
-
25
-    if (!all(is.na(Pw)))
26
-    {
27
-        if (length(Pw) != length(gsStat))
28
-        {
29
-            stop('Invalid weighting')
30
-        }
31
-        gsStat <- gsStat*Pw
32
-    }
33
-
34
-    if (nullGenes)
35
-    {
36
-        ZD <- Amean[setdiff(row.names(Amean), GSGenes),] /
37
-            Asd[setdiff(row.names(Amean), GSGenes),]
38
-    }
39
-    else
40
-    {
41
-        ZD <- Amean[GSGenes,]/Asd[GSGenes,]
42
-    }
43
-    outStats <- apply(sweep(ZD,2,gsStat,FUN="*"),1,sum) / (sum(gsStat))
44
-    outStats <- outStats / apply(ZD,1,sum)
45
-    outStats[which(apply(ZD,1,sum) < 1e-6)] <- 0
46
-
47
-    if (sum(gsStat) < 1e-6)
48
-    {
49
-        return(0)
50
-    }
51
-    return(outStats)
52
-}
53
-
54
-#' Compute Gene Probability
55
-#'
56
-#' @details Computes the p-value for gene set membership using the CoGAPS-based
57
-#' statistics developed in Fertig et al. (2012).  This statistic refines set
58
-#' membership for each candidate gene in a set specified in \code{GSGenes} by
59
-#' comparing the inferred activity of that gene to the average activity of the
60
-#' set.
61
-#' @param Amean A matrix mean values
62
-#' @param Asd A matrix standard deviations
63
-#' @param GSGenes data.frame or list with gene sets
64
-#' @param Pw weight on genes
65
-#' @param numPerm number of permutations for null
66
-#' @param PwNull - logical indicating gene adjustment
67
-#' @return A vector of length GSGenes containing the p-values of set membership
68
-#' for each gene containined in the set specified in GSGenes.
69
-#' @examples
70
-#' data("SimpSim")
71
-#' computeGeneGSProb(SimpSim.result$Amean, SimpSim.result$Asd, GSGenes=GSets[[1]],
72
-#' numPerm=500)
73
-#' @export
74
-computeGeneGSProb <- function(Amean, Asd, GSGenes, Pw=rep(1,ncol(Amean)),
75
-numPerm=500, PwNull=FALSE)
76
-{
77
-    geneGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, Pw=Pw,
78
-        GSGenes=GSGenes, numPerm=numPerm)
79
-
80
-    if (PwNull)
81
-    {
82
-        permGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd,
83
-            GSGenes=GSGenes, numPerm=numPerm, Pw=Pw,
84
-            nullGenes=TRUE)
85
-    }
86
-    else
87
-    {
88
-        permGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd,
89
-            GSGenes=GSGenes, numPerm=numPerm,
90
-            nullGenes=TRUE)
91
-    }
92
-
93
-    finalStats <- sapply(GSGenes, function(x)
94
-        length(which(permGSStat > geneGSStat[x])) / length(permGSStat))
95
-
96
-    return(finalStats)
97
-}
98 0
deleted file mode 100644
... ...
@@ -1,21 +0,0 @@
1
-#' Compute Z-Score Matrix
2
-#'
3
-#' @details calculates the Z-score for each element based on input mean
4
-#' and standard deviation matrices
5
-#' @param meanMat matrix of mean values
6
-#' @param sdMat matrix of standard deviation values
7
-#' @return matrix of z-scores
8
-#' @examples
9
-#' data(SimpSim)
10
-#' calcZ(SimpSim.result$Amean, SimpSim.result$Asd)
11
-#' @export
12
-calcZ <- function(meanMat, sdMat)
13
-{
14
-    if (nrow(meanMat) != nrow(sdMat) | ncol(meanMat) != ncol(sdMat))
15
-        stop("meanMat and sdMat dimensions don't match")
16
-
17
-    zMat <- meanMat / sdMat
18
-    rownames(zMat) <- rownames(meanMat)
19
-    colnames(zMat) <- colnames(meanMat)
20
-    return(zMat)
21
-}
... ...
@@ -67,8 +67,8 @@ ignore.NA=FALSE, bySet=FALSE, plotDen=FALSE, ...)
67 67
             }
68 68
         }
69 69
 
70
-        AByClust[sapply(AByClust,is.null)] <- NULL
71
-        RtoMeanPattern[sapply(RtoMeanPattern,is.null)] <- NULL
70
+        #AByClust[sapply(AByClust,is.null)] <- NULL
71
+        #RtoMeanPattern[sapply(RtoMeanPattern,is.null)] <- NULL
72 72
         return(list("RtoMeanPattern"=RtoMeanPattern, "AByClust"=AByClust))
73 73
     }   
74 74
 
... ...
@@ -123,82 +123,92 @@ setGeneric("parseDirectParams", function(object, args)
123 123
 #' @rdname setParam-methods
124 124
 #' @aliases setParam
125 125
 setMethod("setParam", signature(object="CogapsParams"),
126
-    function(object, whichParam, value)
127
-    {
128
-        slot(object, whichParam) <- value
129
-        validObject(object)
130
-        return(object)
131
-    }
132
-)
126
+function(object, whichParam, value)
127
+{
128
+    slot(object, whichParam) <- value
129
+    validObject(object)
130
+    return(object)
131
+})
133 132
 
134 133
 #' @rdname getParam-methods
135 134
 #' @aliases getParam
136 135
 setMethod("getParam", signature(object="CogapsParams"),
137
-    function(object, whichParam)
138
-    {
139
-        slot(object, whichParam)
140
-    }
141
-)
136
+function(object, whichParam)
137
+{
138
+    slot(object, whichParam)
139
+})
142 140
 
143 141
 #' @rdname parseOldParams-methods
144 142
 #' @aliases parseOldParams
145 143
 setMethod("parseOldParams", signature(object="CogapsParams"),
146
-    function(object, oldArgs)
144
+function(object, oldArgs)
145
+{
146
+    helper <- function(arg, params, newArg)
147 147
     {
148
-        helper <- function(arg, params, newArg)
148
+        if (!is.null(oldArgs[[arg]]))
149 149
         {
150
-            if (!is.null(oldArgs[[arg]]))
151
-            {
152
-                warning(paste("parameter", arg, "is deprecated, it will still",
153
-                    "work, but setting", newArg, "in the params object is the",
154
-                    "preferred method"))
155
-                params <- setParam(params, newArg, oldArgs[[arg]])
156
-                oldArgs[[arg]] <- NULL
157
-            }            
158
-            return(params)
159
-        }
150
+            warning(paste("parameter", arg, "is deprecated, it will still",
151
+                "work, but setting", newArg, "in the params object is the",
152
+                "preferred method"))
153
+            params <- setParam(params, newArg, oldArgs[[arg]])
154
+            oldArgs[[arg]] <- NULL
155
+        }            
156
+        return(params)
157
+    }
160 158
 
161
-        object <- helper("nFactor", object, "nPatterns")
162
-        object <- helper("nIter", object, "nIterations")
163
-        object <- helper("nEquil", object, "nIterations")
164
-        object <- helper("nSample", object, "nIterations")
165
-        object <- helper("nOutR", object, "outputFrequency")
166
-        object <- helper("nOutput", object, "outputFrequency")
167
-        object <- helper("maxGibbmassA", object, "maxGibbsMassA")
168
-        object <- helper("max_gibbmass_paraA", object, "maxGibbsMassA")
169
-        object <- helper("maxGibbmassP", object, "maxGibbsMassP")
170
-        object <- helper("max_gibbmass_paraP", object, "maxGibbsMassP")
171
-        object <- helper("checkpointFile", object, "checkpointOutFile")
172
-        object <- helper("singleCellRNASeq", object, "singleCell")
173
-
174
-        if (!is.null(oldArgs$nSnapshots) | !is.null(oldArgs$sampleSnapshots) | !is.null(oldArgs$numSnapshots))
175
-        {
176
-            warning("snapshots not currently supported in release build")
177
-            oldArgs$nSnapshots <- NULL
178
-            oldArgs$sampleSnapshots <- NULL
179
-            oldArgs$numSnapshots <- NULL
180
-        }
181
-        if (!is.null(oldArgs$fixedPatterns))
182
-            stop("pass fixed matrix in with 'fixedMatrix' argument")
183
-        if (!is.null(oldArgs$S))
184
-            stop("pass uncertainty matrix in with 'uncertainty', not 'S'")
159
+    object <- helper("nFactor", object, "nPatterns")
160
+    object <- helper("nIter", object, "nIterations")
161
+    object <- helper("nEquil", object, "nIterations")
162
+    object <- helper("nSample", object, "nIterations")
163
+    object <- helper("nOutR", object, "outputFrequency")
164
+    object <- helper("nOutput", object, "outputFrequency")
165
+    object <- helper("maxGibbmassA", object, "maxGibbsMassA")
166
+    object <- helper("max_gibbmass_paraA", object, "maxGibbsMassA")
167
+    object <- helper("maxGibbmassP", object, "maxGibbsMassP")
168
+    object <- helper("max_gibbmass_paraP", object, "maxGibbsMassP")
169
+    object <- helper("singleCellRNASeq", object, "singleCell")
185 170
 
186
-        return(object)
171
+    if (!is.null(oldArgs$nSnapshots) | !is.null(oldArgs$sampleSnapshots) | !is.null(oldArgs$numSnapshots))
172
+    {
173
+        warning("snapshots not currently supported in release build")
174
+        oldArgs$nSnapshots <- NULL
175
+        oldArgs$sampleSnapshots <- NULL
176
+        oldArgs$numSnapshots <- NULL
187 177
     }
188
-)
178
+    if (!is.null(oldArgs$fixedPatterns))
179
+        stop("pass fixed matrix in with 'fixedMatrix' argument")
180
+    if (!is.null(oldArgs$S))
181
+        stop("pass uncertainty matrix in with 'uncertainty', not 'S'")
182
+
183
+    return(object)
184
+})
189 185
 
190 186
 #' @rdname parseDirectParams-methods
191 187
 #' @aliases parseDirectParams
192 188
 setMethod("parseDirectParams", signature(object="CogapsParams"),
193
-    function(object, args)
189
+function(object, args)
190
+{
191
+    for (s in slotNames(object))
194 192
     {
195
-        for (s in slotNames(object))
193
+        if (!is.null(args[[s]]))
196 194
         {
197
-            if (!is.null(args[[s]]))
198
-            {
199
-                object <- setParam(object, s, args[[s]])
200
-            }
195
+            object <- setParam(object, s, args[[s]])
201 196
         }
202
-        return(object)
203 197
     }
204
-)
205 198
\ No newline at end of file
199
+    return(object)
200
+})
201
+
202
+setMethod("show", signature("CogapsParams"),
203
+function(object)
204
+{
205
+    cat("An Object of class \"CogapsParams\"\n")
206
+    cat("nPatterns          ", object@nPatterns, "\n")
207
+    cat("nIterations        ", object@nIterations, "\n")
208
+    cat("outputFrequency    ", object@outputFrequency, "\n")
209
+    cat("nCores             ", object@nCores, "\n")
210
+    cat("singleCell         ", object@singleCell, "\n")
211
+    cat("seed               ", object@seed, "\n")
212
+    cat("messages           ", object@messages, "\n")
213
+    cat("checkpointInterval ", object@checkpointInterval, "\n")
214
+    cat("checkpointOutFile  ", object@checkpointOutFile, "\n")
215
+})
206 216
\ No newline at end of file
... ...
@@ -2,65 +2,491 @@
2 2
 #' @export
3 3
 #'
4 4
 #' @description Contains all output from Cogaps run
5
-setClass("CogapsResult", slots=c(
6
-    Amean = "matrix",
7
-    Asd = "matrix",
8
-    Pmean = "matrix",
9
-    Psd = "matrix",
10
-    seed = "numeric",
11
-    meanChiSq = "numeric",
12
-    diagnostics = "list"
13
-))
5
+#' @importClassesFrom S4Vectors DataFrame Annotated character_OR_NULL
6
+setClass("CogapsResult", contains="Annotated", slots=c(
7
+    sampleFactors = "ANY",      # Pmean transpose
8
+    featureLoadings = "ANY",    # Amean
9
+    featureStdDev = "ANY",      # Asd
10
+    sampleStdDev = "ANY",       # Psd
11
+    NAMES = "character_OR_NULL",
12
+    factorData = "DataFrame"),
13
+)
14 14
 
15 15
 #' Constructor for CogapsResult
16 16
 #' @return initialized CogapsResult object
17 17
 #' @importFrom methods callNextMethod
18 18
 setMethod("initialize", "CogapsResult",
19
-function(.Object, ...)
19
+function(.Object, Amean, Pmean, Asd, Psd, seed, meanChiSq, diagnostics, ...)
20 20
 {
21
+    .Object@featureLoadings <- Amean
22
+    .Object@sampleFactors <- t(Pmean)
23
+
24
+    if (!is.null(Asd))
25
+        .Object@featureStdDev <- Asd
26
+    if (!is.null(Psd))
27
+        .Object@sampleStdDev <- t(Psd)
28
+
29
+    .Object@metadata[["seed"]] <- seed
30
+    .Object@metadata[["meanChiSq"]] <- meanChiSq
31
+    .Object@metadata[["diagnostics"]] <- diagnostics
32
+
21 33
     .Object <- callNextMethod(.Object, ...)
22 34
     .Object
23 35
 })
24 36
 
37
+setValidity("CogapsResult",
38
+    function(object)
39
+    {
40
+        if (sum(object@featureLoadings < 0) > 0 | sum(object@featureStdDev < 0) > 0)
41
+            "fatal error - negative values in feature x factor Matrix"
42
+        if (sum(object@sampleFactors < 0) > 0 | sum(object@sampleStdDev < 0) > 0)
43
+            "fatal error - negative values in factor x sample Matrix"
44
+    }
45
+)    
46
+
47
+################################### GENERICS ###################################
48
+
49
+#' return chi-sq of final matrices
50
+#' @export
51
+#' @docType methods
52
+#' @rdname getMeanChiSq-methods
53
+#'
54
+#' @param object an object of type CogapsResult
55
+#' @return chi-sq error
56
+#' data(SimpSim)
57
+#' result <- CoGAPS(SimpSim.D)
58
+#' meanChiSq(result)
59
+setGeneric("getMeanChiSq", function(object)
60
+    {standardGeneric("getMeanChiSq")})
61
+
62
+#' compute z-score matrix
63
+#' @export
64
+#' @docType methods
65
+#' @rdname calcZ-methods
66
+#'
67
+#' @description calculates the Z-score for each element based on input mean
68
+#' and standard deviation matrices
69
+#' @param object an object of type CogapsResult
70
+#' @param which either "feature" or "sample" indicating which matrix to
71
+#' calculate the z-score for
72
+#' @return matrix of z-scores
73
+#' @examples
74
+#' data(SimpSim)
75
+#' result <- CoGAPS(SimpSim.D)
76
+#' feature_zscore <- calcZ(result)
77
+setGeneric("calcZ", function(object, which="feature")
78
+    {standardGeneric("calcZ")})
79
+
80
+#' reconstruct gene
81
+#' @export
82
+#' @docType methods
83
+#' @rdname reconstructGene-methods
84
+#'
85
+#' @param object an object of type CogapsResult
86
+#' @param genes an index of the gene or genes of interest
87
+#' @return the D' estimate of a gene or set of genes
88
+#' @examples
89
+#' data(SimpSim)
90
+#' result <- CoGAPS(SimpSim.D)
91
+#' D_estimate <- reconstructGene(result)
92
+setGeneric("reconstructGene", function(object, genes=NULL)
93
+    {standardGeneric("reconstructGene")})
94
+
95
+#' binary heatmap for standardized feature matrix
96
+#' @export
97
+#' @docType methods
98
+#' @rdname binaryA-methods
99
+#'
100
+#' @description creates a binarized heatmap of the A matrix
101
+#' in which the value is 1 if the value in Amean is greater than
102
+#' threshold * Asd and 0 otherwise
103
+#' @param object an object of type CogapsResult
104
+#' @param threshold the number of standard deviations above zero
105
+#' that an element of Amean must be to get a value of 1
106
+#' @return plots a heatmap of the A Matrix
107
+#' @examples
108
+#' data(SimpSim)
109
+#' result <- CoGAPS(SimpSim.D)
110
+#' binMatrix <- binaryA(result, threshold=3)
111
+setGeneric("binaryA", function(object, threshold=3)
112
+    {standardGeneric("binaryA")})
113
+
114
+#' plot of residuals
115
+#' @export
116
+#' @docType methods
117
+#' @rdname plotResiduals-methods
118
+#'
119
+#' @description calculate residuals and produce heatmap
120
+#' @param object an object of type CogapsResult
121
+#' @param data original data matrix run through GAPS
122
+#' @param uncertainty original standard deviation matrix run through GAPS
123
+#' @return creates a residual plot
124
+#' @examples
125
+#' data(SimpSim)
126
+#' result <- CoGAPS(SimpSim.D)
127
+#' plotResiduals(result, SimpSim.D)
128
+setGeneric("plotResiduals", function(object, data, uncertainty=NULL)
129
+    {standardGeneric("plotResiduals")})
130
+
131
+#' calculate gene set statistics
132
+#' @export
133
+#' @docType methods
134
+#' @rdname calcCoGAPSStat-methods
135
+#'
136
+#' @description calculates the gene set statistics for each
137
+#' column of A using a Z-score from the elements of the A matrix,
138
+#' the input gene set, and permutation tests
139
+#' @param object an object of type CogapsResult
140
+#' @param GStoGenes data.frame or list with gene sets
141
+#' @param numPerm number of permutations for null
142
+#' @return gene set statistics for each column of A
143
+#' @examples
144
+#' data('SimpSim')
145
+#' result <- CoGAPS(SimpSim.D)
146
+#' calcCoGAPSStat(result, GStoGenes=GSets, numPerm=500)
147
+setGeneric("calcCoGAPSStat", function(object, GStoGenes, numPerm=500)
148
+    {standardGeneric("calcCoGAPSStat")})
149
+
150
+#' probability gene belongs in gene set
151
+#' @export
152
+#' @docType methods
153
+#' @rdname calcGeneGSStat-methods
154
+#'
155
+#' @description calculates the probability that a gene
156
+#' listed in a gene set behaves like other genes in the set within
157
+#' the given data set
158
+#' @param object an object of type CogapsResult
159
+#' @param GSGenes data.frame or list with gene sets
160
+#' @param numPerm number of permutations for null
161
+#' @param Pw weight on genes
162
+#' @param nullGenes logical indicating gene adjustment
163
+#' @return gene similiarity statistic
164
+#' @examples
165
+#' data(SimpSim)
166
+#' result <- CoGAPS(SimpSim.D)
167
+#' calcGeneGSStat(result, GSGenes=GSets[[1]], numPerm=500)
168
+setGeneric("calcGeneGSStat", function(object, GStoGenes, numPerm,
169
+Pw=rep(1,ncol(Amean)), nullGenes=FALSE)
170
+    {standardGeneric("calcGeneGSStat")})
171
+
172
+#' compute gene probability
173
+#' @export
174
+#' @docType methods
175
+#' @rdname computeGeneGSProb-methods
176
+#'
177
+#' @description Computes the p-value for gene set membership using the CoGAPS-based
178
+#' statistics developed in Fertig et al. (2012).  This statistic refines set
179
+#' membership for each candidate gene in a set specified in \code{GSGenes} by
180
+#' comparing the inferred activity of that gene to the average activity of the
181
+#' set.
182
+#' @param object an object of type CogapsResult
183
+#' @param GSGenes data.frame or list with gene sets
184
+#' @param Pw weight on genes
185
+#' @param numPerm number of permutations for null
186
+#' @param PwNull - logical indicating gene adjustment
187
+#' @return A vector of length GSGenes containing the p-values of set membership
188
+#' for each gene containined in the set specified in GSGenes.
189
+#' @examples
190
+#' data(SimpSim)
191
+#' result <- CoGAPS(SimpSim.D)
192
+#' computeGeneGSProb(result, GSGenes=GSets[[1]], numPerm=500)
193
+setGeneric("computeGeneGSProb", function(object, GStoGenes, numPerm=500,
194
+Pw=rep(1,ncol(Amean)), PwNull=FALSE)
195
+    {standardGeneric("computeGeneGSProb")})
196
+
197
+################################## METHODS #####################################
198
+
25 199
 setMethod("show", signature("CogapsResult"),
26 200
 function(object)
27 201
 {
28
-    nGenes <- nrow(object@Amean)
29
-    nPatterns <- ncol(object@Amean)
30
-    nSamples <- ncol(object@Pmean)
202
+    nFeatures <- nrow(object@featureLoadings)
203
+    nSamples <- nrow(object@sampleFactors)
204
+    nPatterns <- ncol(object@featureLoadings)
31 205
 
32
-    print(paste("CogapsResult object with", nGenes, "genes and", nSamples, "samples"))
206
+    print(paste("CogapsResult object with", nFeatures, "features and", nSamples,
207
+        "samples"))
33 208
     print(paste(nPatterns, "patterns were learned"))
34 209
 })
35 210
 
36
-setMethod('plot', signature(x='CogapsResult'),
37
-function(x)
211
+#' @export
212
+#' @importFrom graphics plot
213
+plot.CogapsResult <- function(object, ...)
38 214
 {
39
-    colors <- rainbow(nrow(object@Pmean))
40
-    xlimits <- c(0, ncol(object@Pmean) + 1)
41
-    ylimits <- c(0, (max(object@Pmean) * 1.05))
215
+    nSamples <- nrow(object@sampleFactors)
216
+    nFactors <- ncol(object@sampleFactors)
217
+    colors <- rainbow(nFactors)
218
+    xlimits <- c(0, nSamples + 1)
219
+    ylimits <- c(0, max(object@sampleFactors) * 1.1)
42 220
 
43 221
     plot(NULL, xlim=xlimits, ylim=ylimits, ylab="Relative Amplitude")
44 222
 
45
-    for (i in 1:nrow(object@Pmean))
223
+    for (i in 1:nFactors)
46 224
     {
47
-        lines(x=1:ncol(object@Pmean), y=object@Pmean[i,], col=colors[i])
48
-        points(x=1:ncol(object@Pmean), y=object@Pmean[i,], col=colors[i], pch=i)
225
+        lines(x=1:nSamples, y=object@sampleFactors[,i], col=colors[i])
226
+        points(x=1:nSamples, y=object@sampleFactors[,i], col=colors[i], pch=i)
49 227
     }
50 228
 
51
-    legend("bottom", paste("Pattern", 1:nrow(object@Pmean), sep = ""),
52
-    pch = 1:nrow(object@Pmean), lty=1, cex=0.8, col=colors, bty="y", ncol=5)
229
+    legend("top", paste("Pattern", 1:nFactors, sep = ""), pch = 1:nFactors,
230
+        lty=1, cex=0.8, col=colors, bty="y", ncol=5)
231
+}
232
+
233
+#' @rdname getMeanChiSq-methods
234
+#' @aliases getMeanChiSq
235
+setMethod("getMeanChiSq", signature(object="CogapsResult"),
236
+function(object)
237
+{
238
+    object@metadata$meanChiSq
53 239
 })
54 240
 
55
-#' @export
56
-setGeneric("MergeResultsWithSCE", function(result, SCE)
57
-    {standardGeneric("MergeResultsWithSCE")})
241
+#' @rdname calcZ-methods
242
+#' @aliases calcZ
243
+setMethod("calcZ", signature(object="CogapsResult"),
244
+function(object, which)
245
+{
246
+    if (which == "feature")
247
+    {
248
+        object@featureLoadings / object@featureStdDev
249
+    }
250
+    else if (which == "sample")
251
+    {
252
+        object@sampleFactors / object@sampleStdDev
253
+    }
254
+    else
255
+    {
256
+        stop("\"which\" must be either \"feature\" or \"sample\"")
257
+    }
258
+})
259
+
260
+#' @rdname reconstructGene-methods
261
+#' @aliases reconstructGene
262
+setMethod("reconstructGene", signature(object="CogapsResult"),
263
+function(object, genes)
264
+{
265
+    D <- object@featureLoadings %*% t(object@sampleFactors)
266
+    if (!is.null(genes))
267
+    {
268
+        D <- D[genes,]
269
+    }
270
+    return(D)
271
+})
272
+
273
+#' @rdname binaryA-methods
274
+#' @aliases binaryA
275
+setMethod("binaryA", signature(object="CogapsResult"),
276
+function(object, threshold)
277
+{
278
+    binA <- ifelse(calcZ(object) > threshold, 1, 0)
279
+
280
+    heatmap.2(binA, Rowv = FALSE, Colv = FALSE, dendrogram="none",
281
+        scale="none", col = brewer.pal(3,"Blues"), trace="none",
282
+        density.info="none", cexCol=1.3, srtCol=45,
283
+        lmat=rbind(c(0, 3), c(2,1), c(0,4) ),
284
+        lwid=c(1,10), lhei=c(1, 4, 1.2 ),
285
+        main="Heatmap of Standardized Feature Matrix")
286
+    mtext(paste("(Threshold = ", threshold, ")"))
287
+})
288
+
289
+#' @rdname plotResiduals-methods
290
+#' @aliases plotResiduals
291
+setMethod("plotResiduals", signature(object="CogapsResult"),
292
+function(object, data, uncertainty)
293
+{
294
+    data <- as.matrix(data)
295
+    if (is.null(uncertainty))
296
+        uncertainty <- pmax(0.1 * data, 0.1)
297
+    uncertainty <- as.matrix(uncertainty)
298
+
299
+    M <- reconstructGene(object)
300
+    resid <- (data - M) / uncertainty
301
+    
302
+    scaledRdYlBu <- colorRampPalette(brewer.pal(9,"RdYlBu"))(100)
303
+    heatmap.2(resid, Rowv = FALSE, Colv = FALSE, dendrogram="none",
304
+        scale="none", col = scaledRdYlBu, trace="none", density.info="none",
305
+        cexCol=1.33, srtCol=45, lmat=rbind(c(0, 3),c(2,1),c(0,4) ),
306
+        lwid=c(1,10), lhei=c(1, 4, 1.2 ), main="Heatmap of Residuals")
307
+})
58 308
 
59
-#' @importClassesFrom SingleCellExperiment SingleCellExperiment
60
-setMethod("MergeResultsWithSCE", signature("CogapsResult", "SingleCellExperiment"),
61
-function(result, SCE)
309
+#' @rdname calcCoGAPSStat-methods
310
+#' @aliases calcCoGAPSStat
311
+setMethod("calcCoGAPSStat", signature(object="CogapsResult"),
312
+function(object, GStoGenes, numPerm)
62 313
 {
63
-    SCE@reducedDims <- SimpleList(Amean=result@Amean, Pmean=result@Pmean)
64
-    return(SCE)
314
+    # test for std dev of zero, possible mostly in simple simulations
315
+    if (sum(object@featureStdDev==0) > 0)
316
+    {
317
+        object@featureStdDev[object@featureStdDev==0] <- 1e-6
318
+    }
319
+
320
+    # calculate Z scores
321
+    zMatrix <- calcZ(object)
322
+
323
+    # check input arguments
324
+    if (!is(GStoGenes, "data.frame") && !is(GStoGenes, "list") && !is(GStoGenes,"GSA.genesets"))
325
+    {
326
+        stop("GStoGenes must be a data.frame,GSA.genesets, or list with format specified in the users manual.")
327
+    }
328
+
329
+    if (is(GStoGenes, "GSA.genesets"))
330
+    {
331
+        names(GStoGenes$genesets) <- GStoGenes$geneset.names
332
+        GStoGenes <- GStoGenes$genesets
333
+    }
334
+
335
+    if (is(GStoGenes, "list"))
336
+    {
337
+        GStoGenesList <- GStoGenes
338
+    }
339
+    else
340
+    {
341
+        GStoGenesList <- list()
342
+        for (i in 1:dim(GStoGenes)[2])
343
+        {
344
+            GStoGenesList[[as.character(colnames(GStoGenes)[i])]] <- as.character(unique(GStoGenes[,i]))
345
+        }
346
+    }
347
+
348
+    # get dimensions
349
+    numGS   <- length(names(GStoGenesList))
350
+    numPatt <- dim(zMatrix)[2]
351
+    numG    <- dim(zMatrix)[1]+0.9999
352
+
353
+    # initialize matrices
354
+    statsUp   <- matrix(ncol = numGS, nrow = numPatt)
355
+    statsDown <- matrix(ncol = numGS, nrow = numPatt)
356
+    actEst    <- matrix(ncol = numGS, nrow = numPatt)
357
+    results   <- list()
358
+    zPerm     <- matrix(ncol=numPerm,nrow=numPatt)
359
+
360
+    # do permutation test
361
+    for (gs in 1:numGS)
362
+    {
363
+        genes <- GStoGenesList[[names(GStoGenesList)[gs]]]
364
+        index <- rownames(zMatrix) %in% genes
365
+        zValues <- zMatrix[index,1]
366
+        numGenes <- length(zValues)
367
+        label <- as.character(numGenes)
368
+
369
+        if (!any(names(results)==label))
370
+        {
371
+            for (p in 1:numPatt)
372
+            {
373
+                for (j in 1:numPerm)
374
+                {
375
+                    temp <- floor(runif(numGenes,1,numG))
376
+                    temp2 <- zMatrix[temp,p]
377
+                    zPerm[p,j] <- mean(temp2)
378
+                }
379
+            }
380
+            results[[label]] <- zPerm
381
+        }
382
+    }
383
+
384
+    # get p-value
385
+    for (p in 1:numPatt)
386
+    {
387
+        for (gs in 1:numGS)
388
+        {
389
+            genes <- GStoGenesList[[names(GStoGenesList)[gs]]]
390
+            index <- rownames(zMatrix) %in% genes
391
+            zValues <- zMatrix[index,p]
392
+            zScore <- mean(zValues)
393
+
394
+            numGenes <- length(zValues)
395
+            label <- as.character(numGenes)
396
+
397
+            permzValues <- results[[label]][p,]
398
+            ordering <- order(permzValues)
399
+            permzValues <- permzValues[ordering]
400
+
401
+            statistic <- sum(zScore > permzValues)
402
+            statUpReg <- 1 - statistic/length(permzValues)
403
+            statsUp[p,gs] <- max(statUpReg, 1/numPerm)
404
+
405
+            statistic <- sum(zScore < permzValues)
406
+            statDownReg <- 1 - statistic/length(permzValues)
407
+            statsDown[p,gs] <- max(statDownReg,1/numPerm)
408
+
409
+            activity <- 1 - 2*max(statUpReg, 1/numPerm)
410
+            actEst[p,gs] <- activity
411
+        }
412
+    }
413
+
414
+    # format output
415
+    colnames(statsUp) <- names(GStoGenesList)
416
+    colnames(statsDown) <- names(GStoGenesList)
417
+    colnames(actEst) <- names(GStoGenesList)
418
+
419
+    rownames(statsUp) <- colnames(zMatrix)
420
+    rownames(statsDown) <- colnames(zMatrix)
421
+    rownames(actEst) <- colnames(zMatrix)
422
+
423
+    results[['GSUpreg']] <- statsUp
424
+    results[['GSDownreg']] <- statsDown
425
+    results[['GSActEst']] <- actEst
426
+
427
+    return(results)
428
+})
429
+
430
+#' @rdname calcGeneGSStat-methods
431
+#' @aliases calcGeneGSStat
432
+setMethod("calcGeneGSStat", signature(object="CogapsResult"),
433
+function(object, GStoGenes, numPerm, Pw, nullGenes)
434
+{
435
+    gsStat <- calcCoGAPSStat(object, data.frame(GSGenes), numPerm=numPerm)
436
+    gsStat <- gsStat$GSUpreg
437
+    gsStat <- -log(gsStat)
438
+
439
+    if (!all(is.na(Pw)))
440
+    {
441
+        if (length(Pw) != length(gsStat))
442
+        {
443
+            stop('Invalid weighting')
444
+        }
445
+        gsStat <- gsStat*Pw
446
+    }
447
+
448
+    if (nullGenes)
449
+    {
450
+        ZD <- object@featureLoadings[setdiff(row.names(object@featureLoadings), GSGenes),] /
451
+            object@featureStdDev[setdiff(row.names(object@featureLoadings), GSGenes),]
452
+    }
453
+    else
454
+    {
455
+        ZD <- object@featureLoadings[GSGenes,]/object@featureStdDev[GSGenes,]
456
+    }
457
+    outStats <- apply(sweep(ZD,2,gsStat,FUN="*"),1,sum) / (sum(gsStat))
458
+    outStats <- outStats / apply(ZD,1,sum)
459
+    outStats[which(apply(ZD,1,sum) < 1e-6)] <- 0
460
+
461
+    if (sum(gsStat) < 1e-6)
462
+    {
463
+        return(0)
464
+    }
465
+    return(outStats)
466
+})
467
+
468
+#' @rdname computeGeneGSProb-methods
469
+#' @aliases computeGeneGSProb
470
+setMethod("computeGeneGSProb", signature(object="CogapsResult"),
471
+function(object, GStoGenes, numPerm, Pw, PwNull)
472
+{
473
+    geneGSStat <- calcGeneGSStat(object, Pw=Pw, GSGenes=GSGenes,
474
+        numPerm=numPerm)
475
+
476
+    if (PwNull)
477
+    {
478
+        permGSStat <- calcGeneGSStat(object, GSGenes=GSGenes, numPerm=numPerm,
479
+            Pw=Pw, nullGenes=TRUE)
480
+    }
481
+    else
482
+    {
483
+        permGSStat <- calcGeneGSStat(object, GSGenes=GSGenes, numPerm=numPerm,
484
+            nullGenes=TRUE)
485
+    }
486
+
487
+    finalStats <- sapply(GSGenes, function(x)
488
+        length(which(permGSStat > geneGSStat[x])) / length(permGSStat))
489
+
490
+    return(finalStats)
65 491
 })
66 492
 
... ...
@@ -40,8 +40,10 @@ path="", anotionObj=NULL)
40 40
                 warning("Not all celltypes will be sampled from.")
41 41
             }
42 42
             ct.indx <- lapply(unique(anotionObj), function(x) which(anotionObj == x))
43
+            names(ct.indx) <- unique(anotionObj)
43 44
             cellset <- lapply(unique(anotionObj), function(x)
44 45
                 sample(colnames(D)[ct.indx[[x]]], samplingRatio[x],replace=TRUE))
46
+            cellset <- unlist(cellset)
45 47
         }
46 48
 
47 49
         # partition data
48 50
deleted file mode 100644
... ...
@@ -1,143 +0,0 @@
1
-#Calculates significant genes in each pattern according to certain threshold
2
-#Returns the significant gene names as well as well as the means of these matrices and number of genes in each
3
-gapsInterPattern <- function(Amean, Asd, sdThreshold = 3)
4
-{
5
-    #number of rows and cols of Asd
6
-    numGenes = length(Asd[,1]);
7
-    numCols = length(Asd[1,]);
8
-
9
-    #Vector holding the number of each significant gene in each column
10
-    sigGeneNums = data.frame();
11
-
12
-    #Temp number of sig genes in the col
13
-    sigCount = 0;
14
-
15
-    #Number to catch the amount of columns without significant genes
16
-    numEmptyCols = 0;
17
-
18
-    #Keep an array of the significant gene counts
19
-    significantGeneNums = c(0);
20
-
21
-    #Names of the genes from the data matrix
22
-    geneNames = names(Asd[,1]);
23
-
24
-    #Names of the genes that are significant from the data matrix
25
-    sigGeneNames = "0";
26
-
27
-    #The numerator of our statistic
28
-    dimensionStatNumerator = 0;
29
-
30
-    #The denominator of our statistic
31
-    dimensionStatDenominator = 0;
32
-
33
-    #The value of our statistic
34
-    dimensionStatistic = 0;
35
-
36
-    #A matrix holding the values of our statistics
37
-    dimensionStatisticMatrix = matrix(nrow = numCols, ncol = numCols);
38
-
39
-    #The mean of the statistic matrix
40
-    sbar = 0;
41
-
42
-    #A list to return the significant genes in each col and the statistic matrix
43
-    results = list(list());
44
-
45
-    #Scan in the significant genes from each column of Asd
46
-    #The columns of sigGeneNums hold the significant genes for each col of Asd
47
-    for(i in 1:numCols)
48
-    {
49
-        sigCount = 0;
50
-        for(j in 1:numGenes)
51
-        {
52
-            if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0)
53
-            {
54
-                sigCount = sigCount + 1;
55
-                sigGeneNums[sigCount, i] = j;
56
-            }
57
-        }
58
-
59
-        if(sigCount == 0)
60
-        {
61
-            sigGeneNums[1, i] = 0;
62
-            numEmptyCols = numEmptyCols + 1;
63
-        }
64
-
65
-        #Save the number of sigGenes
66
-        significantGeneNums[i] = sigCount;
67
-
68
-        #Get the names and store them
69
-        if(sigCount != 0)
70
-        {
71
-            for(k in 1:sigCount)
72
-            {
73
-                sigGeneNames[k] = geneNames[sigGeneNums[k, i]];
74
-            }
75
-            results[[1]][[i]] = sigGeneNames;
76
-            sigGeneNames = "0";
77
-        }
78
-        else
79
-        {
80
-            results[[1]][[i]] = "None";
81
-        }
82
-    }
83
-
84
-    if(any(significantGeneNums == 0))
85
-    {
86
-        zeroSigCols = which(significantGeneNums == 0);
87
-        print("Warning: No Significant Genes in Pattern(s): ");
88
-
89
-        for(z in 1:length(zeroSigCols))
90
-        {
91
-            print(zeroSigCols[z]);
92
-        }
93
-    }
94
-
95
-    #Now that we have the significant genes want to see if these genes are significant in other columns
96
-    #Fill across the row then down the column
97
-
98
-    #This compares the significant genes in the specified by 'j' to the same genes in Asd in the column specified by 'i'
99
-    for(i in 1:numCols)
100
-    {
101
-        for(j in 1:numCols)
102
-        {
103
-            #Grab the number of significant genes from the interested column
104
-            sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE);
105
-
106
-            if(sigCount != 0)
107
-            {
108
-                dimensionStatDenominator = sigCount;
109
-                dimensionStatNumerator = 0;
110
-
111
-                #loop through the number of significant genes and compare to these genes in the 'ith' col of Asd.
112
-                #Find the significance of these genes, Amean - threshold * Asd
113
-                for(k in 1:sigCount)
114
-                {
115
-                    if((Amean[sigGeneNums[k,j],i] - (sdThreshold*Asd[sigGeneNums[k,j],i])) > 0)
116
-                    {
117
-                        dimensionStatNumerator = dimensionStatNumerator + 1;
118
-                    }
119
-                }
120
-
121
-                dimensionStatistic = dimensionStatNumerator/dimensionStatDenominator;
122
-
123
-                dimensionStatisticMatrix[i, j] = dimensionStatistic;
124
-            }
125
-            else
126
-            {
127
-                dimensionStatisticMatrix[i, j] = 0;
128
-            }
129
-        }
130
-    }
131
-
132
-    #Find mean of the matrices (excluding the diagonal elements)
133
-    #we can subtract numCols as the diagonal elements are 1
134
-    sbar = ((sum(dimensionStatisticMatrix) - (numCols - numEmptyCols))/(length(dimensionStatisticMatrix) - (numCols - numEmptyCols)));
135
-
136
-    results[[2]] = significantGeneNums;
137
-    results[[3]] = t(dimensionStatisticMatrix);
138
-    results[[4]] = sbar;
139
-
140
-    names(results) = c("SignificantGeneNames", "SignificantGeneTotals", "SeparationMatrix", "InterPatternValue");
141
-
142
-    return(results);
143
-}
144 0
deleted file mode 100644
... ...
@@ -1,134 +0,0 @@
1
-#Calculates significant genes in each pattern according to certain threshold
2
-#Returns the significant gene names as well as well as the correlation matrices between these genes and the means of these matrices
3
-gapsIntraPattern <- function(Amean, Asd, DMatrix, sdThreshold = 3)
4
-{
5
-    #number of rows and cols of Asd
6
-    numGenes = length(Asd[,1]);
7
-    numCols = length(Asd[1,]);
8
-
9
-    #number of samples in DMatrix
10
-    numSamp = ncol(DMatrix);
11
-
12
-    #Vector holding the number of each significant gene in each column
13
-    sigGeneNums = data.frame();
14
-
15
-    #Temp number of sig genes in the col
16
-    sigCount = 0;
17
-
18
-    #Keep an array of the significant gene counts
19
-    significantGeneNums = c(0);
20
-
21
-    #A matrix to hold the significant genes in D for the current pattern
22
-    #The matrix just acts as a subset of D, just eliminates non relevant rows
23
-    tempSubsetD = matrix();
24
-
25
-    #A matrix holding the values of our correlation coefficients between genes for the current column
26
-    tempGeneCorrMatrix = matrix();
27
-
28
-    #A list to hold all the correlation matrices
29
-    geneCorrMatrices = list();
30
-
31
-    #A list to hold all the means
32
-    geneCorrMatrMeans = list();
33
-
34
-    #The mean of all the correlation matrices
35
-    cbar = 0;
36
-
37
-    #A list to return the means and the matrices
38
-    results = list();
39
-
40
-    #Scan in the significant genes from each column of Asd
41
-    #The columns of sigGeneNums hold the significant genes for each col of Asd
42
-    for(i in 1:numCols)
43
-    {
44
-        sigCount = 0;
45
-        for(j in 1:numGenes)
46
-        {
47
-            if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0)
48
-            {
49
-                sigCount = sigCount + 1;
50
-                sigGeneNums[sigCount, i] = j;
51
-            }
52
-        }
53
-
54
-        if(sigCount == 0)
55
-        {
56
-            sigGeneNums[1, i] = 0;
57
-        }
58
-
59
-        #Save the number of sigGenes
60
-        significantGeneNums[i] = sigCount;
61
-    }
62
-
63
-    #If a pattern has no significant genes this is clearly an error so return such
64
-    if(any(significantGeneNums == 0))
65
-    {
66
-        zeroSigCols = which(significantGeneNums == 0);
67
-        warning("Warning: No Significant Genes in Pattern(s): ");
68
-
69
-        for(z in 1:length(zeroSigCols))
70
-        {
71
-            message(zeroSigCols[z]);
72
-        }
73
-    }
74
-
75
-
76
-    #Now that we have the significant genes want to grab these from our original D matrix
77
-    #and find the sigGene x sigGene correlation matrix and find its mean
78
-
79
-    for(j in 1:numCols)
80
-    {
81
-        #Grab the number of significant genes from the interested column
82
-        sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE);
83
-
84
-        if(sigCount != 0)
85
-        {
86
-
87
-            #loop through the number of significant genes and pull out the rows of D that represent these genes.
88
-            #Then find the correlation between them with the built in R corr function
89
-            tempSubsetD = matrix(nrow = sigCount, ncol = numSamp);
90
-            for(k in 1:sigCount)
91
-            {
92
-                #Subset D based on significant Genes
93
-                #need to transpose as it reads this in as column vector otherwise
94
-                tempSubsetD[k,] = t(DMatrix[sigGeneNums[k,j], ]);
95
-            }
96
-
97
-            #Find the correlation between these genes in D
98
-            #Need to transpose as it calculates correlations between the columns
99
-            tempGeneCorrMatrix = cor(t(tempSubsetD));
100
-
101
-            #Find the mean of this matrix
102
-            tempGeneCorrMatrMean = mean(tempGeneCorrMatrix);
103
-
104
-        }
105
-        else
106
-        {
107
-            tempGeneCorrMatrix = 0;
108
-            tempGeneCorrMatrMean = 0;
109
-        }
110