Browse code

making progress towards passing Bioc Checks

sherman5 authored on 19/04/2018 20:02:35
Showing52 changed files

1 1
deleted file mode 100644
... ...
@@ -1 +0,0 @@
1
-load("/Users/maggiewodicka/Desktop/CoGAPS/GWCoGAPS.AP.fixed.Rda")
... ...
@@ -1,9 +1,10 @@
1 1
 Package: CoGAPS
2
-Version: 2.7.0
3
-Date: 2014-08-23
2
+Version: 2.99.0
3
+Date: 2018-04-24
4 4
 Title: Coordinated Gene Activity in Pattern Sets
5 5
 Author: Thomas Sherman, Wai-shing Lee, Conor Kelton, Ondrej Maxian, Jacob Carey,
6
-    Genevieve Stein-O'Brien, Michael Considine, Maggie Wodicka, John Stansfield, Shawn Sivy, Carlo Colantuoni, Alexander Favorov, Mike Ochs, Elana Fertig
6
+    Genevieve Stein-O'Brien, Michael Considine, Maggie Wodicka, John Stansfield,
7
+    Shawn Sivy, Carlo Colantuoni, Alexander Favorov, Mike Ochs, Elana Fertig
7 8
 Description: Coordinated Gene Activity in Pattern Sets (CoGAPS)
8 9
     implements a Bayesian MCMC matrix factorization algorithm,
9 10
     GAPS, and links it to gene set statistic methods to infer biological
... ...
@@ -41,4 +42,4 @@ biocViews: GeneExpression, Transcription, GeneSetEnrichment,
41 42
     DifferentialExpression, Bayesian, Clustering, TimeCourse, RNASeq, Microarray,
42 43
     MultipleComparison, DimensionReduction
43 44
 NeedsCompilation: yes
44
-RoxygenNote: 6.0.1
45
+RoxygenNote: 5.0.1
... ...
@@ -26,6 +26,7 @@ export(reOrderBySet)
26 26
 export(reconstructGene)
27 27
 export(residuals)
28 28
 export(scCoGAPS)
29
+export(scCoGAPSFromCheckpoint)
29 30
 import(doParallel)
30 31
 import(foreach)
31 32
 import(ggplot2)
... ...
@@ -24,8 +24,6 @@
24 24
 #' @param fixedPatterns matrix of fixed values in either A or P matrix
25 25
 #' @param checkpointInterval time (in seconds) between creating a checkpoint
26 26
 #' @param checkpointFile name of the checkpoint file
27
-#' @param pumpThreshold type of threshold for pump statistic
28
-#' @param nPumpSamples number of samples used in pump statistic
29 27
 #' @param ... keeps backwards compatibility with arguments from older versions
30 28
 #' @return list with A and P matrix estimates
31 29
 #' @importFrom methods new
... ...
@@ -37,8 +35,7 @@ CoGAPS <- function(D, S, nFactor=7, nEquil=1000, nSample=1000, nOutputs=1000,
37 35
 nSnapshots=0, alphaA=0.01, alphaP=0.01, maxGibbmassA=100, maxGibbmassP=100,
38 36
 seed=-1, messages=TRUE, singleCellRNASeq=FALSE, whichMatrixFixed='N',
39 37
 fixedPatterns=matrix(0), checkpointInterval=0, 
40
-checkpointFile="gaps_checkpoint.out", pumpThreshold="unique",
41
-nPumpSamples=0, ...)
38
+checkpointFile="gaps_checkpoint.out", ...)
42 39
 {
43 40
     # get v2 arguments
44 41
     oldArgs <- list(...)
... ...
@@ -57,6 +54,14 @@ nPumpSamples=0, ...)
57 54
     if (missing(S) & !is.null(oldArgs$unc))
58 55
         S <- oldArgs$unc
59 56
 
57
+    # get pump arguments - hidden for now from user
58
+    pumpThreshold <- "unique"
59
+    nPumpSamples <- 0
60
+    if (!is.null(list(...)$pumpThreshold))
61
+        pumpThreshold <- list(...)$pumpThreshold
62
+    if (!is.null(list(...)$nPumpSamples))
63
+        pumpThreshold <- list(...)$nPumpSamples
64
+
60 65
     # check arguments
61 66
     if (class(D) != "matrix" | class(S) != "matrix")
62 67
         stop('D and S must be matrices')
... ...
@@ -149,7 +154,7 @@ output_atomic=FALSE, fixedBinProbs=FALSE, fixedDomain="N", sampleSnapshots=TRUE,
149 154
 numSnapshots=100, alphaA=0.01, nMaxA=100000, max_gibbmass_paraA=100.0,
150 155
 alphaP=0.01, nMaxP=100000, max_gibbmass_paraP=100.0, seed=-1, messages=TRUE)
151 156
 {
152
-    #warning('gapsRun is deprecated with v3.0, use CoGAPS')
157
+    warning('gapsRun is deprecated with v3.0, use CoGAPS')
153 158
     CoGAPS(D, S, nFactor=nFactor, nEquil=nEquil, nSample=nSample,
154 159
         nOutputs=nOutR, nSnapshots=ifelse(sampleSnapshots,numSnapshots,0),
155 160
         alphaA=alphaA, alphaP=alphaP, maxGibbmassA=max_gibbmass_paraA,
... ...
@@ -179,7 +184,7 @@ sampleSnapshots=TRUE, numSnapshots=100, alphaA=0.01, nMaxA=100000,
179 184
 max_gibbmass_paraA=100.0, alphaP=0.01, nMaxP=100000, max_gibbmass_paraP=100.0,
180 185
 seed=-1, messages=TRUE)
181 186
 {
182
-    #warning('gapsMapRun is deprecated with v3.0, use CoGaps')
187
+    warning('gapsMapRun is deprecated with v3.0, use CoGaps')
183 188
     CoGAPS(D, S, nFactor=nFactor, nEquil=nEquil, nSample=nSample,
184 189
         nOutputs=nOutR, nSnapshots=ifelse(sampleSnapshots,numSnapshots,0),
185 190
         alphaA=alphaA, alphaP=alphaP, maxGibbmassA=max_gibbmass_paraA,
... ...
@@ -192,7 +197,7 @@ v2CoGAPS <- function(result, ...)
192 197
 {
193 198
     if (!is.null(list(...)$GStoGenes))
194 199
     {
195
-        #warning('GStoGenes is deprecated with v3.0, see CoGAPS documentation')
200
+        warning('GStoGenes is deprecated with v3.0, see CoGAPS documentation')
196 201
         if (is.null(list(...)$plot) | list(...)$plot)
197 202
         {
198 203
             plotGAPS(result$Amean, result$Pmean)
... ...
@@ -18,16 +18,21 @@
18 18
 #' data(SimpSim)
19 19
 #' sim_name <- "example"
20 20
 #' createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name)
21
-#' result <- GWCoGAPS(sim_name, nFactor=3, nEquil=1000, nSample=1000)
21
+#' result <- GWCoGAPS(sim_name, nFactor=3, nEquil=200, nSample=200)
22 22
 #' @export
23 23
 GWCoGAPS <- function(simulationName, nFactor, nCores=NA, cut=NA, minNS=NA, manualMatch=FALSE, consensusPatterns=NULL, ...)
24 24
 {
25 25
     if (!is.null(list(...)$checkpointFile))
26
+    {
26 27
         stop("checkpoint file name automatically set in GWCoGAPS - don't pass this parameter")
27
-    if (is.null(consensusPatterns)){
28
+    }
29
+
30
+    if (is.null(consensusPatterns))
31
+    {
28 32
         allDataSets <- preInitialPhase(simulationName, nCores)
29 33
         initialResult <- runInitialPhase(simulationName, allDataSets, nFactor, ...)
30
-        if(manualMatch){
34
+        if (manualMatch)
35
+        {
31 36
             saveRDS(initialResult,file=paste(simulationName,"_initial.rds", sep=""))
32 37
             stop("Please provide consensus patterns upon restarting.")
33 38
         }
... ...
@@ -107,7 +112,9 @@ preInitialPhase <- function(simulationName, nCores)
107 112
 
108 113
     # establish the number of cores that we are able to use
109 114
     if (is.na(nCores))
115
+    {
110 116
         nCores <- length(allDataSets)
117
+    }
111 118
     registerDoParallel(cores=nCores)
112 119
     return(allDataSets)
113 120
 }
... ...
@@ -139,21 +146,29 @@ postInitialPhase <- function(initialResult, nSets, cut, minNS)
139 146
 
140 147
     #run postpattern match function
141 148
     if (is.na(cut))
149
+    {
142 150
         cut <- nFactor
151
+    }
152
+
143 153
     return(patternMatch4Parallel(Ptot=BySet$P, nP=nFactor, nSets=nSets, cnt=cut,
144 154
         minNS=minNS, bySet=TRUE))
145 155
 }
146 156
 
147 157
 runFinalPhase <- function(simulationName, allDataSets, consensusPatterns, nCores, ...)
148 158
 {    
149
-    if(length(dim(consensusPatterns))!=2){stop("consensusPatterns must be a matrix")}
159
+    if (length(dim(consensusPatterns)) != 2)
160
+    {
161
+        stop("consensusPatterns must be a matrix")
162
+    }
150 163
 
151 164
     # find data files if providing consensus patterns
152 165
     fileSig <- paste(simulationName, "_partition_[0-9]+.RData", sep="")
153 166
     allDataSets <- list.files(full.names=TRUE, pattern=fileSig)
154 167
 
155 168
     if (is.na(nCores))
156
-    nCores <- length(allDataSets)
169
+    {
170
+        nCores <- length(allDataSets)
171
+    }
157 172
     registerDoParallel(cores=nCores)
158 173
 
159 174
     # generate seeds for parallelization
... ...
@@ -1,95 +1,121 @@
1
-
2
-#' patternMatch4Parallel
1
+#' cellMatchR
2
+#' @export
3 3
 #'
4 4
 #' @param Atot a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}
5 5
 #' @param nSets number of parallel sets used to generate \code{Atot}
6 6
 #' @param cnt  number of branches at which to cut dendrogram
7 7
 #' @param minNS minimum of individual set contributions a cluster must contain
8
+#' @param maxNS maximum of individual set contributions a cluster must contain
8 9
 #' @param ignore.NA logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.
9 10
 #' @param bySet logical indicating whether to return list of matched set solutions from \code{Atot}
10
-#' @param R2mean 
11
-#' @param
11
+#' @param plotDen plot
12 12
 #' @param ... additional parameters for \code{agnes}
13 13
 #' @return a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each
14 14
 #' concensus pattern is also returned.
15
-#' @export
16
-#' @seealso \code{\link{fastcluster}}
17
-#'
18
-#'
15
+cellMatchR <- function(Atot, nSets, cnt, minNS=NA, maxNS=NA,
16
+ignore.NA=FALSE, bySet=FALSE, plotDen=FALSE, ...)
17
+{
18
+    if (is.na(minNS))
19
+    {
20
+        minNS <- nSets / 2
21
+    }
22
+    if (is.na(maxNS))
23
+    {
24
+        maxNS <- nSets + minNS
25
+    }
26
+    if (!ignore.NA)
27
+    {
28
+        if (anyNA(Atot))
29
+        {
30
+            warning(paste("Non-sparse matrixes produced. Reducing the number",
31
+                "of patterns asked for and rerun."))
32
+        }
33
+    }
34
+    else
35
+    {
36
+        Atot <- Atot[complete.cases(Atot),]
37
+    }
19 38
 
20
-cellMatchR <- function(Atot,nSets, cnt, minNS=NULL, maxNS=NULL, ignore.NA=FALSE, bySet=FALSE, plotDen=FALSE,...){
39
+    corcut <- function(Atot, minNS, cnt, cluster.method)
40
+    {
41
+        corr.dist <- cor(Atot)
42
+        corr.dist <- 1-corr.dist
21 43
 
22
-  if(is.null(minNS)){minNS=nSets/2}
23
-  if(is.null(maxNS)){maxNS=nSets+minNS}
44
+        clust <- agnes(x=corr.dist, diss=TRUE, cluster.method)
45
+        #clust=fastcluster::hclust(dist(corr.dist))
46
+        cut <- cutree(as.hclust(clust),k=cnt)
24 47
 
25
-  if(ignore.NA==FALSE){if(anyNA(Atot)){
26
-    warning('Non-sparse matrixes produced. Reducing the number of patterns asked for and rerun.')
27
-  }}
28
-  if(ignore.NA==TRUE){Atot<-Atot[complete.cases(Atot),]}
48
+        cls <- sort(unique(cut))
49
+        cMNs <- matrix(ncol=cnt,nrow=dim(Atot)[1])
50
+        colnames(cMNs) <- cls
51
+        rownames(cMNs) <- rownames(Atot)
29 52
 
30
-corcut<-function(Atot,minNS,cnt,cluster.method){
31
-  corr.dist=cor(Atot)
32
-  corr.dist=1-corr.dist
53
+        RtoMeanPattern <- list()
54
+        AByClust <- list()
55
+        for (i in cls)
56
+        {
57
+            if (!is.null(dim(Atot[,cut == i])))
58
+            {
59
+                if (dim(Atot[,cut == i])[2] >= minNS)
60
+                {
61
+                    cMNs[,i] <- rowMeans(Atot[,cut==i])
62
+                    AByClust[[i]] <- Atot[,cut==i]
63
+                    nIN <- sum(cut==i)
64
+                    RtoMeanPattern[[i]] <- sapply(1:nIN, function(j) 
65
+                        round(cor(x=Atot[,cut==i][,j], y=cMNs[,i]),3))
66
+                }
67
+            }
68
+        }
33 69
 
34
-  clust=agnes(x=corr.dist,diss=TRUE,cluster.method)
35
-  #clust=fastcluster::hclust(dist(corr.dist))
36
-  cut=cutree(as.hclust(clust),k=cnt)
70
+        AByClust[sapply(AByClust,is.null)] <- NULL
71
+        RtoMeanPattern[sapply(RtoMeanPattern,is.null)] <- NULL
72
+        return(list("RtoMeanPattern"=RtoMeanPattern, "AByClust"=AByClust))
73
+    }   
37 74
 
38
-  cls=sort(unique(cut))
39
-  cMNs=matrix(ncol=cnt,nrow=dim(Atot)[1])
40
-  colnames(cMNs)=cls
41
-  rownames(cMNs)=rownames(Atot)
75
+    cc <- corcut(Atot, minNS, cnt, cluster.method)
42 76
 
43
-  RtoMeanPattern <- list()
44
-  AByClust <- list()
45
-    for(i in cls){
46
-        if (is.null(dim(Atot[,cut == i]))==TRUE){
47
-          next
48
-        } else if(dim(Atot[,cut == i])[2] < minNS){
77
+    ### split by maxNS
78
+    indx <- which(unlist(lapply(cc$AByClust, function(x) dim(x)[1] > maxNS)))
79
+    i <- 1
80
+    while (length(indx) > 0)
81
+    { 
82
+        icc <- corcut(cc$AByClust[[indx[1]]], minNS, 2, cluster.method)
83
+        if (length(icc[[2]]) == 0)
84
+        {
85
+            indx <- indx[-1]
49 86
             next
50
-        } else{
51
-            cMNs[,i]=rowMeans(Atot[,cut==i])
52
-            AByClust[[i]] <- Atot[,cut==i]
53
-            nIN=sum(cut==i)
54
-            RtoMeanPattern[[i]] <- sapply(1:nIN,function(j) {round(cor(x=Atot[,cut==i][,j],y=cMNs[,i]),3)})
55 87
         }
56
-      }
57
-    AByClust[sapply(AByClust,is.null)]<-NULL
58
-    RtoMeanPattern[sapply(RtoMeanPattern,is.null)]<-NULL
59
-    return(list("RtoMeanPattern"=RtoMeanPattern,"AByClust"=AByClust))
60
-  }   
61
-  cc<-corcut(Atot,minNS,cnt,cluster.method)
62
-
63
-    ### split by maxNS
64
-    indx<-which(unlist(lapply(cc$AByClust,function(x) dim(x)[1]>maxNS)))
65
-    i<-1
66
-    while(length(indx)>0){ 
67
-            icc<-corcut(cc$AByClust[[indx[1]]],minNS,2,cluster.method)
68
-            if(length(icc[[2]])==0){
69
-              indx<-indx[-1]
70
-              next
71
-            } else{
72
-              cc$AByClust[[indx[1]]]<-icc[[2]][[1]]
73
-              cc$RtoMeanPattern[[indx[1]]]<-icc[[1]][[1]]
74
-              if(length(icc[[2]])>1){
88
+        else
89
+        {
90
+            cc$AByClust[[indx[1]]] <- icc[[2]][[1]]
91
+            cc$RtoMeanPattern[[indx[1]]] <- icc[[1]][[1]]
92
+            if (length(icc[[2]]) > 1)
93
+            {
75 94
                 cc$AByClust<-append(cc$AByClust,icc[[2]][2])
76 95
                 cc$RtoMeanPattern<-append(cc$RtoMeanPattern,icc[[1]][2])
77
-              } 
78
-              indx<-which(unlist(lapply(cc$AByClust,function(x) dim(x)[1]>maxNS)))
79
-            }
96
+            } 
97
+            indx <- which(unlist(lapply(cc$AByClust,function(x) dim(x)[1]>maxNS)))
98
+        }
80 99
     }
81 100
 
101
+    #weighted.mean(AByClustDrop[[1]],RtoMPDrop[[1]])
102
+    AByCDSWavg <- t(sapply(1:length(cc$AByClust), function(z)
103
+        apply(cc$AByClust[[z]], 1, function(x)
104
+            weighted.mean(x, (cc$RtoMeanPattern[[z]])^3))))
105
+    rownames(AByCDSWavg) <- lapply(1:length(cc$AByClust), function(x)
106
+        paste("Pattern",x))
107
+    #scale As
108
+    Amax <- apply(AByCDSWavg, 1, max)
109
+    AByCDSWavgScaled <- t(sapply(1:dim(AByCDSWavg)[1], function(x)
110
+        AByCDSWavg[x,] / Amax[x]))
111
+    rownames(AByCDSWavgScaled) <- rownames(AByCDSWavg)
82 112
 
83
-#weighted.mean(AByClustDrop[[1]],RtoMPDrop[[1]])
84
-AByCDSWavg<- t(sapply(1:length(cc$AByClust),function(z) apply(cc$AByClust[[z]],1,function(x) weighted.mean(x,(cc$RtoMeanPattern[[z]])^3))))
85
-rownames(AByCDSWavg) <- lapply(1:length(cc$AByClust),function(x) paste("Pattern",x))
86
-#scale As
87
-Amax <- apply(AByCDSWavg,1,max)
88
-AByCDSWavgScaled <- t(sapply(1:dim(AByCDSWavg)[1],function(x) AByCDSWavg[x,]/Amax[x]))
89
-rownames(AByCDSWavgScaled) <- rownames(AByCDSWavg)
90
-
91
-  if(bySet){  
92
-    return(list("consenusAs"=t(AByCDSWavgScaled),"ABySet"=cc))
93
-  } else {return(AByCDSWavgScaled)}
94
-
113
+    if (bySet)
114
+    {  
115
+        return(list("consenusAs"=t(AByCDSWavgScaled),"ABySet"=cc))
116
+    }
117
+    else
118
+    {
119
+        return(AByCDSWavgScaled)
120
+    }
95 121
 }
... ...
@@ -1,42 +1,39 @@
1
-#' createGWCoGAPSSets
1
+#' Create Gene Sets for GWCoGAPS
2 2
 #'
3
-#'\code{createGWCoGAPSSets} factors whole genome data into randomly generated sets for indexing;
3
+#' @details factors whole genome data into randomly generated sets for indexing
4 4
 #'
5
-#'@param data data matrix with unique rownames
6
-#'@param nSets number of sets for parallelization
7
-#'@param outRDA name of output file
8
-#'@param keep logical indicating whether or not to save gene set list. Default is TRUE.
9
-#'@param path character string indicating were to save resulting data objects
10
-#'@export
11
-#'@return list with randomly generated sets of genes from whole genome data
12
-#'@examples \dontrun{
13
-#'createGWCoGAPSSet(D,nSets=nSets)
14
-#'}
15
-#'
16
-
17
-createGWCoGAPSSets <- function(D, S, nSets, simulationName, path="")
5
+#' @param D data matrix
6
+#' @param S uncertainty matrix
7
+#' @param nSets number of sets to partition the data into
8
+#' @param simulationName name used to identify files created by this simulation
9
+#' @return simulationName used to identify saved files
10
+#' @examples
11
+#' data(SimpSim)
12
+#' createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, "example")
13
+#' @export
14
+createGWCoGAPSSets <- function(D, S, nSets, simulationName)
18 15
 {
19 16
     # check gene names
20
-    if (length(unique(colnames(D))) != length(colnames(D)))
17
+    if (length(unique(rownames(D))) != length(rownames(D)))
21 18
     {
22
-        warning("Cell identifiers not unique!")
19
+        warning("Gene identifiers not unique!")
23 20
     }
24 21
 
25
-    # partition data by sampling random sets of cells
22
+    # partition data by sampling random sets of genes
26 23
     genes <- 1:nrow(D)
27 24
     setSize <- floor(length(genes) / nSets)
28 25
     for (set in 1:nSets)
29 26
     {
30
-        
31
-        # sample genes
32
-            sampleSize <- ifelse(set == nSets, length(genes), setSize)
33
-            geneset <- sample(genes, sampleSize, replace=FALSE)
34
-            genes <- genes[!(genes %in% geneset)]
27
+        # sample gene names
28
+        sampleSize <- ifelse(set == nSets, length(genes), setSize)
29
+        geneSet <- sample(genes, sampleSize, replace=FALSE)
30
+        genes <- genes[!(genes %in% geneSet)]
31
+
35 32
         # partition data
36
-        sampleD <- D[geneset,]
37
-        sampleS <- S[geneset,]
38
-        save(sampleD, sampleS, file=paste(path, simulationName, "_partition_", set,
33
+        sampleD <- D[geneSet,]
34
+        sampleS <- S[geneSet,]
35
+        save(sampleD, sampleS, file=paste(simulationName, "_partition_", set,
39 36
             ".RData", sep=""));
40 37
     }
41 38
     return(simulationName)
42
-}
39
+}
43 40
\ No newline at end of file
... ...
@@ -1,9 +1,8 @@
1 1
 #' Create Gene Sets for scCoGAPS
2
+#' @export
2 3
 #'
3
-#' @details factors whole genome data into randomly generated sets for indexing
4
-#'
4
+#' @description factors whole genome data into randomly generated sets for indexing
5 5
 #' @param D data matrix
6
-#' @param S uncertainty matrix
7 6
 #' @param nSets number of sets to partition the data into
8 7
 #' @param simulationName name used to identify files created by this simulation
9 8
 #' @param samplingRatio vector of relative quantities to use for sampling celltypes
... ...
@@ -12,9 +11,9 @@
12 11
 #' @return simulationName used to identify saved files
13 12
 #' @examples
14 13
 #' data(SimpSim)
15
-#' createscCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, "example")
16
-#' @export
17
-createscCoGAPSSets <- function(D, nSets, simulationName,samplingRatio=NULL,path="",anotionObj=NULL)
14
+#' createscCoGAPSSets(SimpSim.D, nSets=2, simulationName="example")
15
+createscCoGAPSSets <- function(D, nSets, simulationName, samplingRatio=NULL,
16
+path="", anotionObj=NULL)
18 17
 {
19 18
     # check gene names
20 19
     if (length(unique(colnames(D))) != length(colnames(D)))
... ...
@@ -27,16 +26,22 @@ createscCoGAPSSets <- function(D, nSets, simulationName,samplingRatio=NULL,path=
27 26
     setSize <- floor(length(cells) / nSets)
28 27
     for (set in 1:nSets)
29 28
     {
30
-        
31
-        if(is.null(samplingRatio)){
32
-        # sample cell names
29
+        if (is.null(samplingRatio))
30
+        {
31
+            # sample cell names
33 32
             sampleSize <- ifelse(set == nSets, length(cells), setSize)
34 33
             cellset <- sample(cells, sampleSize, replace=FALSE)
35 34
             cells <- cells[!(cells %in% cellset)]
36
-        } else {
37
-        if(length(unique(anotionObj))!=length(samplingRatio)){warning("Not all celltypes will be sampled from.")}
38
-        ct.indx<-lapply(unique(anotionObj),function(x) which(anotionObj == x))
39
-        cellset<-sample(colnames(D)[ct.indx[[x]]], samplingRatio[x],replace=TRUE)
35
+        }
36
+        else
37
+        {
38
+            if (length(unique(anotionObj)) != length(samplingRatio))
39
+            {
40
+                warning("Not all celltypes will be sampled from.")
41
+            }
42
+            ct.indx <- lapply(unique(anotionObj), function(x) which(anotionObj == x))
43
+            cellset <- lapply(unique(anotionObj), function(x)
44
+                sample(colnames(D)[ct.indx[[x]]], samplingRatio[x],replace=TRUE))
40 45
         }
41 46
 
42 47
         # partition data
... ...
@@ -45,7 +50,8 @@ createscCoGAPSSets <- function(D, nSets, simulationName,samplingRatio=NULL,path=
45 50
         sampleD <- log2(sampleD+1)
46 51
         # generate S
47 52
         sampleS <- pmax(.1*sampleD, .1)
48
-        save(sampleD, sampleS, file=paste0(path,simulationName, "_partition_", set,".RData"));
53
+        save(sampleD, sampleS, file=paste0(path,simulationName, "_partition_",
54
+            set,".RData"));
49 55
     }
50 56
     return(simulationName)
51 57
 }
... ...
@@ -12,56 +12,75 @@
12 12
 patternMarkers <- function(Amatrix=NA, scaledPmatrix=FALSE, Pmatrix=NA,
13 13
 threshold="all", lp=NA, full=FALSE)
14 14
 {
15
-    if(scaledPmatrix==FALSE)
15
+    if (scaledPmatrix==FALSE)
16 16
     {
17
-        if(!is.na(Pmatrix)){
18
-        pscale <- apply(Pmatrix,1,max)   # rescale p's to have max 1
19
-        Amatrix <- sweep(Amatrix, 2, pscale, FUN="*")   # rescale A in accordance with p's having max 1
17
+        if(!is.na(Pmatrix))
18
+        {
19
+            pscale <- apply(Pmatrix,1,max)   # rescale p's to have max 1
20
+            Amatrix <- sweep(Amatrix, 2, pscale, FUN="*")   # rescale A in accordance with p's having max 1
21
+        }
22
+        else
23
+        {
24
+            warning("P values must be provided if not already scaled")
20 25
         }
21
-        else(warning("P values must be provided if not already scaled"))
22 26
     }
23 27
 
24 28
     # find the A with the highest magnitude
25 29
     Arowmax <- t(apply(Amatrix, 1, function(x) x/max(x)))
26 30
 
27
-    if(!is.na(lp))
31
+    if (!is.na(lp))
28 32
     {
29
-        if(length(lp)!=dim(Amatrix)[2]){
33
+        if (length(lp) != dim(Amatrix)[2])
34
+        {
30 35
             warning("lp length must equal the number of columns of the Amatrix")
31 36
         }
32 37
         sstat <- apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp)))
33 38
         ssranks <-rank(sstat)
34
-        ssgenes.th <-names(sort(sstat,decreasing=FALSE,na.last=TRUE))
35
-    } else {
39
+        ssgenes.th <- names(sort(sstat,decreasing=FALSE,na.last=TRUE))
40
+    }
41
+    else
42
+    {
36 43
         # determine which genes are most associated with each pattern
37 44
 
38 45
         sstat<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=dimnames(Amatrix))
39 46
         ssranks<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=dimnames(Amatrix))#list()
40 47
         ssgenes<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=NULL)
41
-        for(i in 1:nP){
48
+        for (i in 1:nP)
49
+        {
42 50
             lp <- rep(0,dim(Amatrix)[2])
43 51
             lp[i] <- 1
44 52
             sstat[,i] <- unlist(apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp))))
45 53
             ssranks[,i]<-rank(sstat[,i])
46 54
             ssgenes[,i]<-names(sort(sstat[,i],decreasing=FALSE,na.last=TRUE))
47 55
         }
48
-        if(threshold=="cut"){
56
+        if (threshold=="cut")
57
+        {
49 58
             geneThresh <- sapply(1:nP,function(x) min(which(ssranks[ssgenes[,x],x] > apply(ssranks[ssgenes[,x],],1,min))))
50 59
             ssgenes.th <- sapply(1:nP,function(x) ssgenes[1:geneThresh[x],x])
51 60
         }
52
-        else if (threshold=="all"){
61
+        else if (threshold=="all")
62
+        {
53 63
             pIndx<-unlist(apply(sstat,1,which.min))
54 64
             gBYp <- list()
55
-            for(i in sort(unique(pIndx))){
65
+            for(i in sort(unique(pIndx)))
66
+            {
56 67
                 gBYp[[i]]<-sapply(strsplit(names(pIndx[pIndx==i]),"[.]"),function(x) x[[1]][1])
57 68
             }
58
-            ssgenes.th <- lapply(1:max(sort(unique(pIndx))), function(x) {
59
-                ssgenes[which(ssgenes[,x] %in% gBYp[[x]]),x]
60
-            })
61
-        } else{stop("Threshold arguement not viable option")}
69
+            ssgenes.th <- lapply(1:max(sort(unique(pIndx))), function(x)
70
+                ssgenes[which(ssgenes[,x] %in% gBYp[[x]]),x])
71
+        }
72
+        else
73
+        {
74
+            stop("Threshold arguement not viable option")
75
+        }
62 76
     }
63 77
     if (full)
64
-        return(list("PatternMarkers"=ssgenes.th,"PatternRanks"=ssranks,"PatternMarkerScores"=sstat))
78
+    {
79
+        return(list("PatternMarkers"=ssgenes.th, "PatternRanks"=ssranks,
80
+            "PatternMarkerScores"=sstat))
81
+    }
65 82
     else
83
+    {
66 84
         return("PatternMarkers"=ssgenes.th)
85
+    }
67 86
 }
... ...
@@ -1,96 +1,129 @@
1 1
 #' patternMatch4Parallel
2 2
 #'
3
-#' @param Ptot a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}
3
+#' @param Ptot a matrix containing the total by set estimates of Pmean output
4
+#' from \code{reOrderBySet}
4 5
 #' @param nSets number of parallel sets used to generate \code{Ptot}
5 6
 #' @param cnt  number of branches at which to cut dendrogram
6 7
 #' @param minNS minimum of individual set contributions a cluster must contain
7
-#' @param maxNS max of individual set contributions a cluster must contain. default is nSets+minNS
8
+#' @param maxNS max of individual set contributions a cluster must contain.
9
+#' default is nSets+minNS
8 10
 #' @param cluster.method the agglomeration method to be used for clustering
9
-#' @param ignore.NA logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.
10
-#' @param bySet logical indicating whether to return list of matched set solutions from \code{Ptot}
11
+#' @param ignore.NA logical indicating whether or not to ignore NAs from
12
+#' potential over dimensionalization. Default is FALSE.
13
+#' @param bySet logical indicating whether to return list of matched set
14
+#' solutions from \code{Ptot}
11 15
 #' @param ... additional parameters for \code{agnes}
12
-#' @return a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each
16
+#' @return a matrix of concensus patterns by samples. If \code{bySet=TRUE} then
17
+#' a list of the set contributions to each
13 18
 #' concensus pattern is also returned.
14 19
 #' @seealso \code{\link{agnes}}
15 20
 #' @export
16
-patternMatch4Parallel <- function(Ptot, nSets, cnt, minNS=NULL, maxNS=NULL, 
17
-    cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...){
18
-
19
-    if (is.null(minNS)){minNS=ceiling(nSets/2)}
20
-    if (is.null(maxNS)){maxNS=nSets+minNS}
21
-
21
+patternMatch4Parallel <- function(Ptot, nSets, cnt, minNS=NA, maxNS=NULL, 
22
+cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...)
23
+{
24
+    if (is.na(minNS))
25
+    {
26
+        minNS <- ceiling(nSets / 2)
27
+    }
28
+    if (is.null(maxNS))
29
+    {
30
+        maxNS=nSets+minNS
31
+    }
22 32
     if (ignore.NA==FALSE & anyNA(Ptot))
23
-        warning('Non-sparse matrixes produced. Reducing the number of patterns asked for and rerun.')
33
+    {
34
+        warning(paste("Non-sparse matrixes produced. Reducing the number of",
35
+            "patterns asked for and rerun."))
36
+    }
24 37
     if (ignore.NA==TRUE)
38
+    {
25 39
         Ptot <- Ptot[complete.cases(Ptot),]
40
+    }
26 41
 
27
-    corcut<-function(Ptot,minNS,cnt,cluster.method){
28
-    # corr dist
29
-    corr.dist=cor(t(Ptot))
30
-    corr.dist=1-corr.dist
31
-    # cluster
32
-    #library(cluster)
33
-    clust=agnes(x=corr.dist,diss=TRUE,method=cluster.method)
34
-    cut=cutree(as.hclust(clust),k=cnt)
35
-    #save.image(file=paste("CoGAPS.",nP,"P.",nS,"Set.CorrClustCut",cnt,".RData"))
42
+    corcut <-function(Ptot,minNS,cnt,cluster.method)
43
+    {
44
+        # corr dist
45
+        corr.dist <- cor(t(Ptot))
46
+        corr.dist <- 1-corr.dist
47
+        # cluster
48
+        #library(cluster)
49
+        clust <- agnes(x=corr.dist, diss=TRUE, method=cluster.method)
50
+        cut <- cutree(as.hclust(clust), k=cnt)
51
+        #save.image(file=paste("CoGAPS.", nP, "P.", nS, "Set.CorrClustCut",
52
+            #cnt,".RData"))
36 53
 
37
-    cls=sort(unique(cut))
38
-    cMNs=matrix(nrow=cnt,ncol=dim(Ptot)[2])
39
-    rownames(cMNs)=cls
40
-    colnames(cMNs)=colnames(Ptot)
54
+        cls <- sort(unique(cut))
55
+        cMNs <- matrix(nrow=cnt, ncol=dim(Ptot)[2])
56
+        rownames(cMNs) <- cls
57
+        colnames(cMNs) <- colnames(Ptot)
41 58
 
42
-    RtoMeanPattern <- list()
43
-    PByClust <- list()
44
-    for(i in cls){
45
-        if (is.null(dim(Ptot[cut == i, ]))==TRUE){
46
-            next
47
-        } else if(dim(Ptot[cut == i, ])[1]< minNS){
48
-            next
49
-        } else {
50
-            cMNs[i,]=colMeans(Ptot[cut==i,])
51
-            PByClust[[i]] <- Ptot[cut==i,]
52
-            nIN=sum(cut==i)
53
-            RtoMeanPattern[[i]] <- sapply(1:nIN,function(j) {round(cor(x=Ptot[cut==i,][j,],y=cMNs[i,]),3)})
59
+        RtoMeanPattern <- list()
60
+        PByClust <- list()
61
+        for (i in cls)
62
+        {
63
+            if (!is.null(dim(Ptot[cut == i,])))
64
+            {
65
+                if (dim(Ptot[cut == i,])[1] >= minNS)
66
+                {
67
+                    cMNs[i,] <- colMeans(Ptot[cut==i,])
68
+                    PByClust[[i]] <- Ptot[cut==i,]
69
+                    nIN <- sum(cut==i)
70
+                    RtoMeanPattern[[i]] <- sapply(1:nIN, function(j)
71
+                        round(cor(x=Ptot[cut==i,][j,], y=cMNs[i,]),3)
72
+                    )
73
+                }
74
+            }
54 75
         }
55
-    }
56
-    PByClust[sapply(PByClust,is.null)]<-NULL
57
-    RtoMeanPattern[sapply(RtoMeanPattern,is.null)]<-NULL
58
-    return(list("RtoMeanPattern"=RtoMeanPattern,"PByClust"=PByClust))
76
+
77
+        PByClust[sapply(PByClust,is.null)] <- NULL
78
+        RtoMeanPattern[sapply(RtoMeanPattern,is.null)] <- NULL
79
+        return(list("RtoMeanPattern"=RtoMeanPattern, "PByClust"=PByClust))
59 80
     }    
60 81
 
61
-    cc<-corcut(Ptot,minNS,cnt,cluster.method)
82
+    cc <- corcut(Ptot,minNS,cnt,cluster.method)
62 83
 
63 84
     ### split by maxNS
64
-    indx<-which(unlist(lapply(cc$PByClust,function(x) dim(x)[1]>maxNS)))
65
-    while(length(indx)>0){
66
-            icc<-corcut(cc$PByClust[[indx[1]]],minNS,2,cluster.method)
67
-            if(length(icc[[2]])==0){
68
-              indx<-indx[-1]
69
-              next
70
-            } else{
71
-                cc$PByClust[[indx[1]]]<-icc[[2]][[1]]
72
-                cc$RtoMeanPattern[[indx[1]]]<-icc[[1]][[1]]
73
-                if(length(icc[[2]])>1){
74
-                    cc$PByClust<-append(cc$PByClust,icc[[2]][2])
75
-                    cc$RtoMeanPattern<-append(cc$RtoMeanPattern,icc[[1]][2])
76
-                } 
77
-                indx<-which(unlist(lapply(cc$PByClust,function(x) dim(x)[1]>maxNS)))
78
-            }
85
+    indx <- which(unlist(lapply(cc$PByClust,function(x) dim(x)[1]>maxNS)))
86
+    while(length(indx) > 0)
87
+    {
88
+        icc <- corcut(cc$PByClust[[indx[1]]],minNS,2,cluster.method)
89
+        if (length(icc[[2]])==0)
90
+        {
91
+          indx <- indx[-1]
92
+          next
93
+        }
94
+        else
95
+        {
96
+            cc$PByClust[[indx[1]]] <- icc[[2]][[1]]
97
+            cc$RtoMeanPattern[[indx[1]]] <- icc[[1]][[1]]
98
+            if (length(icc[[2]]) > 1)
99
+            {
100
+                cc$PByClust <- append(cc$PByClust,icc[[2]][2])
101
+                cc$RtoMeanPattern <- append(cc$RtoMeanPattern,icc[[1]][2])
102
+            } 
103
+            indx <- which(unlist(lapply(cc$PByClust, function(x)
104
+                dim(x)[1] > maxNS)))
105
+        }
79 106
     }
80 107
 
81 108
     #weighted.mean
82
-    PByCDSWavg<- t(sapply(1:length(cc$PByClust),function(z) apply(cc$PByClust[[z]],2,function(x) 
83
-        weighted.mean(x,(cc$RtoMeanPattern[[z]])^3))))
84
-    rownames(PByCDSWavg) <- lapply(1:length(cc$PByClust),function(x) paste("Pattern",x))
109
+    PByCDSWavg <- t(sapply(1:length(cc$PByClust), function(z)
110
+        apply(cc$PByClust[[z]],2, function(x)
111
+            weighted.mean(x, (cc$RtoMeanPattern[[z]])^3))))
112
+    rownames(PByCDSWavg) <- lapply(1:length(cc$PByClust), function(x)
113
+        paste("Pattern", x))
85 114
 
86 115
     #scale ps
87 116
     Pmax <- apply(PByCDSWavg,1,max)
88
-    PByCDSWavgScaled <- t(sapply(1:dim(PByCDSWavg)[1],function(x) PByCDSWavg[x,]/Pmax[x]))
117
+    PByCDSWavgScaled <- t(sapply(1:dim(PByCDSWavg)[1], function(x)
118
+        PByCDSWavg[x,] / Pmax[x]))
89 119
     rownames(PByCDSWavgScaled) <- rownames(PByCDSWavg)
90 120
 
91
-    if(bySet){
92
-        return(list("consenusPatterns"=PByCDSWavgScaled,"PBySet"=cc))
93
-    } else {
121
+    if (bySet)
122
+    {
123
+        return(list("consenusPatterns"=PByCDSWavgScaled, "PBySet"=cc))
124
+    }
125
+    else
126
+    {
94 127
         return("consenusPatterns"=PByCDSWavgScaled)
95 128
     }
96 129
 }
... ...
@@ -12,51 +12,56 @@
12 12
 #' concensus pattern is also returned.
13 13
 #' @seealso \code{\link{agnes}}
14 14
 #' @export
15
-#' 
16
-#' 
17
-
18 15
 patternMatch4singleCell <- function(Atot, nSets, cnt, minNS, 
19 16
 cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...)
20 17
 {
21
-    if (!is.null(minNS))
22
-        minNS=nSets/2
23
-
24
-    if (ignore.NA==FALSE & anyNA(Atot))
25
-        warning('Non-sparse matrixes produced. Reducing the number of patterns asked for and rerun.')
26
-    if (ignore.NA==TRUE)
18
+    if (is.na(minNS))
19
+    {
20
+        minNS <- nSets/2
21
+    }
22
+    if (!ignore.NA & anyNA(Atot))
23
+    {
24
+        warning(paste("Non-sparse matrixes produced. Reducing the number of",
25
+            "patterns asked for and rerun"))
26
+    }
27
+    if (ignore.NA)
28
+    {
27 29
         Atot <- Atot[complete.cases(Atot),]
30
+    }
28 31
 
29 32
     # corr dist
30
-    corr.dist=cor(Atot)
31
-    corr.dist=1-corr.dist
33
+    corr.dist <- cor(Atot)
34
+    corr.dist <- 1 - corr.dist
32 35
     # cluster
33 36
     #library(cluster)
34
-    clust=agnes(x=corr.dist,diss=TRUE,method=cluster.method)
35
-    cut=cutree(as.hclust(clust),k=cnt)
37
+    clust <- agnes(x=corr.dist, diss=TRUE, method=cluster.method)
38
+    cut <- cutree(as.hclust(clust), k=cnt)
36 39
     #save.image(file=paste("CoGAPS.",nP,"P.",nS,"Set.CorrClustCut",cnt,".RData"))
37 40
 
38 41
     #drop n<4 and get weighted Avg
39
-    cls=sort(unique(cut))
40
-    cMNs=matrix(nrow=cnt,ncol=dim(Atot)[1])
41
-    rownames(cMNs)=cls
42
-    colnames(cMNs)=colnames(Atot)
42
+    cls <- sort(unique(cut))
43
+    cMNs <- matrix(nrow=cnt, ncol=dim(Atot)[1])
44
+    rownames(cMNs) <- cls
45
+    colnames(cMNs) <- rownames(Atot)
43 46
 
44 47
     RtoMeanPattern <- list()
45 48
     PByClust <- list()
46 49
     for(i in cls)
47 50
     {
48
-        if (is.null(dim(Atot[,cut == i ]))==TRUE)
51
+        if (is.null(dim(Atot[,cut == i])))
49 52
         {
50
-            cMNs[i,] <- Atot[,cut == i ]
53
+            cMNs[i,] <- Atot[,cut == i]
51 54
             RtoMeanPattern[[i]] <- rep(1,length(Atot[,cut == i ]))
52 55
             PByClust[[i]] <- t(as.matrix(Atot[,cut == i ]))
53 56
         }
54 57
         else
55 58
         {
56
-            cMNs[i,]=colMeans(Atot[,cut==i])
59
+            cMNs[i,] <- rowMeans(Atot[,cut==i])
57 60
             PByClust[[i]] <- Atot[,cut==i]
58
-            nIN=sum(cut==i)
59
-            RtoMeanPattern[[i]] <- sapply(1:nIN,function(j) {round(cor(x=Atot[,cut==i][j,],y=cMNs[i,]),3)})
61
+            nIN <- sum(cut==i)
62
+            RtoMeanPattern[[i]] <- sapply(1:nIN, function(j)
63
+                round(cor(x=Atot[,cut==i][,j], y=cMNs[i,]), 3)
64
+            )
60 65
         }
61 66
     }
62 67
 
... ...
@@ -65,17 +70,14 @@ cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...)
65 70
     RtoMPDrop <- list()
66 71
     for(i in cls)
67 72
     {
68
-        if (is.null(dim(PByClust[[i]])) == TRUE)
69
-            next
70
-        if (dim(PByClust[[i]])[1] < minNS)
73
+        if (!is.null(dim(PByClust[[i]])))
71 74
         {
72
-            next
73
-        }
74
-        else
75
-        {
76
-            #indx <- which(RtoMeanPattern[[i]]>.7,arr.ind = TRUE)
77
-            PByClustDrop <- append(PByClustDrop,list(PByClust[[i]]))
78
-            RtoMPDrop <- append(RtoMPDrop,list(RtoMeanPattern[[i]]))
75
+            if (dim(PByClust[[i]])[1] >= minNS)
76
+            {
77
+                #indx <- which(RtoMeanPattern[[i]]>.7,arr.ind = TRUE)
78
+                PByClustDrop <- append(PByClustDrop,list(PByClust[[i]]))
79
+                RtoMPDrop <- append(RtoMPDrop,list(RtoMeanPattern[[i]]))
80
+            }
79 81
         }
80 82
     }
81 83
 
... ...
@@ -84,43 +86,48 @@ cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...)
84 86
     RtoMPDS <- list()
85 87
     for (j in 1:length(PByClustDrop))
86 88
     {
87
-        if (is.null(dim(PByClustDrop[[j]]))==TRUE)
89
+        if (is.null(dim(PByClustDrop[[j]])))
88 90
         {
89 91
             next
90 92
         }
91
-        if (dim(PByClustDrop[[j]])[1]<minNS+nSets)
93
+        if (dim(PByClustDrop[[j]])[1] < minNS + nSets)
92 94
         {
93
-            PByCDS <- append(PByCDS,PByClustDrop[j])
94
-            RtoMPDS <- append(RtoMPDS,RtoMPDrop[j])
95
+            PByCDS <- append(PByCDS, PByClustDrop[j])
96
+            RtoMPDS <- append(RtoMPDS, RtoMPDrop[j])
95 97
         }
96
-        if (dim(PByClustDrop[[j]])[1]>=minNS+nSets)
98
+        if (dim(PByClustDrop[[j]])[1] >= minNS + nSets)
97 99
         {
98
-            corr.distPBCD=cor(t(PByClustDrop[[j]]))
99
-            corr.distPBCD=1-corr.distPBCD
100
-            clustPBCD=agnes(x=corr.distPBCD,diss=TRUE,method="complete")
101
-            cutPBCD=cutree(as.hclust(clustPBCD),k=2)
100
+            corr.distPBCD <- cor(t(PByClustDrop[[j]]))
101
+            corr.distPBCD <- 1-corr.distPBCD
102
+            clustPBCD <- agnes(x=corr.distPBCD, diss=TRUE, method="complete")
103
+            cutPBCD <- cutree(as.hclust(clustPBCD),k=2)
102 104
             g1 <- PByClustDrop[[j]][cutPBCD==1,]
103 105
             PByCDS <- append(PByCDS,list(g1))
104
-            RtoMPDS <- append(RtoMPDS,list(sapply(1:dim(g1)[1],function(z) round(cor(x=g1[z,],y=colMeans(PByClustDrop[[j]][cutPBCD==1,])),3))))
106
+            RtoMPDS <- append(RtoMPDS,list(sapply(1:dim(g1)[1], function(z)
107
+                round(cor(x=g1[z,], y=colMeans(PByClustDrop[[j]][cutPBCD==1,])),3))))
105 108
             g2 <- PByClustDrop[[j]][cutPBCD==2,]
106
-            if (is.null(dim(g2)[1])==FALSE)
109
+            if (!is.null(dim(g2)[1]))
107 110
             {
108 111
                 PByCDS <- append(PByCDS,list(g2))
109
-                RtoMPDS <- append(RtoMPDS,list(sapply(1:dim(g2)[1],function(z) round(cor(x=g2[z,],y=colMeans(PByClustDrop[[j]][cutPBCD==2,])),3))))
112
+                RtoMPDS <- append(RtoMPDS,list(sapply(1:dim(g2)[1], function(z)
113
+                    round(cor(x=g2[z,], y=colMeans(PByClustDrop[[j]][cutPBCD==2,])),3))))
110 114
             }
111 115
         }
112 116
     }
113 117
 
114 118
     #weighted.mean(PByClustDrop[[1]],RtoMPDrop[[1]])
115
-    PByCDSWavg<- t(sapply(1:length(PByCDS),function(z) apply(PByCDS[[z]],2,function(x) weighted.mean(x,(RtoMPDS[[z]])^3))))
116
-    rownames(PByCDSWavg) <- lapply(1:length(PByCDS),function(x) paste("Pattern",x))
119
+    PByCDSWavg <- t(sapply(1:length(PByCDS),function(z) apply(PByCDS[[z]],2,
120
+        function(x) weighted.mean(x,(RtoMPDS[[z]])^3))))
121
+    rownames(PByCDSWavg) <- lapply(1:length(PByCDS),
122
+        function(x) paste("Pattern",x))
117 123
 
118 124
     #scale ps
119 125
     Pmax <- apply(PByCDSWavg,1,max)
120
-    PByCDSWavgScaled <- t(sapply(1:dim(PByCDSWavg)[1],function(x) PByCDSWavg[x,]/Pmax[x]))
126
+    PByCDSWavgScaled <- t(sapply(1:dim(PByCDSWavg)[1],
127
+        function(x) PByCDSWavg[x,] / Pmax[x]))
121 128
     rownames(PByCDSWavgScaled) <- rownames(PByCDSWavg)
122 129
 
123
-    if(bySet)
130
+    if (bySet)
124 131
     {
125 132
         # return by set and final
126 133
         PBySet<-PByCDS
... ...
@@ -1,13 +1,15 @@
1 1
 #' Post Processing of Parallel Output
2 2
 #'
3 3
 #' @param AP.fixed output of parallel gapsMapRun calls with same FP
4
-#' @param setPs data.frame with rows giving fixed patterns for P used as input
4
+#' @param setValues data.frame with rows giving fixed patterns for P used as input
5 5
 #' for gapsMapRun
6
+#' @param setMatrix which matrix, A or P
6 7
 #' @return list of two data.frames containing the A matrix estimates or their
7 8
 #' corresponding standard deviations from output of parallel CoGAPS
8 9
 postFixed4Parallel <- function(AP.fixed, setValues, setMatrix="P")
9 10
 {
10
-    if(setMatrix=="P"){
11
+    if (setMatrix=="P")
12
+    {
11 13
         ASummary <- do.call(rbind,lapply(AP.fixed, function(x) x$Amean))
12 14
         Asd <- do.call(rbind,lapply(AP.fixed, function(x) x$Asd))
13 15
         #PSummary <- do.call(rbind,lapply(AP.fixed, function(x) x$Pmean))
... ...
@@ -18,14 +20,16 @@ postFixed4Parallel <- function(AP.fixed, setValues, setMatrix="P")
18 20
         Aneu <- sweep(ASummary,2,Pmax,FUN="*")
19 21
 
20 22
         X <- apply(Pneu,1,range)
21
-        Y <- apply(setPs,1,range)
23
+        Y <- apply(setValues,1,range)
22 24
         colnames(X) <- colnames(Y)
23 25
         if (all.equal(X,Y,tolerance=0.01) != TRUE)
24 26
             warning("Patterns do not match fixed values.")
25 27
 
26 28
         As4fixPs<-list("A"=Aneu,"Asd"=Asd)
27 29
         return(As4fixPs)
28
-    } else if(setMatrix=="A"){
30
+    }
31
+    else if (setMatrix=="A")
32
+    {
29 33
         PSummary <- do.call(cbind,lapply(AP.fixed, function(x) x$Pmean))
30 34
         Psd <- do.call(cbind,lapply(AP.fixed, function(x) x$Psd))
31 35
         #PSummary <- do.call(rbind,lapply(AP.fixed, function(x) x$Pmean))
... ...
@@ -36,14 +40,16 @@ postFixed4Parallel <- function(AP.fixed, setValues, setMatrix="P")
36 40
         Pneu <- sweep(PSummary,1,Amax,FUN="*")
37 41
 
38 42
         X <- apply(Aneu,2,range)
39
-        Y <- apply(setAs,2,range)
43
+        Y <- apply(setValues,2,range)
40 44
         colnames(X) <- colnames(Y)
41 45
         if (all.equal(X,Y,tolerance=0.01) != TRUE)
42 46
             warning("As do not match fixed values.")
43 47
 
44 48
         Ps4fixAs<-list("P"=Pneu,"Psd"=Psd)
45 49
         return(Ps4fixAs)
46
-    } else{
50
+    }
51
+    else
52
+    {
47 53
         warning("setMatrix can only take values of 'A' or 'P'")
48 54
     }
49 55
 
... ...
@@ -1,7 +1,7 @@
1 1
 #' Post Processing of Parallel Output
2 2
 #'
3 3
 #' @param AP.fixed output of parallel gapsMapRun calls with same FP
4
-#' @param setPs data.frame with rows giving fixed patterns for P used as input
4
+#' @param setAs data.frame with rows giving fixed patterns for A used as input
5 5
 #' for gapsMapRun
6 6
 #' @return list of two data.frames containing the A matrix estimates or their
7 7
 #' corresponding standard deviations from output of parallel CoGAPS
... ...
@@ -17,38 +17,43 @@
17 17
 #' @examples
18 18
 #' data(SimpSim)
19 19
 #' sim_name <- "example"
20
-#' createscCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name)
21
-#' result <- scCoGAPS(sim_name, nFactor=3, nEquil=1000, nSample=1000)
20
+#' createscCoGAPSSets(SimpSim.D, nSets=2, simulationName=sim_name)
21
+#' result <- scCoGAPS(sim_name, nFactor=3, nEquil=200, nSample=200)
22 22
 #' @export
23 23
 scCoGAPS <- function(simulationName, nFactor, nCores=NA, cut=NA, minNS=NA, manualMatch=FALSE, consensusAs=NULL, ...)
24 24
 {
25 25
     if (!is.null(list(...)$checkpointFile))
26
+    {
26 27
         stop("checkpoint file name automatically set in GWCoGAPS - don't pass this parameter")
27
-    if (is.null(consensusAs)){
28
-        allDataSets <- preInitialPhase(simulationName, nCores)
29
-        initialResult <- runInitialPhase(simulationName, allDataSets, nFactor, ...)
30
-        if(manualMatch){
28
+    }
29
+
30
+    if (is.null(consensusAs))
31
+    {
32
+        allDataSets <- sc_preInitialPhase(simulationName, nCores)
33
+        initialResult <- sc_runInitialPhase(simulationName, allDataSets, nFactor, ...)
34
+        if (manualMatch)
35
+        {
31 36
             saveRDS(initialResult,file=paste(simulationName,"_initial.rds", sep=""))
32 37
             stop("Please provide concensus gene weights upon restarting.")
33 38
         }
34
-        matchedAmplitudes <- postInitialPhase(initialResult, length(allDataSets), cut, minNS)
39
+        matchedAmplitudes <- sc_postInitialPhase(initialResult, length(allDataSets), cut, minNS)
35 40
         #save(matchedAmplitudes, file=paste(simulationName, "_matched_As.RData", sep=""))
36 41
         consensusAs<-t(matchedAmplitudes[[1]])
37 42
     } 
38
-    finalResult <- runFinalPhase(simulationName, allDataSets, consensusAs, nCores, ...)
39
-    return(postFinalPhase(finalResult, consensusAs))
43
+    finalResult <- sc_runFinalPhase(simulationName, allDataSets, consensusAs, nCores, ...)
44
+    return(sc_postFinalPhase(finalResult, consensusAs))
40 45
 }
41 46
 
42
-#' Restart a GWCoGaps Run from Checkpoint
47
+#' Restart a scCoGAPS run from a Checkpoint
43 48
 #'
44 49
 #' @inheritParams GWCoGAPS
45 50
 #' @return list of A and P estimates
46 51
 #' @importFrom utils file_test
47 52
 #' @export
48
-GWCoGapsFromCheckpoint <- function(simulationName, nCores=NA, cut=NA, minNS=NA, ...)
53
+scCoGAPSFromCheckpoint <- function(simulationName, nCores=NA, cut=NA, minNS=NA, ...)
49 54
 {
50 55
     # find data files
51
-    allDataSets <- preInitialPhase(simulationName, nCores)
56
+    allDataSets <- sc_preInitialPhase(simulationName, nCores)
52 57
 
53 58
     # figure out phase from file signature
54 59
     initialCpts <- list.files(full.names=TRUE, pattern=paste(simulationName, "_initial_cpt_[0-9]+.out", sep=""))
... ...
@@ -72,7 +77,7 @@ GWCoGapsFromCheckpoint <- function(simulationName, nCores=NA, cut=NA, minNS=NA,
72 77
     {
73 78
         load(paste(simulationName, "_matched_As.RData", sep=""))
74 79
         consensusAs<-matchedAmplitudes[[1]]
75
-        finalResult <- runFinalPhase(simulationName, allDataSets, consensusAs, ...)
80
+        finalResult <- sc_runFinalPhase(simulationName, allDataSets, consensusAs, ...)
76 81
     }
77 82
     else if (length(initialCpts))
78 83
     {
... ...
@@ -87,19 +92,19 @@ GWCoGapsFromCheckpoint <- function(simulationName, nCores=NA, cut=NA, minNS=NA,
87 92
             cptFileName <- paste(simulationName, "_initial_cpt_", i, ".out", sep="")
88 93
             CoGapsFromCheckpoint(sampleD, sampleS, cptFileName)
89 94
         }
90
-        matchedAmplitudes <- postInitialPhase(initialResult, length(allDataSets), cut, minNS)
95
+        matchedAmplitudes <- sc_postInitialPhase(initialResult, length(allDataSets), cut, minNS)
91 96
         save(matchedAmplitudes, file=paste(simulationName, "_matched_As.RData", sep=""))
92 97
         consensusAs<-matchedAmplitudes[[1]]
93
-        finalResult <- runFinalPhase(simulationName, allDataSets, consensusAs, ...)
98
+        finalResult <- sc_runFinalPhase(simulationName, allDataSets, consensusAs, ...)
94 99
     }
95 100
     else
96 101
     {
97 102
         stop("no checkpoint files found")
98 103
     }
99
-    return(postFinalPhase(finalResult, matchedAmplitudes))
104
+    return(sc_postFinalPhase(finalResult, matchedAmplitudes))
100 105
 }
101 106
 
102
-preInitialPhase <- function(simulationName, nCores)
107
+sc_preInitialPhase <- function(simulationName, nCores)
103 108
 {
104 109
     # find data files
105 110
     fileSig <- paste(simulationName, "_partition_[0-9]+.RData", sep="")
... ...
@@ -107,12 +112,14 @@ preInitialPhase <- function(simulationName, nCores)
107 112
 
108 113
     # establish the number of cores that we are able to use
109 114
     if (is.na(nCores))
115
+    {
110 116
         nCores <- length(allDataSets)
117
+    }
111 118
     registerDoParallel(cores=nCores)
112 119
     return(allDataSets)
113 120
 }
114 121
 
115
-runInitialPhase <- function(simulationName, allDataSets, nFactor, ...)
122
+sc_runInitialPhase <- function(simulationName, allDataSets, nFactor, ...)
116 123
 {
117 124
     #generate seeds for parallelization
118 125
     nut <- generateSeeds(chains=length(allDataSets), seed=-1)
... ...
@@ -132,28 +139,36 @@ runInitialPhase <- function(simulationName, allDataSets, nFactor, ...)
132 139
     return(initialResult)
133 140
 }
134 141
 
135
-postInitialPhase <- function(initialResult, nSets, cut, minNS)
142
+sc_postInitialPhase <- function(initialResult, nSets, cut, minNS)
136 143
 {
137 144
     nFactor <- ncol(initialResult[[1]]$Amean)
138 145
     BySet <- reOrderBySet(AP=initialResult, nFactor=nFactor, nSets=nSets,match="A")
139 146
 
140 147
     #run postpattern match function
141 148
     if (is.na(cut))
149
+    {
142 150
         cut <- nFactor
143
-    return(patternMatch4singleCell(Ptot=BySet$A, nP=nFactor, nSets=nSets, cnt=cut,
151
+    }
152
+
153
+    return(patternMatch4singleCell(Atot=BySet$A, nP=nFactor, nSets=nSets, cnt=cut,
144 154
         minNS=minNS, bySet=TRUE))
145 155
 }
146 156
 
147
-runFinalPhase <- function(simulationName, allDataSets, consensusAs, nCores, ...)
157
+sc_runFinalPhase <- function(simulationName, allDataSets, consensusAs, nCores, ...)
148 158
 {    
149
-    if(length(dim(consensusAs))!=2){stop("consensusAs must be a matrix")}
159
+    if (length(dim(consensusAs)) != 2)
160
+    {
161
+        stop("consensusAs must be a matrix")
162
+    }
150 163
 
151 164
     # find data files if providing consensus patterns
152 165
     fileSig <- paste(simulationName, "_partition_[0-9]+.RData", sep="")
153 166
     allDataSets <- list.files(full.names=TRUE, pattern=fileSig)
154 167
 
155 168
     if (is.na(nCores))
156
-    nCores <- length(allDataSets)
169
+    {
170
+        nCores <- length(allDataSets)
171
+    }
157 172
     registerDoParallel(cores=nCores)
158 173
 
159 174
     # generate seeds for parallelization
... ...
@@ -179,7 +194,7 @@ runFinalPhase <- function(simulationName, allDataSets, consensusAs, nCores, ...)
179 194
     return(finalResult)
180 195
 }
181 196
 
182
-postFinalPhase <- function(finalResult, consensusAs)
197
+sc_postFinalPhase <- function(finalResult, consensusAs)
183 198
 {
184 199
     Aresult <- postFixed4Parallel(finalResult, consensusAs)
185 200
     finalResult <- list("Pmean"=Aresult$P, "Psd"=Aresult$Psd,"Amean"=consensusAs)
... ...
@@ -18,13 +18,14 @@ Date: \tab 2018-01-24\cr
18 18
 License: \tab LGPL\cr
19 19
 }
20 20
 }
21
+\author{
22
+Maintainer: Elana J. Fertig \email{ejfertig@jhmi.edu},
23
+            Michael F. Ochs \email{ochsm@tcnj.edu}
24
+}
21 25
 \references{
22 26
 Fertig EJ, Ding J, Favorov AV, Parmigiani G, Ochs MF.
23 27
 CoGAPS: an R/C++ package to identify patterns and biological
24 28
 process activity in transcriptomic data.
25 29
 Bioinformatics. 2010 Nov 1;26(21):2792-3
26 30
 }
27
-\author{
28
-Maintainer: Elana J. Fertig \email{ejfertig@jhmi.edu},
29
-            Michael F. Ochs \email{ochsm@tcnj.edu}
30
-}
31
+
... ...
@@ -9,8 +9,7 @@ CoGAPS(D, S, nFactor = 7, nEquil = 1000, nSample = 1000,
9 9
   maxGibbmassA = 100, maxGibbmassP = 100, seed = -1, messages = TRUE,
10 10
   singleCellRNASeq = FALSE, whichMatrixFixed = "N",
11 11
   fixedPatterns = matrix(0), checkpointInterval = 0,
12
-  checkpointFile = "gaps_checkpoint.out", pumpThreshold = "unique",
13
-  nPumpSamples = 0, ...)
12
+  checkpointFile = "gaps_checkpoint.out", ...)
14 13
 }
15 14
 \arguments{
16 15
 \item{D}{data matrix}
... ...
@@ -52,10 +51,6 @@ the fixed patterns}
52 51
 
53 52
 \item{checkpointFile}{name of the checkpoint file}
54 53
 
55
-\item{pumpThreshold}{type of threshold for pump statistic}
56
-
57
-\item{nPumpSamples}{number of samples used in pump statistic}
58
-
59 54
 \item{...}{keeps backwards compatibility with arguments from older versions}
60 55
 }
61 56
 \value{
... ...
@@ -73,3 +68,4 @@ the data matrix
73 68
 data(SimpSim)
74 69
 result <- CoGAPS(SimpSim.D, SimpSim.S, nFactor=3, nOutputs=250)
75 70
 }
71
+
... ...
@@ -25,3 +25,4 @@ Restart CoGAPS from Checkpoint File
25 25
 loads the state of a previous CoGAPS run from a file and
26 26
  continues the run from that point
27 27
 }
28
+
... ...
@@ -40,8 +40,9 @@ the data matrix for whole genome data;
40 40
 data(SimpSim)
41 41
 sim_name <- "example"
42 42
 createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name)
43
-result <- GWCoGAPS(sim_name, nFactor=3, nEquil=1000, nSample=1000)
43
+result <- GWCoGAPS(sim_name, nFactor=3, nEquil=200, nSample=200)
44 44
 }
45 45
 \seealso{
46 46
 \code{\link{gapsRun}}, \code{\link{patternMatch4Parallel}}, and \code{\link{gapsMapRun}}
47 47
 }
48
+
... ...
@@ -1,13 +1,9 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/GWCoGAPS.R, R/scCoGAPS.R
2
+% Please edit documentation in R/GWCoGAPS.R
3 3
 \name{GWCoGapsFromCheckpoint}
4 4
 \alias{GWCoGapsFromCheckpoint}
5
-\alias{GWCoGapsFromCheckpoint}
6 5
 \title{Restart a GWCoGaps Run from Checkpoint}
7 6
 \usage{
8
-GWCoGapsFromCheckpoint(simulationName, nCores = NA, cut = NA, minNS = NA,
9
-  ...)
10
-
11 7
 GWCoGapsFromCheckpoint(simulationName, nCores = NA, cut = NA, minNS = NA,
12 8
   ...)
13 9
 }
... ...
@@ -23,12 +19,9 @@ GWCoGapsFromCheckpoint(simulationName, nCores = NA, cut = NA, minNS = NA,
23 19
 \item{...}{additional parameters to be fed into \code{gapsRun} and \code{gapsMapRun}}
24 20
 }
25 21
 \value{
26
-list of A and P estimates
27
-
28 22
 list of A and P estimates
29 23
 }
30 24
 \description{
31
-Restart a GWCoGaps Run from Checkpoint
32
-
33 25
 Restart a GWCoGaps Run from Checkpoint
34 26
 }
27
+
... ...
@@ -29,3 +29,4 @@ threshold * Asd and 0 otherwise
29 29
 data(SimpSim)
30 30
 binaryA(SimpSim.result$Amean, SimpSim.result$Asd, threshold=3)
31 31
 }
32
+
... ...
@@ -31,3 +31,4 @@ data('SimpSim')
31 31
 calcCoGAPSStat(SimpSim.result$Amean, SimpSim.result$Asd, GStoGenes=GSets,
32 32
 numPerm=500)
33 33
 }
34
+
... ...
@@ -36,3 +36,4 @@ data('SimpSim')
36 36
 calcGeneGSStat(SimpSim.result$Amean, SimpSim.result$Asd, GSGenes=GSets[[1]],
37 37
 numPerm=500)
38 38
 }
39
+
... ...
@@ -25,3 +25,4 @@ and standard deviation matrices
25 25
 data(SimpSim)
26 26
 calcZ(SimpSim.result$Amean, SimpSim.result$Asd)
27 27
 }
28
+
... ...
@@ -2,10 +2,10 @@
2 2
 % Please edit documentation in R/cellMatchR.R
3 3
 \name{cellMatchR}
4 4
 \alias{cellMatchR}
5
-\title{patternMatch4Parallel}
5
+\title{cellMatchR}
6 6
 \usage{
7
-cellMatchR(Atot, nSets, cnt, minNS = NULL, maxNS = NULL,
8
-  ignore.NA = FALSE, bySet = FALSE, plotDen = FALSE, ...)
7
+cellMatchR(Atot, nSets, cnt, minNS = NA, maxNS = NA, ignore.NA = FALSE,
8
+  bySet = FALSE, plotDen = FALSE, ...)
9 9
 }
10 10
 \arguments{
11 11
 \item{Atot}{a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}}
... ...
@@ -16,21 +16,21 @@ cellMatchR(Atot, nSets, cnt, minNS = NULL, maxNS = NULL,
16 16
 
17 17
 \item{minNS}{minimum of individual set contributions a cluster must contain}
18 18
 
19
+\item{maxNS}{maximum of individual set contributions a cluster must contain}
20
+
19 21
 \item{ignore.NA}{logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.}
20 22
 
21 23
 \item{bySet}{logical indicating whether to return list of matched set solutions from \code{Atot}}
22 24
 
23
-\item{...}{additional parameters for \code{agnes}}
25
+\item{plotDen}{plot}
24 26
 
25
-\item{R2mean}{}
27
+\item{...}{additional parameters for \code{agnes}}
26 28
 }
27 29
 \value{
28 30
 a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each
29 31
 concensus pattern is also returned.
30 32
 }
31 33
 \description{
32
-patternMatch4Parallel
33
-}
34
-\seealso{
35
-\code{\link{fastcluster}}
34
+cellMatchR
36 35
 }
36
+
... ...
@@ -39,3 +39,4 @@ data('SimpSim')
39 39
 computeGeneGSProb(SimpSim.result$Amean, SimpSim.result$Asd, GSGenes=GSets[[1]],
40 40
 numPerm=500)
41 41
 }
42
+
... ...
@@ -2,30 +2,30 @@
2 2
 % Please edit documentation in R/createGWCoGAPSSets.R
3 3
 \name{createGWCoGAPSSets}
4 4
 \alias{createGWCoGAPSSets}
5
-\title{createGWCoGAPSSets}
5
+\title{Create Gene Sets for GWCoGAPS}
6 6
 \usage{
7
-createGWCoGAPSSets(D, S, nSets, simulationName, path = "")
7
+createGWCoGAPSSets(D, S, nSets, simulationName)
8 8
 }
9 9
 \arguments{
10
-\item{nSets}{number of sets for parallelization}
10
+\item{D}{data matrix}
11 11
 
12
-\item{path}{character string indicating were to save resulting data objects}
12
+\item{S}{uncertainty matrix}
13 13
 
14
-\item{data}{data matrix with unique rownames}
14
+\item{nSets}{number of sets to partition the data into}
15 15
 
16
-\item{outRDA}{name of output file}
17
-
18
-\item{keep}{logical indicating whether or not to save gene set list. Default is TRUE.}
16
+\item{simulationName}{name used to identify files created by this simulation}
19 17
 }
20 18
 \value{
21
-list with randomly generated sets of genes from whole genome data
19
+simulationName used to identify saved files
22 20
 }
23 21
 \description{
24
-\code{createGWCoGAPSSets} factors whole genome data into randomly generated sets for indexing;
22
+Create Gene Sets for GWCoGAPS
23
+}
24
+\details{
25
+factors whole genome data into randomly generated sets for indexing
25 26
 }
26 27
 \examples{
27
-\dontrun{
28
-createGWCoGAPSSet(D,nSets=nSets)
28
+data(SimpSim)
29
+createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, "example")
29 30
 }
30 31
 
31
-}
... ...
@@ -5,7 +5,7 @@
5 5
 \title{Create Gene Sets for scCoGAPS}
6 6
 \usage{
7 7
 createscCoGAPSSets(D, nSets, simulationName, samplingRatio = NULL,
8
-  path = "")
8
+  path = "", anotionObj = NULL)
9 9
 }
10 10
 \arguments{
11 11
 \item{D}{data matrix}
... ...
@@ -18,20 +18,16 @@ createscCoGAPSSets(D, nSets, simulationName, samplingRatio = NULL,
18 18
 
19 19
 \item{path}{character string indicating were to save resulting data objects. default is current working dir}
20 20
 
21
-\item{S}{uncertainty matrix}
22
-
23
-\item{annotionObj}{vector of same length as number of columns of D}
21
+\item{anotionObj}{vector of same length as number of columns of D}
24 22
 }
25 23
 \value{
26 24
 simulationName used to identify saved files
27 25
 }
28 26
 \description{
29
-Create Gene Sets for scCoGAPS
30
-}
31
-\details{
32 27
 factors whole genome data into randomly generated sets for indexing
33 28
 }
34 29
 \examples{
35 30
 data(SimpSim)
36
-createscCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, "example")
31
+createscCoGAPSSets(SimpSim.D, nSets=2, simulationName="example")
37 32
 }
33
+
... ...
@@ -19,3 +19,4 @@ displays information about how the package was compiled, i.e. which
19 19
 \examples{
20 20
  CoGAPS::displayBuildReport()
21 21
 }
22
+
... ...
@@ -77,3 +77,4 @@ nC <- ncol(SimpSim.D)
77 77
 patterns <- matrix(1:nC/nC, nrow=1, ncol=nC)
78 78
 result <- gapsMapRun(SimpSim.D, SimpSim.S, FP=patterns, nFactor=3)
79 79
 }
80
+
... ...
@@ -68,3 +68,4 @@ Backwards Compatibility with v2
68 68
 data(SimpSim)
69 69
 result <- gapsRun(SimpSim.D, SimpSim.S, nFactor=3)
70 70
 }
71
+
... ...
@@ -18,3 +18,4 @@ vector of randomly generated seeds
18 18
 \description{
19 19
 Generate Seeds for Multiple Concurrent Runs
20 20
 }
21
+
... ...
@@ -27,3 +27,4 @@ genes rankings with a column for each \code{lp} will also be returned.
27 27
 \description{
28 28
 patternMarkers
29 29
 }
30
+
... ...
@@ -4,11 +4,12 @@
4 4
 \alias{patternMatch4Parallel}
5 5
 \title{patternMatch4Parallel}
6 6
 \usage{
7
-patternMatch4Parallel(Ptot, nSets, cnt, minNS = NULL, maxNS = NULL,
7
+patternMatch4Parallel(Ptot, nSets, cnt, minNS = NA, maxNS = NULL,
8 8
   cluster.method = "complete", ignore.NA = FALSE, bySet = FALSE, ...)
9 9
 }
10 10
 \arguments{
11
-\item{Ptot}{a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}}
11
+\item{Ptot}{a matrix containing the total by set estimates of Pmean output
12
+from \code{reOrderBySet}}
12 13
 
13 14
 \item{nSets}{number of parallel sets used to generate \code{Ptot}}
14 15
 
... ...
@@ -16,18 +17,22 @@ patternMatch4Parallel(Ptot, nSets, cnt, minNS = NULL, maxNS = NULL,
16 17
 
17 18
 \item{minNS}{minimum of individual set contributions a cluster must contain}
18 19
 
19
-\item{maxNS}{max of individual set contributions a cluster must contain. default is nSets+minNS}
20
+\item{maxNS}{max of individual set contributions a cluster must contain.
21
+default is nSets+minNS}
20 22
 
21 23
 \item{cluster.method}{the agglomeration method to be used for clustering}
22 24
 
23
-\item{ignore.NA}{logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.}
25
+\item{ignore.NA}{logical indicating whether or not to ignore NAs from
26
+potential over dimensionalization. Default is FALSE.}
24 27
 
25
-\item{bySet}{logical indicating whether to return list of matched set solutions from \code{Ptot}}
28
+\item{bySet}{logical indicating whether to return list of matched set
29
+solutions from \code{Ptot}}
26 30
 
27 31
 \item{...}{additional parameters for \code{agnes}}
28 32
 }
29 33
 \value{
30
-a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each
34
+a matrix of concensus patterns by samples. If \code{bySet=TRUE} then
35
+a list of the set contributions to each
31 36
 concensus pattern is also returned.
32 37
 }
33 38
 \description{
... ...
@@ -36,3 +41,4 @@ patternMatch4Parallel
36 41
 \seealso{
37 42
 \code{\link{agnes}}
38 43
 }
44
+
... ...
@@ -34,3 +34,4 @@ patternMatch4singleCell
34 34
 \seealso{
35 35
 \code{\link{agnes}}
36 36
 }
37
+
... ...
@@ -22,3 +22,4 @@ either an index of selected sets' contributions or the editted \code{PBySet} obj
22 22
 \description{
23 23
 PatternMatcher Shiny Ap
24 24
 }
25
+
... ...
@@ -27,3 +27,4 @@ from one of the vectors returned with atom numbers
27 27
 data(SimpSim)
28 28
 plotAtoms(SimpSim.result, type="sampA")
29 29
 }
30
+
... ...
@@ -22,3 +22,4 @@ plots a series of diagnostic plots
22 22
 data(SimpSim)
23 23
 plotDiag(SimpSim.result)
24 24
 }
25
+
... ...
@@ -28,3 +28,4 @@ heatmap and line plot respectively
28 28
 data(SimpSim)
29 29
 plotGAPS(SimpSim.result$Amean, SimpSim.result$Pmean)
30 30
 }
31
+
... ...
@@ -24,3 +24,4 @@ plots the P matrix in a line plot with error bars
24 24
 data(SimpSim)
25 25
 plotP(SimpSim.result$Pmean, SimpSim.result$Psd)
26 26
 }
27
+
... ...
@@ -37,3 +37,4 @@ plotPatternMarkers
37 37
 \seealso{
38 38
 \code{\link{heatmap.2}}
39 39
 }
40
+
... ...
@@ -43,3 +43,4 @@ Plot Smooth Patterns
43 43
 plots the output A and P matrices as a heatmap and a
44 44
 line plot respectively
45 45
 }
46
+
... ...
@@ -4,13 +4,15 @@
4 4
 \alias{postFixed4Parallel}
5 5
 \title{Post Processing of Parallel Output}
6 6
 \usage{
7
-postFixed4Parallel(AP.fixed, setPs)
7
+postFixed4Parallel(AP.fixed, setValues, setMatrix = "P")
8 8
 }
9 9
 \arguments{
10 10
 \item{AP.fixed}{output of parallel gapsMapRun calls with same FP}
11 11
 
12
-\item{setPs}{data.frame with rows giving fixed patterns for P used as input
12
+\item{setValues}{data.frame with rows giving fixed patterns for P used as input
13 13
 for gapsMapRun}
14
+
15
+\item{setMatrix}{which matrix, A or P}
14 16
 }
15 17
 \value{
16 18
 list of two data.frames containing the A matrix estimates or their
... ...
@@ -19,3 +21,4 @@ corresponding standard deviations from output of parallel CoGAPS
19 21
 \description{
20 22
 Post Processing of Parallel Output
21 23
 }
24
+
... ...
@@ -9,7 +9,7 @@ postFixed4SC(AP.fixed, setAs)
9 9
 \arguments{
10 10
 \item{AP.fixed}{output of parallel gapsMapRun calls with same FP}
11 11
 
12
-\item{setPs}{data.frame with rows giving fixed patterns for P used as input
12
+\item{setAs}{data.frame with rows giving fixed patterns for A used as input
13 13
 for gapsMapRun}
14 14
 }
15 15
 \value{
... ...
@@ -19,3 +19,4 @@ corresponding standard deviations from output of parallel CoGAPS
19 19
 \description{
20 20
 Post Processing of Parallel Output
21 21
 }
22
+
... ...
@@ -21,3 +21,4 @@ a list containing the \code{nSets} sets solution for Amean under "A", Pmean unde
21 21
 \description{
22 22
 <restructures output of gapsRun into a list containing each sets solution for Amean, Pmean, and Asd>
23 23
 }
24
+
... ...
@@ -23,3 +23,4 @@ Reconstruct Gene
23 23
 data(SimpSim)
24 24
 reconstructGene(SimpSim.result$Amean, SimpSim.result$Pmean)
25 25
 }
26
+
... ...
@@ -17,3 +17,4 @@ matched patterns
17 17
 \description{
18 18
 Reorder By Pattern Match
19 19
 }
20
+
... ...
@@ -28,3 +28,4 @@ calculate residuals and produce heatmap
28 28
 data(SimpSim)
29 29
 residuals(SimpSim.result$Amean, SimpSim.result$Pmean, SimpSim.D, SimpSim.S)
30 30
 }
31
+
... ...
@@ -39,9 +39,10 @@ the data matrix for whole genome data;
39 39
 \examples{
40 40
 data(SimpSim)
41 41
 sim_name <- "example"
42
-createscCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name)
43
-result <- scCoGAPS(sim_name, nFactor=3, nEquil=1000, nSample=1000)
42
+createscCoGAPSSets(SimpSim.D, nSets=2, simulationName=sim_name)
43
+result <- scCoGAPS(sim_name, nFactor=3, nEquil=200, nSample=200)
44 44
 }
45 45
 \seealso{
46 46
 \code{\link{gapsRun}}, \code{\link{patternMatch4singleCell}}, and \code{\link{gapsMapRun}}
47 47
 }
48
+
48 49
new file mode 100644
... ...
@@ -0,0 +1,27 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/scCoGAPS.R
3
+\name{scCoGAPSFromCheckpoint}
4
+\alias{scCoGAPSFromCheckpoint}
5
+\title{Restart a scCoGAPS run from a Checkpoint}
6
+\usage{
7
+scCoGAPSFromCheckpoint(simulationName, nCores = NA, cut = NA, minNS = NA,
8
+  ...)
9
+}
10
+\arguments{
11
+\item{simulationName}{name of this simulation}
12
+
13
+\item{nCores}{number of cores for parallelization. If left to the default NA, nCores = nSets.}
14
+
15
+\item{cut}{number of branches at which to cut dendrogram used in patternMatch4Parallel}
16
+
17
+\item{minNS}{minimum of individual set contributions a cluster must contain}
18
+
19
+\item{...}{additional parameters to be fed into \code{gapsRun} and \code{gapsMapRun}}
20
+}
21
+\value{
22
+list of A and P estimates
23
+}
24
+\description{
25
+Restart a scCoGAPS run from a Checkpoint
26
+}
27
+
... ...
@@ -7213,6 +7213,7 @@ namespace Catch {
7213 7213
 #include <set>
7214 7214
 #include <sstream>
7215 7215
 #include <algorithm>
7216
+//#include <R.h>
7216 7217
 
7217 7218
 namespace Catch {
7218 7219
 
... ...
@@ -12,6 +12,10 @@ output:
12 12
   BiocStyle::html_document
13 13
 ---
14 14
 
15
+```{r include=FALSE, cache=FALSE}
16
+library(CoGAPS)
17
+```
18
+
15 19
 # Introduction
16 20
 
17 21
 NMF algorithms associate gene expression changes with biological processes (e.g., time-course dynamics or disease subtypes). Compared with univariate gene associations, the relative gene weights of NMF solutions do not clearly identify gene biomarkers for these processes. Therefore, we developed a novel PatternMarkers statistic to extract genes uniquely representative of NMF patterns for enhanced visualization and subsequent biological validation. Finding unbiased gene markers with PatternMarkers requires whole-genome data. However, NMF algorithms typically do not converge for the tens of thousands of genes in genome-wide profiling. Therefore, we also developed GWCoGAPS to simultaneously cut runtime and ensure convergence for whole genome NMF with a sparse, MCMC NMF algorithm CoGAPS. The software includes a shiny application PatternMatcher to compare NMF patterns. 
... ...
@@ -23,59 +27,44 @@ pipeline for genome wide NMF analysis.
23 27
 
24 28
 ## GWCoGAPS
25 29
 
26
-### Methods
27
-The GWCoGAPS algorithm is run by calling the GWCoGAPS function in the CoGAPS
28
-R package as follows:
30
+Before calling *GWCoGAPS*, the user must partition the data into random subsets
31
+of genes. In this example we use the sample data provided in the package.
29 32
 
30 33
 ```{r}
31
-library(CoGAPS)
34
+data(SimpSim)
35
+simulationName <- "example"
36
+createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, simulationName)
32 37
 ```
33
-***
34
-GWCoGAPS(D, S, nFactor, nSets, nCores, saveBySetResults, fname,
35
-    PatternsMatchFN = patternMatch4Parallel, Cut, minNS, ...)
36
-***
37 38
 
38
-**Input Arguments**
39
-The inputs that must be set each time are only the nSets, nFactor, and data and standard deviation matrices, with all other inputs having default values. Additional inputs to the gapsRun function, as outlined previously, can be used to taylor the analysis based on the expected dimensionality of the data. GWCoGAPS specific arguments are as follows:
40
-
41
-\begin{description}
42
-\item[D]{data matrix}
43
-\item[S]{uncertainty matrix (std devs for chi-squared of Log Likelihood)}
44
-\item[nFactor]{number of patterns (basis vectors, metagenes), which must be
45
-greater than or equal to the number of rows of FP}
46
-\item[nSets]{number of sets for parallelization}
47
-\item[nCores]{number of cores for parallelization. If left to the default NA, nCores = nSets. }
48
-\item[saveBySetResults]{logical indicating whether to save by intermediary by set results. Default is FALSE.}
49
-\item[fname]{character string used to label file output. Default is "GWCoGAPS.out"}
50
-\item[PatternsMatchFN]{function to use for pattern matching across sets. Default is patternMatch4Parallel.}
51
-\item[Cut]{number of branches at which to cut dendrogram used in patternMatch4Parallel}
52
-\item[minNS]{minimum of individual set contributions a cluster must contain used in patternMatch4Parallel}
53
-\end{description}
39
+This will break up our data into 2 separate *.RData* files which we will pass
40
+to *GWCoGAPS* by providing the *simulationName* parameter of
41
+*createGWCoGAPSSets*.
54 42
 
55
-\par Once the GWCoGAPS algorithm has been run, the inferred patterns and corresponding amplitudes can processed in the same way as output from gapsRun, gapsMapsRun, or CoGAPS.
43
+```{r}
44
+#result <- GWCoGAPS(simulationName, nFactor=3, nEquil=500, nSample=500)
45
+```
56 46
 
57
-### Example: Simulated data
47
+Note that *nFactor* is required by *GWCoGAPS* but *nEquil* and *nSample* are
48
+optional. Any parameters passed in the ... argument of *GWCoGAPS* will be 
49
+forwarded to the internal call to *CoGAPS*.
58 50
 
59
-\par In this example, we use the same simulated data in SimpSim (SimpSim.D), as previously described, with three known patterns (SimpSim.P) and corresponding amplitude (SimpSim.A) with specified activity in two gene sets (GSets).
51
+There are additional optional parameters that can be passed to *GWCoGAPS*:
60 52
 
61
-***
62
-library('CoGAPS')
63
-data('SimpSim')
64
-GWCoGAPS(SimpSim.D, SimpSim.S, nFactor=3, nCores=NA, nSets=2,
65
-            fname="test" ,PatternsMatchFN = postCoGAPSPatternMatch,
66
-            sampleSnapshots = "TRUE", numSnapshots = 3)
67
-plotGAPS(AP.Fixed$A, AP.Fixed$P, 'ModSimFigs')
68
-***
53
+*nCores* - number of cores for parallelization, if not set then nCores = nSets
54
+*cut* - number of branches at which to cut dendrogram used in 
55
+    *patternMatch4Parallel*
56
+*minNS* - minimum of individual set contributions a cluster must contain used
57
+    in *patternMatch4Parallel*
58
+*manualMatch* - logical indicating whether or not to stop after initial phase
59
+    for manual pattern matching
60
+*consensusPatterns* - fixed pattern matrix to be used to ensure reciprocity of
61
+    A weights accross sets 
69 62
 
70
-Figure \ref{fig:ModSim} shows the results from plotting the GWCoGAPS estimates of ${\bf{A}}$ and ${\bf{P}}$ using \texttt{plotGAPS}.  
71
-\begin{figure}[ht]
72
-  \begin{center}
73
-      \includegraphics[width=0.45\linewidth]{ModSimFigs-Amplitude}\includegraphics[width=0.45\linewidth]{ModSimFigs-Patterns}
74
-  \end{center}
75
-  \caption{Results from GWCoGAPS on simulated data set with known true patterns.}
76
-  \label{fig:ModSim}
77
-\end{figure}
63
+Once the GWCoGAPS algorithm has been run, the inferred patterns and corresponding amplitudes can processed in the same way as output from CoGAPS.
78 64
 
65
+```{r}
66
+#plotGaps(result$Amean, result$Pmean)
67
+```
79 68
 
80 69
 ## PatternMatcher Shiny App
81 70
 
82 71
deleted file mode 100644
... ...
@@ -1,233 +0,0 @@
1
-title: "GWCoGAPS and PatternMarkers Vignette"
2
-author: "Genevieve L. Stein-O'Brien"
3
-date: \today
4
-output: BiocStyle::pdf_document
5
-bibliography: AppNote.bib
6
-vignette: >
7
-  %\VignetteIndexEntry{Overview of CNPBayes package}
8
-  %\VignetteEngine{knitr::rmarkdown}
9
-  %\usepackage[utf8]{inputenc} 
10
-
11
-# Introduction
12
-
13
-NMF algorithms associate gene expression changes with biological processes (e.g., time-course dynamics or disease subtypes). Compared with univariate gene associations, the relative gene weights of NMF solutions do not clearly identify gene biomarkers for these processes. Therefore, we developed a novel PatternMarkers statistic to extract genes uniquely representative of NMF patterns for enhanced visualization and subsequent biological validation. Finding unbiased gene markers with PatternMarkers requires whole-genome data. However, NMF algorithms typically do not converge for the tens of thousands of genes in genome-wide profiling. Therefore, we also developed GWCoGAPS to simultaneously cut runtime and ensure convergence for whole genome NMF with a sparse, MCMC NMF algorithm CoGAPS. The software includes a shiny application PatternMatcher to compare NMF patterns. 
14
-
15
-# Executing GWCoGAPS
16
-
17
-In this chapter, we describe how to run both the GWCoGAPS algorithm and a manual
18
-pipeline for genome wide NMF analysis.
19
-
20
-## GWCoGAPS
21
-
22
-### Methods
23
-The GWCoGAPS algorithm is run by calling the GWCoGAPS function in the CoGAPS
24
-R package as follows:
25
-
26
-
27
-```r
28
-library(CoGAPS) 
29
-GWCoGAPS(D, S, nFactor, nSets, nCores, saveBySetResults, fname,
30
-    PatternsMatchFN = patternMatch4Parallel, Cut, minNS, ...)
31
-```
32
-
33
-**Input Arguments**
34
-The inputs that must be set each time are only the nSets, nFactor, and data and standard deviation matrices, with all other inputs having default values. Additional inputs to the gapsRun function, as outlined previously, can be used to taylor the analysis based on the expected dimensionality of the data. GWCoGAPS specific arguments are as follows:
35
-
36
-\begin{description}
37
-\item[D]{data matrix}
38
-\item[S]{uncertainty matrix (std devs for chi-squared of Log Likelihood)}
39
-\item[nFactor]{number of patterns (basis vectors, metagenes), which must be
40
-greater than or equal to the number of rows of FP}
41
-\item[nSets]{number of sets for parallelization}
42
-\item[nCores]{number of cores for parallelization. If left to the default NA, nCores = nSets. }
43
-\item[saveBySetResults]{logical indicating whether to save by intermediary by set results. Default is FALSE.}
44
-\item[fname]{character string used to label file output. Default is "GWCoGAPS.out"}
45
-\item[PatternsMatchFN]{function to use for pattern matching across sets. Default is patternMatch4Parallel.}
46
-\item[Cut]{number of branches at which to cut dendrogram used in patternMatch4Parallel}
47
-\item[minNS]{minimum of individual set contributions a cluster must contain used in patternMatch4Parallel}
48
-\end{description}
49
-
50
-\par Once the GWCoGAPS algorithm has been run, the inferred patterns and corresponding amplitudes can processed in the same way as output from gapsRun, gapsMapsRun, or CoGAPS.
51
-
52
-### Example: Simulated data
53
-
54
-\par In this example, we use the same simulated data in SimpSim (SimpSim.D), as previously described, with three known patterns (SimpSim.P) and corresponding amplitude (SimpSim.A) with specified activity in two gene sets (GSets).
55
-
56
-
57
-```r
58
-library('CoGAPS')
59
-data('SimpSim')
60
-GWCoGAPS(SimpSim.D, SimpSim.S, nFactor=3, nCores=NA, nSets=2,
61
-            fname="test" ,PatternsMatchFN = postCoGAPSPatternMatch,
62
-            sampleSnapshots = "TRUE", numSnapshots = 3)
63
-plotGAPS(AP.Fixed$A, AP.Fixed$P, 'ModSimFigs')
64
-```
65
-
66
-
67
-## Manual Pipeline
68
-
69
-The functions that compose the core of the GWCoGAPS algorithm are also provided in the CoGAPS R package and can be used for a manual pipeline of the same analysis or to create a custom pipeline. Additionally, a web based shiny application is included for visualizing and editing the pattern matching process.
70
-
71
-### PatternMatcher Shiny App
72
-
73
-**Input Arguments**
74
-The inputs that must be set each time is the PBySet. Additionally, the order and sample.color inputs are only available when calling patternMatcher in an interactive R session.
75
-
76
-\begin{description}
77
-\item[PBySet]{list of matched set solutions for the Pmatrix from an NMF algorithm}
78
-\item[out]{optional name for saving output}
79
-\item[order]{optional vector indicating order of samples for plotting. Default is NULL.}
80
-\item[sample.color]{optional vector of colors of same lenght as colnames. Default is NULL.}
81
-\end{description}
82
-
83
-![A screenshot of the PatternMatcher Shiny App plotting the by-set results for pattern 1 of the simulated data](ShinyApp.pdf)
84
-
85
-### Example: Simulated data}
86
-
87
-\par This example will manually generate the same result as calling the GWCoGAPS function as given in the example in the previous section.
88
-
89
-
90
-```r
91
-data('SimpSim')
92
-D<-SimpSim.D
93
-S<-SimpSim.S
94
-nFactor=3
95
-nSets<-2
96
-nCores=NA
97
-numSnapshots=3
98
-saveBySetResults=TRUE
99
-fname="test"
100
-minNS=NA
101
-
102
-# break the data into sets
103
-genesInSets<-createGWCoGAPSSets(data=D, nSets=nSets, keep=FALSE)
104
-
105
-#generate seeds for parallelization
106
-nut<-generateSeeds(chains=nSets, seed=-1)
107
-
108
-# establish the number of cores that you are able to use
109
-if(is.na(nCores)){nCores<-nSets}
110
-registerDoParallel(cores=nCores)
111
-
112
-# run CoGAPS for each set
113
-AP <- foreach(i=1:nSets) %dopar% {
114
-  D <- as.matrix(D[genesInSets[[i]],])
115
-  S <- as.matrix(S[genesInSets[[i]],])
116
-  gapsRun(D=D, S=S, nFactor=nFactor,seed=nut[i],numSnapshots=numSnapshots)
117
-}
118
-
119
-BySet<-reOrderBySet(AP=AP,nFactor=nFactor,nSets=nSets)
120
-matchedPs<-patternMatch4Parallel(Ptot=BySet$P,nP=nFactor,nS=nSets,cnt=nFactor,minNS=minNS,bySet=TRUE)
121
-
122
-# use shiny for pattern matching
123
-selectPBySet<-PatternMatcher(PBySet=matchedPs[["PBySet"]])
124
-library(reshape2)
125
-selectPBySet<-dcast(selectPBySet,  BySet ~ Samples)
126
-rownames(selectPBySet)<-selectPBySet$BySet
127
-selectPBySet<-as.matrix(selectPBySet[,-1])
128
-matchedPs<-patternMatch4Parallel(Ptot=selectPBySet,nP=nFactor,nS=nSets,cnt=nFactor,minNS=minNS,bySet=FALSE)
129
-
130
-#generate seeds for parallelization
131
-nut<-generateSeeds(chains=nSets, seed=-1)
132
-#final number of factors
133
-nFactorFinal<-dim(matchedPs)[1]
134
-
135
-# run fixed CoGAPS
136
-Fixed <- foreach(i=1:nSets) %dopar% {
137
-  D <- as.matrix(D[genesInSets[[i]],])
138
-  S <- as.matrix(S[genesInSets[[i]],])
139
-  AP <- gapsMapRun(D, S, FP=matchedPs, nFactor=nFactorFinal, fixedMatrix = "P",seed=nut[i],numSnapshots=numSnapshots)
140
-}
141
-
142
-#extract A and Asds
143
-As4fixPs <- postFixed(AP.fixed=Fixed,setPs=matchedPs)
144
-```
145
-
146
-
147
-# PatternMarkers
148
-
149
-In this chapter, we describe the PatternMarkers statistic.
150
-
151
-## PatternMarkers
152
-
153
-The PatternMarkers statistic finds the genes most uniquely associated with a given pattern or linear combination of patterns by computing
154
-
155
-\begin{equation}
156
-\sqrt{\left(\bf{A}_{i}-l_{p}\right)^{\textit{t}} \left(\bf{A}_{i}-l_{p}\right)}
157
-\label{eq:PatternMarkers}
158
-\end{equation}
159
-
160
-where  are the elements of the $\bf{A}$ matrix for the $\textit{i}^{th}$ gene scaled to have a maximum of one and \textit{l} is the $\textit{p}^{th}$ user specified norm. The genes are then ranked. In the case where \textit{l} is the identity vector, the ranking is run separately for each of the K patterns. Unique sets are generated by thresholding using the first gene to have a lower ranking, i.e. better fit to, another patterns.
161
-
162
-### Methods
163
-
164
-The PatternMarkers statistic is run by calling thepatternMarkers function in the CoGAPS R package as follows:
165
-
166
-
167
-```r
168
- patternMarkers(Amatrix = AP$Amean, scaledPmatrix = FALSE, Pmatrix = NA,
169
-  threshold = "cut", lp = NA, full = FALSE, ...)
170
-```
171
-
172
-**Input Arguments**
173
-\begin{description}
174
-\item[Amatrix]{A matrix of genes by weights resulting from CoGAPS or other NMF decomposition}
175
-\item[scaledPmatrix]{logical indicating whether the corresponding pattern matrix was fixed to have max 1 during decomposition}
176
-\item[Pmatrix]{the corresponding Pmatrix (patterns X samples) for the provided Amatrix (genes x patterns). This must be supplied if scaledPmatrix is FALSE.}
177
-\item[threshold]{the type of threshold to be used. The default "cut" will thresholding by the first gene to have a lower ranking, i.e. better fit to, a pattern. Alternatively, threshold="all" will return all of the genes in rank order for each pattern.}
178
-\item[lp]{a vector of weights for each pattern to be used for finding markers. If NA markers for each pattern of the A matrix will be used.}
179
-\item[full]{logical indicating whether to return the ranks of each gene for each pattern}
180
-\end{description}
181
-
182
-Once the PatternMarkers statistic has been run, a heatmap of each markers expression level can be displayed using the plotPatternMarkers function as follows:
183
-
184
-
185
-```r
186
-plotPatternMarkers(data = NA, patternMarkers = PatternMarkers,
187
-      patternPalette = NA, sampleNames = NA, samplePalette = NA,
188
-      colDenogram = TRUE, heatmapCol = "bluered", scale = "row", ...)
189
-```
190
- 
191
-**Input Arguments**
192
-\begin{description}
193
-\item[data]{the dataset from which the patterns where generated}
194
-\item[patternMarkers]{the list of genes generated from the patternMarkers function}
195
-\item[patternPalette]{a vector indicating what color should be used for each pattern}
196
-\item[sampleNames]{names of the samples to use for labeling }
197
-\item[samplePalette]{a vector indicating what color should be used for each sample}
198
-\end{description}
199
-
200
-### Example: Simulated data
201
-
202
-
203
-```r
204
-# GWCoGAPS output
205
-PatternMarkers<-patternMarkers(Amatrix=AP.fixed$A,scaledPmatrix=TRUE,threshold="cut")
206
-plotPatternMarkers(data=D,patternMarkers=PatternMarkers,patternPalette=c("grey","navy","orange"))
207
-# manual pipeline output
208
-PatternMarkers<-patternMarkers(Amatrix=As4fixPs$A,scaledPmatrix=TRUE,threshold="cut")
209
-plotPatternMarkers(data=D,patternMarkers=PatternMarkers,patternPalette=c("grey","navy","orange"))
210
-```
211
-
212
-Figure \ref{fig:PM1} shows the results from running plotPatternMarkers on the PatternMarkers generated from the the GWCoGAPS (left) and manual pipeline (right) results generated from the simulated data as previously illustrated.
213
-\begin{figure}[h]
214
-\begin{center}
215
-\includegraphics[width=0.4\linewidth]{GWCoGAPSPMs}
216
-\includegraphics[width=0.4\linewidth]{ManPipePMs}
217
-\end{center}
218
-\caption{Heatmap of PatternMarkers expression levels for simulated data.}
219
-\label{fig:PM1}
220
-\end{figure}
221
-
222
-# Feedback
223