Browse code

cleaned up directory

Tom Sherman authored on 26/07/2018 18:15:21
Showing55 changed files

... ...
@@ -17,24 +17,12 @@ Depends:
17 17
     R (>= 3.4.0),
18 18
     Rcpp (>= 0.11.0)
19 19
 Imports:
20
-    RColorBrewer (>= 1.0.5),
21
-    gplots (>= 2.8.0),
22
-    graphics,
23
-    grDevices,
24 20
     methods,
25
-    cluster,
26
-    shiny,
27
-    stats,
28
-    utils,
29
-    doParallel,
30
-    foreach,
31
-    ggplot2,
32
-    reshape2,
33
-    SingleCellExperiment,
34
-    SummarizedExperiment
21
+    gplots,
22
+    graphics,
23
+    S4Vectors
35 24
 Suggests:
36 25
     testthat,
37
-    lintr,
38 26
     knitr,
39 27
     rmarkdown,
40 28
     BiocStyle
... ...
@@ -49,21 +37,9 @@ RoxygenNote: 5.0.1
49 37
 Collate:
50 38
     'class-CogapsParams.R'
51 39
     'CoGAPS.R'
52
-    'GWCoGAPS.R'
40
+    'DistributedCogaps.R'
41
+    'Package.R'
53 42
     'RcppExports.R'
54
-    'cellMatchR.R'
55 43
     'class-CogapsResult.R'
56
-    'createGWCoGAPSSets.R'
57
-    'createscCoGAPSSets.R'
58
-    'distributedCogaps.R'
59
-    'generateSeeds.R'
60
-    'package.R'
61
-    'patternMarkers.R'
62
-    'patternMatch4Parallel.R'
63
-    'patternMatcher.R'
64
-    'plotPatternMarkers.R'
65
-    'postFixed4Parallel.R'
66
-    'postFixed4SC.R'
67
-    'reOrderBySet.R'
68
-    'reorderByPatternMatch.R'
69
-    'scCoGAPS.R'
44
+    'methods-CogapsParams.R'
45
+    'methods-CogapsResult.R'
... ...
@@ -1,79 +1,28 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3 3
 export(CoGAPS)
4
-export(GWCoGAPS)
5
-export(GWCoGapsFromCheckpoint)
6 4
 export(binaryA)
7 5
 export(buildReport)
8 6
 export(calcCoGAPSStat)
9 7
 export(calcGeneGSStat)
10 8
 export(calcZ)
11
-export(cellMatchR)
12 9
 export(computeGeneGSProb)
13
-export(createGWCoGAPSSets)
14
-export(createscCoGAPSSets)
15 10
 export(getMeanChiSq)
16 11
 export(getParam)
17 12
 export(patternMarkers)
18 13
 export(patternMatch)
19
-export(patternMatch4Parallel)
20 14
 export(plot.CogapsResult)
15
+export(plotPatternMarkers)
21 16
 export(plotResiduals)
22 17
 export(reconstructGene)
23
-export(scCoGAPS)
24 18
 export(setParam)
25 19
 exportClasses(CogapsParams)
26 20
 exportClasses(CogapsResult)
27
-import(doParallel)
28
-import(foreach)
29
-import(ggplot2)
30
-import(shiny)
31 21
 importClassesFrom(S4Vectors,Annotated)
32 22
 importClassesFrom(S4Vectors,DataFrame)
33 23
 importClassesFrom(S4Vectors,character_OR_NULL)
34
-importFrom(RColorBrewer,brewer.pal)
35
-importFrom(Rcpp,evalCpp)
36
-importFrom(cluster,agnes)
37
-importFrom(gplots,heatmap.2)
38
-importFrom(gplots,plotCI)
39
-importFrom(grDevices,colorRampPalette)
40
-importFrom(grDevices,dev.new)
41
-importFrom(grDevices,dev.off)
42
-importFrom(grDevices,pdf)
43
-importFrom(grDevices,rainbow)
44
-importFrom(graphics,abline)
45
-importFrom(graphics,close.screen)
46
-importFrom(graphics,hist)
47
-importFrom(graphics,legend)
48
-importFrom(graphics,lines)
49
-importFrom(graphics,matplot)
50
-importFrom(graphics,mtext)
51
-importFrom(graphics,par)
24
+importFrom(gplots,bluered)
52 25
 importFrom(graphics,plot)
53
-importFrom(graphics,points)
54
-importFrom(graphics,screen)
55
-importFrom(graphics,split.screen)
56
-importFrom(graphics,title)
57 26
 importFrom(methods,callNextMethod)
58
-importFrom(methods,is)
59 27
 importFrom(methods,new)
60
-importFrom(reshape2,melt)
61
-importFrom(stats,D)
62
-importFrom(stats,as.dist)
63
-importFrom(stats,as.hclust)
64
-importFrom(stats,complete.cases)
65
-importFrom(stats,cor)
66
-importFrom(stats,cutree)
67
-importFrom(stats,hclust)
68
-importFrom(stats,heatmap)
69
-importFrom(stats,loess)
70
-importFrom(stats,runif)
71
-importFrom(stats,sd)
72
-importFrom(stats,update)
73
-importFrom(stats,variable.names)
74
-importFrom(stats,weighted.mean)
75
-importFrom(utils,file_test)
76
-importFrom(utils,read.table)
77
-importFrom(utils,str)
78
-importFrom(utils,write.table)
79 28
 useDynLib(CoGAPS)
80 29
similarity index 93%
81 30
rename from R/distributedCogaps.R
82 31
rename to R/DistributedCogaps.R
... ...
@@ -127,7 +127,7 @@ patternMatch <- function(allPatterns, allParams)
127 127
         indx <- which(sapply(cc$PatsByClust, function(x) ncol(x) > allParams$gaps@maxNS))
128 128
     }
129 129
 
130
-    # created matrix of mean patterns - weighted by R closeness of fit
130
+    # create matrix of mean patterns - weighted by coefficient of determination
131 131
     PatsByCDSWavg <- t(sapply(1:length(cc$PatsByClust), function(z)
132 132
         apply(cc$PatsByClust[[z]], 1, function(x) weighted.mean(x, (cc$RtoMeanPattern[[z]])^3))))
133 133
     rownames(PatsByCDSWavg) <- lapply(1:length(cc$PatsByClust), function(x) paste("Pattern", x))
... ...
@@ -137,13 +137,15 @@ patternMatch <- function(allPatterns, allParams)
137 137
     return(apply(PatsByCDSWavg, 1, function(row) row / max(row)))
138 138
 }
139 139
 
140
+#' @importFrom cluster agnes
141
+#' @importFrom stats cutree as.hclust
140 142
 corcut <- function(allPatterns, allParams)
141 143
 {
142 144
     corr.dist <- cor(allPatterns)
143 145
     corr.dist <- 1 - corr.dist
144 146
 
145
-    clust <- agnes(x=corr.dist, diss=TRUE, "complete")
146
-    patternIds <- cutree(as.hclust(clust), k=allParams$gaps@cut)
147
+    clust <- cluster::agnes(x=corr.dist, diss=TRUE, "complete")
148
+    patternIds <- stats::cutree(stats::as.hclust(clust), k=allParams$gaps@cut)
147 149
 
148 150
     RtoMeanPattern <- list()
149 151
     PatsByClust <- list()
150 152
deleted file mode 100644
... ...
@@ -1,238 +0,0 @@
1
-#' GWCoGAPS
2
-#'
3
-#' @details calls the C++ MCMC code and performs Bayesian
4
-#' matrix factorization returning the two matrices that reconstruct
5
-#' the data matrix for whole genome data;
6
-#' @param simulationName name of this simulation
7
-#' @param nFactor number of patterns (basis vectors, metagenes), which must be
8
-#' greater than or equal to the number of rows of FP
9
-#' @param nCores number of cores for parallelization. If left to the default NA, nCores = nSets.
10
-#' @param cut number of branches at which to cut dendrogram used in patternMatch4Parallel
11
-#' @param minNS minimum of individual set contributions a cluster must contain
12
-#' @param manualMatch logical indicating whether or not to stop after initial phase for manual pattern matching
13
-#' @param consensusPatterns fixed pattern matrix to be used to ensure reciprocity of A weights accross sets 
14
-#' @param ... additional parameters to be fed into \code{gapsRun} and \code{gapsMapRun}
15
-#' @return list of A and P estimates
16
-#' @seealso \code{\link{gapsRun}}, \code{\link{patternMatch4Parallel}}, and \code{\link{gapsMapRun}}
17
-#' @examples
18
-#' data(SimpSim)
19
-#' sim_name <- "example"
20
-#' createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name)
21
-#' result <- GWCoGAPS(sim_name, nFactor=3, nEquil=200, nSample=200)
22
-#' @export
23
-GWCoGAPS <- function(simulationName, nFactor=3, nCores=NA, cut=NA, minNS=NA,
24
-manualMatch=FALSE, consensusPatterns=NULL, saveUnmatchedPatterns=FALSE, ...)
25
-{
26
-    if (!is.null(list(...)$nPatterns))
27
-    {
28
-        nFactor <- list(...)$nPatterns
29
-    }
30
-
31
-    if (!is.null(list(...)$checkpointFile))
32
-    {
33
-        stop("checkpoint file name automatically set in GWCoGAPS - don't pass this parameter")
34
-    }
35
-
36
-    if (is.null(consensusPatterns))
37
-    {
38
-        allDataSets <- preInitialPhase(simulationName, nCores)
39
-        unmatchedPatterns <- runInitialPhase(simulationName, allDataSets, nFactor, ...)
40
-        if (saveUnmatchedPatterns)
41
-        {
42
-            save(unmatchedPatterns, file=paste(simulationName, "_unmatched_patterns.RData"))
43
-        }
44
-        if (manualMatch)
45
-        {
46
-            saveRDS(unmatchedPatterns,file=paste(simulationName,"_initial.rds", sep=""))
47
-            stop("Please provide consensus patterns upon restarting.")
48
-        }
49
-        matchedPatternSets <- postInitialPhase(unmatchedPatterns, length(allDataSets), cut, minNS)
50
-        save(matchedPatternSets, file=paste(simulationName, "_matched_ps.RData", sep=""))
51
-        consensusPatterns <- matchedPatternSets[[1]]
52
-    } 
53
-    finalResult <- runFinalPhase(simulationName, allDataSets, consensusPatterns, nCores, ...)
54
-    return(postFinalPhase(finalResult, consensusPatterns))
55
-}
56
-
57
-#' Restart a GWCoGaps Run from Checkpoint
58
-#'
59
-#' @inheritParams GWCoGAPS
60
-#' @return list of A and P estimates
61
-#' @importFrom utils file_test
62
-#' @examples
63
-#' data(SimpSim)
64
-#' sim_name <- "example"
65
-#' createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name)
66
-#' trash <- GWCoGAPS(sim_name, nFactor=3, nEquil=200, nSample=200)
67
-#' result <- GWCoGapsFromCheckpoint(sim_name, 2)
68
-#' @export
69
-GWCoGapsFromCheckpoint <- function(simulationName, nCores, cut=NA, minNS=NA, ...)
70
-{
71
-    # find data files
72
-    allDataSets <- preInitialPhase(simulationName, nCores)
73
-
74
-    # figure out phase from file signature
75
-    initialCpts <- list.files(full.names=TRUE, pattern=paste(simulationName,
76
-        "_initial_cpt_[0-9]+.out", sep=""))
77
-    finalCpts <- list.files(full.names=TRUE, pattern=paste(simulationName,
78
-        "_final_cpt_[0-9]+.out", sep=""))
79
-
80
-    if (length(finalCpts))
81
-    {
82
-        finalResult <- foreach(i=1:length(allDataSets)) %dopar%
83
-        {
84
-            # load data set and shift values so gene minimum is zero
85
-            load(allDataSets[[i]])
86
-            sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x)
87
-                pmin(0,min(x))))
88
-    
89
-            # run CoGAPS with fixed patterns
90
-            cptFileName <- paste(simulationName, "_final_cpt_", i, ".out",
91
-                sep="")
92
-            CoGapsFromCheckpoint(sampleD, sampleS, cptFileName)
93
-        }
94
-        load(paste(simulationName, "_matched_ps.RData", sep=""))
95
-    }
96
-    else if (file_test("-f", paste(simulationName, "_matched_ps.RData", sep="")))
97
-    {
98
-        load(paste(simulationName, "_matched_ps.RData", sep=""))
99
-        consensusPatterns<-matchedPatternSets[[1]]
100
-        finalResult <- runFinalPhase(simulationName, allDataSets,
101
-            consensusPatterns, nCores, ...)
102
-    }
103
-    else if (length(initialCpts))
104
-    {
105
-        # initial phase - always needs to be run to get matchedPatternSets
106
-        initialResult <- foreach(i=1:length(allDataSets)) %dopar%
107
-        {
108
-            # load data set and shift values so gene minimum is zero
109
-            load(allDataSets[[i]])
110
-            sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x)
111
-                pmin(0,min(x))))
112
-
113
-            # run CoGAPS from checkpoint
114
-            cptFileName <- paste(simulationName, "_initial_cpt_", i, ".out",
115
-                sep="")
116
-            CoGapsFromCheckpoint(sampleD, sampleS, cptFileName)
117
-        }
118
-        matchedPatternSets <- postInitialPhase(initialResult,
119
-            length(allDataSets), cut, minNS)
120
-        save(matchedPatternSets, file=paste(simulationName, "_matched_ps.RData",
121
-            sep=""))
122
-        consensusPatterns<-matchedPatternSets[[1]]
123
-        finalResult <- runFinalPhase(simulationName, allDataSets,
124
-            consensusPatterns, nCores, ...)
125
-    }
126
-    else
127
-    {
128
-        stop("no checkpoint files found")
129
-    }
130
-    return(postFinalPhase(finalResult, consensusPatterns))
131
-}
132
-
133
-preInitialPhase <- function(simulationName, nCores)
134
-{
135
-    # find data files
136
-    fileSig <- paste(simulationName, "_partition_[0-9]+.RData", sep="")
137
-    allDataSets <- list.files(full.names=TRUE, pattern=fileSig)
138
-
139
-    # establish the number of cores that we are able to use
140
-    if (is.na(nCores))
141
-    {
142
-        nCores <- length(allDataSets)
143
-    }
144
-    registerDoParallel(cores=nCores)
145
-    return(allDataSets)
146
-}
147
-
148
-runInitialPhase <- function(simulationName, allDataSets, nFactor, ...)
149
-{
150
-    #generate seeds for parallelization
151
-    nut <- generateSeeds(chains=length(allDataSets), seed=-1)
152
-
153
-    # run CoGAPS for each set
154
-    initialResult <- foreach(i=1:length(allDataSets)) %dopar%
155
-    {
156
-        # load data set and shift values so gene minimum is zero
157
-        load(allDataSets[[i]])
158
-        sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x) pmin(0,min(x))))
159
-
160
-        # run CoGAPS without any fixed patterns
161
-        cptFileName <- paste(simulationName, "_initial_cpt_", i, ".out", sep="")
162
-        CoGAPS(sampleD, uncertainty=sampleS, nFactor=nFactor, seed=nut[i],
163
-            checkpointFile=cptFileName)
164
-    }
165
-    return(initialResult)
166
-}
167
-
168
-postInitialPhase <- function(initialResult, nSets, cut, minNS)
169
-{
170
-    nFactor <- ncol(initialResult[[1]]@featureLoadings)
171
-    BySet <- reOrderBySet(AP=initialResult, nFactor=nFactor, nSets=nSets)
172
-
173
-    #run postpattern match function
174
-    if (is.na(cut))
175
-    {
176
-        cut <- nFactor
177
-    }
178
-
179
-    return(patternMatch4Parallel(Ptot=BySet$P, nP=nFactor, nSets=nSets, cnt=cut,
180
-        minNS=minNS, bySet=TRUE))
181
-}
182
-
183
-runFinalPhase <- function(simulationName, allDataSets, consensusPatterns, nCores, ...)
184
-{    
185
-    if (class(consensusPatterns) != "matrix")
186
-    {
187
-        stop("consensusPatterns must be a matrix")
188
-    }
189
-
190
-    # find data files if providing consensus patterns
191
-    fileSig <- paste(simulationName, "_partition_[0-9]+.RData", sep="")
192
-    allDataSets <- list.files(full.names=TRUE, pattern=fileSig)
193
-
194
-    if (is.na(nCores))
195
-    {
196
-        nCores <- length(allDataSets)
197
-    }
198
-    registerDoParallel(cores=nCores)
199
-
200
-    # generate seeds for parallelization
201
-    nut <- generateSeeds(chains=length(allDataSets), seed=-1)
202
-
203
-    # final number of factors
204
-    nFactorFinal <- nrow(consensusPatterns)
205
-    message(paste("final number of patterns:", nFactorFinal))
206
-
207
-    # run fixed CoGAPS
208
-    finalResult <- foreach(i=1:length(allDataSets)) %dopar%
209
-    {
210
-        # load data set and shift values so gene minimum is zero
211
-        load(allDataSets[[i]])
212
-        sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x) pmin(0,min(x))))
213
-
214
-        # run CoGAPS with fixed patterns
215
-        cptFileName <- paste(simulationName, "_final_cpt_", i, ".out", sep="")
216
-        CoGAPS(sampleD, uncertainty=sampleS, fixedMatrix=consensusPatterns,
217
-            nFactor=nFactorFinal, seed=nut[i], checkpointFile=cptFileName,
218
-            whichMatrixFixed='P')
219
-
220
-    }
221
-    return(finalResult)
222
-}
223
-
224
-postFinalPhase <- function(finalResult, consensusPatterns)
225
-{
226
-    Aresult <- postFixed4Parallel(finalResult, consensusPatterns)
227
-    finalResult <- list("Amean"=Aresult$A, "Asd"=Aresult$Asd,"Pmean"=consensusPatterns)
228
-    
229
-    return(new("CogapsResult",
230
-        Amean       = Aresult$A,
231
-        Asd         = Aresult$Asd,
232
-        Pmean       = consensusPatterns,
233
-        Psd         = NULL,
234
-        seed        = 0,
235
-        meanChiSq   = 0,
236
-        diagnostics = list()
237
-    ))
238
-}
239 0
similarity index 76%
240 1
rename from R/package.R
241 2
rename to R/Package.R
... ...
@@ -21,21 +21,6 @@
21 21
 #' Bioinformatics. 2010 Nov 1;26(21):2792-3
22 22
 #' @docType package
23 23
 #' @name CoGAPS-package
24
-#' @importFrom Rcpp evalCpp
25
-#' @importFrom gplots heatmap.2 plotCI
26
-#' @importFrom stats variable.names sd update heatmap runif
27
-#' @importFrom graphics matplot title abline close.screen hist legend lines mtext par plot points screen split.screen
28
-#' @importFrom grDevices dev.new dev.off pdf colorRampPalette rainbow
29
-#' @importFrom methods is
30
-#' @importFrom utils read.table write.table str
31
-#' @importFrom RColorBrewer brewer.pal
32
-#' @importFrom stats cor loess as.dist cutree as.hclust complete.cases D hclust weighted.mean
33
-#' @import foreach
34
-#' @importFrom cluster agnes 
35
-#' @import doParallel
36
-#' @import shiny
37
-#' @importFrom reshape2 melt
38
-#' @import ggplot2
39 24
 #' @useDynLib CoGAPS
40 25
 NULL
41 26
 
42 27
deleted file mode 100644
... ...
@@ -1,125 +0,0 @@
1
-#' cellMatchR
2
-#' @export
3
-#'
4
-#' @param Atot a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}
5
-#' @param nSets number of parallel sets used to generate \code{Atot}
6
-#' @param cnt  number of branches at which to cut dendrogram
7
-#' @param minNS minimum of individual set contributions a cluster must contain
8
-#' @param maxNS maximum of individual set contributions a cluster must contain
9
-#' @param ignore.NA logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.
10
-#' @param bySet logical indicating whether to return list of matched set solutions from \code{Atot}
11
-#' @param plotDen plot
12
-#' @param ... additional parameters for \code{agnes}
13
-#' @return a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each
14
-#' concensus pattern is also returned.
15
-cellMatchR <- function(Atot, nSets, cnt, minNS=NA, maxNS=NA,
16
-ignore.NA=FALSE, bySet=FALSE, plotDen=FALSE, ...)
17
-{
18
-    if (is.na(minNS))
19
-    {
20
-        minNS <- nSets / 2
21
-    }
22
-    if (is.na(maxNS))
23
-    {
24
-        maxNS <- nSets + minNS
25
-    }
26
-    if (!ignore.NA)
27
-    {
28
-        if (anyNA(Atot))
29
-        {
30
-            warning(paste("Non-sparse matrixes produced. Reducing the number",
31
-                "of patterns asked for and rerun."))
32
-        }
33
-    }
34
-    else
35
-    {
36
-        Atot <- Atot[complete.cases(Atot),]
37
-    }
38
-
39
-    corcut <- function(Atot, minNS, cnt, cluster.method)
40
-    {
41
-        corr.dist <- cor(Atot)
42
-        corr.dist <- 1-corr.dist
43
-
44
-        clust <- agnes(x=corr.dist, diss=TRUE, cluster.method)
45
-        #clust=fastcluster::hclust(dist(corr.dist))
46
-        cut <- cutree(as.hclust(clust),k=cnt)
47
-
48
-        cls <- sort(unique(cut))
49
-        cMNs <- matrix(ncol=cnt,nrow=dim(Atot)[1])
50
-        colnames(cMNs) <- cls
51
-        rownames(cMNs) <- rownames(Atot)
52
-
53
-        RtoMeanPattern <- list()
54
-        AByClust <- list()
55
-        for (i in cls)
56
-        {
57
-            if (!is.null(dim(Atot[,cut == i])))
58
-            {
59
-                if (dim(Atot[,cut == i])[2] >= minNS)
60
-                {
61
-                    cMNs[,i] <- rowMeans(Atot[,cut==i])
62
-                    AByClust[[i]] <- Atot[,cut==i]
63
-                    nIN <- sum(cut==i)
64
-                    RtoMeanPattern[[i]] <- sapply(1:nIN, function(j) 
65
-                        round(cor(x=Atot[,cut==i][,j], y=cMNs[,i]),3))
66
-                }
67
-            }
68
-        }
69
-
70
-        #AByClust[sapply(AByClust,is.null)] <- NULL
71
-        #RtoMeanPattern[sapply(RtoMeanPattern,is.null)] <- NULL
72
-        return(list("RtoMeanPattern"=RtoMeanPattern, "AByClust"=AByClust))
73
-    }   
74
-
75
-    cc <- corcut(Atot, minNS, cnt, cluster.method)
76
-
77
-    ### split by maxNS
78
-    indx <- which(unlist(lapply(cc$AByClust, function(x) dim(x)[1] > maxNS)))
79
-    i <- 1
80
-    while (length(indx) > 0)
81
-    { 
82
-        icc <- corcut(cc$AByClust[[indx[1]]], minNS, 2, cluster.method)
83
-        if (length(icc[[2]]) == 0)
84
-        {
85
-            indx <- indx[-1]
86
-            next
87
-        }
88
-        else
89
-        {
90
-            cc$AByClust[[indx[1]]] <- icc[[2]][[1]]
91
-            cc$RtoMeanPattern[[indx[1]]] <- icc[[1]][[1]]
92
-            if (length(icc[[2]]) > 1)
93
-            {
94
-                cc$AByClust<-append(cc$AByClust,icc[[2]][2])
95
-                cc$RtoMeanPattern<-append(cc$RtoMeanPattern,icc[[1]][2])
96
-            } 
97
-            indx <- which(unlist(lapply(cc$AByClust,function(x) dim(x)[1]>maxNS)))
98
-        }
99
-    }
100
-
101
-    #weighted.mean(AByClustDrop[[1]],RtoMPDrop[[1]])
102
-    AByCDSWavg <- t(sapply(1:length(cc$AByClust), function(z)
103
-        apply(cc$AByClust[[z]], 1, function(x)
104
-            weighted.mean(x, (cc$RtoMeanPattern[[z]])^3))))
105
-    rownames(AByCDSWavg) <- lapply(1:length(cc$AByClust), function(x)
106
-        paste("Pattern",x))
107
-    #scale As
108
-    Amax <- apply(AByCDSWavg, 1, max)
109
-    AByCDSWavgScaled <- t(sapply(1:dim(AByCDSWavg)[1], function(x)
110
-        AByCDSWavg[x,] / Amax[x]))
111
-    rownames(AByCDSWavgScaled) <- rownames(AByCDSWavg)
112
-
113
-    if (bySet)
114
-    {  
115
-        return(list("consenusAs"=t(AByCDSWavgScaled),"ABySet"=cc))
116
-    }
117
-    else
118
-    {
119
-        return(AByCDSWavgScaled)
120
-    }
121
-}
122
-
123
-# pattern first then file name
124
-# dont overwrite
125
-# some elements are negative
126 0
\ No newline at end of file
... ...
@@ -120,95 +120,3 @@ setGeneric("parseOldParams", function(object, oldArgs)
120 120
 setGeneric("parseDirectParams", function(object, args)
121 121
     {standardGeneric("parseDirectParams")})
122 122
 
123
-#' @rdname setParam-methods
124
-#' @aliases setParam
125
-setMethod("setParam", signature(object="CogapsParams"),
126
-function(object, whichParam, value)
127
-{
128
-    slot(object, whichParam) <- value
129
-    validObject(object)
130
-    return(object)
131
-})
132
-
133
-#' @rdname getParam-methods
134
-#' @aliases getParam
135
-setMethod("getParam", signature(object="CogapsParams"),
136
-function(object, whichParam)
137
-{
138
-    slot(object, whichParam)
139
-})
140
-
141
-#' @rdname parseOldParams-methods
142
-#' @aliases parseOldParams
143
-setMethod("parseOldParams", signature(object="CogapsParams"),
144
-function(object, oldArgs)
145
-{
146
-    helper <- function(arg, params, newArg)
147
-    {
148
-        if (!is.null(oldArgs[[arg]]))
149
-        {
150
-            warning(paste("parameter", arg, "is deprecated, it will still",
151
-                "work, but setting", newArg, "in the params object is the",
152
-                "preferred method"))
153
-            params <- setParam(params, newArg, oldArgs[[arg]])
154
-            oldArgs[[arg]] <- NULL
155
-        }            
156
-        return(params)
157
-    }
158
-
159
-    object <- helper("nFactor", object, "nPatterns")
160
-    object <- helper("nIter", object, "nIterations")
161
-    object <- helper("nEquil", object, "nIterations")
162
-    object <- helper("nSample", object, "nIterations")
163
-    object <- helper("nOutR", object, "outputFrequency")
164
-    object <- helper("nOutput", object, "outputFrequency")
165
-    object <- helper("maxGibbmassA", object, "maxGibbsMassA")
166
-    object <- helper("max_gibbmass_paraA", object, "maxGibbsMassA")
167
-    object <- helper("maxGibbmassP", object, "maxGibbsMassP")
168
-    object <- helper("max_gibbmass_paraP", object, "maxGibbsMassP")
169
-    object <- helper("singleCellRNASeq", object, "singleCell")
170
-
171
-    if (!is.null(oldArgs$nSnapshots) | !is.null(oldArgs$sampleSnapshots) | !is.null(oldArgs$numSnapshots))
172
-    {
173
-        warning("snapshots not currently supported in release build")
174
-        oldArgs$nSnapshots <- NULL
175
-        oldArgs$sampleSnapshots <- NULL
176
-        oldArgs$numSnapshots <- NULL
177
-    }
178
-    if (!is.null(oldArgs$fixedPatterns))
179
-        stop("pass fixed matrix in with 'fixedMatrix' argument")
180
-    if (!is.null(oldArgs$S))
181
-        stop("pass uncertainty matrix in with 'uncertainty', not 'S'")
182
-
183
-    return(object)
184
-})
185
-
186
-#' @rdname parseDirectParams-methods
187
-#' @aliases parseDirectParams
188
-setMethod("parseDirectParams", signature(object="CogapsParams"),
189
-function(object, args)
190
-{
191
-    for (s in slotNames(object))
192
-    {
193
-        if (!is.null(args[[s]]))
194
-        {
195
-            object <- setParam(object, s, args[[s]])
196
-        }
197
-    }
198
-    return(object)
199
-})
200
-
201
-setMethod("show", signature("CogapsParams"),
202
-function(object)
203
-{
204
-    cat("An Object of class \"CogapsParams\"\n")
205
-    cat("nPatterns          ", object@nPatterns, "\n")
206
-    cat("nIterations        ", object@nIterations, "\n")
207
-    cat("outputFrequency    ", object@outputFrequency, "\n")
208
-    cat("nCores             ", object@nCores, "\n")
209
-    cat("singleCell         ", object@singleCell, "\n")
210
-    cat("seed               ", object@seed, "\n")
211
-    cat("messages           ", object@messages, "\n")
212
-    cat("checkpointInterval ", object@checkpointInterval, "\n")
213
-    cat("checkpointOutFile  ", object@checkpointOutFile, "\n")
214
-})
... ...
@@ -3,14 +3,10 @@
3 3
 #'
4 4
 #' @description Contains all output from Cogaps run
5 5
 #' @importClassesFrom S4Vectors DataFrame Annotated character_OR_NULL
6
-setClass("CogapsResult", contains="Annotated", slots=c(
7
-    sampleFactors = "ANY",      # Pmean transpose
8
-    featureLoadings = "ANY",    # Amean
9
-    featureStdDev = "ANY",      # Asd
10
-    sampleStdDev = "ANY",       # Psd
11
-    NAMES = "character_OR_NULL",
12
-    factorData = "DataFrame"),
13
-)
6
+setClass("CogapsResult", contains="LinearEmbeddingMatrix", slots=c(
7
+    sampleStdDev = "ANY",   # Psd transpose
8
+    featureStdDev = "ANY",  # Asd
9
+))
14 10
 
15 11
 #' Constructor for CogapsResult
16 12
 #' @return initialized CogapsResult object
... ...
@@ -194,369 +190,21 @@ setGeneric("computeGeneGSProb", function(object, GStoGenes, numPerm=500,
194 190
 Pw=rep(1,ncol(Amean)), PwNull=FALSE)
195 191
     {standardGeneric("computeGeneGSProb")})
196 192
 
197
-################################## METHODS #####################################
198
-
199
-setMethod("show", signature("CogapsResult"),
200
-function(object)
201
-{
202
-    nFeatures <- nrow(object@featureLoadings)
203
-    nSamples <- nrow(object@sampleFactors)
204
-    nPatterns <- ncol(object@featureLoadings)
205
-
206
-    print(paste("CogapsResult object with", nFeatures, "features and", nSamples,
207
-        "samples"))
208
-    print(paste(nPatterns, "patterns were learned"))
209
-})
210
-
193
+#' compute pattern markers statistic
211 194
 #' @export
212
-#' @importFrom graphics plot
213
-plot.CogapsResult <- function(object, ...)
214
-{
215
-    nSamples <- nrow(object@sampleFactors)
216
-    nFactors <- ncol(object@sampleFactors)
217
-    colors <- rainbow(nFactors)
218
-    xlimits <- c(0, nSamples + 1)
219
-    ylimits <- c(0, max(object@sampleFactors) * 1.1)
220
-
221
-    plot(NULL, xlim=xlimits, ylim=ylimits, ylab="Relative Amplitude")
222
-
223
-    for (i in 1:nFactors)
224
-    {
225
-        lines(x=1:nSamples, y=object@sampleFactors[,i], col=colors[i])
226
-        points(x=1:nSamples, y=object@sampleFactors[,i], col=colors[i], pch=i)
227
-    }
228
-
229
-    legend("top", paste("Pattern", 1:nFactors, sep = ""), pch = 1:nFactors,
230
-        lty=1, cex=0.8, col=colors, bty="y", ncol=5)
231
-}
232
-
233
-#' @rdname getMeanChiSq-methods
234
-#' @aliases getMeanChiSq
235
-setMethod("getMeanChiSq", signature(object="CogapsResult"),
236
-function(object)
237
-{
238
-    object@metadata$meanChiSq
239
-})
240
-
241
-#' @rdname calcZ-methods
242
-#' @aliases calcZ
243
-setMethod("calcZ", signature(object="CogapsResult"),
244
-function(object, which)
245
-{
246
-    if (which == "feature")
247
-    {
248
-        object@featureLoadings / object@featureStdDev
249
-    }
250
-    else if (which == "sample")
251
-    {
252
-        object@sampleFactors / object@sampleStdDev
253
-    }
254
-    else
255
-    {
256
-        stop("\"which\" must be either \"feature\" or \"sample\"")
257
-    }
258
-})
259
-
260
-#' @rdname reconstructGene-methods
261
-#' @aliases reconstructGene
262
-setMethod("reconstructGene", signature(object="CogapsResult"),
263
-function(object, genes)
264
-{
265
-    D <- object@featureLoadings %*% t(object@sampleFactors)
266
-    if (!is.null(genes))
267
-    {
268
-        D <- D[genes,]
269
-    }
270
-    return(D)
271
-})
272
-
273
-#' @rdname binaryA-methods
274
-#' @aliases binaryA
275
-setMethod("binaryA", signature(object="CogapsResult"),
276
-function(object, threshold)
277
-{
278
-    binA <- ifelse(calcZ(object) > threshold, 1, 0)
279
-
280
-    heatmap.2(binA, Rowv = FALSE, Colv = FALSE, dendrogram="none",
281
-        scale="none", col = brewer.pal(3,"Blues"), trace="none",
282
-        density.info="none", cexCol=1.3, srtCol=45,
283
-        lmat=rbind(c(0, 3), c(2,1), c(0,4) ),
284
-        lwid=c(1,10), lhei=c(1, 4, 1.2 ),
285
-        main="Heatmap of Standardized Feature Matrix")
286
-    mtext(paste("(Threshold = ", threshold, ")"))
287
-})
288
-
289
-#' @rdname plotResiduals-methods
290
-#' @aliases plotResiduals
291
-setMethod("plotResiduals", signature(object="CogapsResult"),
292
-function(object, data, uncertainty)
293
-{
294
-    data <- as.matrix(data)
295
-    if (is.null(uncertainty))
296
-        uncertainty <- pmax(0.1 * data, 0.1)
297
-    uncertainty <- as.matrix(uncertainty)
298
-
299
-    M <- reconstructGene(object)
300
-    resid <- (data - M) / uncertainty
301
-    
302
-    scaledRdYlBu <- colorRampPalette(brewer.pal(9,"RdYlBu"))(100)
303
-    heatmap.2(resid, Rowv = FALSE, Colv = FALSE, dendrogram="none",
304
-        scale="none", col = scaledRdYlBu, trace="none", density.info="none",
305
-        cexCol=1.33, srtCol=45, lmat=rbind(c(0, 3),c(2,1),c(0,4) ),
306
-        lwid=c(1,10), lhei=c(1, 4, 1.2 ), main="Heatmap of Residuals")
307
-})
308
-
309
-setMethod("patternMarkers", signature(object="CogapsResult"),
310
-function(object, threshold="all", lp=NA, full=FALSE)
311
-{
312
-    nGenes <- nrow(object@featureLoadings)
313
-    nPatterns <- ncol(object@featureLoadings)
314
-
315
-    # find the A with the highest magnitude
316
-    Arowmax <- t(apply(object@featureLoadings, 1, function(x) x/max(x)))
317
-
318
-    if (!is.na(lp))
319
-    {
320
-        if (length(lp) != nPatterns)
321
-        {
322
-            warning("lp length must equal the number of columns of the Amatrix")
323
-        }
324
-        sstat <- apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp)))
325
-        ssranks <-rank(sstat)
326
-        ssgenes.th <- names(sort(sstat,decreasing=FALSE,na.last=TRUE))
327
-    }
328
-    else
329
-    {
330
-        # determine which genes are most associated with each pattern
331
-
332
-        sstat <- matrix(NA, nrow=nGenes, ncol=nPatterns,
333
-            dimnames=dimnames(object@featureLoadings))
334
-        ssranks <- matrix(NA, nrow=nGenes, ncol=nPatterns,
335
-            dimnames=dimnames(object@featureLoadings))
336
-        ssgenes <- matrix(NA, nrow=nGenes, ncol=nPatterns, dimnames=NULL)
337
-        for (i in 1:nP)
338
-        {
339
-            lp <- rep(0,dim(Amatrix)[2])
340
-            lp[i] <- 1
341
-            sstat[,i] <- unlist(apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp))))
342
-            ssranks[,i]<-rank(sstat[,i])
343
-            ssgenes[,i]<-names(sort(sstat[,i],decreasing=FALSE,na.last=TRUE))
344
-        }
345
-        if (threshold=="cut")
346
-        {
347
-            geneThresh <- sapply(1:nP,function(x) min(which(ssranks[ssgenes[,x],x] > apply(ssranks[ssgenes[,x],],1,min))))
348
-            ssgenes.th <- sapply(1:nP,function(x) ssgenes[1:geneThresh[x],x])
349
-        }
350
-        else if (threshold=="all")
351
-        {
352
-            pIndx<-unlist(apply(sstat,1,which.min))
353
-            gBYp <- list()
354
-            for(i in sort(unique(pIndx)))
355
-            {
356
-                gBYp[[i]]<-sapply(strsplit(names(pIndx[pIndx==i]),"[.]"),function(x) x[[1]][1])
357
-            }
358
-            ssgenes.th <- lapply(1:max(sort(unique(pIndx))), function(x)
359
-                ssgenes[which(ssgenes[,x] %in% gBYp[[x]]),x])
360
-        }
361
-        else
362
-        {
363
-            stop("Threshold arguement not viable option")
364
-        }
365
-    }
366
-    if (full)
367
-    {
368
-        return(list("PatternMarkers"=ssgenes.th, "PatternRanks"=ssranks,
369
-            "PatternMarkerScores"=sstat))
370
-    }
371
-    else
372
-    {
373
-        return("PatternMarkers"=ssgenes.th)
374
-    }
375
-})
376
-
377
-plotPatternMarkers
378
-
379
-#' @rdname calcCoGAPSStat-methods
380
-#' @aliases calcCoGAPSStat
381
-setMethod("calcCoGAPSStat", signature(object="CogapsResult"),
382
-function(object, GStoGenes, numPerm)
383
-{
384
-    # test for std dev of zero, possible mostly in simple simulations
385
-    if (sum(object@featureStdDev==0) > 0)
386
-    {
387
-        object@featureStdDev[object@featureStdDev==0] <- 1e-6
388
-    }
389
-
390
-    # calculate Z scores
391
-    zMatrix <- calcZ(object)
392
-
393
-    # check input arguments
394
-    if (!is(GStoGenes, "data.frame") && !is(GStoGenes, "list") && !is(GStoGenes,"GSA.genesets"))
395
-    {
396
-        stop("GStoGenes must be a data.frame,GSA.genesets, or list with format specified in the users manual.")
397
-    }
398
-
399
-    if (is(GStoGenes, "GSA.genesets"))
400
-    {
401
-        names(GStoGenes$genesets) <- GStoGenes$geneset.names
402
-        GStoGenes <- GStoGenes$genesets
403
-    }
404
-
405
-    if (is(GStoGenes, "list"))
406
-    {
407
-        GStoGenesList <- GStoGenes
408
-    }
409
-    else
410
-    {
411
-        GStoGenesList <- list()
412
-        for (i in 1:dim(GStoGenes)[2])
413
-        {
414
-            GStoGenesList[[as.character(colnames(GStoGenes)[i])]] <- as.character(unique(GStoGenes[,i]))
415
-        }
416
-    }
417
-
418
-    # get dimensions
419
-    numGS   <- length(names(GStoGenesList))
420
-    numPatt <- dim(zMatrix)[2]
421
-    numG    <- dim(zMatrix)[1]+0.9999
422
-
423
-    # initialize matrices
424
-    statsUp   <- matrix(ncol = numGS, nrow = numPatt)
425
-    statsDown <- matrix(ncol = numGS, nrow = numPatt)
426
-    actEst    <- matrix(ncol = numGS, nrow = numPatt)
427
-    results   <- list()
428
-    zPerm     <- matrix(ncol=numPerm,nrow=numPatt)
429
-
430
-    # do permutation test
431
-    for (gs in 1:numGS)
432
-    {
433
-        genes <- GStoGenesList[[names(GStoGenesList)[gs]]]
434
-        index <- rownames(zMatrix) %in% genes
435
-        zValues <- zMatrix[index,1]
436
-        numGenes <- length(zValues)
437
-        label <- as.character(numGenes)
438
-
439
-        if (!any(names(results)==label))
440
-        {
441
-            for (p in 1:numPatt)
442
-            {
443
-                for (j in 1:numPerm)
444
-                {
445
-                    temp <- floor(runif(numGenes,1,numG))
446
-                    temp2 <- zMatrix[temp,p]
447
-                    zPerm[p,j] <- mean(temp2)
448
-                }
449
-            }
450
-            results[[label]] <- zPerm
451
-        }
452
-    }
453
-
454
-    # get p-value
455
-    for (p in 1:numPatt)
456
-    {
457
-        for (gs in 1:numGS)
458
-        {
459
-            genes <- GStoGenesList[[names(GStoGenesList)[gs]]]
460
-            index <- rownames(zMatrix) %in% genes
461
-            zValues <- zMatrix[index,p]
462
-            zScore <- mean(zValues)
463
-
464
-            numGenes <- length(zValues)
465
-            label <- as.character(numGenes)
466
-
467
-            permzValues <- results[[label]][p,]
468
-            ordering <- order(permzValues)
469
-            permzValues <- permzValues[ordering]
470
-
471
-            statistic <- sum(zScore > permzValues)
472
-            statUpReg <- 1 - statistic/length(permzValues)
473
-            statsUp[p,gs] <- max(statUpReg, 1/numPerm)
474
-
475
-            statistic <- sum(zScore < permzValues)
476
-            statDownReg <- 1 - statistic/length(permzValues)
477
-            statsDown[p,gs] <- max(statDownReg,1/numPerm)
478
-
479
-            activity <- 1 - 2*max(statUpReg, 1/numPerm)
480
-            actEst[p,gs] <- activity
481
-        }
482
-    }
483
-
484
-    # format output
485
-    colnames(statsUp) <- names(GStoGenesList)
486
-    colnames(statsDown) <- names(GStoGenesList)
487
-    colnames(actEst) <- names(GStoGenesList)
488
-
489
-    rownames(statsUp) <- colnames(zMatrix)
490
-    rownames(statsDown) <- colnames(zMatrix)
491
-    rownames(actEst) <- colnames(zMatrix)
492
-
493
-    results[['GSUpreg']] <- statsUp
494
-    results[['GSDownreg']] <- statsDown
495
-    results[['GSActEst']] <- actEst
496
-
497
-    return(results)
498
-})
499
-
500
-#' @rdname calcGeneGSStat-methods
501
-#' @aliases calcGeneGSStat
502
-setMethod("calcGeneGSStat", signature(object="CogapsResult"),
503
-function(object, GStoGenes, numPerm, Pw, nullGenes)
504
-{
505
-    gsStat <- calcCoGAPSStat(object, data.frame(GSGenes), numPerm=numPerm)
506
-    gsStat <- gsStat$GSUpreg
507
-    gsStat <- -log(gsStat)
508
-
509
-    if (!all(is.na(Pw)))
510
-    {
511
-        if (length(Pw) != length(gsStat))
512
-        {
513
-            stop('Invalid weighting')
514
-        }
515
-        gsStat <- gsStat*Pw
516
-    }
517
-
518
-    if (nullGenes)
519
-    {
520
-        ZD <- object@featureLoadings[setdiff(row.names(object@featureLoadings), GSGenes),] /
521
-            object@featureStdDev[setdiff(row.names(object@featureLoadings), GSGenes),]
522
-    }
523
-    else
524
-    {
525
-        ZD <- object@featureLoadings[GSGenes,]/object@featureStdDev[GSGenes,]
526
-    }
527
-    outStats <- apply(sweep(ZD,2,gsStat,FUN="*"),1,sum) / (sum(gsStat))
528
-    outStats <- outStats / apply(ZD,1,sum)
529
-    outStats[which(apply(ZD,1,sum) < 1e-6)] <- 0
530
-
531
-    if (sum(gsStat) < 1e-6)
532
-    {
533
-        return(0)
534
-    }
535
-    return(outStats)
536
-})
537
-
538
-#' @rdname computeGeneGSProb-methods
539
-#' @aliases computeGeneGSProb
540
-setMethod("computeGeneGSProb", signature(object="CogapsResult"),
541
-function(object, GStoGenes, numPerm, Pw, PwNull)
542
-{
543
-    geneGSStat <- calcGeneGSStat(object, Pw=Pw, GSGenes=GSGenes,
544
-        numPerm=numPerm)
545
-
546
-    if (PwNull)
547
-    {
548
-        permGSStat <- calcGeneGSStat(object, GSGenes=GSGenes, numPerm=numPerm,
549
-            Pw=Pw, nullGenes=TRUE)
550
-    }
551
-    else
552
-    {
553
-        permGSStat <- calcGeneGSStat(object, GSGenes=GSGenes, numPerm=numPerm,
554
-            nullGenes=TRUE)
555
-    }
556
-
557
-    finalStats <- sapply(GSGenes, function(x)
558
-        length(which(permGSStat > geneGSStat[x])) / length(permGSStat))
559
-
560
-    return(finalStats)
561
-})
195
+#' @docType methods
196
+#' @rdname patternMarkers-methods
197
+#'
198
+#' @description calculate the most associated pattern for each gene
199
+#' @param object an object of type CogapsResult
200
+#' @param threshold the type of threshold to be used. The default "all" will
201
+#' distribute genes into pattern with the lowest ranking. The "cut" thresholds
202
+#' by the first gene to have a lower ranking, i.e. better fit to, a pattern.
203
+#' @param lp a vector of weights for each pattern to be used for finding
204
+#' markers. If NA markers for each pattern of the A matrix will be used.
205
+#' @return By default a non-overlapping list of genes associated with each
206
+#' \code{lp}. If \code{full=TRUE} a data.frame of genes rankings with a column
207
+#' for each \code{lp} will also be returned.
208
+setGeneric("patternMarkers", function(object, threshold="all", lp=NA, full=FALSE)
209
+    {standardGeneric("patternMarkers")})
562 210
 
563 211
deleted file mode 100644
... ...
@@ -1,39 +0,0 @@
1
-#' Create Gene Sets for GWCoGAPS
2
-#'
3
-#' @details factors whole genome data into randomly generated sets for indexing
4
-#'
5
-#' @param D data matrix
6
-#' @param S uncertainty matrix
7
-#' @param nSets number of sets to partition the data into
8
-#' @param simulationName name used to identify files created by this simulation
9
-#' @return simulationName used to identify saved files
10
-#' @examples
11
-#' data(SimpSim)
12
-#' createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, "example")
13
-#' @export
14
-createGWCoGAPSSets <- function(D, S, nSets, simulationName)
15
-{
16
-    # check gene names
17
-    if (length(unique(rownames(D))) != length(rownames(D)))
18
-    {
19
-        warning("Gene identifiers not unique!")
20
-    }
21
-
22
-    # partition data by sampling random sets of genes
23
-    genes <- 1:nrow(D)
24
-    setSize <- floor(length(genes) / nSets)
25
-    for (set in 1:nSets)
26
-    {
27
-        # sample gene names
28
-        sampleSize <- ifelse(set == nSets, length(genes), setSize)
29
-        geneSet <- sample(genes, sampleSize, replace=FALSE)
30
-        genes <- genes[!(genes %in% geneSet)]
31
-
32
-        # partition data
33
-        sampleD <- D[geneSet,]
34
-        sampleS <- S[geneSet,]
35
-        save(sampleD, sampleS, file=paste(simulationName, "_partition_", set,
36
-            ".RData", sep=""));
37
-    }
38
-    return(simulationName)
39
-}
40 0
deleted file mode 100644
... ...
@@ -1,59 +0,0 @@
1
-#' Create Gene Sets for scCoGAPS
2
-#' @export
3
-#'
4
-#' @description factors whole genome data into randomly generated sets for indexing
5
-#' @param D data matrix
6
-#' @param nSets number of sets to partition the data into
7
-#' @param simulationName name used to identify files created by this simulation
8
-#' @param samplingRatio vector of relative quantities to use for sampling celltypes
9
-#' @param anotionObj vector of same length as number of columns of D 
10
-#' @param path character string indicating were to save resulting data objects. default is current working dir
11
-#' @return simulationName used to identify saved files
12
-#' @examples
13
-#' data(SimpSim)
14
-#' createscCoGAPSSets(SimpSim.D, nSets=2, simulationName="example")
15
-createscCoGAPSSets <- function(D, nSets, simulationName, samplingRatio=NULL,
16
-path="", anotionObj=NULL)
17
-{
18
-    # check gene names
19
-    if (length(unique(colnames(D))) != length(colnames(D)))
20
-    {
21
-        warning("Cell identifiers not unique!")
22
-    }
23
-
24
-    # partition data by sampling random sets of cells
25
-    cells <- 1:ncol(D)
26
-    setSize <- floor(length(cells) / nSets)
27
-    for (set in 1:nSets)
28
-    {
29
-        if (is.null(samplingRatio))
30
-        {
31
-            # sample cell names
32
-            sampleSize <- ifelse(set == nSets, length(cells), setSize)
33
-            cellset <- sample(cells, sampleSize, replace=FALSE)
34
-            cells <- cells[!(cells %in% cellset)]
35
-        }
36
-        else
37
-        {
38
-            if (length(unique(anotionObj)) != length(samplingRatio))
39
-            {
40
-                warning("Not all celltypes will be sampled from.")
41
-            }
42
-            ct.indx <- lapply(unique(anotionObj), function(x) which(anotionObj == x))
43
-            names(ct.indx) <- unique(anotionObj)
44
-            cellset <- lapply(unique(anotionObj), function(x)
45
-                sample(colnames(D)[ct.indx[[x]]], samplingRatio[x],replace=TRUE))
46
-            cellset <- unlist(cellset)
47
-        }
48
-
49
-        # partition data
50
-        sampleD <- D[,cellset]
51
-        #log transform 
52
-        sampleD <- log2(sampleD+1)
53
-        # generate S
54
-        sampleS <- pmax(.1*sampleD, .1)
55
-        save(sampleD, sampleS, file=paste0(path,simulationName, "_partition_",
56
-            set,".RData"));
57
-    }
58
-    return(simulationName)
59
-}
60 0
deleted file mode 100644
... ...
@@ -1,27 +0,0 @@
1
-#' Generate Seeds for Multiple Concurrent Runs
2
-#'
3
-#' @param chains number of seeds to generate (number of chains to run)
4
-#' @param seed positive values are kept, negative values will be overwritten
5
-#' by a seed generated from the current time
6
-#' @return vector of randomly generated seeds
7
-generateSeeds <- function(chains=2, seed=-1)
8
-{
9
-    if (chains < 2 || (as.integer(chains) != chains))
10
-    {
11
-        stop("chains must be >= 2 and an integer")
12
-    }
13
-
14
-    if (seed < 1)
15
-    {
16
-        secs <- as.numeric(difftime(Sys.time(), paste(Sys.Date(), "00:00"),
17
-            units="secs"))
18
-        secs <- round(secs)
19
-        seeds <- seq_len(chains) * secs
20
-        return(seeds)
21
-    }
22
-    else
23
-    {
24
-        seeds <- seq_len(chains) * seed
25
-        return(seeds)
26
-    }
27
-}
28 0
new file mode 100644
... ...
@@ -0,0 +1,93 @@
1
+#' @rdname setParam-methods
2
+#' @aliases setParam
3
+setMethod("setParam", signature(object="CogapsParams"),
4
+function(object, whichParam, value)
5
+{
6
+    slot(object, whichParam) <- value
7
+    validObject(object)
8
+    return(object)
9
+})
10
+
11
+#' @rdname getParam-methods
12
+#' @aliases getParam
13
+setMethod("getParam", signature(object="CogapsParams"),
14
+function(object, whichParam)
15
+{
16
+    slot(object, whichParam)
17
+})
18
+
19
+#' @rdname parseOldParams-methods
20
+#' @aliases parseOldParams
21
+setMethod("parseOldParams", signature(object="CogapsParams"),
22
+function(object, oldArgs)
23
+{
24
+    helper <- function(arg, params, newArg)
25
+    {
26
+        if (!is.null(oldArgs[[arg]]))
27
+        {
28
+            warning(paste("parameter", arg, "is deprecated, it will still",
29
+                "work, but setting", newArg, "in the params object is the",
30
+                "preferred method"))
31
+            params <- setParam(params, newArg, oldArgs[[arg]])
32
+            oldArgs[[arg]] <- NULL
33
+        }            
34
+        return(params)
35
+    }
36
+
37
+    object <- helper("nFactor", object, "nPatterns")
38
+    object <- helper("nIter", object, "nIterations")
39
+    object <- helper("nEquil", object, "nIterations")
40
+    object <- helper("nSample", object, "nIterations")
41
+    object <- helper("nOutR", object, "outputFrequency")
42
+    object <- helper("nOutput", object, "outputFrequency")
43
+    object <- helper("maxGibbmassA", object, "maxGibbsMassA")
44
+    object <- helper("max_gibbmass_paraA", object, "maxGibbsMassA")
45
+    object <- helper("maxGibbmassP", object, "maxGibbsMassP")
46
+    object <- helper("max_gibbmass_paraP", object, "maxGibbsMassP")
47
+    object <- helper("singleCellRNASeq", object, "singleCell")
48
+
49
+    if (!is.null(oldArgs$nSnapshots) | !is.null(oldArgs$sampleSnapshots) | !is.null(oldArgs$numSnapshots))
50
+    {
51
+        warning("snapshots not currently supported in release build")
52
+        oldArgs$nSnapshots <- NULL
53
+        oldArgs$sampleSnapshots <- NULL
54
+        oldArgs$numSnapshots <- NULL
55
+    }
56
+    if (!is.null(oldArgs$fixedPatterns))
57
+        stop("pass fixed matrix in with 'fixedMatrix' argument")
58
+    if (!is.null(oldArgs$S))
59
+        stop("pass uncertainty matrix in with 'uncertainty', not 'S'")
60
+
61
+    return(object)
62
+})
63
+
64
+#' @rdname parseDirectParams-methods
65
+#' @aliases parseDirectParams
66
+setMethod("parseDirectParams", signature(object="CogapsParams"),
67
+function(object, args)
68
+{
69
+    for (s in slotNames(object))
70
+    {
71
+        if (!is.null(args[[s]]))
72
+        {
73
+            object <- setParam(object, s, args[[s]])
74
+        }
75
+    }
76
+    return(object)
77
+})
78
+
79
+setMethod("show", signature("CogapsParams"),
80
+function(object)
81
+{
82
+    cat("An Object of class \"CogapsParams\"\n")
83
+    cat("nPatterns          ", object@nPatterns, "\n")
84
+    cat("nIterations        ", object@nIterations, "\n")
85
+    cat("outputFrequency    ", object@outputFrequency, "\n")
86
+    cat("nCores             ", object@nCores, "\n")
87
+    cat("singleCell         ", object@singleCell, "\n")
88
+    cat("seed               ", object@seed, "\n")
89
+    cat("messages           ", object@messages, "\n")
90
+    cat("checkpointInterval ", object@checkpointInterval, "\n")
91
+    cat("checkpointOutFile  ", object@checkpointOutFile, "\n")
92
+})
93
+
0 94
new file mode 100644
... ...
@@ -0,0 +1,407 @@
1
+setMethod("show", signature("CogapsResult"),
2
+function(object)
3
+{
4
+    nFeatures <- nrow(object@featureLoadings)
5
+    nSamples <- nrow(object@sampleFactors)
6
+    nPatterns <- ncol(object@featureLoadings)
7
+
8
+    print(paste("CogapsResult object with", nFeatures, "features and", nSamples,
9
+        "samples"))
10
+    print(paste(nPatterns, "patterns were learned"))
11
+})
12
+
13
+#' @export
14
+#' @importFrom graphics plot
15
+plot.CogapsResult <- function(x, ...)
16
+{
17
+    nSamples <- nrow(object@sampleFactors)
18
+    nFactors <- ncol(object@sampleFactors)
19
+    colors <- rainbow(nFactors)
20
+    xlimits <- c(0, nSamples + 1)
21
+    ylimits <- c(0, max(object@sampleFactors) * 1.1)
22
+
23
+    plot(NULL, xlim=xlimits, ylim=ylimits, ylab="Relative Amplitude")
24
+
25
+    for (i in 1:nFactors)
26
+    {
27
+        lines(x=1:nSamples, y=object@sampleFactors[,i], col=colors[i])
28
+        points(x=1:nSamples, y=object@sampleFactors[,i], col=colors[i], pch=i)
29
+    }
30
+
31
+    legend("top", paste("Pattern", 1:nFactors, sep = ""), pch = 1:nFactors,
32
+        lty=1, cex=0.8, col=colors, bty="y", ncol=5)
33
+}
34
+
35
+#' @rdname getMeanChiSq-methods
36
+#' @aliases getMeanChiSq
37
+setMethod("getMeanChiSq", signature(object="CogapsResult"),
38
+function(object)
39
+{
40
+    object@metadata$meanChiSq
41
+})
42
+
43
+#' @rdname calcZ-methods
44
+#' @aliases calcZ
45
+setMethod("calcZ", signature(object="CogapsResult"),
46
+function(object, which)
47
+{
48
+    if (which == "feature")
49
+    {
50
+        object@featureLoadings / object@featureStdDev
51
+    }
52
+    else if (which == "sample")
53
+    {
54
+        object@sampleFactors / object@sampleStdDev
55
+    }
56
+    else
57
+    {
58
+        stop("\"which\" must be either \"feature\" or \"sample\"")
59
+    }
60
+})
61
+
62
+#' @rdname reconstructGene-methods
63
+#' @aliases reconstructGene
64
+setMethod("reconstructGene", signature(object="CogapsResult"),
65
+function(object, genes)
66
+{
67
+    D <- object@featureLoadings %*% t(object@sampleFactors)
68
+    if (!is.null(genes))
69
+    {
70
+        D <- D[genes,]
71
+    }
72
+    return(D)
73
+})
74
+
75
+#' @rdname binaryA-methods
76
+#' @aliases binaryA
77
+#' @importFrom gplots heatmap.2
78
+setMethod("binaryA", signature(object="CogapsResult"),
79
+function(object, threshold)
80
+{
81
+    binA <- ifelse(calcZ(object) > threshold, 1, 0)
82
+
83
+    gplots::heatmap.2(binA, Rowv = FALSE, Colv = FALSE, dendrogram="none",
84
+        scale="none", col = brewer.pal(3,"Blues"), trace="none",
85
+        density.info="none", cexCol=1.3, srtCol=45,
86
+        lmat=rbind(c(0, 3), c(2,1), c(0,4) ),
87
+        lwid=c(1,10), lhei=c(1, 4, 1.2 ),
88
+        main="Heatmap of Standardized Feature Matrix")
89
+    mtext(paste("(Threshold = ", threshold, ")"))
90
+})
91
+
92
+#' @rdname plotResiduals-methods
93
+#' @aliases plotResiduals
94
+#' @importFrom gplots heatmap.2
95
+setMethod("plotResiduals", signature(object="CogapsResult"),
96
+function(object, data, uncertainty)
97
+{
98
+    data <- as.matrix(data)
99
+    if (is.null(uncertainty))
100
+        uncertainty <- pmax(0.1 * data, 0.1)
101
+    uncertainty <- as.matrix(uncertainty)
102
+
103
+    M <- reconstructGene(object)
104
+    resid <- (data - M) / uncertainty
105
+    
106
+    scaledRdYlBu <- colorRampPalette(brewer.pal(9,"RdYlBu"))(100)
107
+    gplots::heatmap.2(resid, Rowv = FALSE, Colv = FALSE, dendrogram="none",
108
+        scale="none", col = scaledRdYlBu, trace="none", density.info="none",
109
+        cexCol=1.33, srtCol=45, lmat=rbind(c(0, 3),c(2,1),c(0,4) ),
110
+        lwid=c(1,10), lhei=c(1, 4, 1.2 ), main="Heatmap of Residuals")
111
+})
112
+
113
+#' @rdname patternMarkers-methods
114
+#' @aliases patternMarkers
115
+setMethod("patternMarkers", signature(object="CogapsResult"),
116
+function(object, threshold, lp)
117
+{
118
+    nGenes <- nrow(object@featureLoadings)
119
+    nPatterns <- ncol(object@featureLoadings)
120
+
121
+    # find the A with the highest magnitude
122
+    Arowmax <- t(apply(object@featureLoadings, 1, function(x) x/max(x)))
123
+
124
+    if (!is.na(lp))
125
+    {
126
+        if (length(lp) != nPatterns)
127
+        {
128
+            warning("lp length must equal the number of columns of the Amatrix")
129
+        }
130
+        sstat <- apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp)))
131
+        ssranks <-rank(sstat)
132
+        ssgenes.th <- names(sort(sstat,decreasing=FALSE,na.last=TRUE))
133
+    }
134
+    else
135
+    {
136
+        # determine which genes are most associated with each pattern
137
+        sstat <- matrix(NA, nrow=nGenes, ncol=nPatterns,
138
+            dimnames=dimnames(object@featureLoadings))
139
+        ssranks <- matrix(NA, nrow=nGenes, ncol=nPatterns,
140
+            dimnames=dimnames(object@featureLoadings))
141
+        ssgenes <- matrix(NA, nrow=nGenes, ncol=nPatterns, dimnames=NULL)
142
+        for (i in 1:nP)
143
+        {
144
+            lp <- rep(0,dim(Amatrix)[2])
145
+            lp[i] <- 1
146
+            sstat[,i] <- unlist(apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp))))
147
+            ssranks[,i]<-rank(sstat[,i])
148
+            ssgenes[,i]<-names(sort(sstat[,i],decreasing=FALSE,na.last=TRUE))
149
+        }
150
+        if (threshold=="cut")
151
+        {
152
+            geneThresh <- sapply(1:nP,function(x) min(which(ssranks[ssgenes[,x],x] > apply(ssranks[ssgenes[,x],],1,min))))
153
+            ssgenes.th <- sapply(1:nP,function(x) ssgenes[1:geneThresh[x],x])
154
+        }
155
+        else if (threshold=="all")
156
+        {
157
+            pIndx<-unlist(apply(sstat,1,which.min))
158
+            gBYp <- list()
159
+            for(i in sort(unique(pIndx)))
160
+            {
161
+                gBYp[[i]]<-sapply(strsplit(names(pIndx[pIndx==i]),"[.]"),function(x) x[[1]][1])
162
+            }
163
+            ssgenes.th <- lapply(1:max(sort(unique(pIndx))), function(x)
164
+                ssgenes[which(ssgenes[,x] %in% gBYp[[x]]),x])
165
+        }
166
+        else
167
+        {
168
+            stop("Threshold arguement not viable option")
169
+        }
170
+    }
171
+    return(list("PatternMarkers"=ssgenes.th, "PatternRanks"=ssranks,
172
+        "PatternMarkerScores"=sstat))
173
+})
174
+
175
+#' heatmap of original data clustered by pattern markers statistic
176
+#' @export
177
+#' @docType methods
178
+#' @rdname plotPatternMarkers-methods
179
+#'
180
+#' @param object an object of type CogapsResult
181
+#' @param data the original data as a matrix
182
+#' @param patternPalette a vector indicating what color should be used
183
+#' for each pattern
184
+#' @param sampleNames names of the samples to use for labeling 
185
+#' @param samplePalette  a vector indicating what color should be used
186
+#' for each sample
187
+#' @param colDenogram logical indicating whether to display sample denogram
188
+#' @param heatmapCol pallelet giving color scheme for heatmap
189
+#' @param scale character indicating if the values should be centered and scaled
190
+#' in either the row direction or the column direction, or none. The
191
+#' default is "row". 
192
+#' @param ... additional graphical parameters to be passed to \code{heatmap.2}
193
+#' @return heatmap of the \code{data} values for the \code{patternMarkers}
194
+#' @seealso  \code{\link{heatmap.2}}
195
+#' @importFrom gplots bluered
196
+#' @importFrom gplots heatmap.2
197
+#' @importFrom stats hclust
198
+plotPatternMarkers <- function(object, data, patternPalette, sampleNames,
199
+samplePalette=NULL, heatmapCol=bluered, colDenogram=TRUE, scale="row", ...)
200
+{
201
+    if (is.null(samplePalette))
202
+        samplePalette <- rep("black", ncol(data))
203
+
204
+    ### coloring of genes
205
+    patternCols <- rep(NA, length(unlist(patternMarkers)))
206
+    names(patternCols) <- unlist(patternMarkers)
207
+    for (i in 1:length(patternMarkers))
208
+    {
209
+        patternCols[patternMarkers[[i]]] <- patternPalette[i]
210
+    }
211
+
212
+    gplots::heatmap.2(as.matrix(data[unlist(patternMarkers),],...),
213
+        symkey=FALSE,
214
+        symm=FALSE, 
215
+        scale=scale,
216
+        col=heatmapCol,
217
+        distfun=function(x) as.dist((1-cor(t(x)))/2),
218
+        hclustfun=function(x) stats::hclust(x,method="average"),
219
+        Rowv=FALSE,
220
+        Colv=colDenogram,
221
+        trace='none',
222
+        RowSideColors = as.character(patternCols[unlist(patternMarkers)]),
223
+        labCol= sampleNames,
224
+        cexCol=0.8,
225
+        ColSideColors=as.character(samplePalette),
226
+        rowsep=cumsum(sapply(patternMarkers,length))
227
+    )
228
+})
229
+
230
+#' @rdname calcCoGAPSStat-methods
231
+#' @aliases calcCoGAPSStat
232
+setMethod("calcCoGAPSStat", signature(object="CogapsResult"),
233
+function(object, GStoGenes, numPerm)
234
+{
235
+    # test for std dev of zero, possible mostly in simple simulations
236
+    if (sum(object@featureStdDev==0) > 0)
237
+    {
238
+        object@featureStdDev[object@featureStdDev==0] <- 1e-6
239
+    }
240
+
241
+    # calculate Z scores
242
+    zMatrix <- calcZ(object)
243
+
244
+    # check input arguments
245
+    if (!is(GStoGenes, "data.frame") && !is(GStoGenes, "list") && !is(GStoGenes,"GSA.genesets"))
246
+    {
247
+        stop("GStoGenes must be a data.frame,GSA.genesets, or list with format specified in the users manual.")
248
+    }
249
+
250
+    if (is(GStoGenes, "GSA.genesets"))
251
+    {
252
+        names(GStoGenes$genesets) <- GStoGenes$geneset.names
253
+        GStoGenes <- GStoGenes$genesets
254
+    }
255
+
256
+    if (is(GStoGenes, "list"))
257
+    {
258
+        GStoGenesList <- GStoGenes
259
+    }
260
+    else
261
+    {
262
+        GStoGenesList <- list()
263
+        for (i in 1:dim(GStoGenes)[2])
264
+        {
265
+            GStoGenesList[[as.character(colnames(GStoGenes)[i])]] <- as.character(unique(GStoGenes[,i]))
266
+        }
267
+    }
268
+
269
+    # get dimensions
270
+    numGS   <- length(names(GStoGenesList))
271
+    numPatt <- dim(zMatrix)[2]
272
+    numG    <- dim(zMatrix)[1]+0.9999
273
+
274
+    # initialize matrices
275
+    statsUp   <- matrix(ncol = numGS, nrow = numPatt)
276
+    statsDown <- matrix(ncol = numGS, nrow = numPatt)
277
+    actEst    <- matrix(ncol = numGS, nrow = numPatt)
278
+    results   <- list()
279
+    zPerm     <- matrix(ncol=numPerm,nrow=numPatt)
280
+
281
+    # do permutation test
282
+    for (gs in 1:numGS)
283
+    {
284
+        genes <- GStoGenesList[[names(GStoGenesList)[gs]]]
285
+        index <- rownames(zMatrix) %in% genes
286
+        zValues <- zMatrix[index,1]
287
+        numGenes <- length(zValues)
288
+        label <- as.character(numGenes)
289
+
290
+        if (!any(names(results)==label))
291
+        {
292
+            for (p in 1:numPatt)
293
+            {
294
+                for (j in 1:numPerm)
295
+                {
296
+                    temp <- floor(runif(numGenes,1,numG))
297
+                    temp2 <- zMatrix[temp,p]
298
+                    zPerm[p,j] <- mean(temp2)
299
+                }
300
+            }
301
+            results[[label]] <- zPerm
302
+        }
303
+    }
304
+
305
+    # get p-value
306
+    for (p in 1:numPatt)
307
+    {
308
+        for (gs in 1:numGS)
309
+        {
310
+            genes <- GStoGenesList[[names(GStoGenesList)[gs]]]
311
+            index <- rownames(zMatrix) %in% genes
312
+            zValues <- zMatrix[index,p]
313
+            zScore <- mean(zValues)
314
+
315
+            numGenes <- length(zValues)
316
+            label <- as.character(numGenes)
317
+
318
+            permzValues <- results[[label]][p,]
319
+            ordering <- order(permzValues)
320
+            permzValues <- permzValues[ordering]
321
+
322
+            statUpReg <- 1 - sum(zScore > permzValues) / length(permzValues)
323
+            statsUp[p,gs] <- max(statUpReg, 1 / numPerm)
324
+
325
+            statDownReg <- 1 - sum(zScore < permzValues) / length(permzValues)
326
+            statsDown[p,gs] <- max(statDownReg, 1 / numPerm)
327
+
328
+            activity <- 1 - 2*max(statUpReg, 1/numPerm)
329
+            actEst[p,gs] <- activity
330
+        }
331
+    }
332
+
333
+    # format output
334
+    rownames(statsUp) <- rownames(statsDown) <- rownames(actEst) <- colnames(zMatrix)
335
+    colnames(statsUp) <- colnames(statsDown) <- colnames(actEst) <- names(GStoGenesList)
336
+
337
+    results[['GSUpreg']] <- statsUp
338
+    results[['GSDownreg']] <- statsDown
339
+    results[['GSActEst']] <- actEst
340
+
341
+    return(results)
342
+})
343
+
344
+#' @rdname calcGeneGSStat-methods
345
+#' @aliases calcGeneGSStat
346
+setMethod("calcGeneGSStat", signature(object="CogapsResult"),
347
+function(object, GStoGenes, numPerm, Pw, nullGenes)
348
+{
349
+    gsStat <- calcCoGAPSStat(object, data.frame(GSGenes), numPerm=numPerm)
350
+    gsStat <- gsStat$GSUpreg
351
+    gsStat <- -log(gsStat)
352
+
353
+    if (!all(is.na(Pw)))
354
+    {
355
+        if (length(Pw) != length(gsStat))
356
+        {
357
+            stop('Invalid weighting')
358
+        }
359
+        gsStat <- gsStat*Pw
360
+    }
361
+
362
+    if (nullGenes)
363
+    {
364
+        ZD <- object@featureLoadings[setdiff(row.names(object@featureLoadings), GSGenes),] /
365
+            object@featureStdDev[setdiff(row.names(object@featureLoadings), GSGenes),]
366
+    }
367
+    else
368
+    {
369
+        ZD <- object@featureLoadings[GSGenes,]/object@featureStdDev[GSGenes,]
370
+    }
371
+    outStats <- apply(sweep(ZD,2,gsStat,FUN="*"),1,sum) / (sum(gsStat))
372
+    outStats <- outStats / apply(ZD,1,sum)
373
+    outStats[which(apply(ZD,1,sum) < 1e-6)] <- 0
374
+
375
+    if (sum(gsStat) < 1e-6)
376
+    {
377
+        return(0)
378
+    }
379
+    return(outStats)
380
+})
381
+
382
+#' @rdname computeGeneGSProb-methods
383
+#' @aliases computeGeneGSProb
384
+setMethod("computeGeneGSProb", signature(object="CogapsResult"),
385
+function(object, GStoGenes, numPerm, Pw, PwNull)
386
+{
387
+    geneGSStat <- calcGeneGSStat(object, Pw=Pw, GSGenes=GSGenes,
388
+        numPerm=numPerm)
389
+
390
+    if (PwNull)
391
+    {
392
+        permGSStat <- calcGeneGSStat(object, GSGenes=GSGenes, numPerm=numPerm,
393
+            Pw=Pw, nullGenes=TRUE)
394
+    }
395
+    else
396
+    {
397
+        permGSStat <- calcGeneGSStat(object, GSGenes=GSGenes, numPerm=numPerm,
398
+            nullGenes=TRUE)
399
+    }
400
+
401
+    finalStats <- sapply(GSGenes, function(x)
402
+        length(which(permGSStat > geneGSStat[x])) / length(permGSStat))
403
+
404
+    return(finalStats)
405
+})
406
+
407
+
0 408
deleted file mode 100644
... ...
@@ -1,86 +0,0 @@
1
-#' patternMarkers
2
-#'
3
-#' @param Amatrix A matrix of genes by weights resulting from CoGAPS or other NMF decomposition
4
-#' @param scaledPmatrix logical indicating whether the corresponding pattern matrix was fixed to have max 1 during decomposition
5
-#' @param Pmatrix the corresponding Pmatrix (patterns X samples) for the provided Amatrix (genes x patterns). This must be supplied if scaledPmatrix is FALSE.
6
-#' @param threshold # the type of threshold to be used. The default "all" will distribute genes into pattern with the lowest ranking. The "cut" thresholding by the first gene to have a lower ranking, i.e. better fit to, a pattern.
7
-#' @param lp a vector of weights for each pattern to be used for finding markers. If NA markers for each pattern of the A matrix will be used.
8
-#' @param full logical indicating whether to return the ranks of each gene for each pattern
9
-#' @return By default a non-overlapping list of genes associated with each \code{lp}. If \code{full=TRUE} a data.frame of
10
-#' genes rankings with a column for each \code{lp} will also be returned.
11
-#' @export
12
-patternMarkers <- function(Amatrix=NA, scaledPmatrix=FALSE, Pmatrix=NA,
13
-threshold="all", lp=NA, full=FALSE)
14
-{
15
-    if (scaledPmatrix==FALSE)
16
-    {
17
-        if(!is.na(Pmatrix))
18
-        {
19
-            pscale <- apply(Pmatrix,1,max)   # rescale p's to have max 1
20
-            Amatrix <- sweep(Amatrix, 2, pscale, FUN="*")   # rescale A in accordance with p's having max 1
21
-        }
22
-        else
23
-        {
24
-            warning("P values must be provided if not already scaled")
25
-        }
26
-    }
27
-
28
-    # find the A with the highest magnitude
29
-    Arowmax <- t(apply(Amatrix, 1, function(x) x/max(x)))
30
-
31
-    if (!is.na(lp))
32
-    {
33
-        if (length(lp) != dim(Amatrix)[2])
34
-        {
35
-            warning("lp length must equal the number of columns of the Amatrix")
36
-        }
37
-        sstat <- apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp)))
38
-        ssranks <-rank(sstat)
39
-        ssgenes.th <- names(sort(sstat,decreasing=FALSE,na.last=TRUE))
40
-    }
41
-    else
42
-    {
43
-        # determine which genes are most associated with each pattern
44
-
45
-        sstat<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=dimnames(Amatrix))
46
-        ssranks<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=dimnames(Amatrix))#list()
47
-        ssgenes<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=NULL)
48
-        for (i in 1:nP)
49
-        {
50
-            lp <- rep(0,dim(Amatrix)[2])
51
-            lp[i] <- 1
52
-            sstat[,i] <- unlist(apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp))))
53
-            ssranks[,i]<-rank(sstat[,i])
54
-            ssgenes[,i]<-names(sort(sstat[,i],decreasing=FALSE,na.last=TRUE))
55
-        }
56
-        if (threshold=="cut")
57
-        {
58
-            geneThresh <- sapply(1:nP,function(x) min(which(ssranks[ssgenes[,x],x] > apply(ssranks[ssgenes[,x],],1,min))))
59
-            ssgenes.th <- sapply(1:nP,function(x) ssgenes[1:geneThresh[x],x])
60
-        }
61
-        else if (threshold=="all")
62
-        {
63
-            pIndx<-unlist(apply(sstat,1,which.min))
64
-            gBYp <- list()
65
-            for(i in sort(unique(pIndx)))
66
-            {
67
-                gBYp[[i]]<-sapply(strsplit(names(pIndx[pIndx==i]),"[.]"),function(x) x[[1]][1])
68
-            }
69
-            ssgenes.th <- lapply(1:max(sort(unique(pIndx))), function(x)
70
-                ssgenes[which(ssgenes[,x] %in% gBYp[[x]]),x])
71
-        }
72
-        else
73
-        {
74
-            stop("Threshold arguement not viable option")
75
-        }
76
-    }
77
-    if (full)
78
-    {
79
-        return(list("PatternMarkers"=ssgenes.th, "PatternRanks"=ssranks,
80
-            "PatternMarkerScores"=sstat))
81
-    }
82
-    else
83
-    {
84
-        return("PatternMarkers"=ssgenes.th)
85
-    }
86
-}
87 0
deleted file mode 100644
... ...
@@ -1,129 +0,0 @@
1
-#' patternMatch4Parallel
2
-#'
3
-#' @param Ptot a matrix containing the total by set estimates of Pmean output
4
-#' from \code{reOrderBySet}
5
-#' @param nSets number of parallel sets used to generate \code{Ptot}
6
-#' @param cnt  number of branches at which to cut dendrogram
7
-#' @param minNS minimum of individual set contributions a cluster must contain
8
-#' @param maxNS max of individual set contributions a cluster must contain.
9
-#' default is nSets+minNS
10
-#' @param cluster.method the agglomeration method to be used for clustering
11
-#' @param ignore.NA logical indicating whether or not to ignore NAs from
12
-#' potential over dimensionalization. Default is FALSE.
13
-#' @param bySet logical indicating whether to return list of matched set
14
-#' solutions from \code{Ptot}
15
-#' @param ... additional parameters for \code{agnes}
16
-#' @return a matrix of concensus patterns by samples. If \code{bySet=TRUE} then
17
-#' a list of the set contributions to each
18
-#' concensus pattern is also returned.
19
-#' @seealso \code{\link{agnes}}
20
-#' @export
21
-patternMatch4Parallel <- function(Ptot, nSets, cnt, minNS=NA, maxNS=NULL, 
22
-cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...)
23
-{
24
-    if (is.na(minNS))
25
-    {
26
-        minNS <- ceiling(nSets / 2)
27
-    }
28
-    if (is.null(maxNS))
29
-    {
30
-        maxNS=nSets+minNS
31
-    }
32
-    if (ignore.NA==FALSE & anyNA(Ptot))
33
-    {
34
-        warning(paste("Non-sparse matrixes produced. Reducing the number of",
35
-            "patterns asked for and rerun."))
36
-    }
37
-    if (ignore.NA==TRUE)
38
-    {
39
-        Ptot <- Ptot[complete.cases(Ptot),]
40
-    }
41
-
42
-    corcut <-function(Ptot,minNS,cnt,cluster.method)
43
-    {
44
-        # corr dist
45
-        corr.dist <- cor(t(Ptot))
46
-        corr.dist <- 1-corr.dist
47
-        # cluster
48
-        #library(cluster)
49
-        clust <- agnes(x=corr.dist, diss=TRUE, method=cluster.method)
50
-        cut <- cutree(as.hclust(clust), k=cnt)
51
-        #save.image(file=paste("CoGAPS.", nP, "P.", nS, "Set.CorrClustCut",
52
-            #cnt,".RData"))
53
-
54
-        cls <- sort(unique(cut))
55
-        cMNs <- matrix(nrow=cnt, ncol=dim(Ptot)[2])
56
-        rownames(cMNs) <- cls
57
-        colnames(cMNs) <- colnames(Ptot)
58
-
59
-        RtoMeanPattern <- list()
60
-        PByClust <- list()
61
-        for (i in cls)
62
-        {
63
-            if (!is.null(dim(Ptot[cut == i,])))
64
-            {
65
-                if (dim(Ptot[cut == i,])[1] >= minNS)
66
-                {
67
-                    cMNs[i,] <- colMeans(Ptot[cut==i,])
68
-                    PByClust[[i]] <- Ptot[cut==i,]
69
-                    nIN <- sum(cut==i)
70
-                    RtoMeanPattern[[i]] <- sapply(1:nIN, function(j)
71
-                        round(cor(x=Ptot[cut==i,][j,], y=cMNs[i,]),3)
72
-                    )
73
-                }
74
-            }
75
-        }
76
-
77
-        #PByClust[sapply(PByClust,is.null)] <- NULL
78
-        #RtoMeanPattern[sapply(RtoMeanPattern,is.null)] <- NULL
79
-        return(list("RtoMeanPattern"=RtoMeanPattern, "PByClust"=PByClust))
80
-    }    
81
-
82
-    cc <- corcut(Ptot,minNS,cnt,cluster.method)
83
-
84
-    ### split by maxNS
85
-    indx <- which(unlist(lapply(cc$PByClust,function(x) dim(x)[1]>maxNS)))
86
-    while(length(indx) > 0)
87
-    {
88
-        icc <- corcut(cc$PByClust[[indx[1]]],minNS,2,cluster.method)
89
-        if (length(icc[[2]])==0)
90
-        {
91
-          indx <- indx[-1]
92
-          next
93
-        }
94
-        else
95
-        {
96
-            cc$PByClust[[indx[1]]] <- icc[[2]][[1]]
97
-            cc$RtoMeanPattern[[indx[1]]] <- icc[[1]][[1]]
98
-            if (length(icc[[2]]) > 1)
99
-            {
100
-                cc$PByClust <- append(cc$PByClust,icc[[2]][2])
101
-                cc$RtoMeanPattern <- append(cc$RtoMeanPattern,icc[[1]][2])
102
-            } 
103
-            indx <- which(unlist(lapply(cc$PByClust, function(x)
104
-                dim(x)[1] > maxNS)))
105
-        }
106
-    }
107
-
108
-    #weighted.mean
109
-    PByCDSWavg <- t(sapply(1:length(cc$PByClust), function(z)
110
-        apply(cc$PByClust[[z]],2, function(x)
111
-            weighted.mean(x, (cc$RtoMeanPattern[[z]])^3))))
112
-    rownames(PByCDSWavg) <- lapply(1:length(cc$PByClust), function(x)
113
-        paste("Pattern", x))
114
-
115
-    #scale ps
116
-    Pmax <- apply(PByCDSWavg,1,max)
117
-    PByCDSWavgScaled <- t(sapply(1:dim(PByCDSWavg)[1], function(x)
118
-        PByCDSWavg[x,] / Pmax[x]))
119
-    rownames(PByCDSWavgScaled) <- rownames(PByCDSWavg)
120
-
121
-    if (bySet)
122
-    {
123
-        return(list("consenusPatterns"=PByCDSWavgScaled, "PBySet"=cc))
124
-    }
125
-    else
126
-    {
127
-        return("consenusPatterns"=PByCDSWavgScaled)
128
-    }
129
-}
130 0
deleted file mode 100644
... ...
@@ -1,151 +0,0 @@
1
-#' PatternMatcher Shiny Ap
2
-#'
3
-#' @param PBySet list of matched set solutions for the Pmatrix from an NMF algorithm
4
-#' @param out optional name for saving output
5
-#' @param order optional vector indicating order of samples for plotting. Default is NULL.
6
-#' @param sample.color optional vector of colors of same length as colnames. Default is NULL.
7
-#' @return either an index of selected sets' contributions or the editted \code{PBySet} object
8
-patternMatcher<-function(PBySet=NULL,out=NULL,order=NULL, sample.color=NULL)
9
-{
10
-    runApp(list(
11
-        ui = pageWithSidebar(
12
-            # Application title
13
-            headerPanel('NMF Pattern Matching'),
14
-            # Side pannel with controls
15
-            sidebarPanel(
16
-                # to upload file
17
-                fileInput('file1', 'Choose .Rda File',
18
-                    accept=c('.RData','.Rda','R data object','.rda')),
19
-                uiOutput("pickplot"),
20
-                uiOutput("checkbs"),
21
-                downloadButton('downloadData', 'Download'),
22
-                actionButton("end", "Return")
23
-            ),
24
-            # Main panel with plots
25
-            mainPanel(plotOutput('plot1'))
26
-        ),
27
-
28
-        server = function(input, output, session)
29
-        {
30
-            #load in the data
31
-            df<-reactive({
32
-                if(!is.null(PBySet))
33
-                {
34
-                    df<-PBySet
35
-                    return(df)
36
-                }
37
-                inFile <- input$file1 # get the path to the input file on the server
38
-                if (is.null(inFile)){return(NULL)}
39
-                load(inFile$datapath) #load it
40
-                df <- get(load(inFile$datapath))# get the name of the object that was loaded
41
-                return(df)