Browse code

restored createGWCoGAPSsets

Genevieve Stein-O'Brien authored on 28/02/2018 17:06:47
Showing 8 changed files

... ...
@@ -1,5 +1,6 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3
+export("")
3 4
 export(CoGAPS)
4 5
 export(CoGapsFromCheckpoint)
5 6
 export(GWCoGAPS)
... ...
@@ -10,10 +11,12 @@ export(calcGeneGSStat)
10 11
 export(calcZ)
11 12
 export(computeGeneGSProb)
12 13
 export(createGWCoGAPSSets)
14
+export(createscCoGAPSSets)
13 15
 export(displayBuildReport)
14 16
 export(gapsMapRun)
15 17
 export(gapsRun)
16 18
 export(patternMatch4Parallel)
19
+export(patternMatch4singleCell)
17 20
 export(plotAtoms)
18 21
 export(plotDiag)
19 22
 export(plotGAPS)
... ...
@@ -13,22 +13,29 @@
13 13
 #'}
14 14
 #'
15 15
 
16
-createGWCoGAPSSets<-function(data=D, #data matrix with unique rownames
17
-	nSets=nSets, #number of sets for parallelization
18
-	outRDA="GenesInCoGAPSSets.Rda", #name of output file
19
-	keep=TRUE #logical indicating whether or not to save gene set list. Default is TRUE.
20
-	){
21
-genes=rownames(data)
22
-setSize=floor(length(genes)/nSets)
23
-genesInSets <- list()
24
-for (set in 1:nSets) {
25
-  if(set!=nSets){genesInSets[[set]] <- sample(genes,setSize)}
26
-  if(set==nSets){genesInSets[[set]] <- genes}
27
-  genes=genes[!genes%in%genesInSets[[set]]]
28
-}
29
-if(!identical(sort(unlist(genesInSets)),sort(rownames(data)))){print("Gene identifiers not unique!")}
30
-if(keep==TRUE){save(list=c('genesInSets'),file=outRDA)}
31
-return(genesInSets)
32
-}
33
-
16
+createGWCoGAPSSets <- function(D, S, nSets, simulationName)
17
+{
18
+    # check gene names
19
+    if (length(unique(colnames(D))) != length(colnames(D)))
20
+    {
21
+        warning("Cell identifiers not unique!")
22
+    }
34 23
 
24
+    # partition data by sampling random sets of cells
25
+    genes <- 1:nrow(D)
26
+    setSize <- floor(length(genes) / nSets)
27
+    for (set in 1:nSets)
28
+    {
29
+        
30
+        # sample genes
31
+            sampleSize <- ifelse(set == nSets, length(genes), setSize)
32
+            geneset <- sample(genes, sampleSize, replace=FALSE)
33
+            genes <- genes[!(genes %in% geneset)]
34
+        # partition data
35
+        sampleD <- D[geneset,]
36
+        sampleS <- S[geneset,]
37
+        save(sampleD, sampleS, file=paste(simulationName, "_partition_", set,
38
+            ".RData", sep=""));
39
+    }
40
+    return(simulationName)
41
+}
... ...
@@ -1,12 +1,12 @@
1 1
 #' patternMatch4singleCell
2 2
 #'
3
-#' @param Ptot a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}
4
-#' @param nSets number of parallel sets used to generate \code{Ptot}
3
+#' @param Atot a matrix containing the total by set estimates of Amean output from \code{reOrderBySet}
4
+#' @param nSets number of parallel sets used to generate \code{Atot}
5 5
 #' @param cnt  number of branches at which to cut dendrogram
6 6
 #' @param minNS minimum of individual set contributions a cluster must contain
7 7
 #' @param cluster.method the agglomeration method to be used for clustering
8 8
 #' @param ignore.NA logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.
9
-#' @param bySet logical indicating whether to return list of matched set solutions from \code{Ptot}
9
+#' @param bySet logical indicating whether to return list of matched set solutions from \code{Atot}
10 10
 #' @param ... additional parameters for \code{agnes}
11 11
 #' @return a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each
12 12
 #' concensus pattern is also returned.
... ...
@@ -15,19 +15,19 @@
15 15
 #' 
16 16
 #' 
17 17
 
18
-patternMatch4singleCell <- function(Ptot, nSets, cnt, minNS, 
18
+patternMatch4singleCell <- function(Atot, nSets, cnt, minNS, 
19 19
 cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...)
20 20
 {
21 21
     if (!is.null(minNS))
22 22
         minNS=nSets/2
23 23
 
24
-    if (ignore.NA==FALSE & anyNA(Ptot))
24
+    if (ignore.NA==FALSE & anyNA(Atot))
25 25
         warning('Non-sparse matrixes produced. Reducing the number of patterns asked for and rerun.')
26 26
     if (ignore.NA==TRUE)
27
-        Ptot <- Ptot[complete.cases(Ptot),]
27
+        Atot <- Atot[complete.cases(Atot),]
28 28
 
29 29
     # corr dist
30
-    corr.dist=cor(Ptot)
30
+    corr.dist=cor(Atot)
31 31
     corr.dist=1-corr.dist
32 32
     # cluster
33 33
     #library(cluster)
... ...
@@ -37,26 +37,26 @@ cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...)
37 37
 
38 38
     #drop n<4 and get weighted Avg
39 39
     cls=sort(unique(cut))
40
-    cMNs=matrix(nrow=cnt,ncol=dim(Ptot)[2])
40
+    cMNs=matrix(nrow=cnt,ncol=dim(Atot)[2])
41 41
     rownames(cMNs)=cls
42
-    colnames(cMNs)=colnames(Ptot)
42
+    colnames(cMNs)=colnames(Atot)
43 43
 
44 44
     RtoMeanPattern <- list()
45 45
     PByClust <- list()
46 46
     for(i in cls)
47 47
     {
48
-        if (is.null(dim(Ptot[cut == i, ]))==TRUE)
48
+        if (is.null(dim(Atot[cut == i, ]))==TRUE)
49 49
         {
50
-            cMNs[i,] <- Ptot[cut == i, ]
51
-            RtoMeanPattern[[i]] <- rep(1,length(Ptot[cut == i, ]))
52
-            PByClust[[i]] <- t(as.matrix(Ptot[cut == i, ]))
50
+            cMNs[i,] <- Atot[cut == i, ]
51
+            RtoMeanPattern[[i]] <- rep(1,length(Atot[cut == i, ]))
52
+            PByClust[[i]] <- t(as.matrix(Atot[cut == i, ]))
53 53
         }
54 54
         else
55 55
         {
56
-            cMNs[i,]=colMeans(Ptot[cut==i,])
57
-            PByClust[[i]] <- Ptot[cut==i,]
56
+            cMNs[i,]=colMeans(Atot[cut==i,])
57
+            PByClust[[i]] <- Atot[cut==i,]
58 58
             nIN=sum(cut==i)
59
-            RtoMeanPattern[[i]] <- sapply(1:nIN,function(j) {round(cor(x=Ptot[cut==i,][j,],y=cMNs[i,]),3)})
59
+            RtoMeanPattern[[i]] <- sapply(1:nIN,function(j) {round(cor(x=Atot[cut==i,][j,],y=cMNs[i,]),3)})
60 60
         }
61 61
     }
62 62
 
... ...
@@ -124,7 +124,7 @@ cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...)
124 124
     {
125 125
         # return by set and final
126 126
         PBySet<-PByCDS
127
-        return(list("consenusPatterns"=PByCDSWavgScaled,"PBySet"=PBySet,"RtoMPDS"=RtoMPDS))
127
+        return(list("consenusAs"=PByCDSWavgScaled,"ABySet"=PBySet,"RtoMPDS"=RtoMPDS))
128 128
     }
129 129
     else
130 130
     {
... ...
@@ -66,11 +66,11 @@ GWCoGapsFromCheckpoint <- function(simulationName, nCores=NA, cut=NA, minNS=NA,
66 66
             cptFileName <- paste(simulationName, "_final_cpt_", i, ".out", sep="")
67 67
             CoGapsFromCheckpoint(sampleD, sampleS, cptFileName)
68 68
         }
69
-        load(paste(simulationName, "_matched_ps.RData", sep=""))
69
+        load(paste(simulationName, "_matched_As.RData", sep=""))
70 70
     }
71
-    else if (file_test("-f", paste(simulationName, "_matched_ps.RData", sep="")))
71
+    else if (file_test("-f", paste(simulationName, "_matched_As.RData", sep="")))
72 72
     {
73
-        load(paste(simulationName, "_matched_ps.RData", sep=""))
73
+        load(paste(simulationName, "_matched_As.RData", sep=""))
74 74
         consensusAs<-matchedAmplitudes[[1]]
75 75
         finalResult <- runFinalPhase(simulationName, allDataSets, consensusAs, ...)
76 76
     }
... ...
@@ -88,7 +88,7 @@ GWCoGapsFromCheckpoint <- function(simulationName, nCores=NA, cut=NA, minNS=NA,
88 88
             CoGapsFromCheckpoint(sampleD, sampleS, cptFileName)
89 89
         }
90 90
         matchedAmplitudes <- postInitialPhase(initialResult, length(allDataSets), cut, minNS)
91
-        save(matchedAmplitudes, file=paste(simulationName, "_matched_ps.RData", sep=""))
91
+        save(matchedAmplitudes, file=paste(simulationName, "_matched_As.RData", sep=""))
92 92
         consensusAs<-matchedAmplitudes[[1]]
93 93
         finalResult <- runFinalPhase(simulationName, allDataSets, consensusAs, ...)
94 94
     }
... ...
@@ -182,7 +182,7 @@ runFinalPhase <- function(simulationName, allDataSets, consensusAs, nCores, ...)
182 182
 postFinalPhase <- function(finalResult, consensusAs)
183 183
 {
184 184
     Aresult <- postFixed4Parallel(finalResult, consensusAs)
185
-    finalResult <- list("Amean"=Aresult$A, "Asd"=Aresult$Asd,"Pmean"=consensusAs)
185
+    finalResult <- list("Pmean"=Aresult$P, "Psd"=Aresult$Psd,"Amean"=consensusAs)
186 186
     class(finalResult) <- append(class(finalResult), "CoGAPS")
187 187
     return(finalResult)
188 188
 }
... ...
@@ -2,29 +2,29 @@
2 2
 % Please edit documentation in R/createGWCoGAPSSets.R
3 3
 \name{createGWCoGAPSSets}
4 4
 \alias{createGWCoGAPSSets}
5
-\title{Create Gene Sets for GWCoGAPS}
5
+\title{createGWCoGAPSSets}
6 6
 \usage{
7
-createGWCoGAPSSets(D, S, nSets, simulationName)
7
+createGWCoGAPSSets(data = D, nSets = nSets,
8
+  outRDA = "GenesInCoGAPSSets.Rda", keep = TRUE)
8 9
 }
9 10
 \arguments{
10
-\item{D}{data matrix}
11
+\item{data}{data matrix with unique rownames}
11 12
 
12
-\item{S}{uncertainty matrix}
13
+\item{nSets}{number of sets for parallelization}
13 14
 
14
-\item{nSets}{number of sets to partition the data into}
15
+\item{outRDA}{name of output file}
15 16
 
16
-\item{simulationName}{name used to identify files created by this simulation}
17
+\item{keep}{logical indicating whether or not to save gene set list. Default is TRUE.}
17 18
 }
18 19
 \value{
19
-simulationName used to identify saved files
20
+list with randomly generated sets of genes from whole genome data
20 21
 }
21 22
 \description{
22
-Create Gene Sets for GWCoGAPS
23
-}
24
-\details{
25
-factors whole genome data into randomly generated sets for indexing
23
+\code{createGWCoGAPSSets} factors whole genome data into randomly generated sets for indexing;
26 24
 }
27 25
 \examples{
28
-data(SimpSim)
29
-createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, "example")
26
+\dontrun{
27
+createGWCoGAPSSet(D,nSets=nSets)
28
+}
29
+
30 30
 }
31 31
new file mode 100644
... ...
@@ -0,0 +1,34 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/createscCoGAPSSets.R
3
+\name{createscCoGAPSSets}
4
+\alias{createscCoGAPSSets}
5
+\title{Create Gene Sets for scCoGAPS}
6
+\usage{
7
+createscCoGAPSSets(D, S, nSets, simulationName, samplingRatio = NULL)
8
+}
9
+\arguments{
10
+\item{D}{data matrix}
11
+
12
+\item{S}{uncertainty matrix}
13
+
14
+\item{nSets}{number of sets to partition the data into}
15
+
16
+\item{simulationName}{name used to identify files created by this simulation}
17
+
18
+\item{samplingRatio}{vector of relative quantities to use for sampling celltypes}
19
+
20
+\item{annotionObj}{vector of same length as number of columns of D}
21
+}
22
+\value{
23
+simulationName used to identify saved files
24
+}
25
+\description{
26
+Create Gene Sets for scCoGAPS
27
+}
28
+\details{
29
+factors whole genome data into randomly generated sets for indexing
30
+}
31
+\examples{
32
+data(SimpSim)
33
+createscCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, "example")
34
+}
0 35
new file mode 100644
... ...
@@ -0,0 +1,36 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/patternMatch4singleCell.R
3
+\name{patternMatch4singleCell}
4
+\alias{patternMatch4singleCell}
5
+\title{patternMatch4singleCell}
6
+\usage{
7
+patternMatch4singleCell(Ptot, nSets, cnt, minNS, cluster.method = "complete",
8
+  ignore.NA = FALSE, bySet = FALSE, ...)
9
+}
10
+\arguments{
11
+\item{Ptot}{a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}}
12
+
13
+\item{nSets}{number of parallel sets used to generate \code{Ptot}}
14
+
15
+\item{cnt}{number of branches at which to cut dendrogram}
16
+
17
+\item{minNS}{minimum of individual set contributions a cluster must contain}
18
+
19
+\item{cluster.method}{the agglomeration method to be used for clustering}
20
+
21
+\item{ignore.NA}{logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.}
22
+
23
+\item{bySet}{logical indicating whether to return list of matched set solutions from \code{Ptot}}
24
+
25
+\item{...}{additional parameters for \code{agnes}}
26
+}
27
+\value{
28
+a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each
29
+concensus pattern is also returned.
30
+}
31
+\description{
32
+patternMatch4singleCell
33
+}
34
+\seealso{
35
+\code{\link{agnes}}
36
+}
... ...
@@ -15,7 +15,7 @@ greater than or equal to the number of rows of FP}
15 15
 
16 16
 \item{nCores}{number of cores for parallelization. If left to the default NA, nCores = nSets.}
17 17
 
18
-\item{cut}{number of branches at which to cut dendrogram used in patternMatch4Parallel}
18
+\item{cut}{number of branches at which to cut dendrogram used in patternMatch4singleCell}
19 19
 
20 20
 \item{minNS}{minimum of individual set contributions a cluster must contain}
21 21
 
... ...
@@ -40,8 +40,8 @@ the data matrix for whole genome data;
40 40
 data(SimpSim)
41 41
 sim_name <- "example"
42 42
 createscCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name)
43
-result <- GWCoGAPS(sim_name, nFactor=3, nEquil=1000, nSample=1000)
43
+result <- scCoGAPS(sim_name, nFactor=3, nEquil=1000, nSample=1000)
44 44
 }
45 45
 \seealso{
46
-\code{\link{gapsRun}}, \code{\link{patternMatch4Parallel}}, and \code{\link{gapsMapRun}}
46
+\code{\link{gapsRun}}, \code{\link{patternMatch4singleCell}}, and \code{\link{gapsMapRun}}
47 47
 }