... | ... |
@@ -1,6 +1,5 @@ |
1 | 1 |
# Generated by roxygen2: do not edit by hand |
2 | 2 |
|
3 |
-export("") |
|
4 | 3 |
export(CoGAPS) |
5 | 4 |
export(CoGapsFromCheckpoint) |
6 | 5 |
export(GWCoGAPS) |
... | ... |
@@ -9,12 +8,14 @@ export(binaryA) |
9 | 8 |
export(calcCoGAPSStat) |
10 | 9 |
export(calcGeneGSStat) |
11 | 10 |
export(calcZ) |
11 |
+export(cellMatchR) |
|
12 | 12 |
export(computeGeneGSProb) |
13 | 13 |
export(createGWCoGAPSSets) |
14 | 14 |
export(createscCoGAPSSets) |
15 | 15 |
export(displayBuildReport) |
16 | 16 |
export(gapsMapRun) |
17 | 17 |
export(gapsRun) |
18 |
+export(patternMarkers) |
|
18 | 19 |
export(patternMatch4Parallel) |
19 | 20 |
export(patternMatch4singleCell) |
20 | 21 |
export(plotAtoms) |
21 | 22 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,91 @@ |
1 |
+ |
|
2 |
+#' patternMatch4Parallel |
|
3 |
+#' |
|
4 |
+#' @param Atot a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet} |
|
5 |
+#' @param nSets number of parallel sets used to generate \code{Atot} |
|
6 |
+#' @param cnt number of branches at which to cut dendrogram |
|
7 |
+#' @param minNS minimum of individual set contributions a cluster must contain |
|
8 |
+#' @param ignore.NA logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE. |
|
9 |
+#' @param bySet logical indicating whether to return list of matched set solutions from \code{Atot} |
|
10 |
+#' @param R2mean |
|
11 |
+#' @param |
|
12 |
+#' @param ... additional parameters for \code{agnes} |
|
13 |
+#' @return a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each |
|
14 |
+#' concensus pattern is also returned. |
|
15 |
+#' @export |
|
16 |
+#' @seealso \code{\link{fastcluster}} |
|
17 |
+#' |
|
18 |
+#' |
|
19 |
+ |
|
20 |
+cellMatchR <- function(Atot,nSets, cnt, minNS=NULL, maxNS=NULL, ignore.NA=FALSE, bySet=FALSE, plotDen=FALSE,...){ |
|
21 |
+ |
|
22 |
+ if(is.null(minNS)){minNS=nSets/2} |
|
23 |
+ if(is.null(maxNS)){maxNS=nSets+minNS} |
|
24 |
+ |
|
25 |
+ if(ignore.NA==FALSE){if(anyNA(Atot)){ |
|
26 |
+ warning('Non-sparse matrixes produced. Reducing the number of patterns asked for and rerun.') |
|
27 |
+ }} |
|
28 |
+ if(ignore.NA==TRUE){Atot<-Atot[complete.cases(Atot),]} |
|
29 |
+ |
|
30 |
+ |
|
31 |
+corcut<-function(Atot,minNS,cnt,cluster.method){ |
|
32 |
+ corr.dist=cor(Atot) |
|
33 |
+ corr.dist=1-corr.dist |
|
34 |
+ |
|
35 |
+ clust=agnes(x=corr.dist,diss=TRUE,cluster.method) |
|
36 |
+ #clust=fastcluster::hclust(dist(corr.dist)) |
|
37 |
+ cut=cutree(as.hclust(clust),k=cnt) |
|
38 |
+ |
|
39 |
+ cls=sort(unique(cut)) |
|
40 |
+ cMNs=matrix(ncol=cnt,nrow=dim(Atot)[1]) |
|
41 |
+ colnames(cMNs)=cls |
|
42 |
+ rownames(cMNs)=rownames(Atot) |
|
43 |
+ |
|
44 |
+ RtoMeanPattern <- list() |
|
45 |
+ AByClust <- list() |
|
46 |
+ for(i in cls){ |
|
47 |
+ if (is.null(dim(Atot[,cut == i]))==TRUE){ |
|
48 |
+ next |
|
49 |
+ } else if(dim(Atot[,cut == i])[2] < minNS){ |
|
50 |
+ next |
|
51 |
+ } else{ |
|
52 |
+ cMNs[,i]=rowMeans(Atot[,cut==i]) |
|
53 |
+ AByClust[[i]] <- Atot[,cut==i] |
|
54 |
+ nIN=sum(cut==i) |
|
55 |
+ RtoMeanPattern[[i]] <- sapply(1:nIN,function(j) {round(cor(x=Atot[,cut==i][,j],y=cMNs[,i]),3)}) |
|
56 |
+ } |
|
57 |
+ } |
|
58 |
+ PByClust[sapply(PByClust,is.null)]<-NULL |
|
59 |
+ RtoMeanPattern[sapply(RtoMeanPattern,is.null)]<-NULL |
|
60 |
+ return(list("RtoMeanPattern"=RtoMeanPattern,"AByClust"=AByClust)) |
|
61 |
+ } |
|
62 |
+ |
|
63 |
+ cc<-corcut(Atot,minNS,cnt,cluster.method) |
|
64 |
+ |
|
65 |
+ ### split by maxNS |
|
66 |
+ indx<-which(unlist(lapply(cc$PByClust,function(x) dim(x)[1]>maxNS))) |
|
67 |
+ while(length(indx)>0){ |
|
68 |
+ icc<-corcut(cc$AByClust[[indx[1]]],minNS,2,cluster.method) |
|
69 |
+ cc$AByClust[[indx[1]]]<-icc[[2]][[2]] |
|
70 |
+ cc$RtoMeanPattern[[indx[1]]]<-icc[[1]][[2]] |
|
71 |
+ if(length(icc[[2]])>1){ |
|
72 |
+ cc$AByClust<-append(cc$AByClust,icc[[2]][1]) |
|
73 |
+ cc$RtoMeanPattern<-append(cc$RtoMeanPattern,icc[[1]][1]) |
|
74 |
+ } |
|
75 |
+ indx<-which(unlist(lapply(cc$PByClust,function(x) dim(x)[1]>maxNS))) |
|
76 |
+ } |
|
77 |
+ |
|
78 |
+#weighted.mean(AByClustDrop[[1]],RtoMPDrop[[1]]) |
|
79 |
+AByCDSWavg<- t(sapply(1:length(cc$AByClust),function(z) apply(cc$AByClust[[z]],1,function(x) weighted.mean(x,(cc$RtoMeanPattern[[z]])^3)))) |
|
80 |
+rownames(AByCDSWavg) <- lapply(1:length(cc$AByClust),function(x) paste("Pattern",x)) |
|
81 |
+ |
|
82 |
+#scale As |
|
83 |
+Amax <- apply(AByCDSWavg,1,max) |
|
84 |
+AByCDSWavgScaled <- t(sapply(1:dim(AByCDSWavg)[1],function(x) AByCDSWavg[x,]/Amax[x])) |
|
85 |
+rownames(AByCDSWavgScaled) <- rownames(AByCDSWavg) |
|
86 |
+ |
|
87 |
+ if(bySet){ |
|
88 |
+ return(list("consenusAs"=t(AByCDSWavgScaled),"ABySet"=cc)) |
|
89 |
+ } else {return(AByCDSWavgScaled)} |
|
90 |
+ |
|
91 |
+} |
... | ... |
@@ -14,7 +14,7 @@ |
14 | 14 |
#' data(SimpSim) |
15 | 15 |
#' createscCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, "example") |
16 | 16 |
#' @export |
17 |
-createscCoGAPSSets <- function(D, nSets, simulationName,samplingRatio=NULL,path="") |
|
17 |
+createscCoGAPSSets <- function(D, nSets, simulationName,samplingRatio=NULL,path="",annotionObj=NULL) |
|
18 | 18 |
{ |
19 | 19 |
# check gene names |
20 | 20 |
if (length(unique(colnames(D))) != length(colnames(D))) |
... | ... |
@@ -16,8 +16,8 @@ |
16 | 16 |
patternMatch4Parallel <- function(Ptot, nSets, cnt, minNS=NULL, maxNS=NULL, |
17 | 17 |
cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...){ |
18 | 18 |
|
19 |
- if (!is.null(minNS)){minNS=nSets/2} |
|
20 |
- if (!is.null(maxNS)){maxNS=nSets+minNS} |
|
19 |
+ if (is.null(minNS)){minNS=ceiling(nSets/2)} |
|
20 |
+ if (is.null(maxNS)){maxNS=nSets+minNS} |
|
21 | 21 |
|
22 | 22 |
if (ignore.NA==FALSE & anyNA(Ptot)) |
23 | 23 |
warning('Non-sparse matrixes produced. Reducing the number of patterns asked for and rerun.') |
... | ... |
@@ -53,6 +53,8 @@ patternMatch4Parallel <- function(Ptot, nSets, cnt, minNS=NULL, maxNS=NULL, |
53 | 53 |
RtoMeanPattern[[i]] <- sapply(1:nIN,function(j) {round(cor(x=Ptot[cut==i,][j,],y=cMNs[i,]),3)}) |
54 | 54 |
} |
55 | 55 |
} |
56 |
+ PByClust[sapply(PByClust,is.null)]<-NULL |
|
57 |
+ RtoMeanPattern[sapply(RtoMeanPattern,is.null)]<-NULL |
|
56 | 58 |
return(list("RtoMeanPattern"=RtoMeanPattern,"PByClust"=PByClust)) |
57 | 59 |
} |
58 | 60 |
|
... | ... |
@@ -64,14 +66,11 @@ patternMatch4Parallel <- function(Ptot, nSets, cnt, minNS=NULL, maxNS=NULL, |
64 | 66 |
icc<-corcut(cc$PByClust[[indx[1]]],minNS,2,cluster.method) |
65 | 67 |
cc$PByClust[[indx[1]]]<-icc[[2]][[1]] |
66 | 68 |
cc$RtoMeanPattern[[indx[1]]]<-icc[[1]][[1]] |
67 |
- if(is.null(icc[[2]][[2]])){ |
|
68 |
- indx<-which(unlist(lapply(cc$PByClust,function(x) dim(x)[1]>maxNS))) |
|
69 |
- next |
|
70 |
- } else { |
|
69 |
+ if(length(icc[[2]])>1){ |
|
71 | 70 |
cc$PByClust<-append(cc$PByClust,icc[[2]][2]) |
72 |
- cc$RtoMeanPattern<-append(cc$PByClust,icc[[1]][[2]]) |
|
73 |
- indx<-which(unlist(lapply(cc$PByClust,function(x) dim(x)[1]>maxNS))) |
|
74 |
- } |
|
71 |
+ cc$RtoMeanPattern<-append(cc$RtoMeanPattern,icc[[1]][2]) |
|
72 |
+ } |
|
73 |
+ indx<-which(unlist(lapply(cc$PByClust,function(x) dim(x)[1]>maxNS))) |
|
75 | 74 |
} |
76 | 75 |
|
77 | 76 |
#weighted.mean |
78 | 77 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,36 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/cellMatchR.R |
|
3 |
+\name{cellMatchR} |
|
4 |
+\alias{cellMatchR} |
|
5 |
+\title{patternMatch4Parallel} |
|
6 |
+\usage{ |
|
7 |
+cellMatchR(Atot, nSets, cnt, minNS = NULL, maxNS = NULL, |
|
8 |
+ ignore.NA = FALSE, bySet = FALSE, plotDen = FALSE, ...) |
|
9 |
+} |
|
10 |
+\arguments{ |
|
11 |
+\item{Atot}{a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}} |
|
12 |
+ |
|
13 |
+\item{nSets}{number of parallel sets used to generate \code{Atot}} |
|
14 |
+ |
|
15 |
+\item{cnt}{number of branches at which to cut dendrogram} |
|
16 |
+ |
|
17 |
+\item{minNS}{minimum of individual set contributions a cluster must contain} |
|
18 |
+ |
|
19 |
+\item{ignore.NA}{logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.} |
|
20 |
+ |
|
21 |
+\item{bySet}{logical indicating whether to return list of matched set solutions from \code{Atot}} |
|
22 |
+ |
|
23 |
+\item{...}{additional parameters for \code{agnes}} |
|
24 |
+ |
|
25 |
+\item{R2mean}{} |
|
26 |
+} |
|
27 |
+\value{ |
|
28 |
+a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each |
|
29 |
+concensus pattern is also returned. |
|
30 |
+} |
|
31 |
+\description{ |
|
32 |
+patternMatch4Parallel |
|
33 |
+} |
|
34 |
+\seealso{ |
|
35 |
+\code{\link{fastcluster}} |
|
36 |
+} |
... | ... |
@@ -4,14 +4,15 @@ |
4 | 4 |
\alias{createGWCoGAPSSets} |
5 | 5 |
\title{createGWCoGAPSSets} |
6 | 6 |
\usage{ |
7 |
-createGWCoGAPSSets(data = D, nSets = nSets, |
|
8 |
- outRDA = "GenesInCoGAPSSets.Rda", keep = TRUE) |
|
7 |
+createGWCoGAPSSets(D, S, nSets, simulationName, path = "") |
|
9 | 8 |
} |
10 | 9 |
\arguments{ |
11 |
-\item{data}{data matrix with unique rownames} |
|
12 |
- |
|
13 | 10 |
\item{nSets}{number of sets for parallelization} |
14 | 11 |
|
12 |
+\item{path}{character string indicating were to save resulting data objects} |
|
13 |
+ |
|
14 |
+\item{data}{data matrix with unique rownames} |
|
15 |
+ |
|
15 | 16 |
\item{outRDA}{name of output file} |
16 | 17 |
|
17 | 18 |
\item{keep}{logical indicating whether or not to save gene set list. Default is TRUE.} |
... | ... |
@@ -4,19 +4,22 @@ |
4 | 4 |
\alias{createscCoGAPSSets} |
5 | 5 |
\title{Create Gene Sets for scCoGAPS} |
6 | 6 |
\usage{ |
7 |
-createscCoGAPSSets(D, S, nSets, simulationName, samplingRatio = NULL) |
|
7 |
+createscCoGAPSSets(D, nSets, simulationName, samplingRatio = NULL, |
|
8 |
+ path = "") |
|
8 | 9 |
} |
9 | 10 |
\arguments{ |
10 | 11 |
\item{D}{data matrix} |
11 | 12 |
|
12 |
-\item{S}{uncertainty matrix} |
|
13 |
- |
|
14 | 13 |
\item{nSets}{number of sets to partition the data into} |
15 | 14 |
|
16 | 15 |
\item{simulationName}{name used to identify files created by this simulation} |
17 | 16 |
|
18 | 17 |
\item{samplingRatio}{vector of relative quantities to use for sampling celltypes} |
19 | 18 |
|
19 |
+\item{path}{character string indicating were to save resulting data objects. default is current working dir} |
|
20 |
+ |
|
21 |
+\item{S}{uncertainty matrix} |
|
22 |
+ |
|
20 | 23 |
\item{annotionObj}{vector of same length as number of columns of D} |
21 | 24 |
} |
22 | 25 |
\value{ |
... | ... |
@@ -4,8 +4,8 @@ |
4 | 4 |
\alias{patternMatch4Parallel} |
5 | 5 |
\title{patternMatch4Parallel} |
6 | 6 |
\usage{ |
7 |
-patternMatch4Parallel(Ptot, nSets, cnt, minNS, cluster.method = "complete", |
|
8 |
- ignore.NA = FALSE, bySet = FALSE, ...) |
|
7 |
+patternMatch4Parallel(Ptot, nSets, cnt, minNS = NULL, maxNS = NULL, |
|
8 |
+ cluster.method = "complete", ignore.NA = FALSE, bySet = FALSE, ...) |
|
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 | 11 |
\item{Ptot}{a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}} |
... | ... |
@@ -16,6 +16,8 @@ patternMatch4Parallel(Ptot, nSets, cnt, minNS, cluster.method = "complete", |
16 | 16 |
|
17 | 17 |
\item{minNS}{minimum of individual set contributions a cluster must contain} |
18 | 18 |
|
19 |
+\item{maxNS}{max of individual set contributions a cluster must contain. default is nSets+minNS} |
|
20 |
+ |
|
19 | 21 |
\item{cluster.method}{the agglomeration method to be used for clustering} |
20 | 22 |
|
21 | 23 |
\item{ignore.NA}{logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.} |
... | ... |
@@ -4,13 +4,13 @@ |
4 | 4 |
\alias{patternMatch4singleCell} |
5 | 5 |
\title{patternMatch4singleCell} |
6 | 6 |
\usage{ |
7 |
-patternMatch4singleCell(Ptot, nSets, cnt, minNS, cluster.method = "complete", |
|
7 |
+patternMatch4singleCell(Atot, nSets, cnt, minNS, cluster.method = "complete", |
|
8 | 8 |
ignore.NA = FALSE, bySet = FALSE, ...) |
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 |
-\item{Ptot}{a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}} |
|
11 |
+\item{Atot}{a matrix containing the total by set estimates of Amean output from \code{reOrderBySet}} |
|
12 | 12 |
|
13 |
-\item{nSets}{number of parallel sets used to generate \code{Ptot}} |
|
13 |
+\item{nSets}{number of parallel sets used to generate \code{Atot}} |
|
14 | 14 |
|
15 | 15 |
\item{cnt}{number of branches at which to cut dendrogram} |
16 | 16 |
|
... | ... |
@@ -20,7 +20,7 @@ patternMatch4singleCell(Ptot, nSets, cnt, minNS, cluster.method = "complete", |
20 | 20 |
|
21 | 21 |
\item{ignore.NA}{logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.} |
22 | 22 |
|
23 |
-\item{bySet}{logical indicating whether to return list of matched set solutions from \code{Ptot}} |
|
23 |
+\item{bySet}{logical indicating whether to return list of matched set solutions from \code{Atot}} |
|
24 | 24 |
|
25 | 25 |
\item{...}{additional parameters for \code{agnes}} |
26 | 26 |
} |
27 | 27 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,21 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/postFixed4SC.R |
|
3 |
+\name{postFixed4SC} |
|
4 |
+\alias{postFixed4SC} |
|
5 |
+\title{Post Processing of Parallel Output} |
|
6 |
+\usage{ |
|
7 |
+postFixed4SC(AP.fixed, setAs) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{AP.fixed}{output of parallel gapsMapRun calls with same FP} |
|
11 |
+ |
|
12 |
+\item{setPs}{data.frame with rows giving fixed patterns for P used as input |
|
13 |
+for gapsMapRun} |
|
14 |
+} |
|
15 |
+\value{ |
|
16 |
+list of two data.frames containing the A matrix estimates or their |
|
17 |
+corresponding standard deviations from output of parallel CoGAPS |
|
18 |
+} |
|
19 |
+\description{ |
|
20 |
+Post Processing of Parallel Output |
|
21 |
+} |