Browse code

createscCoGAPSsets add annotationobj to function call

Genevieve Stein-O'Brien authored on 30/03/2018 17:15:41
Showing 10 changed files

... ...
@@ -1,6 +1,5 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3
-export("")
4 3
 export(CoGAPS)
5 4
 export(CoGapsFromCheckpoint)
6 5
 export(GWCoGAPS)
... ...
@@ -9,12 +8,14 @@ export(binaryA)
9 8
 export(calcCoGAPSStat)
10 9
 export(calcGeneGSStat)
11 10
 export(calcZ)
11
+export(cellMatchR)
12 12
 export(computeGeneGSProb)
13 13
 export(createGWCoGAPSSets)
14 14
 export(createscCoGAPSSets)
15 15
 export(displayBuildReport)
16 16
 export(gapsMapRun)
17 17
 export(gapsRun)
18
+export(patternMarkers)
18 19
 export(patternMatch4Parallel)
19 20
 export(patternMatch4singleCell)
20 21
 export(plotAtoms)
21 22
new file mode 100644
... ...
@@ -0,0 +1,91 @@
1
+
2
+#' patternMatch4Parallel
3
+#'
4
+#' @param Atot a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}
5
+#' @param nSets number of parallel sets used to generate \code{Atot}
6
+#' @param cnt  number of branches at which to cut dendrogram
7
+#' @param minNS minimum of individual set contributions a cluster must contain
8
+#' @param ignore.NA logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.
9
+#' @param bySet logical indicating whether to return list of matched set solutions from \code{Atot}
10
+#' @param R2mean 
11
+#' @param
12
+#' @param ... additional parameters for \code{agnes}
13
+#' @return a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each
14
+#' concensus pattern is also returned.
15
+#' @export
16
+#' @seealso \code{\link{fastcluster}}
17
+#'
18
+#'
19
+
20
+cellMatchR <- function(Atot,nSets, cnt, minNS=NULL, maxNS=NULL, ignore.NA=FALSE, bySet=FALSE, plotDen=FALSE,...){
21
+
22
+  if(is.null(minNS)){minNS=nSets/2}
23
+  if(is.null(maxNS)){maxNS=nSets+minNS}
24
+
25
+  if(ignore.NA==FALSE){if(anyNA(Atot)){
26
+    warning('Non-sparse matrixes produced. Reducing the number of patterns asked for and rerun.')
27
+  }}
28
+  if(ignore.NA==TRUE){Atot<-Atot[complete.cases(Atot),]}
29
+
30
+
31
+corcut<-function(Atot,minNS,cnt,cluster.method){
32
+  corr.dist=cor(Atot)
33
+  corr.dist=1-corr.dist
34
+
35
+  clust=agnes(x=corr.dist,diss=TRUE,cluster.method)
36
+  #clust=fastcluster::hclust(dist(corr.dist))
37
+  cut=cutree(as.hclust(clust),k=cnt)
38
+
39
+  cls=sort(unique(cut))
40
+  cMNs=matrix(ncol=cnt,nrow=dim(Atot)[1])
41
+  colnames(cMNs)=cls
42
+  rownames(cMNs)=rownames(Atot)
43
+
44
+  RtoMeanPattern <- list()
45
+  AByClust <- list()
46
+    for(i in cls){
47
+        if (is.null(dim(Atot[,cut == i]))==TRUE){
48
+          next
49
+        } else if(dim(Atot[,cut == i])[2] < minNS){
50
+            next
51
+        } else{
52
+            cMNs[,i]=rowMeans(Atot[,cut==i])
53
+            AByClust[[i]] <- Atot[,cut==i]
54
+            nIN=sum(cut==i)
55
+            RtoMeanPattern[[i]] <- sapply(1:nIN,function(j) {round(cor(x=Atot[,cut==i][,j],y=cMNs[,i]),3)})
56
+        }
57
+      }
58
+    PByClust[sapply(PByClust,is.null)]<-NULL
59
+    RtoMeanPattern[sapply(RtoMeanPattern,is.null)]<-NULL
60
+    return(list("RtoMeanPattern"=RtoMeanPattern,"AByClust"=AByClust))
61
+  }   
62
+
63
+  cc<-corcut(Atot,minNS,cnt,cluster.method)
64
+
65
+    ### split by maxNS
66
+    indx<-which(unlist(lapply(cc$PByClust,function(x) dim(x)[1]>maxNS)))
67
+    while(length(indx)>0){
68
+          icc<-corcut(cc$AByClust[[indx[1]]],minNS,2,cluster.method)
69
+          cc$AByClust[[indx[1]]]<-icc[[2]][[2]]
70
+          cc$RtoMeanPattern[[indx[1]]]<-icc[[1]][[2]]
71
+          if(length(icc[[2]])>1){
72
+                cc$AByClust<-append(cc$AByClust,icc[[2]][1])
73
+                cc$RtoMeanPattern<-append(cc$RtoMeanPattern,icc[[1]][1])
74
+          }
75
+      indx<-which(unlist(lapply(cc$PByClust,function(x) dim(x)[1]>maxNS)))
76
+    }
77
+
78
+#weighted.mean(AByClustDrop[[1]],RtoMPDrop[[1]])
79
+AByCDSWavg<- t(sapply(1:length(cc$AByClust),function(z) apply(cc$AByClust[[z]],1,function(x) weighted.mean(x,(cc$RtoMeanPattern[[z]])^3))))
80
+rownames(AByCDSWavg) <- lapply(1:length(cc$AByClust),function(x) paste("Pattern",x))
81
+
82
+#scale As
83
+Amax <- apply(AByCDSWavg,1,max)
84
+AByCDSWavgScaled <- t(sapply(1:dim(AByCDSWavg)[1],function(x) AByCDSWavg[x,]/Amax[x]))
85
+rownames(AByCDSWavgScaled) <- rownames(AByCDSWavg)
86
+
87
+  if(bySet){  
88
+    return(list("consenusAs"=t(AByCDSWavgScaled),"ABySet"=cc))
89
+  } else {return(AByCDSWavgScaled)}
90
+
91
+}
... ...
@@ -14,7 +14,7 @@
14 14
 #' data(SimpSim)
15 15
 #' createscCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, "example")
16 16
 #' @export
17
-createscCoGAPSSets <- function(D, nSets, simulationName,samplingRatio=NULL,path="")
17
+createscCoGAPSSets <- function(D, nSets, simulationName,samplingRatio=NULL,path="",annotionObj=NULL)
18 18
 {
19 19
     # check gene names
20 20
     if (length(unique(colnames(D))) != length(colnames(D)))
... ...
@@ -16,8 +16,8 @@
16 16
 patternMatch4Parallel <- function(Ptot, nSets, cnt, minNS=NULL, maxNS=NULL, 
17 17
     cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...){
18 18
 
19
-    if (!is.null(minNS)){minNS=nSets/2}
20
-    if (!is.null(maxNS)){maxNS=nSets+minNS}
19
+    if (is.null(minNS)){minNS=ceiling(nSets/2)}
20
+    if (is.null(maxNS)){maxNS=nSets+minNS}
21 21
 
22 22
     if (ignore.NA==FALSE & anyNA(Ptot))
23 23
         warning('Non-sparse matrixes produced. Reducing the number of patterns asked for and rerun.')
... ...
@@ -53,6 +53,8 @@ patternMatch4Parallel <- function(Ptot, nSets, cnt, minNS=NULL, maxNS=NULL,
53 53
             RtoMeanPattern[[i]] <- sapply(1:nIN,function(j) {round(cor(x=Ptot[cut==i,][j,],y=cMNs[i,]),3)})
54 54
         }
55 55
     }
56
+    PByClust[sapply(PByClust,is.null)]<-NULL
57
+    RtoMeanPattern[sapply(RtoMeanPattern,is.null)]<-NULL
56 58
     return(list("RtoMeanPattern"=RtoMeanPattern,"PByClust"=PByClust))
57 59
     }    
58 60
 
... ...
@@ -64,14 +66,11 @@ patternMatch4Parallel <- function(Ptot, nSets, cnt, minNS=NULL, maxNS=NULL,
64 66
             icc<-corcut(cc$PByClust[[indx[1]]],minNS,2,cluster.method)
65 67
             cc$PByClust[[indx[1]]]<-icc[[2]][[1]]
66 68
             cc$RtoMeanPattern[[indx[1]]]<-icc[[1]][[1]]
67
-            if(is.null(icc[[2]][[2]])){ 
68
-                indx<-which(unlist(lapply(cc$PByClust,function(x) dim(x)[1]>maxNS)))
69
-                next
70
-            } else {
69
+            if(length(icc[[2]])>1){
71 70
                 cc$PByClust<-append(cc$PByClust,icc[[2]][2])
72
-                cc$RtoMeanPattern<-append(cc$PByClust,icc[[1]][[2]])
73
-                indx<-which(unlist(lapply(cc$PByClust,function(x) dim(x)[1]>maxNS)))
74
-            }
71
+                cc$RtoMeanPattern<-append(cc$RtoMeanPattern,icc[[1]][2])
72
+            } 
73
+            indx<-which(unlist(lapply(cc$PByClust,function(x) dim(x)[1]>maxNS)))
75 74
     }
76 75
 
77 76
     #weighted.mean
78 77
new file mode 100644
... ...
@@ -0,0 +1,36 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/cellMatchR.R
3
+\name{cellMatchR}
4
+\alias{cellMatchR}
5
+\title{patternMatch4Parallel}
6
+\usage{
7
+cellMatchR(Atot, nSets, cnt, minNS = NULL, maxNS = NULL,
8
+  ignore.NA = FALSE, bySet = FALSE, plotDen = FALSE, ...)
9
+}
10
+\arguments{
11
+\item{Atot}{a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}}
12
+
13
+\item{nSets}{number of parallel sets used to generate \code{Atot}}
14
+
15
+\item{cnt}{number of branches at which to cut dendrogram}
16
+
17
+\item{minNS}{minimum of individual set contributions a cluster must contain}
18
+
19
+\item{ignore.NA}{logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.}
20
+
21
+\item{bySet}{logical indicating whether to return list of matched set solutions from \code{Atot}}
22
+
23
+\item{...}{additional parameters for \code{agnes}}
24
+
25
+\item{R2mean}{}
26
+}
27
+\value{
28
+a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each
29
+concensus pattern is also returned.
30
+}
31
+\description{
32
+patternMatch4Parallel
33
+}
34
+\seealso{
35
+\code{\link{fastcluster}}
36
+}
... ...
@@ -4,14 +4,15 @@
4 4
 \alias{createGWCoGAPSSets}
5 5
 \title{createGWCoGAPSSets}
6 6
 \usage{
7
-createGWCoGAPSSets(data = D, nSets = nSets,
8
-  outRDA = "GenesInCoGAPSSets.Rda", keep = TRUE)
7
+createGWCoGAPSSets(D, S, nSets, simulationName, path = "")
9 8
 }
10 9
 \arguments{
11
-\item{data}{data matrix with unique rownames}
12
-
13 10
 \item{nSets}{number of sets for parallelization}
14 11
 
12
+\item{path}{character string indicating were to save resulting data objects}
13
+
14
+\item{data}{data matrix with unique rownames}
15
+
15 16
 \item{outRDA}{name of output file}
16 17
 
17 18
 \item{keep}{logical indicating whether or not to save gene set list. Default is TRUE.}
... ...
@@ -4,19 +4,22 @@
4 4
 \alias{createscCoGAPSSets}
5 5
 \title{Create Gene Sets for scCoGAPS}
6 6
 \usage{
7
-createscCoGAPSSets(D, S, nSets, simulationName, samplingRatio = NULL)
7
+createscCoGAPSSets(D, nSets, simulationName, samplingRatio = NULL,
8
+  path = "")
8 9
 }
9 10
 \arguments{
10 11
 \item{D}{data matrix}
11 12
 
12
-\item{S}{uncertainty matrix}
13
-
14 13
 \item{nSets}{number of sets to partition the data into}
15 14
 
16 15
 \item{simulationName}{name used to identify files created by this simulation}
17 16
 
18 17
 \item{samplingRatio}{vector of relative quantities to use for sampling celltypes}
19 18
 
19
+\item{path}{character string indicating were to save resulting data objects. default is current working dir}
20
+
21
+\item{S}{uncertainty matrix}
22
+
20 23
 \item{annotionObj}{vector of same length as number of columns of D}
21 24
 }
22 25
 \value{
... ...
@@ -4,8 +4,8 @@
4 4
 \alias{patternMatch4Parallel}
5 5
 \title{patternMatch4Parallel}
6 6
 \usage{
7
-patternMatch4Parallel(Ptot, nSets, cnt, minNS, cluster.method = "complete",
8
-  ignore.NA = FALSE, bySet = FALSE, ...)
7
+patternMatch4Parallel(Ptot, nSets, cnt, minNS = NULL, maxNS = NULL,
8
+  cluster.method = "complete", ignore.NA = FALSE, bySet = FALSE, ...)
9 9
 }
10 10
 \arguments{
11 11
 \item{Ptot}{a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}}
... ...
@@ -16,6 +16,8 @@ patternMatch4Parallel(Ptot, nSets, cnt, minNS, cluster.method = "complete",
16 16
 
17 17
 \item{minNS}{minimum of individual set contributions a cluster must contain}
18 18
 
19
+\item{maxNS}{max of individual set contributions a cluster must contain. default is nSets+minNS}
20
+
19 21
 \item{cluster.method}{the agglomeration method to be used for clustering}
20 22
 
21 23
 \item{ignore.NA}{logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.}
... ...
@@ -4,13 +4,13 @@
4 4
 \alias{patternMatch4singleCell}
5 5
 \title{patternMatch4singleCell}
6 6
 \usage{
7
-patternMatch4singleCell(Ptot, nSets, cnt, minNS, cluster.method = "complete",
7
+patternMatch4singleCell(Atot, nSets, cnt, minNS, cluster.method = "complete",
8 8
   ignore.NA = FALSE, bySet = FALSE, ...)
9 9
 }
10 10
 \arguments{
11
-\item{Ptot}{a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}}
11
+\item{Atot}{a matrix containing the total by set estimates of Amean output from \code{reOrderBySet}}
12 12
 
13
-\item{nSets}{number of parallel sets used to generate \code{Ptot}}
13
+\item{nSets}{number of parallel sets used to generate \code{Atot}}
14 14
 
15 15
 \item{cnt}{number of branches at which to cut dendrogram}
16 16
 
... ...
@@ -20,7 +20,7 @@ patternMatch4singleCell(Ptot, nSets, cnt, minNS, cluster.method = "complete",
20 20
 
21 21
 \item{ignore.NA}{logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.}
22 22
 
23
-\item{bySet}{logical indicating whether to return list of matched set solutions from \code{Ptot}}
23
+\item{bySet}{logical indicating whether to return list of matched set solutions from \code{Atot}}
24 24
 
25 25
 \item{...}{additional parameters for \code{agnes}}
26 26
 }
27 27
new file mode 100644
... ...
@@ -0,0 +1,21 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/postFixed4SC.R
3
+\name{postFixed4SC}
4
+\alias{postFixed4SC}
5
+\title{Post Processing of Parallel Output}
6
+\usage{
7
+postFixed4SC(AP.fixed, setAs)
8
+}
9
+\arguments{
10
+\item{AP.fixed}{output of parallel gapsMapRun calls with same FP}
11
+
12
+\item{setPs}{data.frame with rows giving fixed patterns for P used as input
13
+for gapsMapRun}
14
+}
15
+\value{
16
+list of two data.frames containing the A matrix estimates or their
17
+corresponding standard deviations from output of parallel CoGAPS
18
+}
19
+\description{
20
+Post Processing of Parallel Output
21
+}