Browse code

fixed recursion for patternmatcher and cellmatcher

Genevieve Stein-O'Brien authored on 29/03/2018 17:48:59
Showing 1 changed files

... ...
@@ -4,6 +4,7 @@
4 4
 #' @param nSets number of parallel sets used to generate \code{Ptot}
5 5
 #' @param cnt  number of branches at which to cut dendrogram
6 6
 #' @param minNS minimum of individual set contributions a cluster must contain
7
+#' @param maxNS max of individual set contributions a cluster must contain. default is nSets+minNS
7 8
 #' @param cluster.method the agglomeration method to be used for clustering
8 9
 #' @param ignore.NA logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.
9 10
 #' @param bySet logical indicating whether to return list of matched set solutions from \code{Ptot}
... ...
@@ -12,17 +13,18 @@
12 13
 #' concensus pattern is also returned.
13 14
 #' @seealso \code{\link{agnes}}
14 15
 #' @export
15
-patternMatch4Parallel <- function(Ptot, nSets, cnt, minNS, 
16
-cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...)
17
-{
18
-    if (!is.null(minNS))
19
-        minNS=nSets/2
16
+patternMatch4Parallel <- function(Ptot, nSets, cnt, minNS=NULL, maxNS=NULL, 
17
+    cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...){
18
+
19
+    if (!is.null(minNS)){minNS=nSets/2}
20
+    if (!is.null(maxNS)){maxNS=nSets+minNS}
20 21
 
21 22
     if (ignore.NA==FALSE & anyNA(Ptot))
22 23
         warning('Non-sparse matrixes produced. Reducing the number of patterns asked for and rerun.')
23 24
     if (ignore.NA==TRUE)
24 25
         Ptot <- Ptot[complete.cases(Ptot),]
25 26
 
27
+    corcut<-function(Ptot,minNS,cnt,cluster.method){
26 28
     # corr dist
27 29
     corr.dist=cor(t(Ptot))
28 30
     corr.dist=1-corr.dist
... ...
@@ -32,7 +34,6 @@ cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...)
32 34
     cut=cutree(as.hclust(clust),k=cnt)
33 35
     #save.image(file=paste("CoGAPS.",nP,"P.",nS,"Set.CorrClustCut",cnt,".RData"))
34 36
 
35
-    #drop n<4 and get weighted Avg
36 37
     cls=sort(unique(cut))
37 38
     cMNs=matrix(nrow=cnt,ncol=dim(Ptot)[2])
38 39
     rownames(cMNs)=cls
... ...
@@ -40,91 +41,52 @@ cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...)
40 41
 
41 42
     RtoMeanPattern <- list()
42 43
     PByClust <- list()
43
-    for(i in cls)
44
-    {
45
-        if (is.null(dim(Ptot[cut == i, ]))==TRUE)
46
-        {
47
-            cMNs[i,] <- Ptot[cut == i, ]
48
-            RtoMeanPattern[[i]] <- rep(1,length(Ptot[cut == i, ]))
49
-            PByClust[[i]] <- t(as.matrix(Ptot[cut == i, ]))
50
-        }
51
-        else
52
-        {
44
+    for(i in cls){
45
+        if (is.null(dim(Ptot[cut == i, ]))==TRUE){
46
+            next
47
+        } else if(dim(Ptot[cut == i, ])[1]< minNS){
48
+            next
49
+        } else {
53 50
             cMNs[i,]=colMeans(Ptot[cut==i,])
54 51
             PByClust[[i]] <- Ptot[cut==i,]
55 52
             nIN=sum(cut==i)
56 53
             RtoMeanPattern[[i]] <- sapply(1:nIN,function(j) {round(cor(x=Ptot[cut==i,][j,],y=cMNs[i,]),3)})
57 54
         }
58 55
     }
56
+    return(list("RtoMeanPattern"=RtoMeanPattern,"PByClust"=PByClust))
57
+    }    
59 58
 
60
-    #drop n<minNS 
61
-    PByClustDrop <- list()
62
-    RtoMPDrop <- list()
63
-    for(i in cls)
64
-    {
65
-        if (is.null(dim(PByClust[[i]])) == TRUE)
66
-            next
67
-        if (dim(PByClust[[i]])[1] < minNS)
68
-        {
69
-            next
70
-        }
71
-        else
72
-        {
73
-            #indx <- which(RtoMeanPattern[[i]]>.7,arr.ind = TRUE)
74
-            PByClustDrop <- append(PByClustDrop,list(PByClust[[i]]))
75
-            RtoMPDrop <- append(RtoMPDrop,list(RtoMeanPattern[[i]]))
76
-        }
77
-    }
59
+    cc<-corcut(Ptot,minNS,cnt,cluster.method)
78 60
 
79
-    ### split by corr  (build in drop if below minNS)
80
-    PByCDS <- list()
81
-    RtoMPDS <- list()
82
-    for (j in 1:length(PByClustDrop))
83
-    {
84
-        if (is.null(dim(PByClustDrop[[j]]))==TRUE)
85
-        {
86
-            next
87
-        }
88
-        if (dim(PByClustDrop[[j]])[1]<minNS+nSets)
89
-        {
90
-            PByCDS <- append(PByCDS,PByClustDrop[j])
91
-            RtoMPDS <- append(RtoMPDS,RtoMPDrop[j])
92
-        }
93
-        if (dim(PByClustDrop[[j]])[1]>=minNS+nSets)
94
-        {
95
-            corr.distPBCD=cor(t(PByClustDrop[[j]]))
96
-            corr.distPBCD=1-corr.distPBCD
97
-            clustPBCD=agnes(x=corr.distPBCD,diss=TRUE,method="complete")
98
-            cutPBCD=cutree(as.hclust(clustPBCD),k=2)
99
-            g1 <- PByClustDrop[[j]][cutPBCD==1,]
100
-            PByCDS <- append(PByCDS,list(g1))
101
-            RtoMPDS <- append(RtoMPDS,list(sapply(1:dim(g1)[1],function(z) round(cor(x=g1[z,],y=colMeans(PByClustDrop[[j]][cutPBCD==1,])),3))))
102
-            g2 <- PByClustDrop[[j]][cutPBCD==2,]
103
-            if (is.null(dim(g2)[1])==FALSE)
104
-            {
105
-                PByCDS <- append(PByCDS,list(g2))
106
-                RtoMPDS <- append(RtoMPDS,list(sapply(1:dim(g2)[1],function(z) round(cor(x=g2[z,],y=colMeans(PByClustDrop[[j]][cutPBCD==2,])),3))))
61
+    ### split by maxNS
62
+    indx<-unlist(sapply(cc$PByClust,function(x) which(dim(x)[1]>maxNS)))
63
+    while(length(indx)>0){
64
+            icc<-corcut(cc$PByClust[[indx[1]]],minNS,2,cluster.method)
65
+            cc$PByClust[[indx[1]]]<-icc[[2]][[2]]
66
+            cc$RtoMeanPattern[[indx[1]]]<-icc[[1]][[2]]
67
+            if(is.null(icc[[2]][[1]])){ 
68
+                indx<-unlist(sapply(cc$PByClust,function(x) which(dim(x)[1]>maxNS)))
69
+                next
70
+            } else {
71
+                cc$PByClust<-append(cc$PByClust,icc[[2]][1])
72
+                cc$RtoMeanPattern<-append(cc$PByClust,icc[[1]][[1]])
73
+                indx<-unlist(sapply(cc$PByClust,function(x) which(dim(x)[1]>maxNS)))
107 74
             }
108
-        }
109 75
     }
110 76
 
111
-    #weighted.mean(PByClustDrop[[1]],RtoMPDrop[[1]])
112
-    PByCDSWavg<- t(sapply(1:length(PByCDS),function(z) apply(PByCDS[[z]],2,function(x) weighted.mean(x,(RtoMPDS[[z]])^3))))
113
-    rownames(PByCDSWavg) <- lapply(1:length(PByCDS),function(x) paste("Pattern",x))
77
+    #weighted.mean
78
+    PByCDSWavg<- t(sapply(1:length(cc$PByClust),function(z) apply(cc$PByClust[[z]],2,function(x) 
79
+        weighted.mean(x,(cc$RtoMeanPattern[[z]])^3))))
80
+    rownames(PByCDSWavg) <- lapply(1:length(cc$PByClust),function(x) paste("Pattern",x))
114 81
 
115 82
     #scale ps
116 83
     Pmax <- apply(PByCDSWavg,1,max)
117 84
     PByCDSWavgScaled <- t(sapply(1:dim(PByCDSWavg)[1],function(x) PByCDSWavg[x,]/Pmax[x]))
118 85
     rownames(PByCDSWavgScaled) <- rownames(PByCDSWavg)
119 86
 
120
-    if(bySet)
121
-    {
122
-        # return by set and final
123
-        PBySet<-PByCDS
124
-        return(list("consenusPatterns"=PByCDSWavgScaled,"PBySet"=PBySet,"RtoMPDS"=RtoMPDS))
125
-    }
126
-    else
127
-    {
128
-        return(PByCDSWavgScaled)
87
+    if(bySet){
88
+        return(list("consenusPatterns"=PByCDSWavgScaled,"PBySet"=cc))
89
+    } else {
90
+        return("consenusPatterns"=PByCDSWavgScaled)
129 91
     }
130 92
 }