Browse code

Fixed bug related to factors/integers for z/y cluster labels within internal functions

Joshua D. Campbell authored on 22/10/2022 00:27:29
Showing 1 changed files
... ...
@@ -47,7 +47,7 @@
47 47
       verbose = FALSE,
48 48
       reorder = FALSE
49 49
     )
50
-    clustSplit[[i]] <- celdaClusters(clustLabel)$z
50
+    clustSplit[[i]] <- as.integer(celdaClusters(clustLabel)$z)
51 51
   }
52 52
 
53 53
   ## Find second best assignment give current assignments for each cell
... ...
@@ -205,7 +205,7 @@
205 205
       verbose = FALSE,
206 206
       reorder = FALSE
207 207
     )
208
-    clustSplit[[i]] <- celdaClusters(clustLabel)$z
208
+    clustSplit[[i]] <- as.integer(celdaClusters(clustLabel)$z)
209 209
   }
210 210
 
211 211
   ## Find second best assignment give current assignments for each cell
... ...
@@ -399,7 +399,7 @@
399 399
         verbose = FALSE,
400 400
         reorder = FALSE
401 401
       )
402
-      tempZ[ix] <- celdaClusters(clustLabel)$z + currentTopZ
402
+      tempZ[ix] <- as.integer(celdaClusters(clustLabel)$z) + currentTopZ
403 403
     }
404 404
     currentTopZ <- max(tempZ, na.rm = TRUE)
405 405
   }
... ...
@@ -443,7 +443,7 @@
443 443
       verbose = FALSE,
444 444
       reorder = FALSE
445 445
     )
446
-    clustSplit[[i]] <- celdaClusters(clustLabel)$y
446
+    clustSplit[[i]] <- as.integer(celdaClusters(clustLabel)$y)
447 447
   }
448 448
 
449 449
   ## Find second best assignment give current assignments for each cell
... ...
@@ -637,7 +637,7 @@
637 637
       verbose = FALSE,
638 638
       reorder = FALSE
639 639
     )
640
-    clustSplit[[i]] <- celdaClusters(clustLabel)$y
640
+    clustSplit[[i]] <- as.integer(celdaClusters(clustLabel)$y)
641 641
   }
642 642
 
643 643
   ## Find second best assignment give current assignments for each cell
Browse code

Refactored functions performing matrix operations on different classes (sparse, numeric, integer) to be more centralized in the matrixSums.R file.

Joshua D. Campbell authored on 05/04/2021 18:20:44
Showing 1 changed files
... ...
@@ -44,7 +44,8 @@
44 44
       maxIter = 5,
45 45
       splitOnIter = -1,
46 46
       splitOnLast = FALSE,
47
-      verbose = FALSE
47
+      verbose = FALSE,
48
+      reorder = FALSE
48 49
     )
49 50
     clustSplit[[i]] <- celdaClusters(clustLabel)$z
50 51
   }
... ...
@@ -201,7 +202,8 @@
201 202
       maxIter = 5,
202 203
       splitOnIter = -1,
203 204
       splitOnLast = FALSE,
204
-      verbose = FALSE
205
+      verbose = FALSE,
206
+      reorder = FALSE
205 207
     )
206 208
     clustSplit[[i]] <- celdaClusters(clustLabel)$z
207 209
   }
... ...
@@ -394,7 +396,8 @@
394 396
         maxIter = 5,
395 397
         splitOnIter = -1,
396 398
         splitOnLast = FALSE,
397
-        verbose = FALSE
399
+        verbose = FALSE,
400
+        reorder = FALSE
398 401
       )
399 402
       tempZ[ix] <- celdaClusters(clustLabel)$z + currentTopZ
400 403
     }
... ...
@@ -437,7 +440,8 @@
437 440
       maxIter = 5,
438 441
       splitOnIter = -1,
439 442
       splitOnLast = FALSE,
440
-      verbose = FALSE
443
+      verbose = FALSE,
444
+      reorder = FALSE
441 445
     )
442 446
     clustSplit[[i]] <- celdaClusters(clustLabel)$y
443 447
   }
... ...
@@ -630,7 +634,8 @@
630 634
       maxIter = 5,
631 635
       splitOnIter = -1,
632 636
       splitOnLast = FALSE,
633
-      verbose = FALSE
637
+      verbose = FALSE,
638
+      reorder = FALSE
634 639
     )
635 640
     clustSplit[[i]] <- celdaClusters(clustLabel)$y
636 641
   }
Browse code

sce StateHeatmap

zhewa authored on 19/05/2020 07:37:46
Showing 1 changed files
... ...
@@ -46,7 +46,7 @@
46 46
       splitOnLast = FALSE,
47 47
       verbose = FALSE
48 48
     )
49
-    clustSplit[[i]] <- clustLabel@clusters$z
49
+    clustSplit[[i]] <- celdaClusters(clustLabel)$z
50 50
   }
51 51
 
52 52
   ## Find second best assignment give current assignments for each cell
... ...
@@ -203,7 +203,7 @@
203 203
       splitOnLast = FALSE,
204 204
       verbose = FALSE
205 205
     )
206
-    clustSplit[[i]] <- clustLabel@clusters$z
206
+    clustSplit[[i]] <- celdaClusters(clustLabel)$z
207 207
   }
208 208
 
209 209
   ## Find second best assignment give current assignments for each cell
... ...
@@ -396,7 +396,7 @@
396 396
         splitOnLast = FALSE,
397 397
         verbose = FALSE
398 398
       )
399
-      tempZ[ix] <- clustLabel@clusters$z + currentTopZ
399
+      tempZ[ix] <- celdaClusters(clustLabel)$z + currentTopZ
400 400
     }
401 401
     currentTopZ <- max(tempZ, na.rm = TRUE)
402 402
   }
... ...
@@ -439,7 +439,7 @@
439 439
       splitOnLast = FALSE,
440 440
       verbose = FALSE
441 441
     )
442
-    clustSplit[[i]] <- clustLabel@clusters$y
442
+    clustSplit[[i]] <- celdaClusters(clustLabel)$y
443 443
   }
444 444
 
445 445
   ## Find second best assignment give current assignments for each cell
... ...
@@ -632,7 +632,7 @@
632 632
       splitOnLast = FALSE,
633 633
       verbose = FALSE
634 634
     )
635
-    clustSplit[[i]] <- clustLabel@clusters$y
635
+    clustSplit[[i]] <- celdaClusters(clustLabel)$y
636 636
   }
637 637
 
638 638
   ## Find second best assignment give current assignments for each cell
Browse code

sce celdaGridSearch update docs

zhewa authored on 10/05/2020 13:27:03
Showing 1 changed files
... ...
@@ -46,7 +46,7 @@
46 46
       splitOnLast = FALSE,
47 47
       verbose = FALSE
48 48
     )
49
-    clustSplit[[i]] <- clusters(clustLabel)$z
49
+    clustSplit[[i]] <- clustLabel@clusters$z
50 50
   }
51 51
 
52 52
   ## Find second best assignment give current assignments for each cell
... ...
@@ -203,7 +203,7 @@
203 203
       splitOnLast = FALSE,
204 204
       verbose = FALSE
205 205
     )
206
-    clustSplit[[i]] <- clusters(clustLabel)$z
206
+    clustSplit[[i]] <- clustLabel@clusters$z
207 207
   }
208 208
 
209 209
   ## Find second best assignment give current assignments for each cell
... ...
@@ -396,7 +396,7 @@
396 396
         splitOnLast = FALSE,
397 397
         verbose = FALSE
398 398
       )
399
-      tempZ[ix] <- clusters(clustLabel)$z + currentTopZ
399
+      tempZ[ix] <- clustLabel@clusters$z + currentTopZ
400 400
     }
401 401
     currentTopZ <- max(tempZ, na.rm = TRUE)
402 402
   }
... ...
@@ -439,7 +439,7 @@
439 439
       splitOnLast = FALSE,
440 440
       verbose = FALSE
441 441
     )
442
-    clustSplit[[i]] <- clusters(clustLabel)$y
442
+    clustSplit[[i]] <- clustLabel@clusters$y
443 443
   }
444 444
 
445 445
   ## Find second best assignment give current assignments for each cell
... ...
@@ -632,7 +632,7 @@
632 632
       splitOnLast = FALSE,
633 633
       verbose = FALSE
634 634
     )
635
-    clustSplit[[i]] <- clusters(clustLabel)$y
635
+    clustSplit[[i]] <- clustLabel@clusters$y
636 636
   }
637 637
 
638 638
   ## Find second best assignment give current assignments for each cell
Browse code

Ran styler and fixed lints

Joshua D. Campbell authored on 06/04/2020 23:58:56
Showing 1 changed files
... ...
@@ -1,205 +1,296 @@
1 1
 # .cCCalcLL = function(mCPByS, nGByCP, s, z, K, nS, nG, alpha, beta)
2 2
 .cCSplitZ <- function(counts,
3
-    mCPByS,
4
-    nGByCP,
5
-    nCP,
6
-    s,
7
-    z,
8
-    K,
9
-    nS,
10
-    nG,
11
-    alpha,
12
-    beta,
13
-    zProb,
14
-    maxClustersToTry = 10,
15
-    minCell = 3) {
16
-
17
-    ## Identify clusters to split
18
-    zTa <- tabulate(z, K)
19
-    zToSplit <- which(zTa >= minCell)
20
-    zNonEmpty <- which(zTa > 0)
21
-
22
-    if (length(zToSplit) == 0) {
23
-        m <- paste0(date(),
24
-            " .... Cluster sizes too small. No additional splitting was",
25
-            " performed.")
26
-        return(list(z = z,
27
-            mCPByS,
28
-            nGByCP,
29
-            nCP = nCP,
30
-            message = m))
31
-    }
32
-
33
-    ## Loop through each split-able Z and perform split
34
-    clustSplit <- vector("list", K)
35
-    for (i in zToSplit) {
36
-        clustLabel <- .celda_C(
37
-            counts[, z == i],
38
-            K = 2,
39
-            zInitialize = "random",
40
-            maxIter = 5,
41
-            splitOnIter = -1,
42
-            splitOnLast = FALSE,
43
-            verbose = FALSE
44
-        )
45
-        clustSplit[[i]] <- clusters(clustLabel)$z
46
-    }
47
-
48
-    ## Find second best assignment give current assignments for each cell
49
-    zProb[cbind(seq(nrow(zProb)), z)] <- NA
50
-    zSecond <- apply(zProb, 1, which.max)
51
-
52
-    ## Set up initial variables
53
-    zSplit <- matrix(NA,
54
-        nrow = length(z),
55
-        ncol = length(zToSplit) * maxClustersToTry)
56
-    zSplitLl <- rep(NA, times = length(zToSplit) * maxClustersToTry)
57
-    zSplitLl[1] <- .cCCalcLL(mCPByS, nGByCP, s, z, K, nS, nG, alpha, beta)
58
-    zSplit[, 1] <- z
59
-
60
-    ## Select worst clusters to test for reshuffling
61
-    previousZ <- z
62
-    llShuffle <- rep(NA, K)
63
-    for (i in zNonEmpty) {
64
-        ix <- z == i
65
-        newZ <- z
66
-        newZ[ix] <- zSecond[ix]
67
-
68
-        p <- .cCReDecomposeCounts(counts, s, newZ, previousZ, nGByCP, K)
69
-        nGByCP <- p$nGByCP
70
-        mCPByS <- p$mCPByS
71
-        llShuffle[i] <- .cCCalcLL(mCPByS, nGByCP, s, z, K, nS, nG, alpha, beta)
72
-        previousZ <- newZ
73
-    }
74
-    zToShuffle <- utils::head(order(llShuffle, decreasing = TRUE, na.last = NA),
75
-        n = maxClustersToTry)
76
-
77
-    pairs <- c(NA, NA)
78
-    splitIx <- 2
79
-    for (i in zToShuffle) {
80
-        otherClusters <- setdiff(zToSplit, i)
81
-
82
-        for (j in otherClusters) {
83
-            newZ <- z
84
-
85
-            ## Assign cluster i to the next most similar cluster (excluding
86
-            ## cluster j)
87
-            ## as defined above by the correlation
88
-            ixToMove <- z == i
89
-            newZ[ixToMove] <- zSecond[ixToMove]
90
-
91
-            ## Split cluster j according to the clustering defined above
92
-            ixToSplit <- z == j
93
-            newZ[ixToSplit] <- ifelse(clustSplit[[j]] == 1, j, i)
94
-
95
-            p <- .cCReDecomposeCounts(counts, s, newZ, previousZ, nGByCP, K)
96
-            nGByCP <- p$nGByCP
97
-            mCPByS <- p$mCPByS
98
-
99
-            ## Calculate likelihood of split
100
-            zSplitLl[splitIx] <- .cCCalcLL(mCPByS,
101
-                nGByCP,
102
-                s,
103
-                z,
104
-                K,
105
-                nS,
106
-                nG,
107
-                alpha,
108
-                beta)
109
-            zSplit[, splitIx] <- newZ
110
-            splitIx <- splitIx + 1L
111
-            previousZ <- newZ
112
-
113
-            pairs <- rbind(pairs, c(i, j))
114
-        }
115
-    }
116
-
117
-    select <- which.max(zSplitLl)
3
+                      mCPByS,
4
+                      nGByCP,
5
+                      nCP,
6
+                      s,
7
+                      z,
8
+                      K,
9
+                      nS,
10
+                      nG,
11
+                      alpha,
12
+                      beta,
13
+                      zProb,
14
+                      maxClustersToTry = 10,
15
+                      minCell = 3) {
16
+
17
+  ## Identify clusters to split
18
+  zTa <- tabulate(z, K)
19
+  zToSplit <- which(zTa >= minCell)
20
+  zNonEmpty <- which(zTa > 0)
21
+
22
+  if (length(zToSplit) == 0) {
23
+    m <- paste0(
24
+      date(),
25
+      " .... Cluster sizes too small. No additional splitting was",
26
+      " performed."
27
+    )
28
+    return(list(
29
+      z = z,
30
+      mCPByS,
31
+      nGByCP,
32
+      nCP = nCP,
33
+      message = m
34
+    ))
35
+  }
36
+
37
+  ## Loop through each split-able Z and perform split
38
+  clustSplit <- vector("list", K)
39
+  for (i in zToSplit) {
40
+    clustLabel <- .celda_C(
41
+      counts[, z == i],
42
+      K = 2,
43
+      zInitialize = "random",
44
+      maxIter = 5,
45
+      splitOnIter = -1,
46
+      splitOnLast = FALSE,
47
+      verbose = FALSE
48
+    )
49
+    clustSplit[[i]] <- clusters(clustLabel)$z
50
+  }
51
+
52
+  ## Find second best assignment give current assignments for each cell
53
+  zProb[cbind(seq(nrow(zProb)), z)] <- NA
54
+  zSecond <- apply(zProb, 1, which.max)
55
+
56
+  ## Set up initial variables
57
+  zSplit <- matrix(NA,
58
+    nrow = length(z),
59
+    ncol = length(zToSplit) * maxClustersToTry
60
+  )
61
+  zSplitLl <- rep(NA, times = length(zToSplit) * maxClustersToTry)
62
+  zSplitLl[1] <- .cCCalcLL(mCPByS, nGByCP, s, z, K, nS, nG, alpha, beta)
63
+  zSplit[, 1] <- z
64
+
65
+  ## Select worst clusters to test for reshuffling
66
+  previousZ <- z
67
+  llShuffle <- rep(NA, K)
68
+  for (i in zNonEmpty) {
69
+    ix <- z == i
70
+    newZ <- z
71
+    newZ[ix] <- zSecond[ix]
72
+
73
+    p <- .cCReDecomposeCounts(counts, s, newZ, previousZ, nGByCP, K)
74
+    nGByCP <- p$nGByCP
75
+    mCPByS <- p$mCPByS
76
+    llShuffle[i] <- .cCCalcLL(mCPByS, nGByCP, s, z, K, nS, nG, alpha, beta)
77
+    previousZ <- newZ
78
+  }
79
+  zToShuffle <- utils::head(order(llShuffle, decreasing = TRUE, na.last = NA),
80
+    n = maxClustersToTry
81
+  )
82
+
83
+  pairs <- c(NA, NA)
84
+  splitIx <- 2
85
+  for (i in zToShuffle) {
86
+    otherClusters <- setdiff(zToSplit, i)
87
+
88
+    for (j in otherClusters) {
89
+      newZ <- z
90
+
91
+      ## Assign cluster i to the next most similar cluster (excluding
92
+      ## cluster j)
93
+      ## as defined above by the correlation
94
+      ixToMove <- z == i
95
+      newZ[ixToMove] <- zSecond[ixToMove]
96
+
97
+      ## Split cluster j according to the clustering defined above
98
+      ixToSplit <- z == j
99
+      newZ[ixToSplit] <- ifelse(clustSplit[[j]] == 1, j, i)
100
+
101
+      p <- .cCReDecomposeCounts(counts, s, newZ, previousZ, nGByCP, K)
102
+      nGByCP <- p$nGByCP
103
+      mCPByS <- p$mCPByS
104
+
105
+      ## Calculate likelihood of split
106
+      zSplitLl[splitIx] <- .cCCalcLL(
107
+        mCPByS,
108
+        nGByCP,
109
+        s,
110
+        z,
111
+        K,
112
+        nS,
113
+        nG,
114
+        alpha,
115
+        beta
116
+      )
117
+      zSplit[, splitIx] <- newZ
118
+      splitIx <- splitIx + 1L
119
+      previousZ <- newZ
118 120
 
119
-    if (select == 1) {
120
-        m <- paste0(date(), " .... No additional splitting was performed.")
121
-    } else {
122
-        m <- paste0(date(),
123
-            " .... Cluster ",
124
-            pairs[select, 1],
125
-            " was reassigned and cluster ",
126
-            pairs[select, 2],
127
-            " was split in two."
128
-        )
121
+      pairs <- rbind(pairs, c(i, j))
129 122
     }
130
-
131
-    p <- .cCReDecomposeCounts(counts, s, zSplit[, select], previousZ, nGByCP, K)
132
-    return(list(z = zSplit[, select],
133
-        mCPByS = p$mCPByS,
134
-        nGByCP = p$nGByCP,
135
-        nCP = p$nCP,
136
-        message = m))
123
+  }
124
+
125
+  select <- which.max(zSplitLl)
126
+
127
+  if (select == 1) {
128
+    m <- paste0(date(), " .... No additional splitting was performed.")
129
+  } else {
130
+    m <- paste0(
131
+      date(),
132
+      " .... Cluster ",
133
+      pairs[select, 1],
134
+      " was reassigned and cluster ",
135
+      pairs[select, 2],
136
+      " was split in two."
137
+    )
138
+  }
139
+
140
+  p <- .cCReDecomposeCounts(counts, s, zSplit[, select], previousZ, nGByCP, K)
141
+  return(list(
142
+    z = zSplit[, select],
143
+    mCPByS = p$mCPByS,
144
+    nGByCP = p$nGByCP,
145
+    nCP = p$nCP,
146
+    message = m
147
+  ))
137 148
 }
138 149
 
139 150
 
140 151
 # .cCGCalcLL = function(K, L, mCPByS, nTSByCP, nByG, nByTS, nGByTS,
141 152
 # nS, nG, alpha, beta, delta, gamma)
142 153
 .cCGSplitZ <- function(counts,
154
+                       mCPByS,
155
+                       nTSByC,
156
+                       nTSByCP,
157
+                       nByG,
158
+                       nByTS,
159
+                       nGByTS,
160
+                       nCP,
161
+                       s,
162
+                       z,
163
+                       K,
164
+                       L,
165
+                       nS,
166
+                       nG,
167
+                       alpha,
168
+                       beta,
169
+                       delta,
170
+                       gamma,
171
+                       zProb,
172
+                       maxClustersToTry = 10,
173
+                       minCell = 3) {
174
+
175
+  ## Identify clusters to split
176
+  zTa <- tabulate(z, K)
177
+  zToSplit <- which(zTa >= minCell)
178
+  zNonEmpty <- which(zTa > 0)
179
+
180
+  if (length(zToSplit) == 0) {
181
+    m <- paste0(
182
+      date(),
183
+      " .... Cluster sizes too small. No additional splitting was",
184
+      " performed."
185
+    )
186
+    return(list(
187
+      z = z,
188
+      mCPByS = mCPByS,
189
+      nTSByCP = nTSByCP,
190
+      nCP = nCP,
191
+      message = m
192
+    ))
193
+  }
194
+
195
+  ## Loop through each split-able Z and perform split
196
+  clustSplit <- vector("list", K)
197
+  for (i in zToSplit) {
198
+    clustLabel <- .celda_C(counts[, z == i],
199
+      K = 2,
200
+      zInitialize = "random",
201
+      maxIter = 5,
202
+      splitOnIter = -1,
203
+      splitOnLast = FALSE,
204
+      verbose = FALSE
205
+    )
206
+    clustSplit[[i]] <- clusters(clustLabel)$z
207
+  }
208
+
209
+  ## Find second best assignment give current assignments for each cell
210
+  zProb[cbind(seq(nrow(zProb)), z)] <- NA
211
+  zSecond <- apply(zProb, 1, which.max)
212
+
213
+  ## Set up initial variables
214
+  zSplit <- matrix(NA,
215
+    nrow = length(z),
216
+    ncol = length(zToSplit) * maxClustersToTry
217
+  )
218
+  zSplitLl <- rep(NA, ncol = length(zToSplit) * maxClustersToTry)
219
+  zSplitLl[1] <- .cCGCalcLL(
220
+    K,
221
+    L,
143 222
     mCPByS,
144
-    nTSByC,
145 223
     nTSByCP,
146 224
     nByG,
147 225
     nByTS,
148 226
     nGByTS,
149
-    nCP,
150
-    s,
151
-    z,
152
-    K,
153
-    L,
154 227
     nS,
155 228
     nG,
156 229
     alpha,
157 230
     beta,
158 231
     delta,
159
-    gamma,
160
-    zProb,
161
-    maxClustersToTry = 10,
162
-    minCell = 3) {
163
-
164
-    ## Identify clusters to split
165
-    zTa <- tabulate(z, K)
166
-    zToSplit <- which(zTa >= minCell)
167
-    zNonEmpty <- which(zTa > 0)
168
-
169
-    if (length(zToSplit) == 0) {
170
-        m <- paste0(date(),
171
-            " .... Cluster sizes too small. No additional splitting was",
172
-            " performed.")
173
-        return(list(z = z,
174
-            mCPByS = mCPByS,
175
-            nTSByCP = nTSByCP,
176
-            nCP = nCP,
177
-            message = m))
178
-    }
179
-
180
-    ## Loop through each split-able Z and perform split
181
-    clustSplit <- vector("list", K)
182
-    for (i in zToSplit) {
183
-        clustLabel <- .celda_C(counts[, z == i],
184
-            K = 2,
185
-            zInitialize = "random",
186
-            maxIter = 5,
187
-            splitOnIter = -1,
188
-            splitOnLast = FALSE,
189
-            verbose = FALSE)
190
-        clustSplit[[i]] <- clusters(clustLabel)$z
191
-    }
192
-
193
-    ## Find second best assignment give current assignments for each cell
194
-    zProb[cbind(seq(nrow(zProb)), z)] <- NA
195
-    zSecond <- apply(zProb, 1, which.max)
196
-
197
-    ## Set up initial variables
198
-    zSplit <- matrix(NA,
199
-        nrow = length(z),
200
-        ncol = length(zToSplit) * maxClustersToTry)
201
-    zSplitLl <- rep(NA, ncol = length(zToSplit) * maxClustersToTry)
202
-    zSplitLl[1] <- .cCGCalcLL(K,
232
+    gamma
233
+  )
234
+  zSplit[, 1] <- z
235
+
236
+  ## Select worst clusters to test for reshuffling
237
+  previousZ <- z
238
+  llShuffle <- rep(NA, K)
239
+  for (i in zNonEmpty) {
240
+    ix <- z == i
241
+    newZ <- z
242
+    newZ[ix] <- zSecond[ix]
243
+
244
+    p <- .cCReDecomposeCounts(nTSByC, s, newZ, previousZ, nTSByCP, K)
245
+    nTSByCP <- p$nGByCP
246
+    mCPByS <- p$mCPByS
247
+    llShuffle[i] <- .cCGCalcLL(
248
+      K,
249
+      L,
250
+      mCPByS,
251
+      nTSByCP,
252
+      nByG,
253
+      nByTS,
254
+      nGByTS,
255
+      nS,
256
+      nG,
257
+      alpha,
258
+      beta,
259
+      delta,
260
+      gamma
261
+    )
262
+    previousZ <- newZ
263
+  }
264
+  zToShuffle <- utils::head(order(llShuffle, decreasing = TRUE, na.last = NA),
265
+    n = maxClustersToTry
266
+  )
267
+
268
+
269
+  pairs <- c(NA, NA)
270
+  splitIx <- 2
271
+  for (i in zToShuffle) {
272
+    otherClusters <- setdiff(zToSplit, i)
273
+
274
+    for (j in otherClusters) {
275
+      newZ <- z
276
+
277
+      ## Assign cluster i to the next most similar cluster (excluding
278
+      ## cluster j)
279
+      ## as defined above by the correlation
280
+      ixToMove <- z == i
281
+      newZ[ixToMove] <- zSecond[ixToMove]
282
+
283
+      ## Split cluster j according to the clustering defined above
284
+      ixToSplit <- z == j
285
+      newZ[ixToSplit] <- ifelse(clustSplit[[j]] == 1, j, i)
286
+
287
+      p <- .cCReDecomposeCounts(nTSByC, s, newZ, previousZ, nTSByCP, K)
288
+      nTSByCP <- p$nGByCP
289
+      mCPByS <- p$mCPByS
290
+
291
+      ## Calculate likelihood of split
292
+      zSplitLl[splitIx] <- .cCGCalcLL(
293
+        K,
203 294
         L,
204 295
         mCPByS,
205 296
         nTSByCP,
... ...
@@ -211,211 +302,233 @@
211 302
         alpha,
212 303
         beta,
213 304
         delta,
214
-        gamma)
215
-    zSplit[, 1] <- z
216
-
217
-    ## Select worst clusters to test for reshuffling
218
-    previousZ <- z
219
-    llShuffle <- rep(NA, K)
220
-    for (i in zNonEmpty) {
221
-        ix <- z == i
222
-        newZ <- z
223
-        newZ[ix] <- zSecond[ix]
224
-
225
-        p <- .cCReDecomposeCounts(nTSByC, s, newZ, previousZ, nTSByCP, K)
226
-        nTSByCP <- p$nGByCP
227
-        mCPByS <- p$mCPByS
228
-        llShuffle[i] <- .cCGCalcLL(K,
229
-            L,
230
-            mCPByS,
231
-            nTSByCP,
232
-            nByG,
233
-            nByTS,
234
-            nGByTS,
235
-            nS,
236
-            nG,
237
-            alpha,
238
-            beta,
239
-            delta,
240
-            gamma)
241
-        previousZ <- newZ
242
-    }
243
-    zToShuffle <- utils::head(order(llShuffle, decreasing = TRUE, na.last = NA),
244
-        n = maxClustersToTry)
245
-
246
-
247
-    pairs <- c(NA, NA)
248
-    splitIx <- 2
249
-    for (i in zToShuffle) {
250
-        otherClusters <- setdiff(zToSplit, i)
251
-
252
-        for (j in otherClusters) {
253
-            newZ <- z
254
-
255
-            ## Assign cluster i to the next most similar cluster (excluding
256
-            ## cluster j)
257
-            ## as defined above by the correlation
258
-            ixToMove <- z == i
259
-            newZ[ixToMove] <- zSecond[ixToMove]
260
-
261
-            ## Split cluster j according to the clustering defined above
262
-            ixToSplit <- z == j
263
-            newZ[ixToSplit] <- ifelse(clustSplit[[j]] == 1, j, i)
264
-
265
-            p <- .cCReDecomposeCounts(nTSByC, s, newZ, previousZ, nTSByCP, K)
266
-            nTSByCP <- p$nGByCP
267
-            mCPByS <- p$mCPByS
268
-
269
-            ## Calculate likelihood of split
270
-            zSplitLl[splitIx] <- .cCGCalcLL(K,
271
-                L,
272
-                mCPByS,
273
-                nTSByCP,
274
-                nByG,
275
-                nByTS,
276
-                nGByTS,
277
-                nS,
278
-                nG,
279
-                alpha,
280
-                beta,
281
-                delta,
282
-                gamma)
283
-            zSplit[, splitIx] <- newZ
284
-            splitIx <- splitIx + 1L
285
-            previousZ <- newZ
286
-
287
-            pairs <- rbind(pairs, c(i, j))
288
-        }
289
-    }
305
+        gamma
306
+      )
307
+      zSplit[, splitIx] <- newZ
308
+      splitIx <- splitIx + 1L
309
+      previousZ <- newZ
290 310
 
291
-    select <- which.max(zSplitLl)
292
-
293
-    if (select == 1) {
294
-        m <- paste0(date(), " .... No additional splitting was performed.")
295
-    } else {
296
-        m <- paste0(date(),
297
-            " .... Cluster ",
298
-            pairs[select, 1],
299
-            " was reassigned and cluster ",
300
-            pairs[select, 2],
301
-            " was split in two.")
311
+      pairs <- rbind(pairs, c(i, j))
302 312
     }
303
-
304
-    p <- .cCReDecomposeCounts(nTSByC,
305
-        s,
306
-        zSplit[, select],
307
-        previousZ,
308
-        nTSByCP,
309
-        K)
310
-    return(list(z = zSplit[, select],
311
-        mCPByS = p$mCPByS,
312
-        nTSByCP = p$nGByCP,
313
-        nCP = p$nCP,
314
-        message = m))
313
+  }
314
+
315
+  select <- which.max(zSplitLl)
316
+
317
+  if (select == 1) {
318
+    m <- paste0(date(), " .... No additional splitting was performed.")
319
+  } else {
320
+    m <- paste0(
321
+      date(),
322
+      " .... Cluster ",
323
+      pairs[select, 1],
324
+      " was reassigned and cluster ",
325
+      pairs[select, 2],
326
+      " was split in two."
327
+    )
328
+  }
329
+
330
+  p <- .cCReDecomposeCounts(
331
+    nTSByC,
332
+    s,
333
+    zSplit[, select],
334
+    previousZ,
335
+    nTSByCP,
336
+    K
337
+  )
338
+  return(list(
339
+    z = zSplit[, select],
340
+    mCPByS = p$mCPByS,
341
+    nTSByCP = p$nGByCP,
342
+    nCP = p$nCP,
343
+    message = m
344
+  ))
315 345
 }
316 346
 
317 347
 
318 348
 .cCGSplitY <- function(counts,
319
-    y,
349
+                       y,
350
+                       mCPByS,
351
+                       nGByCP,
352
+                       nTSByC,
353
+                       nTSByCP,
354
+                       nByG,
355
+                       nByTS,
356
+                       nGByTS,
357
+                       nCP,
358
+                       s,
359
+                       z,
360
+                       K,
361
+                       L,
362
+                       nS,
363
+                       nG,
364
+                       alpha,
365
+                       beta,
366
+                       delta,
367
+                       gamma,
368
+                       yProb,
369
+                       maxClustersToTry = 10,
370
+                       KSubclusters = 10,
371
+                       minCell = 3) {
372
+
373
+  #########################
374
+  ## First, the cell dimension of the original matrix will be reduced by
375
+  ## splitting each z cluster into 'KSubclusters'.
376
+  #########################
377
+
378
+  ## This will not be as big as the original matrix (which can take a lot of
379
+  ## time to process with large number of cells), but not as small as the
380
+  ## 'nGByCP' with current z assignments
381
+
382
+  zTa <- tabulate(z, K)
383
+  zNonEmpty <- which(zTa > 0)
384
+  tempZ <- rep(0, length(z))
385
+  currentTopZ <- 0
386
+  for (i in zNonEmpty) {
387
+    ix <- z == i
388
+    if (zTa[i] <= KSubclusters) {
389
+      tempZ[ix] <- seq(currentTopZ + 1, currentTopZ + zTa[i])
390
+    } else {
391
+      clustLabel <- .celda_C(counts[, z == i],
392
+        K = KSubclusters,
393
+        zInitialize = "random",
394
+        maxIter = 5,
395
+        splitOnIter = -1,
396
+        splitOnLast = FALSE,
397
+        verbose = FALSE
398
+      )
399
+      tempZ[ix] <- clusters(clustLabel)$z + currentTopZ
400
+    }
401
+    currentTopZ <- max(tempZ, na.rm = TRUE)
402
+  }
403
+
404
+  ## Decompose counts according to new/temp z labels
405
+  tempNGByCP <- .colSumByGroup(counts, group = tempZ, K = currentTopZ)
406
+
407
+  #########################
408
+  ## Second, different y splits will be estimated and tested
409
+  #########################
410
+
411
+  ## Identify clusters to split
412
+  yTa <- tabulate(y, L)
413
+  yToSplit <- which(yTa >= minCell)
414
+  yNonEmpty <- which(yTa > 0)
415
+
416
+  if (length(yToSplit) == 0) {
417
+    m <- paste0(
418
+      date(),
419
+      " .... Cluster sizes too small. No additional splitting was",
420
+      " performed."
421
+    )
422
+    return(list(
423
+      y = y,
424
+      mCPByS = mCPByS,
425
+      nTSByCP = nTSByCP,
426
+      nCP = nCP,
427
+      message = m
428
+    ))
429
+  }
430
+
431
+  ## Loop through each split-able Z and perform split
432
+  clustSplit <- vector("list", L)
433
+  for (i in yToSplit) {
434
+    clustLabel <- .celda_G(tempNGByCP[y == i, ],
435
+      L = 2,
436
+      yInitialize = "random",
437
+      maxIter = 5,
438
+      splitOnIter = -1,
439
+      splitOnLast = FALSE,
440
+      verbose = FALSE
441
+    )
442
+    clustSplit[[i]] <- clusters(clustLabel)$y
443
+  }
444
+
445
+  ## Find second best assignment give current assignments for each cell