... | ... |
@@ -47,7 +47,7 @@ |
47 | 47 |
verbose = FALSE, |
48 | 48 |
reorder = FALSE |
49 | 49 |
) |
50 |
- clustSplit[[i]] <- celdaClusters(clustLabel)$z |
|
50 |
+ clustSplit[[i]] <- as.integer(celdaClusters(clustLabel)$z) |
|
51 | 51 |
} |
52 | 52 |
|
53 | 53 |
## Find second best assignment give current assignments for each cell |
... | ... |
@@ -205,7 +205,7 @@ |
205 | 205 |
verbose = FALSE, |
206 | 206 |
reorder = FALSE |
207 | 207 |
) |
208 |
- clustSplit[[i]] <- celdaClusters(clustLabel)$z |
|
208 |
+ clustSplit[[i]] <- as.integer(celdaClusters(clustLabel)$z) |
|
209 | 209 |
} |
210 | 210 |
|
211 | 211 |
## Find second best assignment give current assignments for each cell |
... | ... |
@@ -399,7 +399,7 @@ |
399 | 399 |
verbose = FALSE, |
400 | 400 |
reorder = FALSE |
401 | 401 |
) |
402 |
- tempZ[ix] <- celdaClusters(clustLabel)$z + currentTopZ |
|
402 |
+ tempZ[ix] <- as.integer(celdaClusters(clustLabel)$z) + currentTopZ |
|
403 | 403 |
} |
404 | 404 |
currentTopZ <- max(tempZ, na.rm = TRUE) |
405 | 405 |
} |
... | ... |
@@ -443,7 +443,7 @@ |
443 | 443 |
verbose = FALSE, |
444 | 444 |
reorder = FALSE |
445 | 445 |
) |
446 |
- clustSplit[[i]] <- celdaClusters(clustLabel)$y |
|
446 |
+ clustSplit[[i]] <- as.integer(celdaClusters(clustLabel)$y) |
|
447 | 447 |
} |
448 | 448 |
|
449 | 449 |
## Find second best assignment give current assignments for each cell |
... | ... |
@@ -637,7 +637,7 @@ |
637 | 637 |
verbose = FALSE, |
638 | 638 |
reorder = FALSE |
639 | 639 |
) |
640 |
- clustSplit[[i]] <- celdaClusters(clustLabel)$y |
|
640 |
+ clustSplit[[i]] <- as.integer(celdaClusters(clustLabel)$y) |
|
641 | 641 |
} |
642 | 642 |
|
643 | 643 |
## Find second best assignment give current assignments for each cell |
... | ... |
@@ -44,7 +44,8 @@ |
44 | 44 |
maxIter = 5, |
45 | 45 |
splitOnIter = -1, |
46 | 46 |
splitOnLast = FALSE, |
47 |
- verbose = FALSE |
|
47 |
+ verbose = FALSE, |
|
48 |
+ reorder = FALSE |
|
48 | 49 |
) |
49 | 50 |
clustSplit[[i]] <- celdaClusters(clustLabel)$z |
50 | 51 |
} |
... | ... |
@@ -201,7 +202,8 @@ |
201 | 202 |
maxIter = 5, |
202 | 203 |
splitOnIter = -1, |
203 | 204 |
splitOnLast = FALSE, |
204 |
- verbose = FALSE |
|
205 |
+ verbose = FALSE, |
|
206 |
+ reorder = FALSE |
|
205 | 207 |
) |
206 | 208 |
clustSplit[[i]] <- celdaClusters(clustLabel)$z |
207 | 209 |
} |
... | ... |
@@ -394,7 +396,8 @@ |
394 | 396 |
maxIter = 5, |
395 | 397 |
splitOnIter = -1, |
396 | 398 |
splitOnLast = FALSE, |
397 |
- verbose = FALSE |
|
399 |
+ verbose = FALSE, |
|
400 |
+ reorder = FALSE |
|
398 | 401 |
) |
399 | 402 |
tempZ[ix] <- celdaClusters(clustLabel)$z + currentTopZ |
400 | 403 |
} |
... | ... |
@@ -437,7 +440,8 @@ |
437 | 440 |
maxIter = 5, |
438 | 441 |
splitOnIter = -1, |
439 | 442 |
splitOnLast = FALSE, |
440 |
- verbose = FALSE |
|
443 |
+ verbose = FALSE, |
|
444 |
+ reorder = FALSE |
|
441 | 445 |
) |
442 | 446 |
clustSplit[[i]] <- celdaClusters(clustLabel)$y |
443 | 447 |
} |
... | ... |
@@ -630,7 +634,8 @@ |
630 | 634 |
maxIter = 5, |
631 | 635 |
splitOnIter = -1, |
632 | 636 |
splitOnLast = FALSE, |
633 |
- verbose = FALSE |
|
637 |
+ verbose = FALSE, |
|
638 |
+ reorder = FALSE |
|
634 | 639 |
) |
635 | 640 |
clustSplit[[i]] <- celdaClusters(clustLabel)$y |
636 | 641 |
} |
... | ... |
@@ -46,7 +46,7 @@ |
46 | 46 |
splitOnLast = FALSE, |
47 | 47 |
verbose = FALSE |
48 | 48 |
) |
49 |
- clustSplit[[i]] <- clustLabel@clusters$z |
|
49 |
+ clustSplit[[i]] <- celdaClusters(clustLabel)$z |
|
50 | 50 |
} |
51 | 51 |
|
52 | 52 |
## Find second best assignment give current assignments for each cell |
... | ... |
@@ -203,7 +203,7 @@ |
203 | 203 |
splitOnLast = FALSE, |
204 | 204 |
verbose = FALSE |
205 | 205 |
) |
206 |
- clustSplit[[i]] <- clustLabel@clusters$z |
|
206 |
+ clustSplit[[i]] <- celdaClusters(clustLabel)$z |
|
207 | 207 |
} |
208 | 208 |
|
209 | 209 |
## Find second best assignment give current assignments for each cell |
... | ... |
@@ -396,7 +396,7 @@ |
396 | 396 |
splitOnLast = FALSE, |
397 | 397 |
verbose = FALSE |
398 | 398 |
) |
399 |
- tempZ[ix] <- clustLabel@clusters$z + currentTopZ |
|
399 |
+ tempZ[ix] <- celdaClusters(clustLabel)$z + currentTopZ |
|
400 | 400 |
} |
401 | 401 |
currentTopZ <- max(tempZ, na.rm = TRUE) |
402 | 402 |
} |
... | ... |
@@ -439,7 +439,7 @@ |
439 | 439 |
splitOnLast = FALSE, |
440 | 440 |
verbose = FALSE |
441 | 441 |
) |
442 |
- clustSplit[[i]] <- clustLabel@clusters$y |
|
442 |
+ clustSplit[[i]] <- celdaClusters(clustLabel)$y |
|
443 | 443 |
} |
444 | 444 |
|
445 | 445 |
## Find second best assignment give current assignments for each cell |
... | ... |
@@ -632,7 +632,7 @@ |
632 | 632 |
splitOnLast = FALSE, |
633 | 633 |
verbose = FALSE |
634 | 634 |
) |
635 |
- clustSplit[[i]] <- clustLabel@clusters$y |
|
635 |
+ clustSplit[[i]] <- celdaClusters(clustLabel)$y |
|
636 | 636 |
} |
637 | 637 |
|
638 | 638 |
## Find second best assignment give current assignments for each cell |
... | ... |
@@ -46,7 +46,7 @@ |
46 | 46 |
splitOnLast = FALSE, |
47 | 47 |
verbose = FALSE |
48 | 48 |
) |
49 |
- clustSplit[[i]] <- clusters(clustLabel)$z |
|
49 |
+ clustSplit[[i]] <- clustLabel@clusters$z |
|
50 | 50 |
} |
51 | 51 |
|
52 | 52 |
## Find second best assignment give current assignments for each cell |
... | ... |
@@ -203,7 +203,7 @@ |
203 | 203 |
splitOnLast = FALSE, |
204 | 204 |
verbose = FALSE |
205 | 205 |
) |
206 |
- clustSplit[[i]] <- clusters(clustLabel)$z |
|
206 |
+ clustSplit[[i]] <- clustLabel@clusters$z |
|
207 | 207 |
} |
208 | 208 |
|
209 | 209 |
## Find second best assignment give current assignments for each cell |
... | ... |
@@ -396,7 +396,7 @@ |
396 | 396 |
splitOnLast = FALSE, |
397 | 397 |
verbose = FALSE |
398 | 398 |
) |
399 |
- tempZ[ix] <- clusters(clustLabel)$z + currentTopZ |
|
399 |
+ tempZ[ix] <- clustLabel@clusters$z + currentTopZ |
|
400 | 400 |
} |
401 | 401 |
currentTopZ <- max(tempZ, na.rm = TRUE) |
402 | 402 |
} |
... | ... |
@@ -439,7 +439,7 @@ |
439 | 439 |
splitOnLast = FALSE, |
440 | 440 |
verbose = FALSE |
441 | 441 |
) |
442 |
- clustSplit[[i]] <- clusters(clustLabel)$y |
|
442 |
+ clustSplit[[i]] <- clustLabel@clusters$y |
|
443 | 443 |
} |
444 | 444 |
|
445 | 445 |
## Find second best assignment give current assignments for each cell |
... | ... |
@@ -632,7 +632,7 @@ |
632 | 632 |
splitOnLast = FALSE, |
633 | 633 |
verbose = FALSE |
634 | 634 |
) |
635 |
- clustSplit[[i]] <- clusters(clustLabel)$y |
|
635 |
+ clustSplit[[i]] <- clustLabel@clusters$y |
|
636 | 636 |
} |
637 | 637 |
|
638 | 638 |
## Find second best assignment give current assignments for each cell |
... | ... |
@@ -1,205 +1,296 @@ |
1 | 1 |
# .cCCalcLL = function(mCPByS, nGByCP, s, z, K, nS, nG, alpha, beta) |
2 | 2 |
.cCSplitZ <- function(counts, |
3 |
- mCPByS, |
|
4 |
- nGByCP, |
|
5 |
- nCP, |
|
6 |
- s, |
|
7 |
- z, |
|
8 |
- K, |
|
9 |
- nS, |
|
10 |
- nG, |
|
11 |
- alpha, |
|
12 |
- beta, |
|
13 |
- zProb, |
|
14 |
- maxClustersToTry = 10, |
|
15 |
- minCell = 3) { |
|
16 |
- |
|
17 |
- ## Identify clusters to split |
|
18 |
- zTa <- tabulate(z, K) |
|
19 |
- zToSplit <- which(zTa >= minCell) |
|
20 |
- zNonEmpty <- which(zTa > 0) |
|
21 |
- |
|
22 |
- if (length(zToSplit) == 0) { |
|
23 |
- m <- paste0(date(), |
|
24 |
- " .... Cluster sizes too small. No additional splitting was", |
|
25 |
- " performed.") |
|
26 |
- return(list(z = z, |
|
27 |
- mCPByS, |
|
28 |
- nGByCP, |
|
29 |
- nCP = nCP, |
|
30 |
- message = m)) |
|
31 |
- } |
|
32 |
- |
|
33 |
- ## Loop through each split-able Z and perform split |
|
34 |
- clustSplit <- vector("list", K) |
|
35 |
- for (i in zToSplit) { |
|
36 |
- clustLabel <- .celda_C( |
|
37 |
- counts[, z == i], |
|
38 |
- K = 2, |
|
39 |
- zInitialize = "random", |
|
40 |
- maxIter = 5, |
|
41 |
- splitOnIter = -1, |
|
42 |
- splitOnLast = FALSE, |
|
43 |
- verbose = FALSE |
|
44 |
- ) |
|
45 |
- clustSplit[[i]] <- clusters(clustLabel)$z |
|
46 |
- } |
|
47 |
- |
|
48 |
- ## Find second best assignment give current assignments for each cell |
|
49 |
- zProb[cbind(seq(nrow(zProb)), z)] <- NA |
|
50 |
- zSecond <- apply(zProb, 1, which.max) |
|
51 |
- |
|
52 |
- ## Set up initial variables |
|
53 |
- zSplit <- matrix(NA, |
|
54 |
- nrow = length(z), |
|
55 |
- ncol = length(zToSplit) * maxClustersToTry) |
|
56 |
- zSplitLl <- rep(NA, times = length(zToSplit) * maxClustersToTry) |
|
57 |
- zSplitLl[1] <- .cCCalcLL(mCPByS, nGByCP, s, z, K, nS, nG, alpha, beta) |
|
58 |
- zSplit[, 1] <- z |
|
59 |
- |
|
60 |
- ## Select worst clusters to test for reshuffling |
|
61 |
- previousZ <- z |
|
62 |
- llShuffle <- rep(NA, K) |
|
63 |
- for (i in zNonEmpty) { |
|
64 |
- ix <- z == i |
|
65 |
- newZ <- z |
|
66 |
- newZ[ix] <- zSecond[ix] |
|
67 |
- |
|
68 |
- p <- .cCReDecomposeCounts(counts, s, newZ, previousZ, nGByCP, K) |
|
69 |
- nGByCP <- p$nGByCP |
|
70 |
- mCPByS <- p$mCPByS |
|
71 |
- llShuffle[i] <- .cCCalcLL(mCPByS, nGByCP, s, z, K, nS, nG, alpha, beta) |
|
72 |
- previousZ <- newZ |
|
73 |
- } |
|
74 |
- zToShuffle <- utils::head(order(llShuffle, decreasing = TRUE, na.last = NA), |
|
75 |
- n = maxClustersToTry) |
|
76 |
- |
|
77 |
- pairs <- c(NA, NA) |
|
78 |
- splitIx <- 2 |
|
79 |
- for (i in zToShuffle) { |
|
80 |
- otherClusters <- setdiff(zToSplit, i) |
|
81 |
- |
|
82 |
- for (j in otherClusters) { |
|
83 |
- newZ <- z |
|
84 |
- |
|
85 |
- ## Assign cluster i to the next most similar cluster (excluding |
|
86 |
- ## cluster j) |
|
87 |
- ## as defined above by the correlation |
|
88 |
- ixToMove <- z == i |
|
89 |
- newZ[ixToMove] <- zSecond[ixToMove] |
|
90 |
- |
|
91 |
- ## Split cluster j according to the clustering defined above |
|
92 |
- ixToSplit <- z == j |
|
93 |
- newZ[ixToSplit] <- ifelse(clustSplit[[j]] == 1, j, i) |
|
94 |
- |
|
95 |
- p <- .cCReDecomposeCounts(counts, s, newZ, previousZ, nGByCP, K) |
|
96 |
- nGByCP <- p$nGByCP |
|
97 |
- mCPByS <- p$mCPByS |
|
98 |
- |
|
99 |
- ## Calculate likelihood of split |
|
100 |
- zSplitLl[splitIx] <- .cCCalcLL(mCPByS, |
|
101 |
- nGByCP, |
|
102 |
- s, |
|
103 |
- z, |
|
104 |
- K, |
|
105 |
- nS, |
|
106 |
- nG, |
|
107 |
- alpha, |
|
108 |
- beta) |
|
109 |
- zSplit[, splitIx] <- newZ |
|
110 |
- splitIx <- splitIx + 1L |
|
111 |
- previousZ <- newZ |
|
112 |
- |
|
113 |
- pairs <- rbind(pairs, c(i, j)) |
|
114 |
- } |
|
115 |
- } |
|
116 |
- |
|
117 |
- select <- which.max(zSplitLl) |
|
3 |
+ mCPByS, |
|
4 |
+ nGByCP, |
|
5 |
+ nCP, |
|
6 |
+ s, |
|
7 |
+ z, |
|
8 |
+ K, |
|
9 |
+ nS, |
|
10 |
+ nG, |
|
11 |
+ alpha, |
|
12 |
+ beta, |
|
13 |
+ zProb, |
|
14 |
+ maxClustersToTry = 10, |
|
15 |
+ minCell = 3) { |
|
16 |
+ |
|
17 |
+ ## Identify clusters to split |
|
18 |
+ zTa <- tabulate(z, K) |
|
19 |
+ zToSplit <- which(zTa >= minCell) |
|
20 |
+ zNonEmpty <- which(zTa > 0) |
|
21 |
+ |
|
22 |
+ if (length(zToSplit) == 0) { |
|
23 |
+ m <- paste0( |
|
24 |
+ date(), |
|
25 |
+ " .... Cluster sizes too small. No additional splitting was", |
|
26 |
+ " performed." |
|
27 |
+ ) |
|
28 |
+ return(list( |
|
29 |
+ z = z, |
|
30 |
+ mCPByS, |
|
31 |
+ nGByCP, |
|
32 |
+ nCP = nCP, |
|
33 |
+ message = m |
|
34 |
+ )) |
|
35 |
+ } |
|
36 |
+ |
|
37 |
+ ## Loop through each split-able Z and perform split |
|
38 |
+ clustSplit <- vector("list", K) |
|
39 |
+ for (i in zToSplit) { |
|
40 |
+ clustLabel <- .celda_C( |
|
41 |
+ counts[, z == i], |
|
42 |
+ K = 2, |
|
43 |
+ zInitialize = "random", |
|
44 |
+ maxIter = 5, |
|
45 |
+ splitOnIter = -1, |
|
46 |
+ splitOnLast = FALSE, |
|
47 |
+ verbose = FALSE |
|
48 |
+ ) |
|
49 |
+ clustSplit[[i]] <- clusters(clustLabel)$z |
|
50 |
+ } |
|
51 |
+ |
|
52 |
+ ## Find second best assignment give current assignments for each cell |
|
53 |
+ zProb[cbind(seq(nrow(zProb)), z)] <- NA |
|
54 |
+ zSecond <- apply(zProb, 1, which.max) |
|
55 |
+ |
|
56 |
+ ## Set up initial variables |
|
57 |
+ zSplit <- matrix(NA, |
|
58 |
+ nrow = length(z), |
|
59 |
+ ncol = length(zToSplit) * maxClustersToTry |
|
60 |
+ ) |
|
61 |
+ zSplitLl <- rep(NA, times = length(zToSplit) * maxClustersToTry) |
|
62 |
+ zSplitLl[1] <- .cCCalcLL(mCPByS, nGByCP, s, z, K, nS, nG, alpha, beta) |
|
63 |
+ zSplit[, 1] <- z |
|
64 |
+ |
|
65 |
+ ## Select worst clusters to test for reshuffling |
|
66 |
+ previousZ <- z |
|
67 |
+ llShuffle <- rep(NA, K) |
|
68 |
+ for (i in zNonEmpty) { |
|
69 |
+ ix <- z == i |
|
70 |
+ newZ <- z |
|
71 |
+ newZ[ix] <- zSecond[ix] |
|
72 |
+ |
|
73 |
+ p <- .cCReDecomposeCounts(counts, s, newZ, previousZ, nGByCP, K) |
|
74 |
+ nGByCP <- p$nGByCP |
|
75 |
+ mCPByS <- p$mCPByS |
|
76 |
+ llShuffle[i] <- .cCCalcLL(mCPByS, nGByCP, s, z, K, nS, nG, alpha, beta) |
|
77 |
+ previousZ <- newZ |
|
78 |
+ } |
|
79 |
+ zToShuffle <- utils::head(order(llShuffle, decreasing = TRUE, na.last = NA), |
|
80 |
+ n = maxClustersToTry |
|
81 |
+ ) |
|
82 |
+ |
|
83 |
+ pairs <- c(NA, NA) |
|
84 |
+ splitIx <- 2 |
|
85 |
+ for (i in zToShuffle) { |
|
86 |
+ otherClusters <- setdiff(zToSplit, i) |
|
87 |
+ |
|
88 |
+ for (j in otherClusters) { |
|
89 |
+ newZ <- z |
|
90 |
+ |
|
91 |
+ ## Assign cluster i to the next most similar cluster (excluding |
|
92 |
+ ## cluster j) |
|
93 |
+ ## as defined above by the correlation |
|
94 |
+ ixToMove <- z == i |
|
95 |
+ newZ[ixToMove] <- zSecond[ixToMove] |
|
96 |
+ |
|
97 |
+ ## Split cluster j according to the clustering defined above |
|
98 |
+ ixToSplit <- z == j |
|
99 |
+ newZ[ixToSplit] <- ifelse(clustSplit[[j]] == 1, j, i) |
|
100 |
+ |
|
101 |
+ p <- .cCReDecomposeCounts(counts, s, newZ, previousZ, nGByCP, K) |
|
102 |
+ nGByCP <- p$nGByCP |
|
103 |
+ mCPByS <- p$mCPByS |
|
104 |
+ |
|
105 |
+ ## Calculate likelihood of split |
|
106 |
+ zSplitLl[splitIx] <- .cCCalcLL( |
|
107 |
+ mCPByS, |
|
108 |
+ nGByCP, |
|
109 |
+ s, |
|
110 |
+ z, |
|
111 |
+ K, |
|
112 |
+ nS, |
|
113 |
+ nG, |
|
114 |
+ alpha, |
|
115 |
+ beta |
|
116 |
+ ) |
|
117 |
+ zSplit[, splitIx] <- newZ |
|
118 |
+ splitIx <- splitIx + 1L |
|
119 |
+ previousZ <- newZ |
|
118 | 120 |
|
119 |
- if (select == 1) { |
|
120 |
- m <- paste0(date(), " .... No additional splitting was performed.") |
|
121 |
- } else { |
|
122 |
- m <- paste0(date(), |
|
123 |
- " .... Cluster ", |
|
124 |
- pairs[select, 1], |
|
125 |
- " was reassigned and cluster ", |
|
126 |
- pairs[select, 2], |
|
127 |
- " was split in two." |
|
128 |
- ) |
|
121 |
+ pairs <- rbind(pairs, c(i, j)) |
|
129 | 122 |
} |
130 |
- |
|
131 |
- p <- .cCReDecomposeCounts(counts, s, zSplit[, select], previousZ, nGByCP, K) |
|
132 |
- return(list(z = zSplit[, select], |
|
133 |
- mCPByS = p$mCPByS, |
|
134 |
- nGByCP = p$nGByCP, |
|
135 |
- nCP = p$nCP, |
|
136 |
- message = m)) |
|
123 |
+ } |
|
124 |
+ |
|
125 |
+ select <- which.max(zSplitLl) |
|
126 |
+ |
|
127 |
+ if (select == 1) { |
|
128 |
+ m <- paste0(date(), " .... No additional splitting was performed.") |
|
129 |
+ } else { |
|
130 |
+ m <- paste0( |
|
131 |
+ date(), |
|
132 |
+ " .... Cluster ", |
|
133 |
+ pairs[select, 1], |
|
134 |
+ " was reassigned and cluster ", |
|
135 |
+ pairs[select, 2], |
|
136 |
+ " was split in two." |
|
137 |
+ ) |
|
138 |
+ } |
|
139 |
+ |
|
140 |
+ p <- .cCReDecomposeCounts(counts, s, zSplit[, select], previousZ, nGByCP, K) |
|
141 |
+ return(list( |
|
142 |
+ z = zSplit[, select], |
|
143 |
+ mCPByS = p$mCPByS, |
|
144 |
+ nGByCP = p$nGByCP, |
|
145 |
+ nCP = p$nCP, |
|
146 |
+ message = m |
|
147 |
+ )) |
|
137 | 148 |
} |
138 | 149 |
|
139 | 150 |
|
140 | 151 |
# .cCGCalcLL = function(K, L, mCPByS, nTSByCP, nByG, nByTS, nGByTS, |
141 | 152 |
# nS, nG, alpha, beta, delta, gamma) |
142 | 153 |
.cCGSplitZ <- function(counts, |
154 |
+ mCPByS, |
|
155 |
+ nTSByC, |
|
156 |
+ nTSByCP, |
|
157 |
+ nByG, |
|
158 |
+ nByTS, |
|
159 |
+ nGByTS, |
|
160 |
+ nCP, |
|
161 |
+ s, |
|
162 |
+ z, |
|
163 |
+ K, |
|
164 |
+ L, |
|
165 |
+ nS, |
|
166 |
+ nG, |
|
167 |
+ alpha, |
|
168 |
+ beta, |
|
169 |
+ delta, |
|
170 |
+ gamma, |
|
171 |
+ zProb, |
|
172 |
+ maxClustersToTry = 10, |
|
173 |
+ minCell = 3) { |
|
174 |
+ |
|
175 |
+ ## Identify clusters to split |
|
176 |
+ zTa <- tabulate(z, K) |
|
177 |
+ zToSplit <- which(zTa >= minCell) |
|
178 |
+ zNonEmpty <- which(zTa > 0) |
|
179 |
+ |
|
180 |
+ if (length(zToSplit) == 0) { |
|
181 |
+ m <- paste0( |
|
182 |
+ date(), |
|
183 |
+ " .... Cluster sizes too small. No additional splitting was", |
|
184 |
+ " performed." |
|
185 |
+ ) |
|
186 |
+ return(list( |
|
187 |
+ z = z, |
|
188 |
+ mCPByS = mCPByS, |
|
189 |
+ nTSByCP = nTSByCP, |
|
190 |
+ nCP = nCP, |
|
191 |
+ message = m |
|
192 |
+ )) |
|
193 |
+ } |
|
194 |
+ |
|
195 |
+ ## Loop through each split-able Z and perform split |
|
196 |
+ clustSplit <- vector("list", K) |
|
197 |
+ for (i in zToSplit) { |
|
198 |
+ clustLabel <- .celda_C(counts[, z == i], |
|
199 |
+ K = 2, |
|
200 |
+ zInitialize = "random", |
|
201 |
+ maxIter = 5, |
|
202 |
+ splitOnIter = -1, |
|
203 |
+ splitOnLast = FALSE, |
|
204 |
+ verbose = FALSE |
|
205 |
+ ) |
|
206 |
+ clustSplit[[i]] <- clusters(clustLabel)$z |
|
207 |
+ } |
|
208 |
+ |
|
209 |
+ ## Find second best assignment give current assignments for each cell |
|
210 |
+ zProb[cbind(seq(nrow(zProb)), z)] <- NA |
|
211 |
+ zSecond <- apply(zProb, 1, which.max) |
|
212 |
+ |
|
213 |
+ ## Set up initial variables |
|
214 |
+ zSplit <- matrix(NA, |
|
215 |
+ nrow = length(z), |
|
216 |
+ ncol = length(zToSplit) * maxClustersToTry |
|
217 |
+ ) |
|
218 |
+ zSplitLl <- rep(NA, ncol = length(zToSplit) * maxClustersToTry) |
|
219 |
+ zSplitLl[1] <- .cCGCalcLL( |
|
220 |
+ K, |
|
221 |
+ L, |
|
143 | 222 |
mCPByS, |
144 |
- nTSByC, |
|
145 | 223 |
nTSByCP, |
146 | 224 |
nByG, |
147 | 225 |
nByTS, |
148 | 226 |
nGByTS, |
149 |
- nCP, |
|
150 |
- s, |
|
151 |
- z, |
|
152 |
- K, |
|
153 |
- L, |
|
154 | 227 |
nS, |
155 | 228 |
nG, |
156 | 229 |
alpha, |
157 | 230 |
beta, |
158 | 231 |
delta, |
159 |
- gamma, |
|
160 |
- zProb, |
|
161 |
- maxClustersToTry = 10, |
|
162 |
- minCell = 3) { |
|
163 |
- |
|
164 |
- ## Identify clusters to split |
|
165 |
- zTa <- tabulate(z, K) |
|
166 |
- zToSplit <- which(zTa >= minCell) |
|
167 |
- zNonEmpty <- which(zTa > 0) |
|
168 |
- |
|
169 |
- if (length(zToSplit) == 0) { |
|
170 |
- m <- paste0(date(), |
|
171 |
- " .... Cluster sizes too small. No additional splitting was", |
|
172 |
- " performed.") |
|
173 |
- return(list(z = z, |
|
174 |
- mCPByS = mCPByS, |
|
175 |
- nTSByCP = nTSByCP, |
|
176 |
- nCP = nCP, |
|
177 |
- message = m)) |
|
178 |
- } |
|
179 |
- |
|
180 |
- ## Loop through each split-able Z and perform split |
|
181 |
- clustSplit <- vector("list", K) |
|
182 |
- for (i in zToSplit) { |
|
183 |
- clustLabel <- .celda_C(counts[, z == i], |
|
184 |
- K = 2, |
|
185 |
- zInitialize = "random", |
|
186 |
- maxIter = 5, |
|
187 |
- splitOnIter = -1, |
|
188 |
- splitOnLast = FALSE, |
|
189 |
- verbose = FALSE) |
|
190 |
- clustSplit[[i]] <- clusters(clustLabel)$z |
|
191 |
- } |
|
192 |
- |
|
193 |
- ## Find second best assignment give current assignments for each cell |
|
194 |
- zProb[cbind(seq(nrow(zProb)), z)] <- NA |
|
195 |
- zSecond <- apply(zProb, 1, which.max) |
|
196 |
- |
|
197 |
- ## Set up initial variables |
|
198 |
- zSplit <- matrix(NA, |
|
199 |
- nrow = length(z), |
|
200 |
- ncol = length(zToSplit) * maxClustersToTry) |
|
201 |
- zSplitLl <- rep(NA, ncol = length(zToSplit) * maxClustersToTry) |
|
202 |
- zSplitLl[1] <- .cCGCalcLL(K, |
|
232 |
+ gamma |
|
233 |
+ ) |
|
234 |
+ zSplit[, 1] <- z |
|
235 |
+ |
|
236 |
+ ## Select worst clusters to test for reshuffling |
|
237 |
+ previousZ <- z |
|
238 |
+ llShuffle <- rep(NA, K) |
|
239 |
+ for (i in zNonEmpty) { |
|
240 |
+ ix <- z == i |
|
241 |
+ newZ <- z |
|
242 |
+ newZ[ix] <- zSecond[ix] |
|
243 |
+ |
|
244 |
+ p <- .cCReDecomposeCounts(nTSByC, s, newZ, previousZ, nTSByCP, K) |
|
245 |
+ nTSByCP <- p$nGByCP |
|
246 |
+ mCPByS <- p$mCPByS |
|
247 |
+ llShuffle[i] <- .cCGCalcLL( |
|
248 |
+ K, |
|
249 |
+ L, |
|
250 |
+ mCPByS, |
|
251 |
+ nTSByCP, |
|
252 |
+ nByG, |
|
253 |
+ nByTS, |
|
254 |
+ nGByTS, |
|
255 |
+ nS, |
|
256 |
+ nG, |
|
257 |
+ alpha, |
|
258 |
+ beta, |
|
259 |
+ delta, |
|
260 |
+ gamma |
|
261 |
+ ) |
|
262 |
+ previousZ <- newZ |
|
263 |
+ } |
|
264 |
+ zToShuffle <- utils::head(order(llShuffle, decreasing = TRUE, na.last = NA), |
|
265 |
+ n = maxClustersToTry |
|
266 |
+ ) |
|
267 |
+ |
|
268 |
+ |
|
269 |
+ pairs <- c(NA, NA) |
|
270 |
+ splitIx <- 2 |
|
271 |
+ for (i in zToShuffle) { |
|
272 |
+ otherClusters <- setdiff(zToSplit, i) |
|
273 |
+ |
|
274 |
+ for (j in otherClusters) { |
|
275 |
+ newZ <- z |
|
276 |
+ |
|
277 |
+ ## Assign cluster i to the next most similar cluster (excluding |
|
278 |
+ ## cluster j) |
|
279 |
+ ## as defined above by the correlation |
|
280 |
+ ixToMove <- z == i |
|
281 |
+ newZ[ixToMove] <- zSecond[ixToMove] |
|
282 |
+ |
|
283 |
+ ## Split cluster j according to the clustering defined above |
|
284 |
+ ixToSplit <- z == j |
|
285 |
+ newZ[ixToSplit] <- ifelse(clustSplit[[j]] == 1, j, i) |
|
286 |
+ |
|
287 |
+ p <- .cCReDecomposeCounts(nTSByC, s, newZ, previousZ, nTSByCP, K) |
|
288 |
+ nTSByCP <- p$nGByCP |
|
289 |
+ mCPByS <- p$mCPByS |
|
290 |
+ |
|
291 |
+ ## Calculate likelihood of split |
|
292 |
+ zSplitLl[splitIx] <- .cCGCalcLL( |
|
293 |
+ K, |
|
203 | 294 |
L, |
204 | 295 |
mCPByS, |
205 | 296 |
nTSByCP, |
... | ... |
@@ -211,211 +302,233 @@ |
211 | 302 |
alpha, |
212 | 303 |
beta, |
213 | 304 |
delta, |
214 |
- gamma) |
|
215 |
- zSplit[, 1] <- z |
|
216 |
- |
|
217 |
- ## Select worst clusters to test for reshuffling |
|
218 |
- previousZ <- z |
|
219 |
- llShuffle <- rep(NA, K) |
|
220 |
- for (i in zNonEmpty) { |
|
221 |
- ix <- z == i |
|
222 |
- newZ <- z |
|
223 |
- newZ[ix] <- zSecond[ix] |
|
224 |
- |
|
225 |
- p <- .cCReDecomposeCounts(nTSByC, s, newZ, previousZ, nTSByCP, K) |
|
226 |
- nTSByCP <- p$nGByCP |
|
227 |
- mCPByS <- p$mCPByS |
|
228 |
- llShuffle[i] <- .cCGCalcLL(K, |
|
229 |
- L, |
|
230 |
- mCPByS, |
|
231 |
- nTSByCP, |
|
232 |
- nByG, |
|
233 |
- nByTS, |
|
234 |
- nGByTS, |
|
235 |
- nS, |
|
236 |
- nG, |
|
237 |
- alpha, |
|
238 |
- beta, |
|
239 |
- delta, |
|
240 |
- gamma) |
|
241 |
- previousZ <- newZ |
|
242 |
- } |
|
243 |
- zToShuffle <- utils::head(order(llShuffle, decreasing = TRUE, na.last = NA), |
|
244 |
- n = maxClustersToTry) |
|
245 |
- |
|
246 |
- |
|
247 |
- pairs <- c(NA, NA) |
|
248 |
- splitIx <- 2 |
|
249 |
- for (i in zToShuffle) { |
|
250 |
- otherClusters <- setdiff(zToSplit, i) |
|
251 |
- |
|
252 |
- for (j in otherClusters) { |
|
253 |
- newZ <- z |
|
254 |
- |
|
255 |
- ## Assign cluster i to the next most similar cluster (excluding |
|
256 |
- ## cluster j) |
|
257 |
- ## as defined above by the correlation |
|
258 |
- ixToMove <- z == i |
|
259 |
- newZ[ixToMove] <- zSecond[ixToMove] |
|
260 |
- |
|
261 |
- ## Split cluster j according to the clustering defined above |
|
262 |
- ixToSplit <- z == j |
|
263 |
- newZ[ixToSplit] <- ifelse(clustSplit[[j]] == 1, j, i) |
|
264 |
- |
|
265 |
- p <- .cCReDecomposeCounts(nTSByC, s, newZ, previousZ, nTSByCP, K) |
|
266 |
- nTSByCP <- p$nGByCP |
|
267 |
- mCPByS <- p$mCPByS |
|
268 |
- |
|
269 |
- ## Calculate likelihood of split |
|
270 |
- zSplitLl[splitIx] <- .cCGCalcLL(K, |
|
271 |
- L, |
|
272 |
- mCPByS, |
|
273 |
- nTSByCP, |
|
274 |
- nByG, |
|
275 |
- nByTS, |
|
276 |
- nGByTS, |
|
277 |
- nS, |
|
278 |
- nG, |
|
279 |
- alpha, |
|
280 |
- beta, |
|
281 |
- delta, |
|
282 |
- gamma) |
|
283 |
- zSplit[, splitIx] <- newZ |
|
284 |
- splitIx <- splitIx + 1L |
|
285 |
- previousZ <- newZ |
|
286 |
- |
|
287 |
- pairs <- rbind(pairs, c(i, j)) |
|
288 |
- } |
|
289 |
- } |
|
305 |
+ gamma |
|
306 |
+ ) |
|
307 |
+ zSplit[, splitIx] <- newZ |
|
308 |
+ splitIx <- splitIx + 1L |
|
309 |
+ previousZ <- newZ |
|
290 | 310 |
|
291 |
- select <- which.max(zSplitLl) |
|
292 |
- |
|
293 |
- if (select == 1) { |
|
294 |
- m <- paste0(date(), " .... No additional splitting was performed.") |
|
295 |
- } else { |
|
296 |
- m <- paste0(date(), |
|
297 |
- " .... Cluster ", |
|
298 |
- pairs[select, 1], |
|
299 |
- " was reassigned and cluster ", |
|
300 |
- pairs[select, 2], |
|
301 |
- " was split in two.") |
|
311 |
+ pairs <- rbind(pairs, c(i, j)) |
|
302 | 312 |
} |
303 |
- |
|
304 |
- p <- .cCReDecomposeCounts(nTSByC, |
|
305 |
- s, |
|
306 |
- zSplit[, select], |
|
307 |
- previousZ, |
|
308 |
- nTSByCP, |
|
309 |
- K) |
|
310 |
- return(list(z = zSplit[, select], |
|
311 |
- mCPByS = p$mCPByS, |
|
312 |
- nTSByCP = p$nGByCP, |
|
313 |
- nCP = p$nCP, |
|
314 |
- message = m)) |
|
313 |
+ } |
|
314 |
+ |
|
315 |
+ select <- which.max(zSplitLl) |
|
316 |
+ |
|
317 |
+ if (select == 1) { |
|
318 |
+ m <- paste0(date(), " .... No additional splitting was performed.") |
|
319 |
+ } else { |
|
320 |
+ m <- paste0( |
|
321 |
+ date(), |
|
322 |
+ " .... Cluster ", |
|
323 |
+ pairs[select, 1], |
|
324 |
+ " was reassigned and cluster ", |
|
325 |
+ pairs[select, 2], |
|
326 |
+ " was split in two." |
|
327 |
+ ) |
|
328 |
+ } |
|
329 |
+ |
|
330 |
+ p <- .cCReDecomposeCounts( |
|
331 |
+ nTSByC, |
|
332 |
+ s, |
|
333 |
+ zSplit[, select], |
|
334 |
+ previousZ, |
|
335 |
+ nTSByCP, |
|
336 |
+ K |
|
337 |
+ ) |
|
338 |
+ return(list( |
|
339 |
+ z = zSplit[, select], |
|
340 |
+ mCPByS = p$mCPByS, |
|
341 |
+ nTSByCP = p$nGByCP, |
|
342 |
+ nCP = p$nCP, |
|
343 |
+ message = m |
|
344 |
+ )) |
|
315 | 345 |
} |
316 | 346 |
|
317 | 347 |
|
318 | 348 |
.cCGSplitY <- function(counts, |
319 |
- y, |
|
349 |
+ y, |
|
350 |
+ mCPByS, |
|
351 |
+ nGByCP, |
|
352 |
+ nTSByC, |
|
353 |
+ nTSByCP, |
|
354 |
+ nByG, |
|
355 |
+ nByTS, |
|
356 |
+ nGByTS, |
|
357 |
+ nCP, |
|
358 |
+ s, |
|
359 |
+ z, |
|
360 |
+ K, |
|
361 |
+ L, |
|
362 |
+ nS, |
|
363 |
+ nG, |
|
364 |
+ alpha, |
|
365 |
+ beta, |
|
366 |
+ delta, |
|
367 |
+ gamma, |
|
368 |
+ yProb, |
|
369 |
+ maxClustersToTry = 10, |
|
370 |
+ KSubclusters = 10, |
|
371 |
+ minCell = 3) { |
|
372 |
+ |
|
373 |
+ ######################### |
|
374 |
+ ## First, the cell dimension of the original matrix will be reduced by |
|
375 |
+ ## splitting each z cluster into 'KSubclusters'. |
|
376 |
+ ######################### |
|
377 |
+ |
|
378 |
+ ## This will not be as big as the original matrix (which can take a lot of |
|
379 |
+ ## time to process with large number of cells), but not as small as the |
|
380 |
+ ## 'nGByCP' with current z assignments |
|
381 |
+ |
|
382 |
+ zTa <- tabulate(z, K) |
|
383 |
+ zNonEmpty <- which(zTa > 0) |
|
384 |
+ tempZ <- rep(0, length(z)) |
|
385 |
+ currentTopZ <- 0 |
|
386 |
+ for (i in zNonEmpty) { |
|
387 |
+ ix <- z == i |
|
388 |
+ if (zTa[i] <= KSubclusters) { |
|
389 |
+ tempZ[ix] <- seq(currentTopZ + 1, currentTopZ + zTa[i]) |
|
390 |
+ } else { |
|
391 |
+ clustLabel <- .celda_C(counts[, z == i], |
|
392 |
+ K = KSubclusters, |
|
393 |
+ zInitialize = "random", |
|
394 |
+ maxIter = 5, |
|
395 |
+ splitOnIter = -1, |
|
396 |
+ splitOnLast = FALSE, |
|
397 |
+ verbose = FALSE |
|
398 |
+ ) |
|
399 |
+ tempZ[ix] <- clusters(clustLabel)$z + currentTopZ |
|
400 |
+ } |
|
401 |
+ currentTopZ <- max(tempZ, na.rm = TRUE) |
|
402 |
+ } |
|
403 |
+ |
|
404 |
+ ## Decompose counts according to new/temp z labels |
|
405 |
+ tempNGByCP <- .colSumByGroup(counts, group = tempZ, K = currentTopZ) |
|
406 |
+ |
|
407 |
+ ######################### |
|
408 |
+ ## Second, different y splits will be estimated and tested |
|
409 |
+ ######################### |
|
410 |
+ |
|
411 |
+ ## Identify clusters to split |
|
412 |
+ yTa <- tabulate(y, L) |
|
413 |
+ yToSplit <- which(yTa >= minCell) |
|
414 |
+ yNonEmpty <- which(yTa > 0) |
|
415 |
+ |
|
416 |
+ if (length(yToSplit) == 0) { |
|
417 |
+ m <- paste0( |
|
418 |
+ date(), |
|
419 |
+ " .... Cluster sizes too small. No additional splitting was", |
|
420 |
+ " performed." |
|
421 |
+ ) |
|
422 |
+ return(list( |
|
423 |
+ y = y, |
|
424 |
+ mCPByS = mCPByS, |
|
425 |
+ nTSByCP = nTSByCP, |
|
426 |
+ nCP = nCP, |
|
427 |
+ message = m |
|
428 |
+ )) |
|
429 |
+ } |
|
430 |
+ |
|
431 |
+ ## Loop through each split-able Z and perform split |
|
432 |
+ clustSplit <- vector("list", L) |
|
433 |
+ for (i in yToSplit) { |
|
434 |
+ clustLabel <- .celda_G(tempNGByCP[y == i, ], |
|
435 |
+ L = 2, |
|
436 |
+ yInitialize = "random", |
|
437 |
+ maxIter = 5, |
|
438 |
+ splitOnIter = -1, |
|
439 |
+ splitOnLast = FALSE, |
|
440 |
+ verbose = FALSE |
|
441 |
+ ) |
|
442 |
+ clustSplit[[i]] <- clusters(clustLabel)$y |
|
443 |
+ } |
|
444 |
+ |
|
445 |
+ ## Find second best assignment give current assignments for each cell |
|