* Changed default parameters for distinct_colors.
* Fixed bug dealing with subtracting 1 to make the factor indices to be zero based
* Changed EM function to use fastor colSumByGroupChange function to shift counts around after z labels have been altered
* updated documentation
* Added 'src/*.so' statement
* Changed 'split.each.z' to 'cC.splitZ' which utilizes the rowSumByGroupChange function to greatly increase speed in celda_C. However a separate function will be needed for the cells in celda_CG
* changed decomposed matrix to have TS as rows rather than columns (e.g. n.C.by.TS => n.TS.by.C)..
* Adding function for splitting clusters for celda_CG
* Reverted code to making any matrix with genes (G) or transcriptional state (TS) as being the rows. This eliminates the need for the counts.t matrix
* Reverted code to making any matrix with genes (G) or transcriptional state (TS) as being the rows and cells (C) or cell populations (CP) as columns. This eliminates the need for transposing matrices back and forth
* Debugged the new function 'cCG.splitZ' which will perform the cell splits for celda_CG in a much faster and more effecient way
* Refactor rowSumByGroupChange
* Modified the split for celda_G to use the faster rowSumByGroupChange function compared to rowSumByGroup
* Updated celda_CG y splitting function
* Fixed bug in using colSumByGroupChange
* updated NAMESPACE
* Updated testthat to reflect new defaults for distinct_color function
* Needed to added statement to load in previously simulated results
* Added missing lines in testthat for celda_C
* Removed dependency on Rmpfr by using functions from matrixStats package
* Changed default gamma in simulateCells functions to 5. Removed 'precision' argument in perplexity
* added 'seed' parameter to celdaTsne
* Updated NAMESPACE
Former-commit-id: 3e7a458f955513bef374fc98ed23c567155b13eb
... | ... |
@@ -91,21 +91,21 @@ celda_C = function(counts, sample.label=NULL, K, alpha=1, beta=1, |
91 | 91 |
num.iter.without.improvement = 0L |
92 | 92 |
do.cell.split = TRUE |
93 | 93 |
while(iter <= max.iter & num.iter.without.improvement <= stop.iter) { |
94 |
- message(paste("Iteration: ", iter)) |
|
95 | 94 |
|
96 | 95 |
# next.z = cC.calcGibbsProbZ(counts=counts, m.CP.by.S=m.CP.by.S, n.G.by.CP=n.G.by.CP, n.by.C=n.by.C, n.CP=n.CP, z=z, s=s, K=K, nG=nG, nM=nM, alpha=alpha, beta=beta) |
97 | 96 |
# next.z = cC.calcEMProbZ(counts=counts, m.CP.by.S=m.CP.by.S, n.G.by.CP=n.G.by.CP, n.by.C=n.by.C, n.CP=n.CP, z=z, s=s, K=K, nG=nG, nM=nM, alpha=alpha, beta=beta) |
98 | 97 |
next.z = do.call(algorithm.fun, list(counts=counts, m.CP.by.S=m.CP.by.S, n.G.by.CP=n.G.by.CP, n.by.C=n.by.C, n.CP=n.CP, z=z, s=s, K=K, nG=nG, nM=nM, alpha=alpha, beta=beta)) |
98 |
+ |
|
99 | 99 |
m.CP.by.S = next.z$m.CP.by.S |
100 | 100 |
n.G.by.CP = next.z$n.G.by.CP |
101 | 101 |
n.CP = next.z$n.CP |
102 | 102 |
z = next.z$z |
103 |
- |
|
103 |
+ |
|
104 | 104 |
## Perform split on i-th iteration of no improvement in log likelihood |
105 | 105 |
if(K > 2 & (((iter == max.iter | num.iter.without.improvement == stop.iter) & isTRUE(split.on.last)) | (split.on.iter > 0 & iter %% split.on.iter == 0 & isTRUE(do.cell.split)))) { |
106 | 106 |
|
107 | 107 |
logMessages(date(), " ... Determining if any cell clusters should be split.", logfile=logfile, append=TRUE, sep="") |
108 |
- res = split.each.z(counts=counts, z=z, K=K, z.prob=t(next.z$probs), alpha=alpha, beta=beta, s=s, LLFunction="calculateLoglikFromVariables.celda_C") |
|
108 |
+ res = cC.splitZ(counts, m.CP.by.S, n.G.by.CP, s, z, K, nS, nG, alpha, beta, z.prob=t(next.z$probs), max.clusters.to.try=10, min.cell=3) |
|
109 | 109 |
logMessages(res$message, logfile=logfile, append=TRUE) |
110 | 110 |
|
111 | 111 |
# Reset convergence counter if a split occured |
... | ... |
@@ -118,9 +118,12 @@ celda_C = function(counts, sample.label=NULL, K, alpha=1, beta=1, |
118 | 118 |
|
119 | 119 |
## Re-calculate variables |
120 | 120 |
z = res$z |
121 |
- m.CP.by.S = matrix(as.integer(table(factor(z, levels=1:K), s)), ncol=nS) |
|
122 |
- n.G.by.CP = colSumByGroup(counts, group=z, K=K) |
|
123 |
- n.CP = as.integer(colSums(n.G.by.CP)) |
|
121 |
+ #m.CP.by.S = matrix(as.integer(table(factor(z, levels=1:K), s)), ncol=nS) |
|
122 |
+ #n.G.by.CP = colSumByGroup(counts, group=z, K=K) |
|
123 |
+ #n.CP = as.integer(colSums(n.G.by.CP)) |
|
124 |
+ m.CP.by.S = res$m.CP.by.S |
|
125 |
+ n.G.by.CP = res$n.G.by.CP |
|
126 |
+ n.CP = res$n.CP |
|
124 | 127 |
} |
125 | 128 |
|
126 | 129 |
|
... | ... |
@@ -225,14 +228,16 @@ cC.calcEMProbZ = function(counts, m.CP.by.S, n.G.by.CP, n.by.C, n.CP, z, s, K, n |
225 | 228 |
## Maximization to find best label for each cell |
226 | 229 |
probs = eigenMatMultInt(phi, counts) + theta[, s] |
227 | 230 |
#probs = (t(phi) %*% counts) + theta[, s] |
231 |
+ |
|
232 |
+ z.previous = z |
|
228 | 233 |
z = apply(probs, 2, which.max) |
229 | 234 |
|
230 | 235 |
## Recalculate counts based on new label |
231 |
- p = cC.decomposeCounts(counts, s, z, K) |
|
236 |
+ #p = cC.decomposeCounts(counts, s, z, K) |
|
237 |
+ p = cC.reDecomposeCounts(counts, s, z, z.previous, n.G.by.CP, K) |
|
232 | 238 |
m.CP.by.S = p$m.CP.by.S |
233 | 239 |
n.G.by.CP = p$n.G.by.CP |
234 | 240 |
n.CP = p$n.CP |
235 |
- n.by.C = p$n.by.C |
|
236 | 241 |
|
237 | 242 |
return(list(m.CP.by.S=m.CP.by.S, n.G.by.CP=n.G.by.CP, n.CP=n.CP, z=z, probs=probs)) |
238 | 243 |
} |
... | ... |
@@ -400,10 +405,21 @@ cC.decomposeCounts = function(counts, s, z, K) { |
400 | 405 |
n.G.by.CP = colSumByGroup(counts, group=z, K=K) |
401 | 406 |
n.CP = as.integer(colSums(n.G.by.CP)) |
402 | 407 |
n.by.C = as.integer(colSums(counts)) |
403 |
- |
|
408 |
+ |
|
404 | 409 |
return(list(m.CP.by.S=m.CP.by.S, n.G.by.CP=n.G.by.CP, n.CP=n.CP, n.by.C=n.by.C, nS=nS, nG=nG, nM=nM)) |
405 | 410 |
} |
406 | 411 |
|
412 |
+cC.reDecomposeCounts = function(counts, s, z, previous.z, n.G.by.CP, K) { |
|
413 |
+ |
|
414 |
+ ## Recalculate counts based on new label |
|
415 |
+ n.G.by.CP = colSumByGroupChange(counts, n.G.by.CP, z, previous.z, K) |
|
416 |
+ nS = length(unique(s)) |
|
417 |
+ m.CP.by.S = matrix(as.integer(table(factor(z, levels=1:K), s)), ncol=nS) |
|
418 |
+ n.CP = as.integer(colSums(n.G.by.CP)) |
|
419 |
+ |
|
420 |
+ return(list(m.CP.by.S=m.CP.by.S, n.G.by.CP=n.G.by.CP, n.CP=n.CP)) |
|
421 |
+} |
|
422 |
+ |
|
407 | 423 |
|
408 | 424 |
#' Calculates the conditional probability of each cell belong to each cluster given all other cluster assignments |
409 | 425 |
#' |
... | ... |
@@ -439,22 +455,13 @@ clusterProbability.celda_C = function(celda.mod, counts, log=FALSE, ...) { |
439 | 455 |
#' @export |
440 | 456 |
calculatePerplexity.celda_C = function(counts, celda.mod, precision=128) { |
441 | 457 |
|
442 |
- # TODO Can try to turn into a single giant matrix multiplication by duplicating |
|
443 |
- # phi / theta / sl |
|
444 |
- # TODO Cast to sparse matrices? |
|
445 | 458 |
factorized = factorizeMatrix(counts = counts, celda.mod = celda.mod, "posterior") |
446 | 459 |
theta = log(factorized$posterior$sample.states) |
447 | 460 |
phi = log(factorized$posterior$gene.states) |
448 | 461 |
sl = celda.mod$sample.label |
449 | 462 |
|
450 | 463 |
inner.log.prob = (t(phi) %*% counts) + theta[, sl] |
451 |
- inner.log.prob = Rmpfr::mpfr(inner.log.prob, precision) |
|
452 |
- inner.log.prob.exp = exp(inner.log.prob) |
|
453 |
- |
|
454 |
- log.px = 0 |
|
455 |
- for(i in 1:ncol(inner.log.prob.exp)) { |
|
456 |
- log.px = log.px + Rmpfr::asNumeric(log(sum(inner.log.prob.exp[, i]))) |
|
457 |
- } |
|
464 |
+ log.px = sum(apply(inner.log.prob, 2, matrixStats::logSumExp)) |
|
458 | 465 |
|
459 | 466 |
perplexity = exp(-(log.px/sum(counts))) |
460 | 467 |
return(perplexity) |
... | ... |
@@ -84,18 +84,19 @@ celda_CG = function(counts, sample.label=NULL, K, L, |
84 | 84 |
p = cCG.decomposeCounts(counts, s, z, y, K, L) |
85 | 85 |
m.CP.by.S = p$m.CP.by.S |
86 | 86 |
n.TS.by.C = p$n.TS.by.C |
87 |
- n.CP.by.TS = p$n.CP.by.TS |
|
87 |
+ n.TS.by.CP = p$n.TS.by.CP |
|
88 | 88 |
n.CP = p$n.CP |
89 | 89 |
n.by.G = p$n.by.G |
90 | 90 |
n.by.C = p$n.by.C |
91 | 91 |
n.by.TS = p$n.by.TS |
92 | 92 |
nG.by.TS = p$nG.by.TS |
93 |
+ n.G.by.CP = p$n.G.by.CP |
|
93 | 94 |
nM = p$nM |
94 | 95 |
nG = p$nG |
95 | 96 |
nS = p$nS |
96 | 97 |
rm(p) |
97 | 98 |
|
98 |
- ll = cCG.calcLL(K=K, L=L, m.CP.by.S=m.CP.by.S, n.CP.by.TS=n.CP.by.TS, n.by.G=n.by.G, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, nS=nS, nG=nG, alpha=alpha, beta=beta, delta=delta, gamma=gamma) |
|
99 |
+ ll = cCG.calcLL(K=K, L=L, m.CP.by.S=m.CP.by.S, n.TS.by.CP=n.TS.by.CP, n.by.G=n.by.G, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, nS=nS, nG=nG, alpha=alpha, beta=beta, delta=delta, gamma=gamma) |
|
99 | 100 |
|
100 | 101 |
set.seed(seed) |
101 | 102 |
logMessages(date(), "... Starting celda_CG to cluster cells and genes.", logfile=logfile, append=FALSE) |
... | ... |
@@ -106,29 +107,27 @@ celda_CG = function(counts, sample.label=NULL, K, L, |
106 | 107 |
do.gene.split = TRUE |
107 | 108 |
while(iter <= max.iter & num.iter.without.improvement <= stop.iter) { |
108 | 109 |
|
109 |
- ## Gibbs sampling for each cell |
|
110 |
- n.TS.by.C = rowSumByGroup(counts, group=y, L=L) |
|
111 |
- n.TS.by.CP = t(n.CP.by.TS) |
|
112 |
- #next.z = cC.calcGibbsProbZ(counts=n.TS.by.C, m.CP.by.S=m.CP.by.S, n.G.by.CP=n.TS.by.CP, n.CP=n.CP, n.by.C=n.by.C, z=z, s=s, K=K, nG=L, nM=nM, alpha=alpha, beta=beta) |
|
110 |
+ ## Gibbs or EM sampling for each cell |
|
113 | 111 |
next.z = do.call(algorithm.fun, list(counts=n.TS.by.C, m.CP.by.S=m.CP.by.S, n.G.by.CP=n.TS.by.CP, n.CP=n.CP, n.by.C=n.by.C, z=z, s=s, K=K, nG=L, nM=nM, alpha=alpha, beta=beta)) |
114 | 112 |
m.CP.by.S = next.z$m.CP.by.S |
115 | 113 |
n.TS.by.CP = next.z$n.G.by.CP |
116 | 114 |
n.CP = next.z$n.CP |
115 |
+ n.G.by.CP = colSumByGroupChange(counts, n.G.by.CP, next.z$z, z, K) |
|
117 | 116 |
z = next.z$z |
118 |
- |
|
117 |
+ |
|
119 | 118 |
## Gibbs sampling for each gene |
120 |
- n.CP.by.G = t(colSumByGroup(counts, group=z, K=K)) |
|
121 |
- n.CP.by.TS = t(n.TS.by.CP) |
|
122 |
- next.y = cG.calcGibbsProbY(counts.t=n.CP.by.G, n.C.by.TS=n.CP.by.TS, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, n.by.G=n.by.G, y=y, L=L, nG=nG, beta=beta, delta=delta, gamma=gamma) |
|
123 |
- n.CP.by.TS = next.y$n.C.by.TS |
|
119 |
+ next.y = cG.calcGibbsProbY(counts=n.G.by.CP, n.TS.by.C=n.TS.by.CP, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, n.by.G=n.by.G, y=y, L=L, nG=nG, beta=beta, delta=delta, gamma=gamma) |
|
120 |
+ n.TS.by.CP = next.y$n.TS.by.C |
|
124 | 121 |
nG.by.TS = next.y$nG.by.TS |
125 | 122 |
n.by.TS = next.y$n.by.TS |
123 |
+ n.TS.by.C = rowSumByGroupChange(counts, n.TS.by.C, next.y$y, y, L) |
|
126 | 124 |
y = next.y$y |
127 | 125 |
|
126 |
+ |
|
128 | 127 |
## Perform split on i-th iteration defined by split.on.iter |
129 | 128 |
if(K > 2 & (((iter == max.iter | num.iter.without.improvement == stop.iter) & isTRUE(split.on.last)) | (split.on.iter > 0 & iter %% split.on.iter == 0 & isTRUE(do.cell.split)))) { |
130 | 129 |
logMessages(date(), " ... Determining if any cell clusters should be split.", logfile=logfile, append=TRUE, sep="") |
131 |
- res = split.each.z(counts=counts, z=z, y=y, z.prob=t(next.z$probs), K=K, L=L, alpha=alpha, delta=delta, beta=beta, gamma=gamma, s=s, LLFunction="calculateLoglikFromVariables.celda_CG") |
|
130 |
+ res = cCG.splitZ(counts, m.CP.by.S, n.TS.by.C, n.TS.by.CP, n.by.G, n.by.TS, nG.by.TS, s, z, K, L, nS, nG, alpha, beta, delta, gamma, z.prob=t(next.z$probs), max.clusters.to.try=10, min.cell=3) |
|
132 | 131 |
logMessages(res$message, logfile=logfile, append=TRUE) |
133 | 132 |
|
134 | 133 |
# Reset convergence counter if a split occured |
... | ... |
@@ -141,15 +140,14 @@ celda_CG = function(counts, sample.label=NULL, K, L, |
141 | 140 |
|
142 | 141 |
## Re-calculate variables |
143 | 142 |
z = res$z |
144 |
- m.CP.by.S = matrix(as.integer(table(factor(z, levels=1:K), s)), ncol=nS) |
|
145 |
- n.TS.by.C = rowSumByGroup(counts, group=y, L=L) |
|
146 |
- n.CP.by.TS = t(colSumByGroup(n.TS.by.C, group=z, K=K)) |
|
147 |
- n.CP = as.integer(rowSums(n.CP.by.TS)) |
|
148 |
- n.CP.by.G = t(colSumByGroup(counts, group=z, K=K)) |
|
143 |
+ m.CP.by.S = res$m.CP.by.S |
|
144 |
+ n.TS.by.CP = res$n.TS.by.CP |
|
145 |
+ n.CP = res$n.CP |
|
146 |
+ n.G.by.CP = colSumByGroup(counts, group=z, K=K) |
|
149 | 147 |
} |
150 | 148 |
if(L > 2 & (((iter == max.iter | num.iter.without.improvement == stop.iter) & isTRUE(split.on.last)) | (split.on.iter > 0 & iter %% split.on.iter == 0 & isTRUE(do.gene.split)))) { |
151 | 149 |
logMessages(date(), " ... Determining if any gene clusters should be split.", logfile=logfile, append=TRUE, sep="") |
152 |
- res = split.each.y(counts=counts, z=z, y=y, y.prob=t(next.y$probs), K=K, L=L, alpha=alpha, beta=beta, delta=delta, gamma=gamma, s=s, LLFunction="calculateLoglikFromVariables.celda_CG") |
|
150 |
+ res = cCG.splitY(counts, y, m.CP.by.S, n.G.by.CP, n.TS.by.C, n.TS.by.CP, n.by.G, n.by.TS, nG.by.TS, n.CP, s, z, K, L, nS, nG, alpha, beta, delta, gamma, y.prob=t(next.y$probs), max.clusters.to.try=10, min.cell=3) |
|
153 | 151 |
logMessages(res$message, logfile=logfile, append=TRUE) |
154 | 152 |
|
155 | 153 |
# Reset convergence counter if a split occured |
... | ... |
@@ -162,15 +160,14 @@ celda_CG = function(counts, sample.label=NULL, K, L, |
162 | 160 |
|
163 | 161 |
## Re-calculate variables |
164 | 162 |
y = res$y |
165 |
- n.TS.by.C = rowSumByGroup(counts, group=y, L=L) |
|
166 |
- n.CP.by.TS = t(colSumByGroup(n.TS.by.C, group=z, K=K)) |
|
167 |
- n.CP = as.integer(rowSums(n.CP.by.TS)) |
|
168 |
- n.by.TS = as.integer(rowSumByGroup(matrix(n.by.G,ncol=1), group=y, L=L)) |
|
169 |
- nG.by.TS = as.integer(table(factor(y, levels=1:L))) |
|
163 |
+ n.TS.by.CP = res$n.TS.by.CP |
|
164 |
+ n.by.TS = res$n.by.TS |
|
165 |
+ nG.by.TS = res$nG.by.TS |
|
166 |
+ n.TS.by.C = rowSumByGroup(counts, group=y, L=L) |
|
170 | 167 |
} |
171 | 168 |
|
172 | 169 |
## Calculate complete likelihood |
173 |
- temp.ll = cCG.calcLL(K=K, L=L, m.CP.by.S=m.CP.by.S, n.CP.by.TS=n.CP.by.TS, n.by.G=n.by.G, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, nS=nS, nG=nG, alpha=alpha, beta=beta, delta=delta, gamma=gamma) |
|
170 |
+ temp.ll = cCG.calcLL(K=K, L=L, m.CP.by.S=m.CP.by.S, n.TS.by.CP=n.TS.by.CP, n.by.G=n.by.G, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, nS=nS, nG=nG, alpha=alpha, beta=beta, delta=delta, gamma=gamma) |
|
174 | 171 |
if((all(temp.ll > ll)) | iter == 1) { |
175 | 172 |
z.best = z |
176 | 173 |
y.best = y |
... | ... |
@@ -223,7 +220,7 @@ celda_CG = function(counts, sample.label=NULL, K, L, |
223 | 220 |
#' @param ... Other arguments |
224 | 221 |
#' @export |
225 | 222 |
simulateCells.celda_CG = function(model, S=10, C.Range=c(50,100), N.Range=c(500,5000), |
226 |
- G=1000, K=3, L=10, alpha=1, beta=1, gamma=1, |
|
223 |
+ G=1000, K=3, L=10, alpha=1, beta=1, gamma=5, |
|
227 | 224 |
delta=1, seed=12345, ...) { |
228 | 225 |
|
229 | 226 |
set.seed(seed) |
... | ... |
@@ -327,7 +324,7 @@ factorizeMatrix.celda_CG = function(counts, celda.mod, type=c("counts", "proport |
327 | 324 |
nM = p$nM |
328 | 325 |
m.CP.by.S = p$m.CP.by.S |
329 | 326 |
n.TS.by.C = p$n.TS.by.C |
330 |
- n.CP.by.TS = p$n.CP.by.TS |
|
327 |
+ n.TS.by.CP = p$n.TS.by.CP |
|
331 | 328 |
n.by.G = p$n.by.G |
332 | 329 |
n.by.TS = p$n.by.TS |
333 | 330 |
nG.by.TS = p$nG.by.TS |
... | ... |
@@ -345,8 +342,8 @@ factorizeMatrix.celda_CG = function(counts, celda.mod, type=c("counts", "proport |
345 | 342 |
rownames(n.G.by.TS) = celda.mod$names$row |
346 | 343 |
rownames(m.CP.by.S) = K.names |
347 | 344 |
colnames(m.CP.by.S) = celda.mod$names$sample |
348 |
- colnames(n.CP.by.TS) = L.names |
|
349 |
- rownames(n.CP.by.TS) = K.names |
|
345 |
+ colnames(n.TS.by.CP) = K.names |
|
346 |
+ rownames(n.TS.by.CP) = L.names |
|
350 | 347 |
|
351 | 348 |
counts.list = c() |
352 | 349 |
prop.list = c() |
... | ... |
@@ -355,7 +352,7 @@ factorizeMatrix.celda_CG = function(counts, celda.mod, type=c("counts", "proport |
355 | 352 |
|
356 | 353 |
if(any("counts" %in% type)) { |
357 | 354 |
counts.list = list(sample.states = m.CP.by.S, |
358 |
- population.states = t(n.CP.by.TS), |
|
355 |
+ population.states = n.TS.by.CP, |
|
359 | 356 |
cell.states = n.TS.by.C, |
360 | 357 |
gene.states = n.G.by.TS, |
361 | 358 |
gene.distribution = nG.by.TS) |
... | ... |
@@ -365,8 +362,8 @@ factorizeMatrix.celda_CG = function(counts, celda.mod, type=c("counts", "proport |
365 | 362 |
|
366 | 363 |
## Need to avoid normalizing cell/gene states with zero cells/genes |
367 | 364 |
unique.z = sort(unique(z)) |
368 |
- temp.n.CP.by.TS = t(n.CP.by.TS) |
|
369 |
- temp.n.CP.by.TS[,unique.z] = normalizeCounts(temp.n.CP.by.TS[,unique.z], scale.factor=1) |
|
365 |
+ temp.n.TS.by.CP = n.TS.by.CP |
|
366 |
+ temp.n.TS.by.CP[,unique.z] = normalizeCounts(temp.n.TS.by.CP[,unique.z], scale.factor=1) |
|
370 | 367 |
|
371 | 368 |
unique.y = sort(unique(y)) |
372 | 369 |
temp.n.G.by.TS = n.G.by.TS |
... | ... |
@@ -374,7 +371,7 @@ factorizeMatrix.celda_CG = function(counts, celda.mod, type=c("counts", "proport |
374 | 371 |
temp.nG.by.TS = nG.by.TS/sum(nG.by.TS) |
375 | 372 |
|
376 | 373 |
prop.list = list(sample.states = normalizeCounts(m.CP.by.S, scale.factor=1), |
377 |
- population.states = temp.n.CP.by.TS, |
|
374 |
+ population.states = temp.n.TS.by.CP, |
|
378 | 375 |
cell.states = normalizeCounts(n.TS.by.C, scale.factor=1), |
379 | 376 |
gene.states = temp.n.G.by.TS, |
380 | 377 |
gene.distribution = temp.nG.by.TS) |
... | ... |
@@ -388,7 +385,7 @@ factorizeMatrix.celda_CG = function(counts, celda.mod, type=c("counts", "proport |
388 | 385 |
temp.nG.by.TS = (nG.by.TS + gamma)/sum(nG.by.TS + gamma) |
389 | 386 |
|
390 | 387 |
post.list = list(sample.states = normalizeCounts(m.CP.by.S + alpha, scale.factor=1), |
391 |
- population.states = normalizeCounts(t(n.CP.by.TS + beta), scale.factor=1), |
|
388 |
+ population.states = normalizeCounts(n.TS.by.CP + beta, scale.factor=1), |
|
392 | 389 |
gene.states = gs, |
393 | 390 |
gene.distribution = temp.nG.by.TS) |
394 | 391 |
res = c(res, posterior = list(post.list)) |
... | ... |
@@ -398,7 +395,7 @@ factorizeMatrix.celda_CG = function(counts, celda.mod, type=c("counts", "proport |
398 | 395 |
} |
399 | 396 |
|
400 | 397 |
# Calculate the loglikelihood for the celda_CG model |
401 |
-cCG.calcLL = function(K, L, m.CP.by.S, n.CP.by.TS, n.by.G, n.by.TS, nG.by.TS, nS, nG, alpha, beta, delta, gamma) { |
|
398 |
+cCG.calcLL = function(K, L, m.CP.by.S, n.TS.by.CP, n.by.G, n.by.TS, nG.by.TS, nS, nG, alpha, beta, delta, gamma) { |
|
402 | 399 |
nG.by.TS[nG.by.TS == 0] = 1 |
403 | 400 |
nG = sum(nG.by.TS) |
404 | 401 |
|
... | ... |
@@ -413,9 +410,9 @@ cCG.calcLL = function(K, L, m.CP.by.S, n.CP.by.TS, n.by.G, n.by.TS, nG.by.TS, nS |
413 | 410 |
|
414 | 411 |
## Calculate for "Phi" component |
415 | 412 |
a = K * lgamma(L * beta) |
416 |
- b = sum(lgamma(n.CP.by.TS + beta)) |
|
413 |
+ b = sum(lgamma(n.TS.by.CP + beta)) |
|
417 | 414 |
c = -K * L * lgamma(beta) |
418 |
- d = -sum(lgamma(rowSums(n.CP.by.TS + beta))) |
|
415 |
+ d = -sum(lgamma(colSums(n.TS.by.CP + beta))) |
|
419 | 416 |
|
420 | 417 |
phi.ll = a + b + c + d |
421 | 418 |
|
... | ... |
@@ -457,7 +454,7 @@ cCG.calcLL = function(K, L, m.CP.by.S, n.CP.by.TS, n.by.G, n.by.TS, nG.by.TS, nS |
457 | 454 |
calculateLoglikFromVariables.celda_CG = function(counts, sample.label, z, y, K, L, alpha, beta, delta, gamma) { |
458 | 455 |
s = processSampleLabels(sample.label, ncol(counts)) |
459 | 456 |
p = cCG.decomposeCounts(counts, s, z, y, K, L) |
460 |
- final = cCG.calcLL(K=K, L=L, m.CP.by.S=p$m.CP.by.S, n.CP.by.TS=p$n.CP.by.TS, n.by.G=p$n.by.G, n.by.TS=p$n.by.TS, nG.by.TS=p$nG.by.TS, nS=p$nS, nG=p$nG, alpha=alpha, beta=beta, delta=delta, gamma=gamma) |
|
457 |
+ final = cCG.calcLL(K=K, L=L, m.CP.by.S=p$m.CP.by.S, n.TS.by.CP=p$n.TS.by.CP, n.by.G=p$n.by.G, n.by.TS=p$n.by.TS, nG.by.TS=p$nG.by.TS, nS=p$nS, nG=p$nG, alpha=alpha, beta=beta, delta=delta, gamma=gamma) |
|
461 | 458 |
return(final) |
462 | 459 |
} |
463 | 460 |
|
... | ... |
@@ -472,18 +469,18 @@ calculateLoglikFromVariables.celda_CG = function(counts, sample.label, z, y, K, |
472 | 469 |
cCG.decomposeCounts = function(counts, s, z, y, K, L) { |
473 | 470 |
nS = length(unique(s)) |
474 | 471 |
m.CP.by.S = matrix(as.integer(table(factor(z, levels=1:K), s)), ncol=nS) |
475 |
- n.TS.by.C = rowSumByGroup(counts, group=y, L=L) |
|
476 |
- n.CP.by.TS = t(colSumByGroup(n.TS.by.C, group=z, K=K)) # TODO Refactor celda_CG |
|
477 |
- n.CP = as.integer(rowSums(n.CP.by.TS)) |
|
472 |
+ n.TS.by.C = rowSumByGroup(counts, group=y, L=L) |
|
473 |
+ n.TS.by.CP = colSumByGroup(n.TS.by.C, group=z, K=K) |
|
474 |
+ n.CP = as.integer(colSums(n.TS.by.CP)) |
|
478 | 475 |
n.by.G = as.integer(rowSums(counts)) |
479 | 476 |
n.by.C = as.integer(colSums(counts)) |
480 | 477 |
n.by.TS = as.integer(rowSumByGroup(matrix(n.by.G,ncol=1), group=y, L=L)) |
481 |
- nG.by.TS = as.integer(table(factor(y, 1:L))) |
|
482 |
- n.CP.by.G = colSumByGroup(counts, group=z, K=K) |
|
478 |
+ nG.by.TS = tabulate(y, L) |
|
479 |
+ n.G.by.CP = colSumByGroup(counts, group=z, K=K) |
|
483 | 480 |
|
484 | 481 |
nG = nrow(counts) |
485 | 482 |
nM = ncol(counts) |
486 |
- return(list(m.CP.by.S=m.CP.by.S, n.TS.by.C=n.TS.by.C, n.CP.by.TS=n.CP.by.TS, n.CP=n.CP, n.by.G=n.by.G, n.by.C=n.by.C, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, n.CP.by.G=n.CP.by.G, nM=nM, nG=nG, nS=nS)) |
|
483 |
+ return(list(m.CP.by.S=m.CP.by.S, n.TS.by.C=n.TS.by.C, n.TS.by.CP=n.TS.by.CP, n.CP=n.CP, n.by.G=n.by.G, n.by.C=n.by.C, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, n.G.by.CP=n.G.by.CP, nM=nM, nG=nG, nS=nS)) |
|
487 | 484 |
} |
488 | 485 |
|
489 | 486 |
|
... | ... |
@@ -511,11 +508,11 @@ clusterProbability.celda_CG = function(celda.mod, counts, log=FALSE, ...) { |
511 | 508 |
p = cCG.decomposeCounts(counts, s, z, y, K, L) |
512 | 509 |
|
513 | 510 |
## Gibbs sampling for each cell |
514 |
- next.z = cC.calcGibbsProbZ(counts=p$n.TS.by.C, m.CP.by.S=p$m.CP.by.S, n.G.by.CP=t(p$n.CP.by.TS), n.CP=p$n.CP, n.by.C=p$n.by.C, z=z, s=s, K=K, nG=L, nM=p$nM, alpha=alpha, beta=beta, do.sample=FALSE) |
|
511 |
+ next.z = cC.calcGibbsProbZ(counts=p$n.TS.by.C, m.CP.by.S=p$m.CP.by.S, n.G.by.CP=p$n.TS.by.CP, n.CP=p$n.CP, n.by.C=p$n.by.C, z=z, s=s, K=K, nG=L, nM=p$nM, alpha=alpha, beta=beta, do.sample=FALSE) |
|
515 | 512 |
z.prob = t(next.z$probs) |
516 | 513 |
|
517 | 514 |
## Gibbs sampling for each gene |
518 |
- next.y = cG.calcGibbsProbY(counts.t=p$n.CP.by.G, n.C.by.TS=p$n.CP.by.TS, n.by.TS=p$n.by.TS, nG.by.TS=p$nG.by.TS, n.by.G=p$n.by.G, y=y, L=L, nG=nG, beta=beta, delta=delta, gamma=gamma, do.sample=FALSE) |
|
515 |
+ next.y = cG.calcGibbsProbY(counts=p$n.CP.by.G, n.TS.by.C=p$n.TS.by.CP, n.by.TS=p$n.by.TS, nG.by.TS=p$nG.by.TS, n.by.G=p$n.by.G, y=y, L=L, nG=nG, beta=beta, delta=delta, gamma=gamma, do.sample=FALSE) |
|
519 | 516 |
y.prob = t(next.y$probs) |
520 | 517 |
|
521 | 518 |
if(!isTRUE(log)) { |
... | ... |
@@ -528,7 +525,7 @@ clusterProbability.celda_CG = function(celda.mod, counts, log=FALSE, ...) { |
528 | 525 |
|
529 | 526 |
|
530 | 527 |
#' @export |
531 |
-calculatePerplexity.celda_CG = function(counts, celda.mod, precision=128) { |
|
528 |
+calculatePerplexity.celda_CG = function(counts, celda.mod) { |
|
532 | 529 |
|
533 | 530 |
factorized = factorizeMatrix(counts = counts, celda.mod = celda.mod, "posterior") |
534 | 531 |
theta = log(factorized$posterior$sample.states) |
... | ... |
@@ -541,14 +538,8 @@ calculatePerplexity.celda_CG = function(counts, celda.mod, precision=128) { |
541 | 538 |
eta.prob = log(eta) * nG.by.TS |
542 | 539 |
gene.by.pop.prob = log(psi %*% phi) |
543 | 540 |
inner.log.prob = (t(gene.by.pop.prob) %*% counts) + theta[, sl] |
544 |
- inner.log.prob = Rmpfr::mpfr(inner.log.prob, precision) |
|
545 |
- inner.log.prob.exp = exp(inner.log.prob) |
|
546 |
- |
|
547 |
- log.px = sum(eta.prob) |
|
548 |
- for(i in 1:ncol(inner.log.prob.exp)) { |
|
549 |
- log.px = log.px + Rmpfr::asNumeric(log(sum(inner.log.prob.exp[, i]))) |
|
550 |
- } |
|
551 | 541 |
|
542 |
+ log.px = sum(apply(inner.log.prob, 2, matrixStats::logSumExp)) + sum(eta.prob) |
|
552 | 543 |
perplexity = exp(-(log.px/sum(counts))) |
553 | 544 |
return(perplexity) |
554 | 545 |
} |
... | ... |
@@ -58,7 +58,6 @@ celda_G = function(counts, L, beta=1, delta=1, gamma=1, |
58 | 58 |
|
59 | 59 |
## Error checking and variable processing |
60 | 60 |
counts = processCounts(counts) |
61 |
- counts.t = t(counts) |
|
62 | 61 |
|
63 | 62 |
if(is.null(count.checksum)) { |
64 | 63 |
count.checksum = digest::digest(counts, algo="md5") |
... | ... |
@@ -70,7 +69,7 @@ celda_G = function(counts, L, beta=1, delta=1, gamma=1, |
70 | 69 |
|
71 | 70 |
## Calculate counts one time up front |
72 | 71 |
p = cG.decomposeCounts(counts=counts, y=y, L=L) |
73 |
- n.C.by.TS = p$n.C.by.TS |
|
72 |
+ n.TS.by.C = p$n.TS.by.C |
|
74 | 73 |
n.by.G = p$n.by.G |
75 | 74 |
n.by.TS = p$n.by.TS |
76 | 75 |
nG.by.TS = p$nG.by.TS |
... | ... |
@@ -82,15 +81,15 @@ celda_G = function(counts, L, beta=1, delta=1, gamma=1, |
82 | 81 |
logMessages(date(), "... Starting celda_G to cluster genes.", logfile=logfile, append=FALSE) |
83 | 82 |
|
84 | 83 |
## Calculate initial log likelihood |
85 |
- ll <- cG.calcLL(n.C.by.TS=n.C.by.TS, n.by.TS=n.by.TS, n.by.G=n.by.G, nG.by.TS=nG.by.TS, nM=nM, nG=nG, L=L, beta=beta, delta=delta, gamma=gamma) |
|
84 |
+ ll <- cG.calcLL(n.TS.by.C=n.TS.by.C, n.by.TS=n.by.TS, n.by.G=n.by.G, nG.by.TS=nG.by.TS, nM=nM, nG=nG, L=L, beta=beta, delta=delta, gamma=gamma) |
|
86 | 85 |
|
87 | 86 |
iter <- 1L |
88 | 87 |
num.iter.without.improvement = 0L |
89 | 88 |
do.gene.split = TRUE |
90 | 89 |
while(iter <= max.iter & num.iter.without.improvement <= stop.iter) { |
91 | 90 |
|
92 |
- next.y = cG.calcGibbsProbY(counts.t=counts.t, n.C.by.TS=n.C.by.TS, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, n.by.G=n.by.G, y=y, nG=nG, L=L, beta=beta, delta=delta, gamma=gamma) |
|
93 |
- n.C.by.TS = next.y$n.C.by.TS |
|
91 |
+ next.y = cG.calcGibbsProbY(counts=counts, n.TS.by.C=n.TS.by.C, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, n.by.G=n.by.G, y=y, nG=nG, L=L, beta=beta, delta=delta, gamma=gamma) |
|
92 |
+ n.TS.by.C = next.y$n.TS.by.C |
|
94 | 93 |
nG.by.TS = next.y$nG.by.TS |
95 | 94 |
n.by.TS = next.y$n.by.TS |
96 | 95 |
y = next.y$y |
... | ... |
@@ -98,7 +97,7 @@ celda_G = function(counts, L, beta=1, delta=1, gamma=1, |
98 | 97 |
## Perform split on i-th iteration of no improvement in log likelihood |
99 | 98 |
if(L > 2 & (((iter == max.iter | num.iter.without.improvement == stop.iter) & isTRUE(split.on.last)) | (split.on.iter > 0 & iter %% split.on.iter == 0 & isTRUE(do.gene.split)))) { |
100 | 99 |
logMessages(date(), " ... Determining if any gene clusters should be split.", logfile=logfile, append=TRUE, sep="") |
101 |
- res = split.each.y(counts=counts, y=y, L=L, y.prob=t(next.y$probs), beta=beta, delta=delta, gamma=gamma, LLFunction="calculateLoglikFromVariables.celda_G") |
|
100 |
+ res = cG.splitY(counts, y, n.TS.by.C, n.by.TS, n.by.G, nG.by.TS, nM, nG, L, beta, delta, gamma, y.prob=t(next.y$probs), min=3, max.clusters.to.try=10) |
|
102 | 101 |
logMessages(res$message, logfile=logfile, append=TRUE) |
103 | 102 |
|
104 | 103 |
# Reset convergence counter if a split occured |
... | ... |
@@ -111,13 +110,13 @@ celda_G = function(counts, L, beta=1, delta=1, gamma=1, |
111 | 110 |
|
112 | 111 |
## Re-calculate variables |
113 | 112 |
y = res$y |
114 |
- n.C.by.TS = t(rowSumByGroup(counts, group=y, L=L)) |
|
115 |
- n.by.TS = as.integer(rowSumByGroup(matrix(n.by.G,ncol=1), group=y, L=L)) |
|
116 |
- nG.by.TS = as.integer(table(factor(y, 1:L))) |
|
113 |
+ n.TS.by.C = res$n.TS.by.C |
|
114 |
+ n.by.TS = res$n.by.TS |
|
115 |
+ nG.by.TS = res$nG.by.TS |
|
117 | 116 |
} |
118 | 117 |
|
119 | 118 |
## Calculate complete likelihood |
120 |
- temp.ll <- cG.calcLL(n.C.by.TS=n.C.by.TS, n.by.TS=n.by.TS, n.by.G=n.by.G, nG.by.TS=nG.by.TS, nM=nM, nG=nG, L=L, beta=beta, delta=delta, gamma=gamma) |
|
119 |
+ temp.ll <- cG.calcLL(n.TS.by.C=n.TS.by.C, n.by.TS=n.by.TS, n.by.G=n.by.G, nG.by.TS=nG.by.TS, nM=nM, nG=nG, L=L, beta=beta, delta=delta, gamma=gamma) |
|
121 | 120 |
if((all(temp.ll > ll)) | iter == 1) { |
122 | 121 |
y.best = y |
123 | 122 |
ll.best = temp.ll |
... | ... |
@@ -158,13 +157,13 @@ celda_G = function(counts, L, beta=1, delta=1, gamma=1, |
158 | 157 |
# @param delta The Dirichlet distribution parameter for Eta; adds a gene pseudocount to the numbers of genes each state. |
159 | 158 |
# @param beta Vector of non-zero concentration parameters for cluster <-> gene assignment Dirichlet distribution |
160 | 159 |
# @keywords log likelihood |
161 |
-cG.calcGibbsProbY = function(counts.t, n.C.by.TS, n.by.TS, nG.by.TS, n.by.G, y, L, nG, beta, delta, gamma, do.sample=TRUE) { |
|
160 |
+cG.calcGibbsProbY = function(counts, n.TS.by.C, n.by.TS, nG.by.TS, n.by.G, y, L, nG, beta, delta, gamma, do.sample=TRUE) { |
|
162 | 161 |
|
163 | 162 |
## Set variables up front outside of loop |
164 | 163 |
probs = matrix(NA, ncol=nG, nrow=L) |
165 | 164 |
temp.nG.by.TS = nG.by.TS |
166 | 165 |
temp.n.by.TS = n.by.TS |
167 |
-# temp.n.C.by.TS = n.C.by.TS |
|
166 |
+# temp.n.TS.by.C = n.TS.by.C |
|
168 | 167 |
|
169 | 168 |
ix <- sample(1:nG) |
170 | 169 |
for(i in ix) { |
... | ... |
@@ -173,25 +172,25 @@ cG.calcGibbsProbY = function(counts.t, n.C.by.TS, n.by.TS, nG.by.TS, n.by.G, y, |
173 | 172 |
nG.by.TS[y[i]] = nG.by.TS[y[i]] - 1L |
174 | 173 |
n.by.TS[y[i]] = n.by.TS[y[i]] - n.by.G[i] |
175 | 174 |
|
176 |
- n.C.by.TS_1 = n.C.by.TS |
|
177 |
- n.C.by.TS_1[,y[i]] = n.C.by.TS_1[,y[i]] - counts.t[,i] |
|
178 |
- n.C.by.TS_1 = .colSums(lgamma(n.C.by.TS_1 + beta), nrow(n.C.by.TS), ncol(n.C.by.TS)) |
|
175 |
+ n.TS.by.C_1 = n.TS.by.C |
|
176 |
+ n.TS.by.C_1[y[i],] = n.TS.by.C_1[y[i],] - counts[i,] |
|
177 |
+ n.TS.by.C_1 = .rowSums(lgamma(n.TS.by.C_1 + beta), nrow(n.TS.by.C), ncol(n.TS.by.C)) |
|
179 | 178 |
|
180 |
- n.C.by.TS_2 = n.C.by.TS |
|
179 |
+ n.TS.by.C_2 = n.TS.by.C |
|
181 | 180 |
other.ix = (1:L)[-y[i]] |
182 |
- n.C.by.TS_2[,other.ix] = n.C.by.TS_2[,other.ix] + counts.t[,i] |
|
183 |
- n.C.by.TS_2 = .colSums(lgamma(n.C.by.TS_2 + beta), nrow(n.C.by.TS), ncol(n.C.by.TS)) |
|
184 |
- |
|
181 |
+ n.TS.by.C_2[other.ix,] = n.TS.by.C_2[other.ix,] + counts[rep(i, length(other.ix)),] |
|
182 |
+ n.TS.by.C_2 = .rowSums(lgamma(n.TS.by.C_2 + beta), nrow(n.TS.by.C), ncol(n.TS.by.C)) |
|
183 |
+ |
|
185 | 184 |
## Calculate probabilities for each state |
186 | 185 |
for(j in 1:L) { |
187 | 186 |
|
188 | 187 |
temp.nG.by.TS = nG.by.TS |
189 | 188 |
temp.n.by.TS = n.by.TS |
190 |
-# temp.n.C.by.TS = n.C.by.TS |
|
189 |
+# temp.n.TS.by.C = n.TS.by.C |
|
191 | 190 |
|
192 | 191 |
temp.nG.by.TS[j] = temp.nG.by.TS[j] + 1L |
193 | 192 |
temp.n.by.TS[j] = temp.n.by.TS[j] + n.by.G[i] |
194 |
-# temp.n.C.by.TS[,j] = temp.n.C.by.TS[,j] + counts.t[,i] |
|
193 |
+# temp.n.TS.by.C[j,] = temp.n.TS.by.C[j,] + counts[i,] |
|
195 | 194 |
|
196 | 195 |
|
197 | 196 |
pseudo.nG.by.TS = temp.nG.by.TS |
... | ... |
@@ -201,8 +200,8 @@ cG.calcGibbsProbY = function(counts.t, n.C.by.TS, n.by.TS, nG.by.TS, n.by.G, y, |
201 | 200 |
other.ix = (1:L)[-j] |
202 | 201 |
probs[j,i] <- sum(lgamma(pseudo.nG.by.TS + gamma)) - |
203 | 202 |
sum(lgamma(sum(pseudo.nG.by.TS + gamma))) + |
204 |
- sum(n.C.by.TS_1[other.ix]) + n.C.by.TS_2[j] + |
|
205 |
-# sum(lgamma(temp.n.C.by.TS + beta)) + |
|
203 |
+ sum(n.TS.by.C_1[other.ix]) + n.TS.by.C_2[j] + |
|
204 |
+# sum(lgamma(temp.n.TS.by.C + beta)) + |
|
206 | 205 |
sum(lgamma(pseudo.nG.by.TS * delta)) - |
207 | 206 |
(pseudo.nG * lgamma(delta)) - |
208 | 207 |
sum(lgamma(temp.n.by.TS + (pseudo.nG.by.TS * delta))) |
... | ... |
@@ -214,16 +213,16 @@ cG.calcGibbsProbY = function(counts.t, n.C.by.TS, n.by.TS, nG.by.TS, n.by.G, y, |
214 | 213 |
if(isTRUE(do.sample)) y[i] = sample.ll(probs[,i]) |
215 | 214 |
|
216 | 215 |
if(prev.y != y[i]) { |
217 |
- n.C.by.TS[,prev.y] = n.C.by.TS[,prev.y] - counts.t[,i] |
|
218 |
- n.C.by.TS[,y[i]] = n.C.by.TS[,y[i]] + counts.t[,i] |
|
216 |
+ n.TS.by.C[prev.y,] = n.TS.by.C[prev.y,] - counts[i,] |
|
217 |
+ n.TS.by.C[y[i],] = n.TS.by.C[y[i],] + counts[i,] |
|
219 | 218 |
} |
220 | 219 |
|
221 | 220 |
nG.by.TS[y[i]] = nG.by.TS[y[i]] + 1L |
222 | 221 |
n.by.TS[y[i]] = n.by.TS[y[i]] + n.by.G[i] |
223 |
- #n.C.by.TS[,y[i]] = n.C.by.TS[,y[i]] + counts.t[,i] |
|
222 |
+ #n.TS.by.C[y[i],] = n.TS.by.C[y[i],] + counts[i,] |
|
224 | 223 |
} |
225 | 224 |
|
226 |
- return(list(n.C.by.TS=n.C.by.TS, nG.by.TS=nG.by.TS, n.by.TS=n.by.TS, y=y, probs=probs)) |
|
225 |
+ return(list(n.TS.by.C=n.TS.by.C, nG.by.TS=nG.by.TS, n.by.TS=n.by.TS, y=y, probs=probs)) |
|
227 | 226 |
} |
228 | 227 |
|
229 | 228 |
|
... | ... |
@@ -244,7 +243,7 @@ cG.calcGibbsProbY = function(counts.t, n.C.by.TS, n.by.TS, nG.by.TS, n.by.G, y, |
244 | 243 |
#' @param ... Other arguments |
245 | 244 |
#' @export |
246 | 245 |
simulateCells.celda_G = function(model, C=100, N.Range=c(500,5000), G=1000, |
247 |
- L=5, beta=1, gamma=1, delta=1, seed=12345, ...) { |
|
246 |
+ L=5, beta=1, gamma=5, delta=1, seed=12345, ...) { |
|
248 | 247 |
set.seed(seed) |
249 | 248 |
eta = rdirichlet(1, rep(gamma, L)) |
250 | 249 |
|
... | ... |
@@ -314,7 +313,7 @@ factorizeMatrix.celda_G = function(counts, celda.mod, type=c("counts", "proporti |
314 | 313 |
gamma = celda.mod$gamma |
315 | 314 |
|
316 | 315 |
p = cG.decomposeCounts(counts=counts, y=y, L=L) |
317 |
- n.TS.by.C = t(p$n.C.by.TS) |
|
316 |
+ n.TS.by.C = p$n.TS.by.C |
|
318 | 317 |
n.by.G = p$n.by.G |
319 | 318 |
n.by.TS = p$n.by.TS |
320 | 319 |
nG.by.TS = p$nG.by.TS |
... | ... |
@@ -370,16 +369,16 @@ factorizeMatrix.celda_G = function(counts, celda.mod, type=c("counts", "proporti |
370 | 369 |
|
371 | 370 |
|
372 | 371 |
# Calculate log-likelihood of celda_CG model |
373 |
-cG.calcLL = function(n.C.by.TS, n.by.TS, n.by.G, nG.by.TS, nM, nG, L, beta, delta, gamma) { |
|
372 |
+cG.calcLL = function(n.TS.by.C, n.by.TS, n.by.G, nG.by.TS, nM, nG, L, beta, delta, gamma) { |
|
374 | 373 |
|
375 | 374 |
nG.by.TS[nG.by.TS == 0] = 1 |
376 | 375 |
nG = sum(nG.by.TS) |
377 | 376 |
|
378 | 377 |
## Calculate for "Phi" component |
379 | 378 |
a <- nM * lgamma(L * beta) |
380 |
- b <- sum(lgamma(n.C.by.TS + beta)) |
|
379 |
+ b <- sum(lgamma(n.TS.by.C + beta)) |
|
381 | 380 |
c <- -nM * L * lgamma(beta) |
382 |
- d <- -sum(lgamma(rowSums(n.C.by.TS + beta))) |
|
381 |
+ d <- -sum(lgamma(colSums(n.TS.by.C + beta))) |
|
383 | 382 |
|
384 | 383 |
phi.ll <- a + b + c + d |
385 | 384 |
|
... | ... |
@@ -422,7 +421,7 @@ cG.calcLL = function(n.C.by.TS, n.by.TS, n.by.G, nG.by.TS, nM, nG, L, beta, delt |
422 | 421 |
calculateLoglikFromVariables.celda_G = function(counts, y, L, beta, delta, gamma) { |
423 | 422 |
|
424 | 423 |
p = cG.decomposeCounts(counts=counts, y=y, L=L) |
425 |
- final <- cG.calcLL(n.C.by.TS=p$n.C.by.TS, n.by.TS=p$n.by.TS, n.by.G=p$n.by.G, nG.by.TS=p$nG.by.TS, nM=p$nM, nG=p$nG, L=L, beta=beta, delta=delta, gamma=gamma) |
|
424 |
+ final <- cG.calcLL(n.TS.by.C=p$n.TS.by.C, n.by.TS=p$n.by.TS, n.by.G=p$n.by.G, nG.by.TS=p$nG.by.TS, nM=p$nM, nG=p$nG, L=L, beta=beta, delta=delta, gamma=gamma) |
|
426 | 425 |
|
427 | 426 |
return(final) |
428 | 427 |
} |
... | ... |
@@ -434,14 +433,24 @@ calculateLoglikFromVariables.celda_G = function(counts, y, L, beta, delta, gamma |
434 | 433 |
#' @param L The number of clusters being considered |
435 | 434 |
cG.decomposeCounts = function(counts, y, L) { |
436 | 435 |
|
437 |
- n.C.by.TS = t(rowSumByGroup(counts, group=y, L=L)) |
|
438 |
- n.by.G = as.integer(rowSums(counts)) |
|
436 |
+ n.TS.by.C = rowSumByGroup(counts, group=y, L=L) |
|
437 |
+ n.by.G = as.integer(.rowSums(counts, nrow(counts), ncol(counts))) |
|
439 | 438 |
n.by.TS = as.integer(rowSumByGroup(matrix(n.by.G,ncol=1), group=y, L=L)) |
440 |
- nG.by.TS = as.integer(table(factor(y, 1:L))) |
|
439 |
+ nG.by.TS = tabulate(y, L) |
|
441 | 440 |
nM = ncol(counts) |
442 | 441 |
nG = nrow(counts) |
443 | 442 |
|
444 |
- return(list(n.C.by.TS=n.C.by.TS, n.by.G=n.by.G, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, nM=nM, nG=nG)) |
|
443 |
+ return(list(n.TS.by.C=n.TS.by.C, n.by.G=n.by.G, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, nM=nM, nG=nG)) |
|
444 |
+} |
|
445 |
+ |
|
446 |
+ |
|
447 |
+cG.reDecomposeCounts = function(counts, y, previous.y, n.TS.by.C, n.by.G, L) { |
|
448 |
+ ## Recalculate counts based on new label |
|
449 |
+ n.TS.by.C = rowSumByGroupChange(counts, n.TS.by.C, y, previous.y, L) |
|
450 |
+ n.by.TS = as.integer(rowSumByGroup(matrix(n.by.G,ncol=1), group=y, L=L)) |
|
451 |
+ nG.by.TS = tabulate(y, L) |
|
452 |
+ |
|
453 |
+ return(list(n.TS.by.C=n.TS.by.C, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS)) |
|
445 | 454 |
} |
446 | 455 |
|
447 | 456 |
|
... | ... |
@@ -463,7 +472,7 @@ clusterProbability.celda_G = function(celda.mod, counts, log=FALSE, ...) { |
463 | 472 |
|
464 | 473 |
## Calculate counts one time up front |
465 | 474 |
p = cG.decomposeCounts(counts=counts, y=y, L=L) |
466 |
- next.y = cG.calcGibbsProbY(counts.t=t(counts), n.C.by.TS=p$n.C.by.TS, n.by.TS=p$n.by.TS, nG.by.TS=p$nG.by.TS, n.by.G=p$n.by.G, y=y, nG=p$nG, L=L, beta=beta, delta=delta, gamma=gamma, do.sample=FALSE) |
|
475 |
+ next.y = cG.calcGibbsProbY(counts=counts, n.TS.by.C=p$n.TS.by.C, n.by.TS=p$n.by.TS, nG.by.TS=p$nG.by.TS, n.by.G=p$n.by.G, y=y, nG=p$nG, L=L, beta=beta, delta=delta, gamma=gamma, do.sample=FALSE) |
|
467 | 476 |
y.prob = t(next.y$probs) |
468 | 477 |
|
469 | 478 |
if(!isTRUE(log)) { |
... | ... |
@@ -475,7 +484,7 @@ clusterProbability.celda_G = function(celda.mod, counts, log=FALSE, ...) { |
475 | 484 |
|
476 | 485 |
|
477 | 486 |
#' @export |
478 |
-calculatePerplexity.celda_G = function(counts, celda.mod, precision=128) { |
|
487 |
+calculatePerplexity.celda_G = function(counts, celda.mod) { |
|
479 | 488 |
|
480 | 489 |
factorized = factorizeMatrix(counts = counts, celda.mod = celda.mod, "posterior") |
481 | 490 |
phi <- factorized$posterior$gene.states |
... | ... |
@@ -252,8 +252,8 @@ logMessages = function(..., sep = " ", logfile = NULL, append = FALSE) { |
252 | 252 |
#' @export |
253 | 253 |
distinct_colors = function(n, |
254 | 254 |
hues = c("red", "cyan", "orange", "blue", "yellow", "purple", "green", "magenta"), |
255 |
- saturation.range = c(0.4, 1), |
|
256 |
- value.range = c(0.4, 1)) { |
|
255 |
+ saturation.range = c(0.7, 1), |
|
256 |
+ value.range = c(0.7, 1)) { |
|
257 | 257 |
|
258 | 258 |
if(!(all(hues %in% grDevices::colors()))) { |
259 | 259 |
stop("Only color names listed in the 'color' function can be used in 'hues'") |
... | ... |
@@ -11,10 +11,9 @@ |
11 | 11 |
#' |
12 | 12 |
#' @param counts The count matrix modeled in the celdaRun parameter |
13 | 13 |
#' @param celda.mod A single celda run (usually from the _res.list_ property of a celda_list) |
14 |
-#' @param precision The amount of bits of precision to pass to Rmpfr |
|
15 |
-#' @return The perplexity for the provided chain as an mpfr number |
|
14 |
+#' @return The perplexity for the provided data and model |
|
16 | 15 |
#' @export |
17 |
-calculatePerplexity = function(counts, celda.mod, precision=128) { |
|
16 |
+calculatePerplexity = function(counts, celda.mod) { |
|
18 | 17 |
UseMethod("calculatePerplexity", celda.mod) |
19 | 18 |
} |
20 | 19 |
|
... | ... |
@@ -115,7 +115,7 @@ plotDrCluster <- function(dim1, dim2, cluster, size = 1, xlab = "Dimension_1", y |
115 | 115 |
#' @param max.iter Numeric vector; determines iterations for tsne. Default 1000. |
116 | 116 |
#' @param distance Character vector; determines which distance metric to use for tsne. Options: cosine, hellinger, spearman. |
117 | 117 |
#' @export |
118 |
-celdaTsne = function(counts, celda.mod, states=NULL, perplexity=20, max.iter=2500, distance="hellinger") { |
|
118 |
+celdaTsne = function(counts, celda.mod, states=NULL, perplexity=20, max.iter=2500, distance="hellinger", seed=12345) { |
|
119 | 119 |
if (!isTRUE(class(celda.mod) %in% c("celda_CG","celda_C","celda_G"))) { |
120 | 120 |
stop("celda.mod argument is not of class celda_C, celda_G or celda_CG") |
121 | 121 |
} |
... | ... |
@@ -148,6 +148,7 @@ celdaTsne = function(counts, celda.mod, states=NULL, perplexity=20, max.iter=250 |
148 | 148 |
} |
149 | 149 |
|
150 | 150 |
do.pca = class(celda.mod) == "celda_C" |
151 |
+ set.seed(seed) |
|
151 | 152 |
res = Rtsne::Rtsne(d, pca=do.pca, max_iter=max.iter, perplexity = perplexity, |
152 | 153 |
check_duplicates = FALSE, is_distance = TRUE)$Y |
153 | 154 |
colnames(res) = c("tsne_1", "tsne_2") |
... | ... |
@@ -1,50 +1,146 @@ |
1 |
-split.each.z = function(counts, z, K, LLFunction, z.prob, min.cell=3, max.clusters.to.try = 10, ...) { |
|
1 |
+# cC.calcLL = function(m.CP.by.S, n.G.by.CP, s, z, K, nS, nG, alpha, beta) |
|
2 |
+cC.splitZ = function(counts, m.CP.by.S, n.G.by.CP, s, z, K, nS, nG, alpha, beta, z.prob, max.clusters.to.try=10, min.cell=3) { |
|
2 | 3 |
|
3 |
- dot.args = list(...) |
|
4 |
- |
|
5 | 4 |
## Identify clusters to split |
6 |
- z.ta = table(factor(z, levels=1:K)) |
|
5 |
+ z.ta = tabulate(z, K) |
|
7 | 6 |
z.to.split = which(z.ta >= min.cell) |
8 | 7 |
z.non.empty = which(z.ta > 0) |
9 | 8 |
|
9 |
+ if(length(z.to.split) == 0) { |
|
10 |
+ m = paste0(date(), " ... Cluster sizes too small. No additional splitting was performed.") |
|
11 |
+ return(list(z=z, m.CP.by.S, n.G.by.CP, n.CP=n.CP, message=m)) |
|
12 |
+ } |
|
13 |
+ |
|
10 | 14 |
## Loop through each split-able Z and perform split |
11 |
- clust.split = list() |
|
12 |
- for(i in 1:K) { |
|
13 |
- if(i %in% z.to.split) { |
|
14 |
- clustLabel = suppressMessages(celda_C(counts[,z == i], K=2, max.iter=5, split.on.iter=-1, split.on.last=FALSE)) |
|
15 |
- clust.split = c(clust.split, list(clustLabel$z)) |
|
16 |
- } else { |
|
17 |
- clust.split = c(clust.split, list(NA)) |
|
18 |
- } |
|
15 |
+ clust.split = vector("list", K) |
|
16 |
+ for(i in z.to.split) { |
|
17 |
+ clustLabel = suppressMessages(celda_C(counts[,z == i], K=2, max.iter=5, split.on.iter=-1, split.on.last=FALSE)) |
|
18 |
+ clust.split[[i]] = clustLabel$z |
|
19 | 19 |
} |
20 | 20 |
|
21 | 21 |
## Find second best assignment give current assignments for each cell |
22 | 22 |
z.prob[cbind(1:nrow(z.prob), z)] = NA |
23 | 23 |
z.second = apply(z.prob, 1, which.max) |
24 | 24 |
|
25 |
+ ## Set up initial variables |
|
26 |
+ z.split = matrix(NA, nrow=length(z), ncol=length(z.to.split) * max.clusters.to.try) |
|
27 |
+ z.split.ll = rep(NA, ncol=length(z.to.split) * max.clusters.to.try) |
|
28 |
+ z.split.ll[1] = cC.calcLL(m.CP.by.S, n.G.by.CP, s, z, K, nS, nG, alpha, beta) |
|
29 |
+ z.split[,1] = z |
|
30 |
+ |
|
31 |
+ ## Select worst clusters to test for reshuffling |
|
32 |
+ previous.z = z |
|
25 | 33 |
ll.shuffle = rep(NA, K) |
26 | 34 |
for(i in z.non.empty) { |
27 | 35 |
ix = z == i |
28 | 36 |
new.z = z |
29 | 37 |
new.z[ix] = z.second[ix] |
30 |
- params = c(list(counts=counts, z=new.z, K=K), dot.args) |
|
31 |
- ll.shuffle[i] = do.call(LLFunction, params) |
|
32 |
- } |
|
38 |
+ |
|
39 |
+ p = cC.reDecomposeCounts(counts, s, new.z, previous.z, n.G.by.CP, K) |
|
40 |
+ n.G.by.CP = p$n.G.by.CP |
|
41 |
+ m.CP.by.S = p$m.CP.by.S |
|
42 |
+ ll.shuffle[i] = cC.calcLL(m.CP.by.S, n.G.by.CP, s, z, K, nS, nG, alpha, beta) |
|
43 |
+ previous.z = new.z |
|
44 |
+ } |
|
33 | 45 |
z.to.shuffle = head(order(ll.shuffle, decreasing = TRUE, na.last=NA), n = max.clusters.to.try) |
34 | 46 |
|
35 |
- if(length(z.to.split) == 0 | length(z.to.shuffle) == 0) { |
|
47 |
+ |
|
48 |
+ pairs = c(NA, NA) |
|
49 |
+ split.ix = 2 |
|
50 |
+ for(i in z.to.shuffle) { |
|
51 |
+ |
|
52 |
+ other.clusters = setdiff(z.to.split, i) |
|
53 |
+ |
|
54 |
+ for(j in other.clusters) { |
|
55 |
+ new.z = z |
|
56 |
+ |
|
57 |
+ ## Assign cluster i to the next most similar cluster (excluding cluster j) |
|
58 |
+ ## as defined above by the correlation |
|
59 |
+ ix.to.move = z == i |
|
60 |
+ new.z[ix.to.move] = z.second[ix.to.move] |
|
61 |
+ |
|
62 |
+ ## Split cluster j according to the clustering defined above |
|
63 |
+ ix.to.split = z == j |
|
64 |
+ new.z[ix.to.split] = ifelse(clust.split[[j]] == 1, j, i) |
|
65 |
+ |
|
66 |
+ p = cC.reDecomposeCounts(counts, s, new.z, previous.z, n.G.by.CP, K) |
|
67 |
+ n.G.by.CP = p$n.G.by.CP |
|
68 |
+ m.CP.by.S = p$m.CP.by.S |
|
69 |
+ |
|
70 |
+ ## Calculate likelihood of split |
|
71 |
+ z.split.ll[split.ix] = cC.calcLL(m.CP.by.S, n.G.by.CP, s, z, K, nS, nG, alpha, beta) |
|
72 |
+ z.split[,split.ix] = new.z |
|
73 |
+ split.ix = split.ix + 1L |
|
74 |
+ previous.z = new.z |
|
75 |
+ |
|
76 |
+ pairs = rbind(pairs, c(i, j)) |
|
77 |
+ } |
|
78 |
+ } |
|
79 |
+ |
|
80 |
+ select = which.max(z.split.ll) |
|
81 |
+ |
|
82 |
+ if(select == 1) { |
|
83 |
+ m = paste0(date(), " ... No additional splitting was performed.") |
|
84 |
+ } else { |
|
85 |
+ m = paste0(date(), " ... Cluster ", pairs[select,1], " was reassigned and cluster ", pairs[select,2], " was split in two.") |
|
86 |
+ } |
|
87 |
+ |
|
88 |
+ p = cC.reDecomposeCounts(counts, s, z.split[,select], previous.z, n.G.by.CP, K) |
|
89 |
+ return(list(z=z.split[,select], m.CP.by.S=p$m.CP.by.S, n.G.by.CP=p$n.G.by.CP, n.CP=p$n.CP, message=m)) |
|
90 |
+} |
|
91 |
+ |
|
92 |
+ |
|
93 |
+# cCG.calcLL = function(K, L, m.CP.by.S, n.TS.by.CP, n.by.G, n.by.TS, nG.by.TS, nS, nG, alpha, beta, delta, gamma) |
|
94 |
+cCG.splitZ = function(counts, m.CP.by.S, n.TS.by.C, n.TS.by.CP, n.by.G, n.by.TS, nG.by.TS, s, z, K, L, nS, nG, alpha, beta, delta, gamma, z.prob, max.clusters.to.try=10, min.cell=3) { |
|
95 |
+ |
|
96 |
+ ## Identify clusters to split |
|
97 |
+ z.ta = tabulate(z, K) |
|
98 |
+ z.to.split = which(z.ta >= min.cell) |
|
99 |
+ z.non.empty = which(z.ta > 0) |
|
100 |
+ |
|
101 |
+ if(length(z.to.split) == 0) { |
|
36 | 102 |
m = paste0(date(), " ... Cluster sizes too small. No additional splitting was performed.") |
37 |
- return(list(z=z,message=m)) |
|
103 |
+ return(list(z=z, m.CP.by.S=m.CP.by.S, n.TS.by.CP=n.TS.by.CP, n.CP=n.CP, message=m)) |
|
38 | 104 |
} |
39 | 105 |
|
40 |
- ## Set up variables for holding results with current iteration of z |
|
41 |
- params = c(list(counts=counts, z=z, K=K), dot.args) |
|
42 |
- z.split.ll = do.call(LLFunction, params) |
|
43 |
- z.split = z |
|
106 |
+ ## Loop through each split-able Z and perform split |
|
107 |
+ clust.split = vector("list", K) |
|
108 |
+ for(i in z.to.split) { |
|
109 |
+ clustLabel = suppressMessages(celda_C(counts[,z == i], K=2, max.iter=5, split.on.iter=-1, split.on.last=FALSE)) |
|
110 |
+ clust.split[[i]] = clustLabel$z |
|
111 |
+ } |
|
44 | 112 |
|
113 |
+ ## Find second best assignment give current assignments for each cell |
|
114 |
+ z.prob[cbind(1:nrow(z.prob), z)] = NA |
|
115 |
+ z.second = apply(z.prob, 1, which.max) |
|
116 |
+ |
|
117 |
+ ## Set up initial variables |
|
118 |
+ z.split = matrix(NA, nrow=length(z), ncol=length(z.to.split) * max.clusters.to.try) |
|
119 |
+ z.split.ll = rep(NA, ncol=length(z.to.split) * max.clusters.to.try) |
|
120 |
+ z.split.ll[1] = cCG.calcLL(K, L, m.CP.by.S, n.TS.by.CP, n.by.G, n.by.TS, nG.by.TS, nS, nG, alpha, beta, delta, gamma) |
|
121 |
+ z.split[,1] = z |
|
122 |
+ |
|
123 |
+ ## Select worst clusters to test for reshuffling |
|
124 |
+ previous.z = z |
|
125 |
+ ll.shuffle = rep(NA, K) |
|
126 |
+ for(i in z.non.empty) { |
|
127 |
+ ix = z == i |
|
128 |
+ new.z = z |
|
129 |
+ new.z[ix] = z.second[ix] |
|
130 |
+ |
|
131 |
+ p = cC.reDecomposeCounts(n.TS.by.C, s, new.z, previous.z, n.TS.by.CP, K) |
|
132 |
+ n.TS.by.CP = p$n.G.by.CP |
|
133 |
+ m.CP.by.S = p$m.CP.by.S |
|
134 |
+ ll.shuffle[i] = cCG.calcLL(K, L, m.CP.by.S, n.TS.by.CP, n.by.G, n.by.TS, nG.by.TS, nS, nG, alpha, beta, delta, gamma) |
|
135 |
+ previous.z = new.z |
|
136 |
+ } |
|
137 |
+ z.to.shuffle = head(order(ll.shuffle, decreasing = TRUE, na.last=NA), n = max.clusters.to.try) |
|
138 |
+ |
|
139 |
+ |
|
45 | 140 |
pairs = c(NA, NA) |
141 |
+ split.ix = 2 |
|
46 | 142 |
for(i in z.to.shuffle) { |
47 |
- |
|
143 |
+ |
|
48 | 144 |
other.clusters = setdiff(z.to.split, i) |
49 | 145 |
|
50 | 146 |
for(j in other.clusters) { |
... | ... |
@@ -59,18 +155,116 @@ split.each.z = function(counts, z, K, LLFunction, z.prob, min.cell=3, max.cluste |
59 | 155 |
ix.to.split = z == j |
60 | 156 |
new.z[ix.to.split] = ifelse(clust.split[[j]] == 1, j, i) |
61 | 157 |
|
158 |
+ p = cC.reDecomposeCounts(n.TS.by.C, s, new.z, previous.z, n.TS.by.CP, K) |
|
159 |
+ n.TS.by.CP = p$n.G.by.CP |
|
160 |
+ m.CP.by.S = p$m.CP.by.S |
|
161 |
+ |
|
62 | 162 |
## Calculate likelihood of split |
63 |
- params = c(list(counts=counts, z=new.z, K=K), dot.args) |
|
64 |
- new.ll = do.call(LLFunction, params) |
|
163 |
+ z.split.ll[split.ix] = cCG.calcLL(K, L, m.CP.by.S, n.TS.by.CP, n.by.G, n.by.TS, nG.by.TS, nS, nG, alpha, beta, delta, gamma) |
|
164 |
+ z.split[,split.ix] = new.z |
|
165 |
+ split.ix = split.ix + 1L |
|
166 |
+ previous.z = new.z |
|
167 |
+ |
|
168 |
+ pairs = rbind(pairs, c(i, j)) |
|
169 |
+ } |
|
170 |
+ } |
|
171 |
+ |
|
172 |
+ select = which.max(z.split.ll) |
|
65 | 173 |
|
66 |
- z.split.ll = c(z.split.ll, new.ll) |
|
67 |
- z.split = cbind(z.split, new.z) |
|
174 |
+ if(select == 1) { |
|
175 |
+ m = paste0(date(), " ... No additional splitting was performed.") |
|
176 |
+ } else { |
|
177 |
+ m = paste0(date(), " ... Cluster ", pairs[select,1], " was reassigned and cluster ", pairs[select,2], " was split in two.") |
|
178 |
+ } |
|
179 |
+ |
|
180 |
+ p = cC.reDecomposeCounts(n.TS.by.C, s, z.split[,select], previous.z, n.TS.by.CP, K) |
|
181 |
+ return(list(z=z.split[,select], m.CP.by.S=p$m.CP.by.S, n.TS.by.CP=p$n.G.by.CP, n.CP=p$n.CP, message=m)) |
|
182 |
+} |
|
183 |
+ |
|
184 |
+ |
|
185 |
+ |
|
186 |
+cCG.splitY = function(counts, y, m.CP.by.S, n.G.by.CP, n.TS.by.C, n.TS.by.CP, n.by.G, n.by.TS, nG.by.TS, n.CP, s, z, K, L, nS, nG, alpha, beta, delta, gamma, y.prob, max.clusters.to.try=10, min.cell=3) { |
|
187 |
+ |
|
188 |
+ ## Identify clusters to split |
|
189 |
+ y.ta = tabulate(y, L) |
|
190 |
+ y.to.split = which(y.ta >= min.cell) |
|
191 |
+ y.non.empty = which(y.ta > 0) |
|
192 |
+ |
|
193 |
+ if(length(y.to.split) == 0) { |
|
194 |
+ m = paste0(date(), " ... Cluster sizes too small. No additional splitting was performed.") |
|
195 |
+ return(list(y=y, m.CP.by.S=m.CP.by.S, n.TS.by.CP=n.TS.by.CP, n.CP=n.CP, message=m)) |
|
196 |
+ } |
|
197 |
+ |
|
198 |
+ ## Loop through each split-able Z and perform split |
|
199 |
+ clust.split = vector("list", L) |
|
200 |
+ for(i in y.to.split) { |
|
201 |
+ clustLabel = suppressMessages(celda_G(counts[y == i,], L=2, max.iter=5, split.on.iter=-1, split.on.last=FALSE)) |
|
202 |
+ clust.split[[i]] = clustLabel$y |
|
203 |
+ } |
|
204 |
+ |
|
205 |
+ ## Find second best assignment give current assignments for each cell |
|
206 |
+ y.prob[cbind(1:nrow(y.prob), y)] = NA |
|
207 |
+ y.second = apply(y.prob, 1, which.max) |
|
208 |
+ |
|
209 |
+ ## Set up initial variables |
|
210 |
+ y.split = matrix(NA, nrow=length(y), ncol=length(y.to.split) * max.clusters.to.try) |
|
211 |
+ y.split.ll = rep(NA, ncol=length(y.to.split) * max.clusters.to.try) |
|
212 |
+ y.split.ll[1] = cCG.calcLL(K, L, m.CP.by.S, n.TS.by.CP, n.by.G, n.by.TS, nG.by.TS, nS, nG, alpha, beta, delta, gamma) |
|
213 |
+ y.split[,1] = y |
|
214 |
+ |
|
215 |
+ ## Select worst clusters to test for reshuffling |
|
216 |
+ previous.y = y |
|
217 |
+ ll.shuffle = rep(NA, L) |
|
218 |
+ for(i in y.non.empty) { |
|
219 |
+ ix = y == i |
|
220 |
+ new.y = y |
|
221 |
+ new.y[ix] = y.second[ix] |
|
222 |
+ |
|
223 |
+ p = cG.reDecomposeCounts(n.G.by.CP, new.y, previous.y, n.TS.by.CP, n.by.G, L) |
|
224 |
+ n.TS.by.CP = p$n.TS.by.C |
|
225 |
+ n.by.TS = p$n.by.TS |
|
226 |
+ nG.by.TS = p$nG.by.TS |
|
227 |
+ |
|
228 |
+ ll.shuffle[i] = cCG.calcLL(K, L, m.CP.by.S, n.TS.by.CP, n.by.G, n.by.TS, nG.by.TS, nS, nG, alpha, beta, delta, gamma) |
|
229 |
+ previous.y = new.y |
|
230 |
+ } |
|
231 |
+ y.to.shuffle = head(order(ll.shuffle, decreasing = TRUE, na.last=NA), n = max.clusters.to.try) |
|
232 |
+ |
|
233 |
+ |
|
234 |
+ pairs = c(NA, NA) |
|
235 |
+ split.ix = 2 |
|
236 |
+ for(i in y.to.shuffle) { |
|
237 |
+ |
|
238 |
+ other.clusters = setdiff(y.to.split, i) |
|
68 | 239 |
|
240 |
+ for(j in other.clusters) { |
|
241 |
+ new.y = y |
|
242 |
+ |
|
243 |
+ ## Assign cluster i to the next most similar cluster (excluding cluster j) |
|
244 |
+ ## as defined above by the correlation |
|
245 |
+ ix.to.move = y == i |
|
246 |
+ new.y[ix.to.move] = y.second[ix.to.move] |
|
247 |
+ |
|
248 |
+ ## Split cluster j according to the clustering defined above |
|
249 |
+ ix.to.split = y == j |
|
250 |
+ new.y[ix.to.split] = ifelse(clust.split[[j]] == 1, j, i) |
|
251 |
+ |
|
252 |
+ p = cG.reDecomposeCounts(n.G.by.CP, new.y, previous.y, n.TS.by.CP, n.by.G, L) |
|
253 |
+ n.TS.by.CP = p$n.TS.by.C |
|
254 |
+ n.by.TS = p$n.by.TS |
|
255 |
+ nG.by.TS = p$nG.by.TS |
|
256 |
+ |
|
257 |
+ ## Calculate likelihood of split |
|
258 |
+ y.split.ll[split.ix] = cCG.calcLL(K, L, m.CP.by.S, n.TS.by.CP, n.by.G, n.by.TS, nG.by.TS, nS, nG, alpha, beta, delta, gamma) |
|
259 |
+ y.split[,split.ix] = new.y |
|
260 |
+ split.ix = split.ix + 1L |
|
261 |
+ previous.y = new.y |
|
262 |
+ |
|
69 | 263 |
pairs = rbind(pairs, c(i, j)) |
70 | 264 |
} |
71 | 265 |
} |
72 | 266 |
|
73 |
- select = which.max(z.split.ll) |
|
267 |
+ select = which.max(y.split.ll) |
|
74 | 268 |
|
75 | 269 |
if(select == 1) { |
76 | 270 |
m = paste0(date(), " ... No additional splitting was performed.") |
... | ... |
@@ -78,55 +272,59 @@ split.each.z = function(counts, z, K, LLFunction, z.prob, min.cell=3, max.cluste |
78 | 272 |
m = paste0(date(), " ... Cluster ", pairs[select,1], " was reassigned and cluster ", pairs[select,2], " was split in two.") |
79 | 273 |
} |
80 | 274 |
|
81 |
- return(list(z=z.split[,select], message=m)) |
|
275 |
+ p = cG.reDecomposeCounts(n.G.by.CP, y.split[,select], previous.y, n.TS.by.CP, n.by.G, L) |
|
276 |
+ return(list(y=y.split[,select], n.TS.by.CP=p$n.TS.by.C, n.by.TS=p$n.by.TS, nG.by.TS=p$nG.by.TS, message=m)) |
|
82 | 277 |
} |
83 | 278 |
|
84 | 279 |
|
85 | 280 |
|
86 |
-split.each.y = function(counts, y, L, LLFunction, y.prob, min=3, max.clusters.to.try=10, ...) { |
|
87 | 281 |
|
88 |
- dot.args = list(...) |
|
282 |
+ |
|
283 |
+#cG.calcLL = function(n.TS.by.C, n.by.TS, n.by.G, nG.by.TS, nM, nG, L, beta, delta, gamma) { |
|
284 |
+cG.splitY = function(counts, y, n.TS.by.C, n.by.TS, n.by.G, nG.by.TS, nM, nG, L, beta, delta, gamma, y.prob, min=3, max.clusters.to.try=10) { |
|
89 | 285 |
|
90 | 286 |
## Identify clusters to split |
91 | 287 |
y.ta = table(factor(y, levels=1:L)) |
92 | 288 |
y.to.split = which(y.ta >= min) |
93 | 289 |
y.non.empty = which(y.ta > 0) |
94 | 290 |
|
291 |
+ if(length(y.to.split) == 0) { |
|
292 |
+ m = paste0(date(), " ... Cluster sizes too small. No additional splitting was performed.") |
|
293 |
+ return(list(y=y, n.TS.by.C=n.TS.by.C, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, message=m)) |
|
294 |
+ } |
|
295 |
+ |
|
95 | 296 |
## Loop through each split-able y and find best split |
96 |
- clust.split = list() |
|
97 |
- for(i in 1:L) { |
|
98 |
- if(i %in% y.to.split) { |
|
99 |
- clustLabel = suppressMessages(celda_G(counts[y == i,], L=2, max.iter=5, split.on.iter=-1, split.on.last=FALSE)) |
|
100 |
- clust.split = c(clust.split, list(clustLabel$y)) |
|
101 |
- } else { |
|
102 |
- clust.split = c(clust.split, list(NA)) |
|
103 |
- } |
|
297 |
+ clust.split = vector("list", L) |
|
298 |
+ for(i in y.to.split) { |
|
299 |
+ clustLabel = suppressMessages(celda_G(counts[y == i,], L=2, max.iter=5, split.on.iter=-1, split.on.last=FALSE)) |
|
300 |
+ clust.split[[i]] = clustLabel$y |
|
104 | 301 |
} |
105 |
- |
|
302 |
+ |
|
106 | 303 |
## Find second best assignment give current assignments for each cell |
107 | 304 |
y.prob[cbind(1:nrow(y.prob), y)] = NA |
108 | 305 |
y.second = apply(y.prob, 1, which.max) |
306 |
+ |
|
307 |
+ ## Set up initial variables |
|
308 |
+ y.split = matrix(NA, nrow=length(y), ncol=length(y.to.split) * max.clusters.to.try) |
|
309 |
+ y.split.ll = rep(NA, ncol=length(y.to.split) * max.clusters.to.try) |
|
310 |
+ y.split.ll[1] = cG.calcLL(n.TS.by.C, n.by.TS, n.by.G, nG.by.TS, nM, nG, L, beta, delta, gamma) |
|
311 |
+ y.split[,1] = y |
|
312 |
+ |
|
313 |
+ ## Select worst clusters to test for reshuffling |
|
109 | 314 |
ll.shuffle = rep(NA, L) |
315 |
+ previous.y = y |
|
110 | 316 |
for(i in y.non.empty) { |
111 | 317 |
ix = y == i |
112 | 318 |
new.y = y |
113 | 319 |
new.y[ix] = y.second[ix] |
114 |
- params = c(list(counts=counts, y=new.y, L=L), dot.args) |
|
115 |
- ll.shuffle[i] = do.call(LLFunction, params) |
|
320 |
+ p = cG.reDecomposeCounts(counts, new.y, previous.y, n.TS.by.C, n.by.G, L) |
|
321 |
+ ll.shuffle[i] = cG.calcLL(p$n.TS.by.C, p$n.by.TS, n.by.G, p$nG.by.TS, nM, nG, L, beta, delta, gamma) |
|
322 |
+ previous.y = new.y |
|
116 | 323 |
} |
117 | 324 |
y.to.shuffle = head(order(ll.shuffle, decreasing = TRUE, na.last=NA), n = max.clusters.to.try) |
118 | 325 |
|
119 |
- if(length(y.to.split) == 0 | length(y.to.shuffle) == 0) { |
|
120 |
- m = paste0(date(), " ... Cluster sizes too small. No additional splitting was performed.") |
|
121 |
- return(list(y=y, message=m)) |
|
122 |
- } |
|
123 |
- |
|
124 |
- ## Set up variables for holding results with current iteration of y |
|
125 |
- params = c(list(counts=counts, y=y, L=L), dot.args) |
|
126 |
- y.split.ll = do.call(LLFunction, params) |
|
127 |
- y.split = y |
|
128 |
- |
|
129 | 326 |
pairs = c(NA, NA) |
327 |
+ split.ix = 2 |
|
130 | 328 |
for(i in y.to.shuffle) { |
131 | 329 |
|
132 | 330 |
other.clusters = setdiff(y.to.split, i) |
... | ... |
@@ -144,11 +342,11 @@ split.each.y = function(counts, y, L, LLFunction, y.prob, min=3, max.clusters.to |
144 | 342 |
new.y[ix.to.split] = ifelse(clust.split[[j]] == 1, j, i) |
145 | 343 |
|
146 | 344 |
## Calculate likelihood of split |
147 |
- params = c(list(counts=counts, y=new.y, L=L), dot.args) |
|
148 |
- new.ll = do.call(LLFunction, params) |
|
149 |
- |
|
150 |
- y.split.ll = c(y.split.ll, new.ll) |
|
151 |
- y.split = cbind(y.split, new.y) |
|
345 |
+ p = cG.reDecomposeCounts(counts, new.y, previous.y, n.TS.by.C, n.by.G, L) |
|
346 |
+ y.split.ll[split.ix] = cG.calcLL(p$n.TS.by.C, p$n.by.TS, n.by.G, p$nG.by.TS, nM, nG, L, beta, delta, gamma) |
|
347 |
+ y.split[,split.ix] = new.y |
|
348 |
+ split.ix = split.ix + 1L |
|
349 |
+ previous.y = new.y |
|
152 | 350 |
|
153 | 351 |
pairs = rbind(pairs, c(i, j)) |
154 | 352 |
} |
... | ... |
@@ -162,32 +360,11 @@ split.each.y = function(counts, y, L, LLFunction, y.prob, min=3, max.clusters.to |
162 | 360 |
m = paste0(date(), " ... Cluster ", pairs[select,1], " was reassigned and cluster ", pairs[select,2], " was split in two.") |
163 | 361 |
} |
164 | 362 |
|
165 |
- return(list(y=y.split[,select], message=m)) |
|
363 |
+ p = cG.reDecomposeCounts(counts, y.split[,select], previous.y, n.TS.by.C, n.by.G, L) |
|
364 |
+ return(list(y=y.split[,select], n.TS.by.C=p$n.TS.by.C, n.by.TS=p$n.by.TS, nG.by.TS=p$nG.by.TS, message=m)) |
|
166 | 365 |
} |
167 | 366 |
|
168 | 367 |
|
169 | 368 |
|
170 | 369 |
|
171 | 370 |
|
172 |
-clusterByHC = function(d, min=3, method="ward.D") { |
|
173 |
- label = cluster::pam(x = d, k=2)$clustering |
|
174 |
- |
|
175 |
- ## If PAM split is too small, perform secondary hclust procedure to split into roughly equal groups |
|
176 |
- if(min(table(label)) < min) { |
|
177 |
- d.hclust = stats::hclust(d, method=method) |
|
178 |
- |
|
179 |
- ## Get maximum sample size of each subcluster |
|
180 |
- d.hclust.size = sapply(1:length(d.hclust$height), function(i) max(table(stats::cutree(d.hclust, h=d.hclust$height[i])))) |
|
181 |
- |
|
182 |
- ## Find the height of the dendrogram that best splits the samples in "roughly" half |
|
183 |
- sample.size = round(length(d.hclust$order)/ 2) |
|
184 |
- d.hclust.select = which.min(abs(d.hclust.size - sample.size)) |
|
185 |
- temp = stats::cutree(d.hclust, h=d.hclust$height[d.hclust.select]) |
|
186 |
- |
|
187 |
- ## Set half of the samples as the largest cluster and the other samples to the other cluster |
|
188 |
- label.max.cluster = which.max(table(temp)) |
|
189 |
- label = ifelse(temp == label.max.cluster, 1, 2) |
|
190 |
- } |
|
191 |
- |
|
192 |
- return(label) |
|
193 |
-} |
... | ... |
@@ -4,17 +4,15 @@ |
4 | 4 |
\alias{calculatePerplexity} |
5 | 5 |
\title{Calculate the perplexity from a single celda run} |
6 | 6 |
\usage{ |
7 |
-calculatePerplexity(counts, celda.mod, precision = 128) |
|
7 |
+calculatePerplexity(counts, celda.mod) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 | 10 |
\item{counts}{The count matrix modeled in the celdaRun parameter} |
11 | 11 |
|
12 | 12 |
\item{celda.mod}{A single celda run (usually from the _res.list_ property of a celda_list)} |
13 |
- |
|
14 |
-\item{precision}{The amount of bits of precision to pass to Rmpfr} |
|
15 | 13 |
} |
16 | 14 |
\value{ |
17 |
-The perplexity for the provided chain as an mpfr number |
|
15 |
+The perplexity for the provided data and model |
|
18 | 16 |
} |
19 | 17 |
\description{ |
20 | 18 |
Perplexity can be seen as a measure of how well a provided set of |
... | ... |
@@ -5,8 +5,8 @@ |
5 | 5 |
\title{Generate a distinct palette for coloring different clusters} |
6 | 6 |
\usage{ |
7 | 7 |
distinct_colors(n, hues = c("red", "cyan", "orange", "blue", "yellow", |
8 |
- "purple", "green", "magenta"), saturation.range = c(0.4, 1), |
|
9 |
- value.range = c(0.4, 1)) |
|
8 |
+ "purple", "green", "magenta"), saturation.range = c(0.7, 1), |
|
9 |
+ value.range = c(0.7, 1)) |
|
10 | 10 |
} |
11 | 11 |
\arguments{ |
12 | 12 |
\item{n}{Integer; Number of colors to generate} |
... | ... |
@@ -115,35 +115,16 @@ SEXP rowSumByGroupChange(SEXP R_x, SEXP R_px, SEXP R_group, SEXP R_pgroup) |
115 | 115 |
error("group label and previous group label must be the same length as the number of rows in x."); |
116 | 116 |
} |
117 | 117 |
|
118 |
- // Create vector containing indices where group is different than pgroup |
|
119 |
- // First, calculate the number of differences in group labels |
|
120 |
- int ndiff = 0; |
|
121 |
- for(i = 0; i < nr; i++) { |
|
122 |
- |
|
118 |
+ int g_ix; |
|
119 |
+ int pg_ix; |
|
120 |
+ for (i = 0; i < nr; i++) { |
|
123 | 121 |
if(pgroup[i] != group[i]) { |
124 |
- ndiff++; |
|
125 |
- } |
|
126 |
- } |
|
127 |
- // Second, add the index of the differences to a vector |
|
128 |
- int group_diff_ix[ndiff-1]; |
|
129 |
- j = 0; |
|
130 |
- for(i = 0; i < nr; i++) { |
|
131 |
- if(group[i] != pgroup[i]) { |
|
132 |
- group_diff_ix[j] = i; |
|
133 |
- j++; |
|
134 |
- } |
|
135 |
- } |
|
136 |
- |
|
137 |
- // Sum the totals for each element of the 'group' variable |
|
138 |
- // Note: columns are iterated over before rows because the compiler appears to store expressions like |
|
139 |
- // 'j * nr' in a temporary variable (as they do not change within inner loop); |
|
140 |
- // swapping the order of the outer and inner loops slows down the code ~10X |
|
141 |
- int row_ix; |
|
142 |
- for (j = 0; j < nc; j++) { |
|
143 |
- for (i = 0; i < ndiff; i++) { |
|
144 |
- row_ix = group_diff_ix[i]; |
|
145 |
- px[j * nl + (pgroup[row_ix] - 1)] -= x[j * nr + row_ix]; |
|
146 |
- px[j * nl + (group[row_ix] - 1)] += x[j * nr + row_ix]; |
|
122 |
+ for (j = 0; j < nc; j++) { |
|
123 |
+ pg_ix = (j * nl) + (pgroup[i] - 1); |
|
124 |
+ g_ix = (j * nl) + (group[i] - 1); |
|
125 |
+ px[pg_ix] -= x[j * nr + i]; |
|
126 |
+ px[g_ix] += x[j * nr + i]; |
|
127 |
+ } |
|
147 | 128 |
} |
148 | 129 |
} |
149 | 130 |
|
... | ... |
@@ -152,6 +133,7 @@ SEXP rowSumByGroupChange(SEXP R_x, SEXP R_px, SEXP R_group, SEXP R_pgroup) |
152 | 133 |
|
153 | 134 |
|
154 | 135 |
|
136 |
+ |
|
155 | 137 |
SEXP colSumByGroupChange(SEXP R_x, SEXP R_px, SEXP R_group, SEXP R_pgroup) |
156 | 138 |
{ |
157 | 139 |
int i, j; |
... | ... |
@@ -187,8 +169,8 @@ SEXP colSumByGroupChange(SEXP R_x, SEXP R_px, SEXP R_group, SEXP R_pgroup) |
187 | 169 |
for (j = 0; j < nc; j++) { |
188 | 170 |
if(group[j] != pgroup[j]) { |
189 | 171 |
for (i = 0; i < nr; i++) { |
190 |
- px[group[j] * nr + i - 1] += x[j * nr + i]; |
|
191 |
- px[pgroup[j] * nr + i - 1] -= x[j * nr + i]; |
|
172 |
+ px[(group[j]-1) * nr + i] += x[j * nr + i]; |
|
173 |
+ px[(pgroup[j]-1) * nr + i] -= x[j * nr + i]; |
|
192 | 174 |
} |
193 | 175 |
} |
194 | 176 |
} |
... | ... |
@@ -1,70 +1,16 @@ |
1 |
-# celda_C.R |
|
1 |
+#celda_C |
|
2 | 2 |
library(celda) |
3 |
-library(Rtsne) |
|
4 |
-library(SummarizedExperiment) |
|
5 | 3 |
context("Testing celda_C") |
6 | 4 |
|
7 | 5 |
load("../celdaCsim.rda") |
8 | 6 |
load("../celdaC.rda") |
9 |
-model_C = getModel(celdaC.res, K = 5)[[1]] |
|
10 |
-factorized <- factorizeMatrix(celda.mod = model_C, counts = celdaC.sim$counts) |
|
11 | 7 |
counts.matrix <- celdaC.sim$counts |
8 |
+model_C = getModel(celdaC.res, K = 5)[[1]] |
|
9 |
+factorized = factorizeMatrix(counts=counts.matrix, celda.mod=model_C) |
|
12 | 10 |
|
13 |
-#Making sure getModel if functioning correctly |
|
14 |
-test_that(desc = "Sanity checking getModel",{ |
|
15 |
- expect_equal(celdaC.res$content.type, class(model_C)) |
|
16 |
-}) |
|
17 |
- |
|
18 |
-#Making sure relationship of counts vs proportions is correct in factorize matrix |
|
19 |
-test_that(desc = "Checking factorize matrix, counts vs proportions",{ |
|
20 |
- expect_equal(TRUE,all(factorized$counts$sample.states/sum(factorized$counts$sample.states) |
|
21 |
- == factorized$proportions$sample.states)) |
|
22 |
-}) |
|
23 |
- |
|
24 |
-#Checking dimension of factorize matrix |
|
25 |
-test_that(desc = "Checking factorize matrix dimension size",{ |
|
26 |
- expect_equal(5, nrow(factorized$proportions$sample.states)) |
|
27 |
-}) |
|
28 |
- |
|
29 |
- |
|
30 |
-# Ensure calculateLoglikFromVariables calculates the expected values |
|
31 |
-test_that(desc = "calculateLoglikFromVariables.celda_C returns correct output for various params", { |
|
32 |
- expect_lt(calculateLoglikFromVariables(z = celdaC.sim$z, |
|
33 |
- beta = 1, alpha = 1, K = 5, model="celda_C", |
|
34 |
- s = celdaC.sim$sample.label, |
|
35 |
- counts=celdaC.sim$counts), |
|
36 |
- 0) |
|
37 |
-}) |
|
38 |
- |
|
39 |
-test_that(desc = "simulateCells.celda_C returns correctly typed output", { |
|
40 |
- sim.res = simulateCells(model="celda_C") |
|
41 |
- expect_equal(typeof(sim.res$counts), "integer") |
|
42 |
-}) |
|
43 |
- |
|
44 |
-#normalizeCounts |
|
45 |
-test_that(desc = "Checking normalizeCounts doesn't change dimensions",{ |
|
46 |
- norm.counts <- normalizeCounts(counts.matrix) |
|
47 |
- expect_equal(dim(norm.counts),dim(counts.matrix)) |
|
48 |
- expect_equal(rownames(norm.counts),rownames(counts.matrix)) |
|
49 |
- expect_equal(colnames(norm.counts),colnames(counts.matrix)) |
|
50 |
-}) |
|
51 |
- |
|
52 |
-#recodeClusterZ |
|
53 |
-test_that(desc = "Checking recodeClusterZ gets correct order",{ |
|
54 |
- expect_error(recodeClusterZ(celda.mod = model_C, from = NULL, to = )) |
|
55 |
- expect_error(recodeClusterZ(celda.mod = model_C, from=c(1,2,3,4,5), to = c(1,2,3,4,6))) |
|
56 |
- new.recoded = recodeClusterZ(celda.mod = model_C, from=c(1,2,3,4,5), to = c(5,4,3,2,1)) |
|
57 |
- expect_equal(model_C$z == 1, new.recoded$z == 5) |
|
58 |
-}) |
|
59 |
- |
|
60 |
-#compareCountMatrix |
|
61 |
-test_that(desc = "Checking CompareCountMatrix",{ |
|
62 |
- expect_true(compareCountMatrix(count.matrix = celdaC.sim$counts, celda.obj = model_C)) |
|
63 |
-}) |
|
64 |
- |
|
65 |
-#distinct_colors |
|
11 |
+distinct_colors |
|
66 | 12 |
test_that(desc = "Checking distinct_colors",{ |
67 |
- expect_equal(distinct_colors(2), c("#FF9999","#99FFFF")) |
|
13 |
+ expect_equal(distinct_colors(2), c("#FF4D4D", "#4DFFFF")) |
|
68 | 14 |
}) |
69 | 15 |
|
70 | 16 |
###renderCeldaHeatmap### |
... | ... |
@@ -85,8 +85,8 @@ test_that(desc = "Checking CompareCountMatrix",{ |
85 | 85 |
|
86 | 86 |
#distinct_colors |
87 | 87 |
test_that(desc = "Making sure distinct_colors gives expected output",{ |
88 |
- expect_equal(distinct_colors(2), c("#FF9999","#99FFFF")) |
|
89 |
- expect_equal(distinct_colors(4), c("#FF9999","#99FFFF","#FFDB99","#9999FF")) |
|
88 |
+ expect_equal(distinct_colors(2), c("#FF4D4D", "#4DFFFF")) |
|
89 |
+ expect_equal(distinct_colors(4), c("#FF4D4D", "#4DFFFF", "#FFC04D", "#4D4DFF")) |
|
90 | 90 |
}) |
91 | 91 |
|
92 | 92 |
|
... | ... |
@@ -63,7 +63,7 @@ test_that(desc = "Checking CompareCountMatrix",{ |
63 | 63 |
|
64 | 64 |
#distinct_colors |
65 | 65 |
test_that(desc = "Checking distinct_colors",{ |
66 |
- expect_equal(distinct_colors(2), c("#FF9999","#99FFFF")) |
|
66 |
+ expect_equal(distinct_colors(2), c("#FF4D4D", "#4DFFFF")) |
|
67 | 67 |
}) |
68 | 68 |
|
69 | 69 |
|