Browse code

Merge pull request #174 from zhewa/RELEASE_3_9

Release 3 9

Zhe Wang authored on 17/05/2019 19:49:17 • GitHub committed on 17/05/2019 19:49:17
Showing 6 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: celda
2 2
 Title: CEllular Latent Dirichlet Allocation
3
-Version: 1.0.1
3
+Version: 1.0.3
4 4
 Authors@R: c(person("Joshua", "Campbell", email = "camp@bu.edu", role = c("aut", "cre")),
5 5
              person("Sean", "Corbett", email = "scorbett@bu.edu", role = c("aut")),
6 6
 	     person("Yusuke", "Koga", email="ykoga07@bu.edu", role = c("aut")),
... ...
@@ -1,3 +1,5 @@
1
+
2
+
1 3
 #' @title Simulate contaminated count matrix
2 4
 #' @description This function generates a list containing two count matrices --
3 5
 #'  one for real expression, the other one for contamination, as well as other
... ...
@@ -59,19 +61,24 @@ simulateContaminatedMatrix <- function(C = 300,
59 61
     NRange = c(500, 1000),
60 62
     beta = 0.5,
61 63
     delta = c(1, 2)) {
62
-
63 64
     if (length(delta) == 1) {
64
-        cpByC <- stats::rbeta(n = C, shape1 = delta, shape2 = delta)
65
+        cpByC <- stats::rbeta(n = C,
66
+            shape1 = delta,
67
+            shape2 = delta)
65 68
     } else {
66
-        cpByC <- stats::rbeta(n = C, shape1 = delta[1], shape2 = delta[2])
69
+        cpByC <- stats::rbeta(n = C,
70
+            shape1 = delta[1],
71
+            shape2 = delta[2])
67 72
     }
68 73
 
69 74
     z <- sample(seq(K), size = C, replace = TRUE)
70 75
     if (length(unique(z)) < K) {
71
-        warning("Only ",
76
+        warning(
77
+            "Only ",
72 78
             length(unique(z)),
73 79
             " clusters are simulated. Try to increase numebr of cells 'C' if",
74
-            " more clusters are needed")
80
+            " more clusters are needed"
81
+        )
75 82
         K <- length(unique(z))
76 83
         z <- plyr::mapvalues(z, unique(z), seq(length(unique(z))))
77 84
     }
... ...
@@ -80,7 +87,9 @@ simulateContaminatedMatrix <- function(C = 300,
80 87
         size = C,
81 88
         replace = TRUE)
82 89
     cNbyC <- vapply(seq(C), function(i) {
83
-        stats::rbinom(n = 1, size = NbyC[i], p = cpByC[i])
90
+        stats::rbinom(n = 1,
91
+            size = NbyC[i],
92
+            p = cpByC[i])
84 93
     }, integer(1))
85 94
     rNbyC <- NbyC - cNbyC
86 95
 
... ...
@@ -95,7 +104,8 @@ simulateContaminatedMatrix <- function(C = 300,
95 104
     colnames(cellRmat) <- paste0("Cell_", seq(C))
96 105
 
97 106
     ## sample contamination count matrix
98
-    nGByK <- rowSums(cellRmat) - .colSumByGroup(cellRmat, group = z, K = K)
107
+    nGByK <-
108
+        rowSums(cellRmat) - .colSumByGroup(cellRmat, group = z, K = K)
99 109
     eta <- normalizeCounts(counts = nGByK, normalize = "proportion")
100 110
 
101 111
     cellCmat <- vapply(seq(C), function(i) {
... ...
@@ -106,12 +116,16 @@ simulateContaminatedMatrix <- function(C = 300,
106 116
     rownames(cellOmat) <- paste0("Gene_", seq(G))
107 117
     colnames(cellOmat) <- paste0("Cell_", seq(C))
108 118
 
109
-    return(list("nativeCounts" = cellRmat,
110
-        "observedCounts" = cellOmat,
111
-        "NByC" = NbyC,
112
-        "z" = z,
113
-        "eta" = eta,
114
-        "phi" = t(phi)))
119
+    return(
120
+        list(
121
+            "nativeCounts" = cellRmat,
122
+            "observedCounts" = cellOmat,
123
+            "NByC" = NbyC,
124
+            "z" = z,
125
+            "eta" = eta,
126
+            "phi" = t(phi)
127
+        )
128
+    )
115 129
 }
116 130
 
117 131
 
... ...
@@ -137,9 +151,11 @@ simulateContaminatedMatrix <- function(C = 300,
137 151
 # decontamination
138 152
 # bgDist Numeric matrix. Rows represent feature and columns are the times that
139 153
 # the background-distribution has been replicated.
140
-.bgCalcLL <- function(counts, cellDist, bgDist, theta) {
141
-    ll <- sum(t(counts) * log(theta * t(cellDist) +
142
-            (1 - theta) * t(bgDist) + 1e-20))
154
+.bgCalcLL <- function(counts, globalZ, cbZ, phi, eta, theta) {
155
+    # ll <- sum(t(counts) * log(theta * t(cellDist) +
156
+    #        (1 - theta) * t(bgDist) + 1e-20))
157
+    ll <- sum(t(counts) * log(theta * t(phi)[cbZ, ] +
158
+            (1 - theta) * t(eta)[globalZ, ] + 1e-20))
143 159
     return(ll)
144 160
 }
145 161
 
... ...
@@ -157,9 +173,7 @@ simulateContaminatedMatrix <- function(C = 300,
157 173
     theta,
158 174
     z,
159 175
     K,
160
-    beta,
161 176
     delta) {
162
-
163 177
     ## Notes: use fix-point iteration to update prior for theta, no need
164 178
     ## to feed delta anymore
165 179
     logPr <- log(t(phi)[z, ] + 1e-20) + log(theta + 1e-20)
... ...
@@ -167,53 +181,73 @@ simulateContaminatedMatrix <- function(C = 300,
167 181
 
168 182
     Pr <- exp(logPr) / (exp(logPr) + exp(logPc))
169 183
     Pc <- 1 - Pr
170
-    deltaV2 <- MCMCprecision::fit_dirichlet(matrix(c(Pr, Pc), ncol = 2))$alpha
184
+    deltaV2 <-
185
+        MCMCprecision::fit_dirichlet(matrix(c(Pr, Pc), ncol = 2))$alpha
171 186
 
172 187
     estRmat <- t(Pr) * counts
173 188
     rnGByK <- .colSumByGroupNumeric(estRmat, z, K)
174 189
     cnGByK <- rowSums(rnGByK) - rnGByK
175 190
 
176 191
     ## Update parameters
177
-    theta <- (colSums(estRmat) + deltaV2[1]) / (colSums(counts) + sum(deltaV2))
192
+    theta <-
193
+        (colSums(estRmat) + deltaV2[1]) / (colSums(counts) + sum(deltaV2))
178 194
     phi <- normalizeCounts(rnGByK,
179 195
         normalize = "proportion",
180
-        pseudocountNormalize = beta)
196
+        pseudocountNormalize = 1e-20)
181 197
     eta <- normalizeCounts(cnGByK,
182 198
         normalize = "proportion",
183
-        pseudocountNormalize = beta)
199
+        pseudocountNormalize = 1e-20)
184 200
 
185
-    return(list("estRmat" = estRmat,
201
+    return(list(
202
+        "estRmat" = estRmat,
186 203
         "theta" = theta,
187 204
         "phi" = phi,
188 205
         "eta" = eta,
189
-        "delta" = deltaV2))
206
+        "delta" = deltaV2
207
+    ))
190 208
 }
191 209
 
192 210
 
193 211
 # This function updates decontamination using background distribution
194
-.cDCalcEMbgDecontamination <- function(counts, cellDist, bgDist, theta, beta) {
195
-    # meanNByC <- apply(counts, 2, mean)
196
-    logPr <- log(t(cellDist) + 1e-20) + log(theta + 1e-20) # +
197
-    # log( t(counts) / meanNByC )   # better when without panelty
198
-    logPc <- log(t(bgDist) + 1e-20) + log(1 - theta + 2e-20)
199
-
200
-    Pr <- exp(logPr) / (exp(logPr) + exp(logPc))
201
-    Pc <- 1 - Pr
202
-    deltaV2 <- MCMCprecision::fit_dirichlet(matrix(c(Pr, Pc), ncol = 2))$alpha
203
-
204
-    estRmat <- t(Pr) * counts
205
-
206
-    ## Update paramters
207
-    theta <- (colSums(estRmat) + deltaV2[1]) / (colSums(counts) + sum(deltaV2))
208
-    cellDist <- normalizeCounts(estRmat,
209
-        normalize = "proportion",
210
-        pseudocountNormalize = beta)
212
+.cDCalcEMbgDecontamination <-
213
+    function(counts, globalZ, cbZ, trZ, phi, eta, theta) {
214
+        logPr <- log(t(phi)[cbZ, ] + 1e-20) + log(theta + 1e-20)
215
+        logPc <-
216
+            log(t(eta)[globalZ, ] + 1e-20) + log(1 - theta + 1e-20)
217
+
218
+        Pr <- exp(logPr) / (exp(logPr) + exp(logPc))
219
+        Pc <- 1 - Pr
220
+        deltaV2 <-
221
+            MCMCprecision::fit_dirichlet(matrix(c(Pr, Pc), ncol = 2))$alpha
222
+
223
+        estRmat <- t(Pr) * counts
224
+        phiUnnormalized <-
225
+            .colSumByGroupNumeric(estRmat, cbZ, max(cbZ))
226
+        etaUnnormalized <-
227
+            rowSums(phiUnnormalized) - .colSumByGroupNumeric(phiUnnormalized,
228
+                trZ, max(trZ))
229
+
230
+        ## Update paramters
231
+        theta <-
232
+            (colSums(estRmat) + deltaV2[1]) / (colSums(counts) + sum(deltaV2))
233
+        phi <-
234
+            normalizeCounts(phiUnnormalized,
235
+                normalize = "proportion",
236
+                pseudocountNormalize = 1e-20)
237
+        eta <-
238
+            normalizeCounts(etaUnnormalized,
239
+                normalize = "proportion",
240
+                pseudocountNormalize = 1e-20)
241
+
242
+        return(list(
243
+            "estRmat" = estRmat,
244
+            "theta" = theta,
245
+            "phi" = phi,
246
+            "eta" = eta,
247
+            "delta" = deltaV2
248
+        ))
249
+    }
211 250
 
212
-    return(list("estRmat" = estRmat,
213
-        "theta" = theta,
214
-        "cellDist" = cellDist,
215
-        "delta" = deltaV2))
216
-}
217 251
 
218 252
 #' @title Decontaminate count matrix
219 253
 #' @description This function updates decontamination on dataset with multiple
... ...
@@ -224,7 +258,6 @@ simulateContaminatedMatrix <- function(C = 300,
224 258
 #' @param batch Integer vector. Cell batch labels. Default NULL.
225 259
 #' @param maxIter Integer. Maximum iterations of EM algorithm. Default to be
226 260
 #'  200.
227
-#' @param beta Numeric. Concentration parameter for Phi. Default to be 1e-6.
228 261
 #' @param delta Numeric. Symmetric concentration parameter for Theta. Default
229 262
 #'  to be 10.
230 263
 #' @param logfile Character. Messages will be redirected to a file named
... ...
@@ -237,16 +270,19 @@ simulateContaminatedMatrix <- function(C = 300,
237 270
 #'  related parameters.
238 271
 #' @examples
239 272
 #' data(contaminationSim)
240
-#' deconC <- decontX(counts = contaminationSim$rmat + contaminationSim$cmat,
241
-#'  z = contaminationSim$z, maxIter = 3)
242
-#' deconBg <- decontX(counts = contaminationSim$rmat + contaminationSim$cmat,
243
-#'  maxIter = 3)
273
+#' deconC <- decontX(
274
+#'   counts = contaminationSim$rmat + contaminationSim$cmat,
275
+#'   z = contaminationSim$z, maxIter = 3
276
+#' )
277
+#' deconBg <- decontX(
278
+#'   counts = contaminationSim$rmat + contaminationSim$cmat,
279
+#'   maxIter = 3
280
+#' )
244 281
 #' @export
245 282
 decontX <- function(counts,
246 283
     z = NULL,
247 284
     batch = NULL,
248 285
     maxIter = 200,
249
-    beta = 1e-6,
250 286
     delta = 10,
251 287
     logfile = NULL,
252 288
     verbose = TRUE,
... ...
@@ -257,7 +293,6 @@ decontX <- function(counts,
257 293
             z = z,
258 294
             batch = batch,
259 295
             maxIter = maxIter,
260
-            beta = beta,
261 296
             delta = delta,
262 297
             logfile = logfile,
263 298
             verbose = verbose)
... ...
@@ -267,7 +302,6 @@ decontX <- function(counts,
267 302
                 z = z,
268 303
                 batch = batch,
269 304
                 maxIter = maxIter,
270
-                beta = beta,
271 305
                 delta = delta,
272 306
                 logfile = logfile,
273 307
                 verbose = verbose))
... ...
@@ -281,18 +315,18 @@ decontX <- function(counts,
281 315
     z = NULL,
282 316
     batch = NULL,
283 317
     maxIter = 200,
284
-    beta = 1e-6,
285 318
     delta = 10,
286 319
     logfile = NULL,
287 320
     verbose = TRUE) {
288
-
289 321
     if (!is.null(batch)) {
290 322
         ## Set result lists upfront for all cells from different batches
291 323
         logLikelihood <- c()
292
-        estRmat <- matrix(NA,
324
+        estRmat <- matrix(
325
+            NA,
293 326
             ncol = ncol(counts),
294 327
             nrow = nrow(counts),
295
-            dimnames = list(rownames(counts), colnames(counts)))
328
+            dimnames = list(rownames(counts), colnames(counts))
329
+        )
296 330
         theta <- rep(NA, ncol(counts))
297 331
         estConp <- rep(NA, ncol(counts))
298 332
 
... ...
@@ -305,16 +339,18 @@ decontX <- function(counts,
305 339
             } else {
306 340
                 zBat <- z
307 341
             }
308
-            resBat <- .decontXoneBatch(counts = countsBat,
342
+            resBat <- .decontXoneBatch(
343
+                counts = countsBat,
309 344
                 z = zBat,
310 345
                 batch = bat,
311 346
                 maxIter = maxIter,
312
-                beta = beta,
313 347
                 delta = delta,
314 348
                 logfile = logfile,
315
-                verbose = verbose)
349
+                verbose = verbose
350
+            )
316 351
 
317
-            estRmat[, batch == bat] <- resBat$resList$estNativeCounts
352
+            estRmat[, batch == bat] <-
353
+                resBat$resList$estNativeCounts
318 354
             estConp[batch == bat] <- resBat$resList$estConp
319 355
             theta[batch == bat] <- resBat$resList$theta
320 356
 
... ...
@@ -328,23 +364,30 @@ decontX <- function(counts,
328 364
 
329 365
         runParams <- resBat$runParams
330 366
         method <- resBat$method
331
-        resList <- list("logLikelihood" = logLikelihood,
367
+        resList <- list(
368
+            "logLikelihood" = logLikelihood,
332 369
             "estNativeCounts" = estRmat,
333 370
             "estConp" = estConp,
334
-            "theta" = theta)
371
+            "theta" = theta
372
+        )
335 373
 
336
-        return(list("runParams" = runParams,
374
+        return(list(
375
+            "runParams" = runParams,
337 376
             "resList" = resList,
338
-            "method" = method))
377
+            "method" = method
378
+        ))
339 379
     }
340 380
 
341
-    return(.decontXoneBatch(counts = counts,
342
-        z = z,
343
-        maxIter = maxIter,
344
-        beta = beta,
345
-        delta = delta,
346
-        logfile = logfile,
347
-        verbose = verbose))
381
+    return(
382
+        .decontXoneBatch(
383
+            counts = counts,
384
+            z = z,
385
+            maxIter = maxIter,
386
+            delta = delta,
387
+            logfile = logfile,
388
+            verbose = verbose
389
+        )
390
+    )
348 391
 }
349 392
 
350 393
 
... ...
@@ -353,13 +396,11 @@ decontX <- function(counts,
353 396
     z = NULL,
354 397
     batch = NULL,
355 398
     maxIter = 200,
356
-    beta = 1e-6,
357 399
     delta = 10,
358 400
     logfile = NULL,
359 401
     verbose = TRUE) {
360
-
361 402
     .checkCountsDecon(counts)
362
-    .checkParametersDecon(proportionPrior = delta, distributionPrior = beta)
403
+    .checkParametersDecon(proportionPrior = delta)
363 404
 
364 405
     # nG <- nrow(counts)
365 406
     nC <- ncol(counts)
... ...
@@ -376,27 +417,35 @@ decontX <- function(counts,
376 417
     numIterWithoutImprovement <- 0L
377 418
     stopIter <- 3L
378 419
 
379
-    .logMessages(paste(rep("-", 50), collapse = ""),
420
+    .logMessages(
421
+        paste(rep("-", 50), collapse = ""),
380 422
         logfile = logfile,
381 423
         append = TRUE,
382
-        verbose = verbose)
383
-    .logMessages("Start DecontX. Decontamination",
424
+        verbose = verbose
425
+    )
426
+    .logMessages(
427
+        "Start DecontX. Decontamination",
384 428
         logfile = logfile,
385 429
         append = TRUE,
386
-        verbose = verbose)
430
+        verbose = verbose
431
+    )
387 432
 
388 433
     if (!is.null(batch)) {
389
-        .logMessages("batch: ",
434
+        .logMessages(
435
+            "batch: ",
390 436
             batch,
391 437
             logfile = logfile,
392 438
             append = TRUE,
393
-            verbose = verbose)
439
+            verbose = verbose
440
+        )
394 441
     }
395 442
 
396
-    .logMessages(paste(rep("-", 50), collapse = ""),
443
+    .logMessages(
444
+        paste(rep("-", 50), collapse = ""),
397 445
         logfile = logfile,
398 446
         append = TRUE,
399
-        verbose = verbose)
447
+        verbose = verbose
448
+    )
400 449
     startTime <- Sys.time()
401 450
 
402 451
     if (deconMethod == "clustering") {
... ...
@@ -411,28 +460,32 @@ decontX <- function(counts,
411 460
         eta <- rowSums(phi) - phi
412 461
         phi <- normalizeCounts(phi,
413 462
             normalize = "proportion",
414
-            pseudocountNormalize = beta)
463
+            pseudocountNormalize = 1e-20)
415 464
         eta <- normalizeCounts(eta,
416 465
             normalize = "proportion",
417
-            pseudocountNormalize = beta)
466
+            pseudocountNormalize = 1e-20)
418 467
         ll <- c()
419 468
 
420
-        llRound <- .deconCalcLL(counts = counts,
469
+        llRound <- .deconCalcLL(
470
+            counts = counts,
421 471
             z = z,
422 472
             phi = phi,
423 473
             eta = eta,
424
-            theta = theta)
474
+            theta = theta
475
+        )
425 476
 
426 477
         ## EM updates
427
-        while (iter <= maxIter & numIterWithoutImprovement <= stopIter) {
428
-            nextDecon <- .cDCalcEMDecontamination(counts = counts,
478
+        while (iter <= maxIter &
479
+                numIterWithoutImprovement <= stopIter) {
480
+            nextDecon <- .cDCalcEMDecontamination(
481
+                counts = counts,
429 482
                 phi = phi,
430 483
                 eta = eta,
431 484
                 theta = theta,
432 485
                 z = z,
433 486
                 K = K,
434
-                beta = beta,
435
-                delta = delta)
487
+                delta = delta
488
+            )
436 489
 
437 490
             theta <- nextDecon$theta
438 491
             phi <- nextDecon$phi
... ...
@@ -440,11 +493,13 @@ decontX <- function(counts,
440 493
             delta <- nextDecon$delta
441 494
 
442 495
             ## Calculate log-likelihood
443
-            llTemp <- .deconCalcLL(counts = counts,
496
+            llTemp <- .deconCalcLL(
497
+                counts = counts,
444 498
                 z = z,
445 499
                 phi = phi,
446 500
                 eta = eta,
447
-                theta = theta)
501
+                theta = theta
502
+            )
448 503
             ll <- c(ll, llTemp)
449 504
             llRound <- c(llRound, round(llTemp, 2))
450 505
 
... ...
@@ -458,41 +513,71 @@ decontX <- function(counts,
458 513
     }
459 514
 
460 515
     if (deconMethod == "background") {
516
+        ## Initialize cell label
517
+        initialLabel <- .decontxInitializeZ(counts = counts)
518
+        globalZ <- initialLabel$globalZ
519
+        cbZ <- initialLabel$cbZ
520
+        trZ <- initialLabel$trZ
521
+
461 522
         ## Initialization
462 523
         deltaInit <- delta
463
-        theta <- stats::rbeta(n = nC,
464
-            shape1 = deltaInit,
465
-            shape2 = deltaInit)
524
+        theta <-
525
+            stats::rbeta(n = nC,
526
+                shape1 = deltaInit,
527
+                shape2 = deltaInit)
466 528
         estRmat <- t(t(counts) * theta)
467
-        bgDist <- rowSums(counts) / sum(counts)
468
-        bgDist <- matrix(rep(bgDist, nC), ncol = nC)
469
-        cellDist <- normalizeCounts(estRmat,
470
-            normalize = "proportion",
471
-            pseudocountNormalize = beta)
529
+
530
+        phi <- .colSumByGroupNumeric(estRmat, cbZ, max(cbZ))
531
+        eta <-
532
+            rowSums(phi) - .colSumByGroupNumeric(phi, trZ, max(trZ))
533
+        phi <-
534
+            normalizeCounts(phi,
535
+                normalize = "proportion",
536
+                pseudocountNormalize = 1e-20)
537
+        eta <-
538
+            normalizeCounts(eta,
539
+                normalize = "proportion",
540
+                pseudocountNormalize = 1e-20)
541
+
472 542
         ll <- c()
473 543
 
474
-        llRound <- .bgCalcLL(counts = counts,
475
-            cellDist = cellDist,
476
-            bgDist = bgDist,
477
-            theta = theta)
544
+        llRound <- .bgCalcLL(
545
+            counts = counts,
546
+            globalZ = globalZ,
547
+            cbZ = cbZ,
548
+            phi = phi,
549
+            eta = eta,
550
+            theta = theta
551
+        )
478 552
 
479 553
         ## EM updates
480
-        while (iter <= maxIter & numIterWithoutImprovement <= stopIter) {
481
-            nextDecon <- .cDCalcEMbgDecontamination(counts = counts,
482
-                cellDist = cellDist,
483
-                bgDist = bgDist,
484
-                theta = theta,
485
-                beta = beta)
554
+        while (iter <= maxIter &
555
+                numIterWithoutImprovement <= stopIter) {
556
+            nextDecon <- .cDCalcEMbgDecontamination(
557
+                counts = counts,
558
+                globalZ = globalZ,
559
+                cbZ = cbZ,
560
+                trZ = trZ,
561
+                phi = phi,
562
+                eta = eta,
563
+                theta = theta
564
+            )
486 565
 
487 566
             theta <- nextDecon$theta
488
-            cellDist <- nextDecon$cellDist
567
+            phi <- nextDecon$phi
568
+            eta <- nextDecon$eta
489 569
             delta <- nextDecon$delta
490 570
 
491 571
             ## Calculate log-likelihood
492
-            llTemp <- .bgCalcLL(counts = counts,
493
-                cellDist = cellDist,
494
-                bgDist = bgDist,
495
-                theta = theta)
572
+            llTemp <-
573
+                .bgCalcLL(
574
+                    counts = counts,
575
+                    globalZ = globalZ,
576
+                    cbZ = cbZ,
577
+                    phi = phi,
578
+                    eta = eta,
579
+                    theta = theta
580
+                )
496 581
             ll <- c(ll, llTemp)
497 582
             llRound <- c(llRound, round(llTemp, 2))
498 583
 
... ...
@@ -508,55 +593,63 @@ decontX <- function(counts,
508 593
     resConp <- 1 - colSums(nextDecon$estRmat) / colSums(counts)
509 594
 
510 595
     endTime <- Sys.time()
511
-    .logMessages(paste(rep("-", 50), collapse = ""),
596
+    .logMessages(
597
+        paste(rep("-", 50), collapse = ""),
512 598
         logfile = logfile,
513 599
         append = TRUE,
514
-        verbose = verbose)
515
-    .logMessages("Completed DecontX. Total time:",
600
+        verbose = verbose
601
+    )
602
+    .logMessages(
603
+        "Completed DecontX. Total time:",
516 604
         format(difftime(endTime, startTime)),
517 605
         logfile = logfile,
518 606
         append = TRUE,
519
-        verbose = verbose)
607
+        verbose = verbose
608
+    )
520 609
     if (!is.null(batch)) {
521
-        .logMessages("batch: ",
610
+        .logMessages(
611
+            "batch: ",
522 612
             batch,
523 613
             logfile = logfile,
524 614
             append = TRUE,
525
-            verbose = verbose)
615
+            verbose = verbose
616
+        )
526 617
     }
527
-    .logMessages(paste(rep("-", 50), collapse = ""),
618
+    .logMessages(
619
+        paste(rep("-", 50), collapse = ""),
528 620
         logfile = logfile,
529 621
         append = TRUE,
530
-        verbose = verbose)
622
+        verbose = verbose
623
+    )
531 624
 
532
-    runParams <- list("betaInit" = beta,
533
-        "deltaInit" = deltaInit,
625
+    runParams <- list("deltaInit" = deltaInit,
534 626
         "iteration" = iter - 1L)
535 627
 
536
-    resList <- list("logLikelihood" = ll,
628
+    resList <- list(
629
+        "logLikelihood" = ll,
537 630
         "estNativeCounts" = nextDecon$estRmat,
538 631
         "estConp" = resConp,
539 632
         "theta" = theta,
540
-        "delta" = delta)
633
+        "delta" = delta
634
+    )
541 635
     # if( deconMethod=="clustering" ) {
542 636
     #    posterior.params = list( "est.GeneDist"=phi,  "est.ConDist"=eta  )
543 637
     #    resList = append( resList , posterior.params )
544 638
     # }
545 639
 
546
-    return(list("runParams" = runParams,
640
+    return(list(
641
+        "runParams" = runParams,
547 642
         "resList" = resList,
548
-        "method" = deconMethod))
643
+        "method" = deconMethod
644
+    ))
549 645
 }
550 646
 
551 647
 
552 648
 ## Make sure provided parameters are the right type and value range
553
-.checkParametersDecon <- function(proportionPrior, distributionPrior) {
649
+.checkParametersDecon <- function(proportionPrior) {
554 650
     if (length(proportionPrior) > 1 | any(proportionPrior <= 0)) {
555 651
         stop("'delta' should be a single positive value.")
556 652
     }
557
-    if (length(distributionPrior) > 1 | any(distributionPrior <= 0)) {
558
-        stop("'beta' should be a single positive value.")
559
-    }
560 653
 }
561 654
 
562 655
 
... ...
@@ -603,3 +696,67 @@ addLogLikelihood <- function(llA, llB) {
603 696
 
604 697
     return(ll)
605 698
 }
699
+
700
+
701
+
702
+## Initialization of cell labels for DecontX when they are not given
703
+.decontxInitializeZ <-
704
+    function(counts,
705
+        K = 10,
706
+        minCell = 3,
707
+        seed = 428) {
708
+        nC <- ncol(counts)
709
+        if (nC < 100) {
710
+            K <- ceiling(sqrt(nC))
711
+        }
712
+
713
+        globalZ <-
714
+            .initializeSplitZ(
715
+                counts,
716
+                K = K,
717
+                KSubcluster = NULL,
718
+                alpha = 1,
719
+                beta = 1,
720
+                minCell = 3
721
+            )
722
+        globalK <- max(globalZ)
723
+
724
+        localZ <- rep(NA, nC)
725
+        for (k in 1:globalK) {
726
+            if (sum(globalZ == k) > 2) {
727
+                localCounts <- counts[, globalZ == k]
728
+                localK <- min(K, ceiling(sqrt(ncol(
729
+                    localCounts
730
+                ))))
731
+                localZ[globalZ == k] <- .initializeSplitZ(
732
+                    localCounts,
733
+                    K = localK,
734
+                    KSubcluster = NULL,
735
+                    alpha = 1,
736
+                    beta = 1,
737
+                    minCell = 3
738
+                )
739
+            } else {
740
+                localZ [globalZ == k] <- 1L
741
+            }
742
+        }
743
+
744
+
745
+        cbZ <-
746
+            interaction(globalZ, localZ, lex.order = TRUE, drop = TRUE)
747
+                # combined z label
748
+        trZ <-
749
+            as.integer(sub("\\..*", "", levels(cbZ), perl = TRUE))
750
+                # transitional z label
751
+        cbZ <-
752
+            as.integer(plyr::mapvalues(cbZ, from = levels(cbZ),
753
+                to = 1:length(levels(cbZ))))
754
+
755
+
756
+        return(list(
757
+            "globalZ" = globalZ,
758
+            "localZ" = localZ,
759
+            "trZ" = trZ,
760
+            "cbZ" = cbZ
761
+        ))
762
+    }
... ...
@@ -27,7 +27,7 @@ library(devtools)
27 27
 install_github("campbio/celda")
28 28
 ```
29 29
 
30
-For `R-3.5` users, please install from the `R_3_5` branch. This version of **celda** is identical to the most recent release of **celda** (`master` branch) except it also works on `R-3.5`.
30
+For `R-3.5` users, please install from the `R_3_5` branch. This version of **celda** is identical to the most recent release of **celda** (`master` branch) except it also works on `R-3.5`. **NOTE:** This branch is no longer updated. Please use `R-3.6` versions.
31 31
 ```
32 32
 library(devtools)
33 33
 install_github("campbio/celda@R_3_5")
... ...
@@ -1,3 +1,11 @@
1
+Changes in version 1.0.3 (2019-05-16):
2
+
3
+    o   Merge development branch with RELEASE_3_9
4
+
5
+Changes in version 1.0.2 (2019-05-14):
6
+
7
+    o   Fix a bug in celdaHeatmap
8
+
1 9
 Changes in version 1.0.1 (2019-05-09):
2 10
 
3 11
     o   Default seed setting to maintain reproducibility
... ...
@@ -20,4 +28,4 @@ Changes in version 0.99.8 (2019-03-11):
20 28
 
21 29
 Changes in version 0.99.0 (2018-05-15):
22 30
 
23
-    o   First submission to Bioconductor
24 31
\ No newline at end of file
32
+    o   First submission to Bioconductor
... ...
@@ -4,9 +4,8 @@
4 4
 \alias{decontX}
5 5
 \title{Decontaminate count matrix}
6 6
 \usage{
7
-decontX(counts, z = NULL, batch = NULL, maxIter = 200,
8
-  beta = 1e-06, delta = 10, logfile = NULL, verbose = TRUE,
9
-  seed = 12345)
7
+decontX(counts, z = NULL, batch = NULL, maxIter = 200, delta = 10,
8
+  logfile = NULL, verbose = TRUE, seed = 12345)
10 9
 }
11 10
 \arguments{
12 11
 \item{counts}{Numeric/Integer matrix. Observed count matrix, rows represent
... ...
@@ -19,8 +18,6 @@ features and columns represent cells.}
19 18
 \item{maxIter}{Integer. Maximum iterations of EM algorithm. Default to be
20 19
 200.}
21 20
 
22
-\item{beta}{Numeric. Concentration parameter for Phi. Default to be 1e-6.}
23
-
24 21
 \item{delta}{Numeric. Symmetric concentration parameter for Theta. Default
25 22
 to be 10.}
26 23
 
... ...
@@ -43,8 +40,12 @@ This function updates decontamination on dataset with multiple
43 40
 }
44 41
 \examples{
45 42
 data(contaminationSim)
46
-deconC <- decontX(counts = contaminationSim$rmat + contaminationSim$cmat,
47
- z = contaminationSim$z, maxIter = 3)
48
-deconBg <- decontX(counts = contaminationSim$rmat + contaminationSim$cmat,
49
- maxIter = 3)
43
+deconC <- decontX(
44
+  counts = contaminationSim$rmat + contaminationSim$cmat,
45
+  z = contaminationSim$z, maxIter = 3
46
+)
47
+deconBg <- decontX(
48
+  counts = contaminationSim$rmat + contaminationSim$cmat,
49
+  maxIter = 3
50
+)
50 51
 }
... ...
@@ -48,14 +48,6 @@ test_that(desc = "Testing .decontXoneBatch", {
48 48
     expect_equal(modelDecontXoneBatch$resList$estConp,
49 49
         1 - colSums(modelDecontXoneBatch$resList$estNativeCounts) /
50 50
             colSums(deconSim$observedCounts))
51
-    expect_error(.decontXoneBatch(counts = deconSim$observedCounts,
52
-        z = deconSim$z,
53
-        beta = -1),
54
-        "'beta' should be a single positive value.")
55
-    expect_error(.decontXoneBatch(counts = deconSim$observedCounts,
56
-        z = deconSim$z,
57
-        beta = c(1, 1)),
58
-        "'beta' should be a single positive value.")
59 51
     expect_error(.decontXoneBatch(counts = deconSim$observedCounts,
60 52
         z = deconSim$z,
61 53
         delta = -1),
... ...
@@ -86,7 +78,7 @@ test_that(desc = "Testing .decontXoneBatch using background distribution", {
86 78
 })
87 79
 
88 80
 ## logLikelihood
89
-test_that(desc = "Testing logLikelihood.DecontXoneBatch", {
81
+#test_that(desc = "Testing logLikelihood.DecontXoneBatch", {
90 82
     # z.process = processCellLabels(deconSim$z,
91 83
     # num.cells=ncol(deconSim$observedCounts) )
92 84
     # expect_equal( decon.calcLL(counts=deconSim$observedCounts, z=z.process  ,
... ...
@@ -96,20 +88,20 @@ test_that(desc = "Testing logLikelihood.DecontXoneBatch", {
96 88
     # modelDecontXoneBatch$resList$logLikelihood[
97 89
     # modelDecontXoneBatch$runParams$iteration  ] )
98 90
 
99
-    cellDistModelBg <- normalizeCounts(
100
-        modelDecontXoneBatchbg$resList$estNativeCounts,
101
-        normalize = "proportion",
102
-        pseudocountNormalize = modelDecontXoneBatchbg$runParams$beta)
103
-    bgDistModelBg <- rowSums(deconSim$observedCounts) / sum(deconSim$NByC)
104
-    bgDistModelBg <- matrix(rep(bgDistModelBg,
105
-        length(deconSim$NByC)), ncol = length(deconSim$NByC))
106
-    expect_equal(.bgCalcLL(counts = deconSim$observedCounts,
107
-        theta = modelDecontXoneBatchbg$resList$theta,
108
-        cellDist = cellDistModelBg,
109
-        bgDist = bgDistModelBg),
110
-        modelDecontXoneBatchbg$resList$logLikelihood[
111
-            modelDecontXoneBatchbg$runParams$iteration])
112
-})
91
+    #cellDistModelBg <- normalizeCounts(
92
+    #    modelDecontXoneBatchbg$resList$estNativeCounts,
93
+    #    normalize = "proportion",
94
+    #    pseudocountNormalize = 1e-20)
95
+    #bgDistModelBg <- rowSums(deconSim$observedCounts) / sum(deconSim$NByC)
96
+    #bgDistModelBg <- matrix(rep(bgDistModelBg,
97
+    #    length(deconSim$NByC)), ncol = length(deconSim$NByC))
98
+    #expect_equal(.bgCalcLL(counts = deconSim$observedCounts,
99
+    #    theta = modelDecontXoneBatchbg$resList$theta,
100
+    #    cellDist = cellDistModelBg,
101
+    #    bgDist = bgDistModelBg),
102
+    #    modelDecontXoneBatchbg$resList$logLikelihood[
103
+    #        modelDecontXoneBatchbg$runParams$iteration])
104
+#})
113 105
 
114 106
 ## decontamination EM updates
115 107
 # test_that( desc = "Testing decontamination EM updates", {