Browse code

function documentationssss

Irisapo authored on 22/05/2017 21:02:55
Showing 35 changed files

... ...
@@ -1,2 +1,3 @@
1 1
 ^.*\.Rproj$
2 2
 ^\.Rproj\.user$
3
+^CONDUCT\.md$
3 4
new file mode 100644
... ...
@@ -0,0 +1,25 @@
1
+# Contributor Code of Conduct
2
+
3
+As contributors and maintainers of this project, we pledge to respect all people who 
4
+contribute through reporting issues, posting feature requests, updating documentation,
5
+submitting pull requests or patches, and other activities.
6
+
7
+We are committed to making participation in this project a harassment-free experience for
8
+everyone, regardless of level of experience, gender, gender identity and expression,
9
+sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.
10
+
11
+Examples of unacceptable behavior by participants include the use of sexual language or
12
+imagery, derogatory comments or personal attacks, trolling, public or private harassment,
13
+insults, or other unprofessional conduct.
14
+
15
+Project maintainers have the right and responsibility to remove, edit, or reject comments,
16
+commits, code, wiki edits, issues, and other contributions that are not aligned to this 
17
+Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 
18
+from the project team.
19
+
20
+Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 
21
+opening an issue or contacting one or more of the project maintainers.
22
+
23
+This Code of Conduct is adapted from the Contributor Covenant 
24
+(http:contributor-covenant.org), version 1.0.0, available at 
25
+http://contributor-covenant.org/version/1/0/0/
... ...
@@ -22,7 +22,8 @@ Imports:
22 22
     graphics,
23 23
     Rmpfr,
24 24
     gplots,
25
-    doParallel
25
+    doParallel,
26
+    cluster
26 27
 Suggests:
27 28
     testthat,
28 29
     knitr,
... ...
@@ -53,11 +53,11 @@ export(simulateCells.celda_CG)
53 53
 export(simulateCells.celda_G)
54 54
 export(topRank)
55 55
 export(visualize_model_performance)
56
+import(RColorBrewer)
56 57
 import(Rmpfr)
57
-import(foreach)  
58
-import(gtable)
58
+import(foreach)
59
+import(grDevices)
60
+import(graphics)
59 61
 import(grid)
62
+import(gtable)
60 63
 import(scales)
61
-import(RColorBrewer)
62
-import(grDevices)
63
-  # require(graphics)
... ...
@@ -1,3 +1,4 @@
1
+#' available models
1 2
 #' @export
2 3
 available_models = c("celda_C", "celda_G", "celda_CG")
3 4
 
... ...
@@ -8,11 +9,10 @@ available_models = c("celda_C", "celda_G", "celda_CG")
8 9
 #' @param model Which celda sub-model to run. Options include "celda_C" (cell clustering), "celda_G" (gene clustering), "celda_CG" (gene and cell clustering)
9 10
 #' @param sample.label A numeric vector indicating the sample for each cell (column) in the count matrix
10 11
 #' @param nchains The number of chains of Gibbs sampling to run for every combination of K/L parameters
11
-#' @param K An integer or range of integers indicating the desired number of cell clusters (for celda_C / celda_CG models)
12
-#' @param L An integer or range of integers indicating the desired number of gene clusters (for celda_G / celda_CG models)
13 12
 #' @param cores The number of cores to use to speed up Gibbs sampling
14 13
 #' @param seed The base seed for random number generation. Each chain celda runs with have a seed index off of this one.
15 14
 #' @param verbose Print messages during celda chain execution
15
+#' @param ... extra parameters passed onto expand.grid 
16 16
 #' @return Object of class "celda_list", which contains results for all model parameter combinations and summaries of the run parameters
17 17
 #' @import foreach
18 18
 #' @export
... ...
@@ -49,6 +49,12 @@ celda = function(counts, model, sample.label=NULL, nchains=1, cores=1, seed=1234
49 49
 
50 50
 
51 51
 #' Sanity check arguments to celda() to ensure a smooth run.
52
+#' @param counts A count matrix 
53
+#' @param model ...
54
+#' @param sample.label ...
55
+#' @param nchains ...
56
+#' @param cores ...
57
+#' @param seed ...
52 58
 validate_args = function(counts, model, sample.label, nchains, cores, seed) {
53 59
   validate_counts(counts)
54 60
   
... ...
@@ -67,6 +73,7 @@ validate_args = function(counts, model, sample.label, nchains, cores, seed) {
67 73
     
68 74
     
69 75
 #' Perform some simple checks on the counts matrix, to ensure celda won't choke.
76
+#' @param counts A count matrix
70 77
 validate_counts = function(counts) {
71 78
   # counts has to be a matrix...
72 79
   if (class(counts) != "matrix") stop("counts argument must be of class 'matrix'")
... ...
@@ -81,6 +81,7 @@ simulateCells.celda_C = function(S=10, C.Range=c(10, 100), N.Range=c(100,5000),
81 81
 #' @param thread The thread index, used for logging purposes
82 82
 #' @param save.history Logical; whether to return the history of cluster assignments. Defaults to FALSE
83 83
 #' @param save.prob Logical; whether to return the history of cluster assignment probabilities. Defaults to FALSE
84
+#' @param ... extra parameters passed onto the celda_C 
84 85
 #' @export
85 86
 celda_C = function(counts, sample.label=NULL, K, alpha=1, beta=1, max.iter=25, 
86 87
                    seed=12345, best=TRUE, z.split.on.iter=3, z.num.splits=3, 
... ...
@@ -371,6 +372,7 @@ getL.celda_C = function(celda.mod) { return(NA) }
371 372
 #' celda_heatmap for celda Cell clustering function 
372 373
 #' @param celda.mod A celda model object of class "celda_C"
373 374
 #' @param counts A numeric count matrix
375
+#' @param ... extra parameters passed onto the render_celda_heatmap
374 376
 #' @export
375 377
 celda_heatmap.celda_C = function(celda.mod, counts, ...) {
376 378
   render_celda_heatmap(counts, z=celda.mod$z, ...)
... ...
@@ -403,5 +405,5 @@ visualize_model_performance.celda_C = function(celda.list, method="perplexity",
403 405
   
404 406
   plot.df = data.frame(size=cluster.sizes,
405 407
                        metric=performance.metric)
406
-  return(celda::render_model_performance_plot(plot.df, "K", method, title))
408
+  return(render_model_performance_plot(plot.df, "K", method, title))
407 409
 }
... ...
@@ -199,6 +199,7 @@ cCG.calcGibbsProbY = function(n.CP.by.TS, n.by.TS, nG.by.TS, nG.in.Y, beta, delt
199 199
 #' @param gamma The Dirichlet distribution parameter for Psi; adds a pseudocount to each gene within each transcriptional state.
200 200
 #' @param delta The Dirichlet distribution parameter for Eta; adds a gene pseudocount to the numbers of genes each state.
201 201
 #' @param seed starting point used for generating simulated data.
202
+#' @param ... extra parameters passed onto the simulateCells.celda_CG
202 203
 #' @export
203 204
 simulateCells.celda_CG = function(S=10, C.Range=c(50,100), N.Range=c(500,5000), 
204 205
                                   G=1000, K=3, L=10, alpha=1, beta=1, gamma=1, 
... ...
@@ -272,12 +273,13 @@ simulateCells.celda_CG = function(S=10, C.Range=c(50,100), N.Range=c(500,5000),
272 273
 #' @param seed Parameter to set.seed() for random number generation
273 274
 #' @param best Whether to return the cluster assignment with the highest log-likelihood. Defaults to TRUE. Returns last generated cluster assignment when FALSE.
274 275
 #' @param z.split.on.iter On z.split.on.iter-th iterations, a heuristic will be applied using hierarchical clustering to determine if a cell cluster should be merged with another cell cluster and a third cell cluster should be split into two clusters. This helps avoid local optimum during the initialization. Default to be 3. 
275
-#' @param z.num.split Maximum number of times to perform the heuristic described in z.split.on.iter.
276
+#' @param z.num.splits Maximum number of times to perform the heuristic described in z.split.on.iter.
276 277
 #' @param y.split.on.iter  On every y.split.on.iter iteration, a heuristic will be applied using hierarchical clustering to determine if a gene cluster should be merged with another gene cluster and a third gene cluster should be split into two clusters. This helps avoid local optimum during the initialization. Default to be 3. 
277 278
 #' @param y.num.splits Maximum number of times to perform the heuristic described in y.split.on.iter.
278 279
 #' @param thread The thread index, used for logging purposes.
279 280
 #' @param save.history Logical; whether to return the history of cluster assignments. Defaults to FALSE.
280 281
 #' @param save.prob Logical; whether to return the history of cluster assignment probabilities. Defaults to FALSE.
282
+#' @param ... extra parameters passed onto celda_CG
281 283
 #' @export
282 284
 celda_CG = function(counts, sample.label=NULL, K, L, alpha=1, beta=1, delta=1, gamma=1,
283 285
 			max.iter=25, seed=12345, best=TRUE, z.split.on.iter=3, z.num.splits=3,
... ...
@@ -623,6 +625,8 @@ getL.celda_CG = function(celda.mod) {
623 625
 
624 626
 #' celda_heatmap for celda Cell and Gene clustering function 
625 627
 #' @param celda.mod A celda model object of "Celda_CG"
628
+#' @param counts A count matrix
629
+#' @param ... extra parameters passed onto render_celda_heatmap
626 630
 #' @export
627 631
 celda_heatmap.celda_CG = function(celda.mod, counts, ...) {
628 632
   render_celda_heatmap(counts, z=celda.mod$z, y=celda.mod$y, ...)
... ...
@@ -630,7 +634,9 @@ celda_heatmap.celda_CG = function(celda.mod, counts, ...) {
630 634
 
631 635
 
632 636
 #' visualize_model_performance for Celda Cell and Gene clustering function 
633
-#' @param celda.mod A celda model object of "Celda_CG"
637
+#' @param celda.list A celda model object of "Celda_CG"
638
+#' @param title Title for the visualize_model_performance
639
+#' @param method One of “perplexity”, “harmonic”, or “loglik”
634 640
 #' @export
635 641
 #' @import Rmpfr
636 642
 visualize_model_performance.celda_CG = function(celda.list, method="perplexity", 
... ...
@@ -130,14 +130,15 @@ cG.calcLL = function(n.TS.by.C, n.by.TS, n.by.G, nG.by.TS, nM, nG, L, beta, delt
130 130
 #' This function calculates the log-likelihood of a given set of cluster assigments for the samples
131 131
 #' represented in the provided count matrix.
132 132
 #' 
133
-#' @param ix The index of the cell being assigned a cluster during the current iteration of Gibbs sampling
134
-#' @param counts A numeric count matrix
135
-#' @param z A numeric vector of cluster assignments
136
-#' @param k The number of clusters being considered
137
-#' @param alpha Vector of non-zero concentration parameters for sample <-> cluster assignment Dirichlet distribution
133
+#' 
134
+#' @param n.TS.by.C Number of counts in each Transcriptional State per Cell 
135
+#' @param n.by.TS Number of counts per Transcriptional State
136
+#' @param nG.by.TS Number of genes in each Transcriptional State
137
+#' @param nG.in.Y  Number of genes in each of the cell cluster
138
+#' @param gamma The Dirichlet distribution parameter for Psi; adds a pseudocount to each gene within each transcriptional state.
139
+#' @param delta The Dirichlet distribution parameter for Eta; adds a gene pseudocount to the numbers of genes each state.
138 140
 #' @param beta Vector of non-zero concentration parameters for cluster <-> gene assignment Dirichlet distribution
139 141
 #' @keywords log likelihood
140
-#' @examples TODO
141 142
 cG.calcGibbsProbY = function(n.TS.by.C, n.by.TS, nG.by.TS, nG.in.Y, beta, delta, gamma) {
142 143
  
143 144
   ## Calculate for "Eta" component
... ...
@@ -174,20 +175,20 @@ cG.calcGibbsProbY = function(n.TS.by.C, n.by.TS, nG.by.TS, nG.in.Y, beta, delta,
174 175
 #' sequencing count matrix, using the celda Bayesian hierarchical model.
175 176
 #' 
176 177
 #' @param counts A numeric count matrix
177
-#' @param k The number of clusters to generate
178
-#' @param a Vector of non-zero concentration parameters for sample <-> cluster assignment Dirichlet distribution
179
-#' @param b Vector of non-zero concentration parameters for cluster <-> gene assignment Dirichlet distribution
180
-#' @param g Number of cell states ("topics")
178
+#' @param L The number of clusters to generate
179
+#' @param beta The Dirichlet distribution parameter for Phi; adds a pseudocount to each transcriptional state within each cell.
180
+#' @param delta The Dirichlet distribution parameter for Eta; adds a gene pseudocount to the numbers of genes each state. Default to 1.
181
+#' @param gamma The Dirichlet distribution parameter for Psi; adds a pseudocount to each gene within each transcriptional state.
181 182
 #' @param max.iter Maximum iterations of Gibbs sampling to perform. Defaults to 25.
182
-#' @param min.cell Desired minimum number of cells per cluster
183
-#' @param seed Parameter to set.seed() for random number generation
184
-#' @param best Whether to return the cluster assignment with the highest log-likelihood. Defaults to TRUE. Returns last generated cluster assignment when FALSE.
185
-#' @param kick Whether to randomize cluster assignments when a cluster has fewer than min.cell cells assigned to it during Gibbs sampling. (TODO param currently unused?)
183
+#' @param y.split.on.iter  On every y.split.on.iter iteration, a heuristic will be applied using hierarchical clustering to determine if a gene cluster should be merged with another gene cluster and a third gene cluster should be split into two clusters. This helps avoid local optimum during the initialization. Default to be 3. 
184
+#' @param y.num.splits Maximum number of times to perform the heuristic described in y.split.on.iter.
185
+#' @param seed Parameter to set.seed() for random number generation.
186
+#' @param best Whether to return the cluster assignment with the highest log-likelihood. Defaults to TRUE. Returns last generated cluster assignment when FALSE. Default to be TRUE. 
186 187
 #' @param save.history Logical; whether to return the history of cluster assignments. Defaults to FALSE
187 188
 #' @param save.prob Logical; whether to return the history of cluster assignment probabilities. Defaults to FALSE
188 189
 #' @param thread The thread index, used for logging purposes
190
+#' @param ...  extra parameters passed onto celda_G
189 191
 #' @keywords LDA gene clustering gibbs
190
-#' @examples TODO
191 192
 #' @export
192 193
 celda_G = function(counts, L, beta=1, delta=1, gamma=1, max.iter=25,
193 194
                    seed=12345, best=TRUE, y.split.on.iter=3, 
... ...
@@ -325,8 +326,8 @@ celda_G = function(counts, L, beta=1, delta=1, gamma=1, max.iter=25,
325 326
 #' @param G The number of genes for which to simulate counts
326 327
 #' @param beta The Dirichlet distribution parameter for Phi; adds a pseudocount to each transcriptional state within each cell
327 328
 #' @param delta The Dirichlet distribution parameter for Psi; adds a pseudocount to each gene within each transcriptional state
329
+#' @param gamma The Dirichlet distribution parameter for Psi; adds a pseudocount to each gene within each transcriptional state.
328 330
 #' @param seed Parameter to set.seed() for random number generation
329
-#' @examples TODO
330 331
 #' @export
331 332
 simulateCells.celda_G = function(C=100, N.Range=c(500,5000),  G=1000, 
332 333
                                          L=5, beta=1, gamma=1, delta=1, seed=12345) {
... ...
@@ -464,6 +465,7 @@ getL.celda_G = function(celda.mod) {
464 465
 #' celda_heatmap for celda Gene clustering function 
465 466
 #' @param celda.mod A celda model object of class "celda_G"
466 467
 #' @param counts A numeric count matrix
468
+#' @param ... extra parameters passed onto render_celda_heatmap
467 469
 #' @export
468 470
 celda_heatmap.celda_G = function(celda.mod, counts, ...) {
469 471
   render_celda_heatmap(counts, y=celda.mod$y, ...)
... ...
@@ -497,5 +499,5 @@ visualize_model_performance.celda_G = function(celda.list, method="perplexity",
497 499
   
498 500
   plot.df = data.frame(size=cluster.sizes,
499 501
                        metric=performance.metric)
500
-  return(celda::render_model_performance_plot(plot.df, "L", method, title))
502
+  return(render_model_performance_plot(plot.df, "L", method, title))
501 503
 }
... ...
@@ -39,6 +39,9 @@ normalizeLogProbs = function(ll.probs) {
39 39
 }
40 40
 
41 41
 
42
+#' normalizeCounts function 
43
+#' @param counts A count matrix 
44
+#' @param scale.factor the scalor for the normalization 
42 45
 #' @export
43 46
 normalizeCounts = function(counts, scale.factor=1e6) {
44 47
   counts.norm = sweep(counts, 2, colSums(counts) / scale.factor, "/")
... ...
@@ -79,7 +79,7 @@ robust_scale <- function(x){
79 79
 #' @param scale.row Logical; psecifying if the z-score transformation is performed to the counts matrix. Defualt to be TRUE. 
80 80
 #' @param z.trim two element vector to specify the lower and upper cutoff of the z-score normalization result by default it is set to NULL so no trimming will be done. Default to be (-2,2)
81 81
 #' @param normalize specify the normalization type: "cpm" or "none". Defualt to be "none". 
82
-#' @param scale_fun specify the function for scaling. Defualt to be scale. 
82
+#' @param scale_function specify the function for scaling. Defualt to be scale. 
83 83
 #' @param cluster.row Logical; determining if rows should be clustered.
84 84
 #' @param cluster.column Logical; determining if columns should be clustered.
85 85
 #' @param annotation_cell a dataframe for the cell annotations (columns).
... ...
@@ -95,6 +95,12 @@ robust_scale <- function(x){
95 95
 #' @param annotation_names_cell Logical; showing if the names for cell annotation tracks should be drawn. Default to be TRUE. 
96 96
 #' @param show_genenames Logical; specifying if gene names should be shown. Default to be FALSE. 
97 97
 #' @param show_cellnames Logical; specifying if cell names should be shown. Default to be FALSE. 
98
+#' @import gtable
99
+#' @import grid
100
+#' @import scales
101
+#' @import RColorBrewer
102
+#' @import grDevices
103
+#' @import graphics
98 104
 #' @export 
99 105
 render_celda_heatmap <- function(counts, z=NULL, y=NULL, 
100 106
                                  scale.log=FALSE, scale.row=TRUE,
... ...
@@ -110,13 +116,6 @@ render_celda_heatmap <- function(counts, z=NULL, y=NULL,
110 116
                                  annotation_names_cell = TRUE,
111 117
                                  show_genenames = FALSE, 
112 118
                                  show_cellnames = FALSE) {
113
-  require(gtable)
114
-  require(grid)
115
-  require(scales)
116
-  require(stats)
117
-  require(RColorBrewer)
118
-  require(grDevices)
119
-  require(graphics)
120 119
   
121 120
   
122 121
   if(normalize =="cpm"){
... ...
@@ -220,13 +219,13 @@ render_celda_heatmap <- function(counts, z=NULL, y=NULL,
220 219
     }
221 220
   }else{  # only one side for the counts values (eihter positive or negative )
222 221
     if(is.null(col)){
223
-      col <- colorRampPalette(c("#FFFFFF", RColorBrewer::brewer.pal(n = 9, name = "Reds")))(100)
222
+      col <- colorRampPalette(c("#FFFFFF", brewer.pal(n = 9, name = "Reds")))(100)
224 223
     }
225 224
   }
226 225
 
227 226
   
228 227
   if(cluster.row & cluster.column){
229
-    celda::semi_pheatmap(mat = counts, 
228
+           semi_pheatmap(mat = counts, 
230 229
                          color = col, 
231 230
                          breaks = breaks, 
232 231
                          cutree_rows = L,
... ...
@@ -246,7 +245,7 @@ render_celda_heatmap <- function(counts, z=NULL, y=NULL,
246 245
   }
247 246
   
248 247
   if(cluster.row & (!cluster.column)){
249
-    celda::semi_pheatmap(mat = counts, 
248
+          semi_pheatmap(mat = counts, 
250 249
                          color = col,
251 250
                          breaks = breaks, 
252 251
                          cutree_rows = L,
... ...
@@ -266,7 +265,7 @@ render_celda_heatmap <- function(counts, z=NULL, y=NULL,
266 265
     
267 266
     
268 267
     if((!cluster.row) & cluster.column){
269
-      celda::semi_pheatmap(mat = counts, 
268
+            semi_pheatmap(mat = counts, 
270 269
                            color = col,
271 270
                            breaks = breaks, 
272 271
                            cluster_rows = FALSE,
... ...
@@ -285,7 +284,7 @@ render_celda_heatmap <- function(counts, z=NULL, y=NULL,
285 284
       }
286 285
     
287 286
     if((!cluster.row) & (!cluster.column) ){
288
-      celda::semi_pheatmap(mat = counts,
287
+            semi_pheatmap(mat = counts,
289 288
                            color = col,
290 289
                            breaks = breaks, 
291 290
                            cluster_rows = FALSE,
... ...
@@ -1,3 +1,9 @@
1
+#' TopRank function 
2
+#' @param fm factorized matrix.
3
+#' @param n Maximum number of items returned for each entry. 
4
+#' @param margin 1 for rows, 2 for columns.
5
+#' @param threshold only include entries in the matrix that is greader than the threshold.
6
+#' @param decreasing Logical; specifying if rank should be decreasing. Default to be TRUE. 
1 7
 #' @export
2 8
 topRank = function(fm, n=25, margin=2, threshold=0, decreasing=TRUE) {
3 9
   if(is.null(threshold) | is.na(threshold)) {
... ...
@@ -21,7 +21,7 @@
21 21
 
22 22
 #' Get run parameters for a celda run.
23 23
 #'
24
-#' @param celda.res A celda_list object, as returned from celda()
24
+#' @param celda.list A celda_list object, as returned from celda()
25 25
 #' @export
26 26
 runParams = function(celda.list) {
27 27
   return(celda.list$run.params)
... ...
@@ -107,14 +107,7 @@ getL = function(celda.mod) {
107 107
 #'
108 108
 #' @param celda.mod A celda model object (of class "celda_C", "celda_G" or "celda_CG")
109 109
 #' @param counts the counts matrix 
110
-#' @param z A numeric vector of cluster assignments for cell. Resolved automatically from celda object when available.
111
-#' @param y A numeric vector of cluster assignments for gene. Resolved automatically from celda object when available.
112
-#' @param scale.log specify the transformation type of the matrix for (semi-)heatmap, can be "log","row"(z-acore by row),"col"(z-score by column), etc. #To be completed
113
-#' @param scale.row specify the transformation type of the matrix for (semi-)heatmap, can be "log","row"(z-acore by row),"col"(z-score by column), etc. #To be completed
114
-#' @param z.trim two element vector to specify the lower and upper cutoff of the z-score normalization result by default it is set to NULL so no trimming will be done.
115
-#' @param scale_fun specify the function for scaling 
116
-#' @param cluster.row boolean values determining if rows should be clustered
117
-#' @param cluster.column boolean values determining if columns should be clustered
110
+#' @param ... extra parameters passed onto celda_heatmap
118 111
 #' @export 
119 112
 celda_heatmap <- function(celda.mod, counts, ...) {
120 113
   UseMethod("celda_heatmap", celda.mod)
... ...
@@ -124,10 +117,11 @@ celda_heatmap <- function(celda.mod, counts, ...) {
124 117
 #' Visualize various performance metrics as a function of K / L to aid parameter choice.
125 118
 #' 
126 119
 #' @param celda.list A celda_list object as returned from *celda()*
127
-#' @param metric Which performance metric to visualize. One of ("perplexity", "harmonic", "loglik"). "perplexity" calculates the inverse of the geometric mean of the log likelihoods from each iteration of Gibbs sampling. "harmonic" calculates the marginal likelihood has the harmonic mean of the likelihoods. "loglik" plots the highest log-likelihood during Gibbs iteration.
120
+#' @param method Which performance metric to visualize. One of ("perplexity", "harmonic", "loglik"). "perplexity" calculates the inverse of the geometric mean of the log likelihoods from each iteration of Gibbs sampling. "harmonic" calculates the marginal likelihood has the harmonic mean of the likelihoods. "loglik" plots the highest log-likelihood during Gibbs iteration.
121
+#' @param title Title for the visualize_model_performance
128 122
 #' @return A ggplot object containing the requested plot(s)
129 123
 #' @export
130
-visualize_model_performance <- function(celda.list, method, ...) {
124
+visualize_model_performance <- function(celda.list, method, title) {
131 125
   # Dispatch on the list's content type
132 126
   UseMethod("visualize_model_performance", celda.list$content.type)
133 127
 }
... ...
@@ -855,6 +855,8 @@ identity2 = function(x, ...){
855 855
 #' @param width manual option for determining the output file width in inches.
856 856
 #' @param height manual option for determining the output file height in inches.
857 857
 #' @param silent do not draw the plot (useful when using the gtable output)
858
+#' @param row_label row cluster labels for semi-clustering 
859
+#' @param col_label column cluster labels for semi-clustering 
858 860
 #' @param \dots graphical parameters for the text used in plot. Parameters passed to 
859 861
 #' \code{\link{grid.text}}, see \code{\link{gpar}}. 
860 862
 #' 
... ...
@@ -868,7 +870,7 @@ identity2 = function(x, ...){
868 870
 #' }
869 871
 #' 
870 872
 #' @author  Raivo Kolde <rkolde@@gmail.com>
871
-#' @examples
873
+#' #@examples
872 874
 #' # Create test matrix
873 875
 #' test = matrix(rnorm(200), 20, 10)
874 876
 #' test[1:10, seq(1, 10, 2)] = test[1:10, seq(1, 10, 2)] + 3
... ...
@@ -951,7 +953,7 @@ identity2 = function(x, ...){
951 953
 #' 
952 954
 #' pheatmap(test, clustering_callback = callback)
953 955
 #' 
954
-#' \dontrun{
956
+#' dontrun{
955 957
 #' # Same using dendsort package
956 958
 #' library(dendsort)
957 959
 #' 
958 960
new file mode 100644
... ...
@@ -0,0 +1,14 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/celda.R
3
+\docType{data}
4
+\name{available_models}
5
+\alias{available_models}
6
+\title{available models}
7
+\format{An object of class \code{character} of length 3.}
8
+\usage{
9
+available_models
10
+}
11
+\description{
12
+available models
13
+}
14
+\keyword{datasets}
... ...
@@ -7,24 +7,23 @@
7 7
 cG.calcGibbsProbY(n.TS.by.C, n.by.TS, nG.by.TS, nG.in.Y, beta, delta, gamma)
8 8
 }
9 9
 \arguments{
10
-\item{beta}{Vector of non-zero concentration parameters for cluster <-> gene assignment Dirichlet distribution}
10
+\item{n.TS.by.C}{Number of counts in each Transcriptional State per Cell}
11
+
12
+\item{n.by.TS}{Number of counts per Transcriptional State}
11 13
 
12
-\item{ix}{The index of the cell being assigned a cluster during the current iteration of Gibbs sampling}
14
+\item{nG.by.TS}{Number of genes in each Transcriptional State}
13 15
 
14
-\item{counts}{A numeric count matrix}
16
+\item{nG.in.Y}{Number of genes in each of the cell cluster}
15 17
 
16
-\item{z}{A numeric vector of cluster assignments}
18
+\item{beta}{Vector of non-zero concentration parameters for cluster <-> gene assignment Dirichlet distribution}
17 19
 
18
-\item{k}{The number of clusters being considered}
20
+\item{delta}{The Dirichlet distribution parameter for Eta; adds a gene pseudocount to the numbers of genes each state.}
19 21
 
20
-\item{alpha}{Vector of non-zero concentration parameters for sample <-> cluster assignment Dirichlet distribution}
22
+\item{gamma}{The Dirichlet distribution parameter for Psi; adds a pseudocount to each gene within each transcriptional state.}
21 23
 }
22 24
 \description{
23 25
 This function calculates the log-likelihood of a given set of cluster assigments for the samples
24 26
 represented in the provided count matrix.
25 27
 }
26
-\examples{
27
-TODO
28
-}
29 28
 \keyword{likelihood}
30 29
 \keyword{log}
... ...
@@ -22,9 +22,7 @@ celda(counts, model, sample.label = NULL, nchains = 1, cores = 1,
22 22
 
23 23
 \item{verbose}{Print messages during celda chain execution}
24 24
 
25
-\item{K}{An integer or range of integers indicating the desired number of cell clusters (for celda_C / celda_CG models)}
26
-
27
-\item{L}{An integer or range of integers indicating the desired number of gene clusters (for celda_G / celda_CG models)}
25
+\item{...}{extra parameters passed onto expand.grid}
28 26
 }
29 27
 \value{
30 28
 Object of class "celda_list", which contains results for all model parameter combinations and summaries of the run parameters
... ...
@@ -35,6 +35,8 @@ celda_C(counts, sample.label = NULL, K, alpha = 1, beta = 1,
35 35
 \item{save.history}{Logical; whether to return the history of cluster assignments. Defaults to FALSE}
36 36
 
37 37
 \item{save.prob}{Logical; whether to return the history of cluster assignment probabilities. Defaults to FALSE}
38
+
39
+\item{...}{extra parameters passed onto the celda_C}
38 40
 }
39 41
 \description{
40 42
 celda Cell clustering function
... ...
@@ -35,6 +35,8 @@ celda_CG(counts, sample.label = NULL, K, L, alpha = 1, beta = 1,
35 35
 
36 36
 \item{z.split.on.iter}{On z.split.on.iter-th iterations, a heuristic will be applied using hierarchical clustering to determine if a cell cluster should be merged with another cell cluster and a third cell cluster should be split into two clusters. This helps avoid local optimum during the initialization. Default to be 3.}
37 37
 
38
+\item{z.num.splits}{Maximum number of times to perform the heuristic described in z.split.on.iter.}
39
+
38 40
 \item{y.split.on.iter}{On every y.split.on.iter iteration, a heuristic will be applied using hierarchical clustering to determine if a gene cluster should be merged with another gene cluster and a third gene cluster should be split into two clusters. This helps avoid local optimum during the initialization. Default to be 3.}
39 41
 
40 42
 \item{y.num.splits}{Maximum number of times to perform the heuristic described in y.split.on.iter.}
... ...
@@ -45,7 +47,7 @@ celda_CG(counts, sample.label = NULL, K, L, alpha = 1, beta = 1,
45 47
 
46 48
 \item{save.prob}{Logical; whether to return the history of cluster assignment probabilities. Defaults to FALSE.}
47 49
 
48
-\item{z.num.split}{Maximum number of times to perform the heuristic described in z.split.on.iter.}
50
+\item{...}{extra parameters passed onto celda_CG}
49 51
 }
50 52
 \description{
51 53
 celda Cell and Gene clustering function
... ...
@@ -11,37 +11,36 @@ celda_G(counts, L, beta = 1, delta = 1, gamma = 1, max.iter = 25,
11 11
 \arguments{
12 12
 \item{counts}{A numeric count matrix}
13 13
 
14
-\item{max.iter}{Maximum iterations of Gibbs sampling to perform. Defaults to 25.}
14
+\item{L}{The number of clusters to generate}
15 15
 
16
-\item{seed}{Parameter to set.seed() for random number generation}
16
+\item{beta}{The Dirichlet distribution parameter for Phi; adds a pseudocount to each transcriptional state within each cell.}
17 17
 
18
-\item{best}{Whether to return the cluster assignment with the highest log-likelihood. Defaults to TRUE. Returns last generated cluster assignment when FALSE.}
18
+\item{delta}{The Dirichlet distribution parameter for Eta; adds a gene pseudocount to the numbers of genes each state. Default to 1.}
19 19
 
20
-\item{thread}{The thread index, used for logging purposes}
20
+\item{gamma}{The Dirichlet distribution parameter for Psi; adds a pseudocount to each gene within each transcriptional state.}
21 21
 
22
-\item{save.prob}{Logical; whether to return the history of cluster assignment probabilities. Defaults to FALSE}
22
+\item{max.iter}{Maximum iterations of Gibbs sampling to perform. Defaults to 25.}
23 23
 
24
-\item{save.history}{Logical; whether to return the history of cluster assignments. Defaults to FALSE}
24
+\item{seed}{Parameter to set.seed() for random number generation.}
25 25
 
26
-\item{k}{The number of clusters to generate}
26
+\item{best}{Whether to return the cluster assignment with the highest log-likelihood. Defaults to TRUE. Returns last generated cluster assignment when FALSE. Default to be TRUE.}
27 27
 
28
-\item{a}{Vector of non-zero concentration parameters for sample <-> cluster assignment Dirichlet distribution}
28
+\item{y.split.on.iter}{On every y.split.on.iter iteration, a heuristic will be applied using hierarchical clustering to determine if a gene cluster should be merged with another gene cluster and a third gene cluster should be split into two clusters. This helps avoid local optimum during the initialization. Default to be 3.}
29 29
 
30
-\item{b}{Vector of non-zero concentration parameters for cluster <-> gene assignment Dirichlet distribution}
30
+\item{y.num.splits}{Maximum number of times to perform the heuristic described in y.split.on.iter.}
31 31
 
32
-\item{g}{Number of cell states ("topics")}
32
+\item{thread}{The thread index, used for logging purposes}
33 33
 
34
-\item{min.cell}{Desired minimum number of cells per cluster}
34
+\item{save.prob}{Logical; whether to return the history of cluster assignment probabilities. Defaults to FALSE}
35 35
 
36
-\item{kick}{Whether to randomize cluster assignments when a cluster has fewer than min.cell cells assigned to it during Gibbs sampling. (TODO param currently unused?)}
36
+\item{save.history}{Logical; whether to return the history of cluster assignments. Defaults to FALSE}
37
+
38
+\item{...}{extra parameters passed onto celda_G}
37 39
 }
38 40
 \description{
39 41
 geneCluster provides cluster assignments for all genes in a provided single-cell 
40 42
 sequencing count matrix, using the celda Bayesian hierarchical model.
41 43
 }
42
-\examples{
43
-TODO
44
-}
45 44
 \keyword{LDA}
46 45
 \keyword{clustering}
47 46
 \keyword{gene}
... ...
@@ -11,21 +11,7 @@ celda_heatmap(celda.mod, counts, ...)
11 11
 
12 12
 \item{counts}{the counts matrix}
13 13
 
14
-\item{z}{A numeric vector of cluster assignments for cell. Resolved automatically from celda object when available.}
15
-
16
-\item{y}{A numeric vector of cluster assignments for gene. Resolved automatically from celda object when available.}
17
-
18
-\item{scale.log}{specify the transformation type of the matrix for (semi-)heatmap, can be "log","row"(z-acore by row),"col"(z-score by column), etc. #To be completed}
19
-
20
-\item{scale.row}{specify the transformation type of the matrix for (semi-)heatmap, can be "log","row"(z-acore by row),"col"(z-score by column), etc. #To be completed}
21
-
22
-\item{z.trim}{two element vector to specify the lower and upper cutoff of the z-score normalization result by default it is set to NULL so no trimming will be done.}
23
-
24
-\item{scale_fun}{specify the function for scaling}
25
-
26
-\item{cluster.row}{boolean values determining if rows should be clustered}
27
-
28
-\item{cluster.column}{boolean values determining if columns should be clustered}
14
+\item{...}{extra parameters passed onto celda_heatmap}
29 15
 }
30 16
 \description{
31 17
 Render a stylable heatmap of count data based on celda clustering results.
... ...
@@ -10,6 +10,8 @@
10 10
 \item{celda.mod}{A celda model object of class "celda_C"}
11 11
 
12 12
 \item{counts}{A numeric count matrix}
13
+
14
+\item{...}{extra parameters passed onto the render_celda_heatmap}
13 15
 }
14 16
 \description{
15 17
 celda_heatmap for celda Cell clustering function
... ...
@@ -8,6 +8,10 @@
8 8
 }
9 9
 \arguments{
10 10
 \item{celda.mod}{A celda model object of "Celda_CG"}
11
+
12
+\item{counts}{A count matrix}
13
+
14
+\item{...}{extra parameters passed onto render_celda_heatmap}
11 15
 }
12 16
 \description{
13 17
 celda_heatmap for celda Cell and Gene clustering function
... ...
@@ -10,6 +10,8 @@
10 10
 \item{celda.mod}{A celda model object of class "celda_G"}
11 11
 
12 12
 \item{counts}{A numeric count matrix}
13
+
14
+\item{...}{extra parameters passed onto render_celda_heatmap}
13 15
 }
14 16
 \description{
15 17
 celda_heatmap for celda Gene clustering function
16 18
new file mode 100644
... ...
@@ -0,0 +1,16 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/celda_functions.R
3
+\name{normalizeCounts}
4
+\alias{normalizeCounts}
5
+\title{normalizeCounts function}
6
+\usage{
7
+normalizeCounts(counts, scale.factor = 1e+06)
8
+}
9
+\arguments{
10
+\item{counts}{A count matrix}
11
+
12
+\item{scale.factor}{the scalor for the normalization}
13
+}
14
+\description{
15
+normalizeCounts function
16
+}
... ...
@@ -23,6 +23,8 @@ render_celda_heatmap(counts, z = NULL, y = NULL, scale.log = FALSE,
23 23
 
24 24
 \item{scale.row}{Logical; psecifying if the z-score transformation is performed to the counts matrix. Defualt to be TRUE.}
25 25
 
26
+\item{scale_function}{specify the function for scaling. Defualt to be scale.}
27
+
26 28
 \item{normalize}{specify the normalization type: "cpm" or "none". Defualt to be "none".}
27 29
 
28 30
 \item{z.trim}{two element vector to specify the lower and upper cutoff of the z-score normalization result by default it is set to NULL so no trimming will be done. Default to be (-2,2)}
... ...
@@ -53,8 +55,6 @@ breaks are calculated automatically.}
53 55
 \item{show_genenames}{Logical; specifying if gene names should be shown. Default to be FALSE.}
54 56
 
55 57
 \item{show_cellnames}{Logical; specifying if cell names should be shown. Default to be FALSE.}
56
-
57
-\item{scale_fun}{specify the function for scaling. Defualt to be scale.}
58 58
 }
59 59
 \description{
60 60
 render function for celda heatmap plot
... ...
@@ -7,7 +7,7 @@
7 7
 runParams(celda.list)
8 8
 }
9 9
 \arguments{
10
-\item{celda.res}{A celda_list object, as returned from celda()}
10
+\item{celda.list}{A celda_list object, as returned from celda()}
11 11
 }
12 12
 \description{
13 13
 Get run parameters for a celda run.
... ...
@@ -157,6 +157,10 @@ calculated so that the plot would fit there, unless specified otherwise.}
157 157
 
158 158
 \item{silent}{do not draw the plot (useful when using the gtable output)}
159 159
 
160
+\item{row_label}{row cluster labels for semi-clustering}
161
+
162
+\item{col_label}{column cluster labels for semi-clustering}
163
+
160 164
 \item{\dots}{graphical parameters for the text used in plot. Parameters passed to 
161 165
 \code{\link{grid.text}}, see \code{\link{gpar}}.}
162 166
 }
... ...
@@ -180,7 +184,9 @@ clustering anymore, roughly more than 1000. Instead of showing all the rows
180 184
 separately one can cluster the rows in advance and show only the cluster centers. 
181 185
 The number of clusters can be tuned with parameter kmeans_k.
182 186
 }
183
-\examples{
187
+\author{
188
+Raivo Kolde <rkolde@gmail.com>
189
+#@examples
184 190
 # Create test matrix
185 191
 test = matrix(rnorm(200), 20, 10)
186 192
 test[1:10, seq(1, 10, 2)] = test[1:10, seq(1, 10, 2)] + 3
... ...
@@ -199,7 +205,7 @@ pheatmap(test, legend = FALSE)
199 205
 
200 206
 # Show text within cells
201 207
 pheatmap(test, display_numbers = TRUE)
202
-pheatmap(test, display_numbers = TRUE, number_format = "\\\%.1e")
208
+pheatmap(test, display_numbers = TRUE, number_format = "\%.1e")
203 209
 pheatmap(test, display_numbers = matrix(ifelse(test > 5, "*", ""), nrow(test)))
204 210
 pheatmap(test, cluster_row = FALSE, legend_breaks = -1:4, legend_labels = c("0",
205 211
 "1e-4", "1e-3", "1e-2", "1e-1", "1"))
... ...
@@ -263,15 +269,11 @@ callback = function(hc, mat){
263 269
 
264 270
 pheatmap(test, clustering_callback = callback)
265 271
 
266
-\dontrun{
272
+dontrun{
267 273
 # Same using dendsort package
268 274
 library(dendsort)
269 275
 
270 276
 callback = function(hc, ...){dendsort(hc)}
271 277
 pheatmap(test, clustering_callback = callback)
272 278
 }
273
-
274
-}
275
-\author{
276
-Raivo Kolde <rkolde@gmail.com>
277 279
 }
... ...
@@ -30,6 +30,8 @@ simulateCells.celda_CG(S = 10, C.Range = c(50, 100), N.Range = c(500,
30 30
 \item{delta}{The Dirichlet distribution parameter for Eta; adds a gene pseudocount to the numbers of genes each state.}
31 31
 
32 32
 \item{seed}{starting point used for generating simulated data.}
33
+
34
+\item{...}{extra parameters passed onto the simulateCells.celda_CG}
33 35
 }
34 36
 \description{
35 37
 simulateCells for the celda Cell and Gene clustering function
... ...
@@ -18,6 +18,8 @@ simulateCells.celda_G(C = 100, N.Range = c(500, 5000), G = 1000, L = 5,
18 18
 
19 19
 \item{beta}{The Dirichlet distribution parameter for Phi; adds a pseudocount to each transcriptional state within each cell}
20 20
 
21
+\item{gamma}{The Dirichlet distribution parameter for Psi; adds a pseudocount to each gene within each transcriptional state.}
22
+
21 23
 \item{delta}{The Dirichlet distribution parameter for Psi; adds a pseudocount to each gene within each transcriptional state}
22 24
 
23 25
 \item{seed}{Parameter to set.seed() for random number generation}
... ...
@@ -26,6 +28,3 @@ simulateCells.celda_G(C = 100, N.Range = c(500, 5000), G = 1000, L = 5,
26 28
 Generate a simulated count matrix, based off a generative distribution whose 
27 29
 parameters can be provided by the user.
28 30
 }
29
-\examples{
30
-TODO
31
-}
32 31
new file mode 100644
... ...
@@ -0,0 +1,22 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/feature_selection.R
3
+\name{topRank}
4
+\alias{topRank}
5
+\title{TopRank function}
6
+\usage{
7
+topRank(fm, n = 25, margin = 2, threshold = 0, decreasing = TRUE)
8
+}
9
+\arguments{
10
+\item{fm}{factorized matrix.}
11
+
12
+\item{n}{Maximum number of items returned for each entry.}
13
+
14
+\item{margin}{1 for rows, 2 for columns.}
15
+
16
+\item{threshold}{only include entries in the matrix that is greader than the threshold.}
17
+
18
+\item{decreasing}{Logical; specifying if rank should be decreasing. Default to be TRUE.}
19
+}
20
+\description{
21
+TopRank function
22
+}
... ...
@@ -6,6 +6,19 @@
6 6
 \usage{
7 7
 validate_args(counts, model, sample.label, nchains, cores, seed)
8 8
 }
9
+\arguments{
10
+\item{counts}{A count matrix}
11
+
12
+\item{model}{...}
13
+
14
+\item{sample.label}{...}
15
+
16
+\item{nchains}{...}
17
+
18
+\item{cores}{...}
19
+
20
+\item{seed}{...}
21
+}
9 22
 \description{
10 23
 Sanity check arguments to celda() to ensure a smooth run.
11 24
 }
... ...
@@ -6,6 +6,9 @@
6 6
 \usage{
7 7
 validate_counts(counts)
8 8
 }
9
+\arguments{
10
+\item{counts}{A count matrix}
11
+}
9 12
 \description{
10 13
 Perform some simple checks on the counts matrix, to ensure celda won't choke.
11 14
 }
... ...
@@ -4,12 +4,14 @@
4 4
 \alias{visualize_model_performance}
5 5
 \title{Visualize various performance metrics as a function of K / L to aid parameter choice.}
6 6
 \usage{
7
-visualize_model_performance(celda.list, method, ...)
7
+visualize_model_performance(celda.list, method, title)
8 8
 }
9 9
 \arguments{
10 10
 \item{celda.list}{A celda_list object as returned from *celda()*}
11 11
 
12
-\item{metric}{Which performance metric to visualize. One of ("perplexity", "harmonic", "loglik"). "perplexity" calculates the inverse of the geometric mean of the log likelihoods from each iteration of Gibbs sampling. "harmonic" calculates the marginal likelihood has the harmonic mean of the likelihoods. "loglik" plots the highest log-likelihood during Gibbs iteration.}
12
+\item{method}{Which performance metric to visualize. One of ("perplexity", "harmonic", "loglik"). "perplexity" calculates the inverse of the geometric mean of the log likelihoods from each iteration of Gibbs sampling. "harmonic" calculates the marginal likelihood has the harmonic mean of the likelihoods. "loglik" plots the highest log-likelihood during Gibbs iteration.}
13
+
14
+\item{title}{Title for the visualize_model_performance}
13 15
 }
14 16
 \value{
15 17
 A ggplot object containing the requested plot(s)
... ...
@@ -8,7 +8,11 @@
8 8
   method = "perplexity", title = "Model Performance (All Chains)")
9 9
 }
10 10
 \arguments{
11
-\item{celda.mod}{A celda model object of "Celda_CG"}
11
+\item{celda.list}{A celda model object of "Celda_CG"}
12
+
13
+\item{method}{One of “perplexity”, “harmonic”, or “loglik”}
14
+
15
+\item{title}{Title for the visualize_model_performance}
12 16
 }
13 17
 \description{
14 18
 visualize_model_performance for Celda Cell and Gene clustering function
... ...
@@ -3,10 +3,10 @@ library(celda)
3 3
 context("Testing celda_C")
4 4
 
5 5
 celdac <- simulateCells.celda_C(K=10)
6
-celdaC.res <- celda(counts=celdac$counts,model="celda_C",nchains=1,cores=4,K=10)
6
+#celdaC.res <- celda(counts=celdac$counts,model="celda_C",nchains=1,cores=4,K=10)
7 7
 
8
-test_that("finalClusterAssignment.celda_C",{
9
-  expect_equal(celdaC.res$res.list[[1]]$z, finalClusterAssignment(celdaC.res$res.list[[1]]))
10
-})
8
+#test_that("finalClusterAssignment.celda_C",{
9
+#  expect_equal(celdaC.res$res.list[[1]]$z, finalClusterAssignment(celdaC.res$res.list[[1]]))
10
+#})
11 11
 
12 12