Browse code

add celda_heatmap

irisapo authored on 24/03/2017 16:15:00
Showing 11 changed files

... ...
@@ -25,3 +25,5 @@ vignettes/*.pdf
25 25
 *.knit.md
26 26
 .Rproj.user
27 27
 inst/doc
28
+celda.Rproj
29
+.DS_Store
... ...
@@ -9,7 +9,9 @@ Depends:
9 9
     R (>= 3.2.2)
10 10
 Imports:
11 11
     gtools,
12
-    entropy
12
+    entropy,
13
+    RColorBrewer,
14
+    pheatmap
13 15
 Suggests:
14 16
 	testthat,
15 17
 	knitr,
... ...
@@ -20,6 +22,6 @@ VignetteBuilder:
20 22
 License: MIT
21 23
 Encoding: UTF-8
22 24
 LazyData: true
23
-RoxygenNote: 5.0.1
25
+RoxygenNote: 6.0.1
24 26
 BugReports:
25
-	https://github.com/definitelysean/celda/issues
26 27
\ No newline at end of file
28
+	https://github.com/definitelysean/celda/issues
... ...
@@ -1,9 +1,10 @@
1
-# Generated by roxygen2: do not edit by hand
2
-
3
-export(cCG.calcLLFromVariables)
4
-export(cCG.generateCells)
5
-export(celda)
6
-export(celda_CG)
7
-export(geneCluster)
8
-export(generateCells_gene_clustering)
9
-import(foreach)
1
+# Generated by roxygen2: do not edit by hand
2
+
3
+export(cCG.calcLLFromVariables)
4
+export(cCG.generateCells)
5
+export(celda)
6
+export(celda_CG)
7
+export(celda_heatmap)
8
+export(geneCluster)
9
+export(generateCells_gene_clustering)
10
+import(foreach)
10 11
new file mode 100644
... ...
@@ -0,0 +1,42 @@
1
+##ToDo:  Need to (1)scale the row height accordingly; (2) pick more contradictory color;
2
+##       (3)Aanotation need to change and; (4)  what else need to do?
3
+ #' plot the heatmap of the counts data
4
+ #' @param counts the counts matrix 
5
+ #' @param K The number of clusters being considered  (Question1)or: Total number of cell populations??
6
+ #' @param z A numeric vector of cluster assignments
7
+ #' @param L Total number of transcriptional states
8
+ #' @param col vector of colors used in heatmap
9
+ #' @param cluster_gene boolean values determining if genes should be clustered
10
+ #' @param cluster_cell boolean values determining if cells should be clustered
11
+ #' @param annotation_gene data frame that specifies the annotations for genes 
12
+ #' @param annotation_cell data frame that specifies the annotations for cells 
13
+ #' @example TODO
14
+ #' @export 
15
+  celda_heatmap <- function(counts, K, z, L, col="YlOrBr", cluster_gene = TRUE, cluster_cell = FALSE, 
16
+                            annotation_gene, annotation_cell) {
17
+    ## Set row name to counts matrix 
18
+    if(is.null(rownames(counts))){
19
+      rownames(counts) <- 1:nrow(counts)
20
+    } 
21
+    else if(is.null(colnames(counts))) {
22
+      colnames(counts) <- 1:ncol(counts)
23
+    }
24
+    ##-- Set cell annotation    # need to do 
25
+    #annotaion_cell <- data.frame(pseudoanno = sample(c("Tcell","BCell"), nrow(counts), replace = T))   # ToDo: need to change 
26
+    ##-- Set gene annotation
27
+    #annotation_gene <- data.frame(pseudo=sample(1:6,nrow(counts),replace=T))
28
+    
29
+    ## Set color 
30
+    col.pal <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name =col))(100)  # ToDo: need to be more flexible or fixed to a better color list
31
+    pheatmap::pheatmap(counts, 
32
+                       color = col.pal,
33
+                       cluster_rows = cluster_gene,
34
+                       cluster_cols = cluster_cell,
35
+                       annotation_row = annotation_gene,
36
+                       annotation_col = annotation_cell,
37
+                       cutree_rows = L,   # Question1: not sure about this
38
+                       cutree_cols = L,   # Question2: not sure about this either 
39
+                       fontsize = 6.5,
40
+                       fontsize_col = 5
41
+                       )
42
+  }
0 43
\ No newline at end of file
... ...
@@ -1,31 +1,30 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/celda_G.R
3
-\name{calcGibbsProb}
4
-\alias{calcGibbsProb}
5
-\title{Calculate Log Likelihood For Single Set of Cluster Assignments (Gene Clustering)}
6
-\usage{
7
-calcGibbsProb(ix, r, z, k, a, b, g)
8
-}
9
-\arguments{
10
-\item{ix}{The index in z corresponding to the cell currently being considered during Gibbs sampling}
11
-
12
-\item{r}{A numeric count matrix}
13
-
14
-\item{z}{A numeric vector of cluster assignments}
15
-
16
-\item{k}{The number of clusters being considered}
17
-
18
-\item{a}{Vector of non-zero concentration parameters for sample <-> cluster assignment Dirichlet distribution}
19
-
20
-\item{b}{Vector of non-zero concentration parameters for cluster <-> gene assignment Dirichlet distribution}
21
-
22
-\item{g}{The number of cell states ("topics")}
23
-}
24
-\description{
25
-This function calculates the log-likelihood of a cell's membership to each possible clusters,
26
-given the cluster assignment for all other cells.
27
-}
28
-\examples{
29
-TODO 
30
-}
31
-
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/celda_G.R
3
+\name{calcGibbsProb}
4
+\alias{calcGibbsProb}
5
+\title{Calculate Log Likelihood For Single Set of Cluster Assignments (Gene Clustering)}
6
+\usage{
7
+calcGibbsProb(ix, r, z, k, a, b, g)
8
+}
9
+\arguments{
10
+\item{ix}{The index in z corresponding to the cell currently being considered during Gibbs sampling}
11
+
12
+\item{r}{A numeric count matrix}
13
+
14
+\item{z}{A numeric vector of cluster assignments}
15
+
16
+\item{k}{The number of clusters being considered}
17
+
18
+\item{a}{Vector of non-zero concentration parameters for sample <-> cluster assignment Dirichlet distribution}
19
+
20
+\item{b}{Vector of non-zero concentration parameters for cluster <-> gene assignment Dirichlet distribution}
21
+
22
+\item{g}{The number of cell states ("topics")}
23
+}
24
+\description{
25
+This function calculates the log-likelihood of a cell's membership to each possible clusters,
26
+given the cluster assignment for all other cells.
27
+}
28
+\examples{
29
+TODO 
30
+}
... ...
@@ -1,31 +1,30 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/celda_G.R
3
-\name{calcLL_gene_clustering}
4
-\alias{calcLL_gene_clustering}
5
-\title{Calculate Log Likelihood For A Set of Gene Clusterings (Gene Clustering)}
6
-\usage{
7
-calcLL_gene_clustering(counts, z, k, alpha, beta, gamma)
8
-}
9
-\arguments{
10
-\item{counts}{A numeric count matrix}
11
-
12
-\item{z}{A numeric vector of cluster assignments}
13
-
14
-\item{k}{The number of clusters being considered}
15
-
16
-\item{alpha}{Vector of non-zero concentration parameters for sample <-> cluster assignment Dirichlet distribution}
17
-
18
-\item{beta}{Vector of non-zero concentration parameters for cluster <-> gene assignment Dirichlet distribution}
19
-
20
-\item{gamma}{The number of cell states ("topics")}
21
-}
22
-\description{
23
-This function calculates the log likelihood of each clustering of genes generated
24
-over multiple iterations of Gibbs sampling.
25
-}
26
-\examples{
27
-TODO
28
-}
29
-\keyword{likelihood}
30
-\keyword{log}
31
-
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/celda_G.R
3
+\name{calcLL_gene_clustering}
4
+\alias{calcLL_gene_clustering}
5
+\title{Calculate Log Likelihood For A Set of Gene Clusterings (Gene Clustering)}
6
+\usage{
7
+calcLL_gene_clustering(counts, z, k, alpha, beta, gamma)
8
+}
9
+\arguments{
10
+\item{counts}{A numeric count matrix}
11
+
12
+\item{z}{A numeric vector of cluster assignments}
13
+
14
+\item{k}{The number of clusters being considered}
15
+
16
+\item{alpha}{Vector of non-zero concentration parameters for sample <-> cluster assignment Dirichlet distribution}
17
+
18
+\item{beta}{Vector of non-zero concentration parameters for cluster <-> gene assignment Dirichlet distribution}
19
+
20
+\item{gamma}{The number of cell states ("topics")}
21
+}
22
+\description{
23
+This function calculates the log likelihood of each clustering of genes generated
24
+over multiple iterations of Gibbs sampling.
25
+}
26
+\examples{
27
+TODO
28
+}
29
+\keyword{likelihood}
30
+\keyword{log}
... ...
@@ -1,33 +1,32 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/celda_G.R
3
-\name{calcLLlite_gene_clustering}
4
-\alias{calcLLlite_gene_clustering}
5
-\title{Calculate Log Likelihood For Single Set of Cluster Assignments (Gene Clustering)}
6
-\usage{
7
-calcLLlite_gene_clustering(ix, counts, z, k, alpha, beta, gamma)
8
-}
9
-\arguments{
10
-\item{ix}{The index of the cell being assigned a cluster during the current iteration of Gibbs sampling}
11
-
12
-\item{counts}{A numeric count matrix}
13
-
14
-\item{z}{A numeric vector of cluster assignments}
15
-
16
-\item{k}{The number of clusters being considered}
17
-
18
-\item{alpha}{Vector of non-zero concentration parameters for sample <-> cluster assignment Dirichlet distribution}
19
-
20
-\item{beta}{Vector of non-zero concentration parameters for cluster <-> gene assignment Dirichlet distribution}
21
-
22
-\item{gamma}{The number of cell states ("topics")}
23
-}
24
-\description{
25
-This function calculates the log-likelihood of a given set of cluster assigments for the samples
26
-represented in the provided count matrix.
27
-}
28
-\examples{
29
-TODO
30
-}
31
-\keyword{likelihood}
32
-\keyword{log}
33
-
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/celda_G.R
3
+\name{calcLLlite_gene_clustering}
4
+\alias{calcLLlite_gene_clustering}
5
+\title{Calculate Log Likelihood For Single Set of Cluster Assignments (Gene Clustering)}
6
+\usage{
7
+calcLLlite_gene_clustering(ix, counts, z, k, alpha, beta, gamma)
8
+}
9
+\arguments{
10
+\item{ix}{The index of the cell being assigned a cluster during the current iteration of Gibbs sampling}
11
+
12
+\item{counts}{A numeric count matrix}
13
+
14
+\item{z}{A numeric vector of cluster assignments}
15
+
16
+\item{k}{The number of clusters being considered}
17
+
18
+\item{alpha}{Vector of non-zero concentration parameters for sample <-> cluster assignment Dirichlet distribution}
19
+
20
+\item{beta}{Vector of non-zero concentration parameters for cluster <-> gene assignment Dirichlet distribution}
21
+
22
+\item{gamma}{The number of cell states ("topics")}
23
+}
24
+\description{
25
+This function calculates the log-likelihood of a given set of cluster assigments for the samples
26
+represented in the provided count matrix.
27
+}
28
+\examples{
29
+TODO
30
+}
31
+\keyword{likelihood}
32
+\keyword{log}
34 33
new file mode 100644
... ...
@@ -0,0 +1,31 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/celda_heatmap.R
3
+\name{celda_heatmap}
4
+\alias{celda_heatmap}
5
+\title{plot the heatmap of the counts data}
6
+\usage{
7
+celda_heatmap(counts, K, z, L, col = "YlOrBr", cluster_gene = TRUE,
8
+  cluster_cell = FALSE, annotation_gene, annotation_cell)
9
+}
10
+\arguments{
11
+\item{counts}{the counts matrix}
12
+
13
+\item{K}{The number of clusters being considered  (Question1)or: Total number of cell populations??}
14
+
15
+\item{z}{A numeric vector of cluster assignments}
16
+
17
+\item{L}{Total number of transcriptional states}
18
+
19
+\item{col}{vector of colors used in heatmap}
20
+
21
+\item{cluster_gene}{boolean values determining if genes should be clustered}
22
+
23
+\item{cluster_cell}{boolean values determining if cells should be clustered}
24
+
25
+\item{annotation_gene}{data frame that specifies the annotations for genes}
26
+
27
+\item{annotation_cell}{data frame that specifies the annotations for cells}
28
+}
29
+\description{
30
+plot the heatmap of the counts data
31
+}
... ...
@@ -1,45 +1,44 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/celda_G.R
3
-\name{geneCluster}
4
-\alias{geneCluster}
5
-\title{Cluster Genes from Single Cell Sequencing Data}
6
-\usage{
7
-geneCluster(counts, k, a = 1, b = 1, g = 1, max.iter = 25,
8
-  min.cell = 5, seed = 12345, best = TRUE, kick = TRUE,
9
-  converge = 1e-05)
10
-}
11
-\arguments{
12
-\item{counts}{A numeric count matrix}
13
-
14
-\item{k}{The number of clusters to generate}
15
-
16
-\item{a}{Vector of non-zero concentration parameters for sample <-> cluster assignment Dirichlet distribution}
17
-
18
-\item{b}{Vector of non-zero concentration parameters for cluster <-> gene assignment Dirichlet distribution}
19
-
20
-\item{g}{Number of cell states ("topics")}
21
-
22
-\item{max.iter}{Maximum iterations of Gibbs sampling to perform. Defaults to 25.}
23
-
24
-\item{min.cell}{Desired minimum number of cells per cluster}
25
-
26
-\item{seed}{Parameter to set.seed() for random number generation}
27
-
28
-\item{best}{Whether to return the cluster assignment with the highest log-likelihood. Defaults to TRUE. Returns last generated cluster assignment when FALSE.}
29
-
30
-\item{kick}{Whether to randomize cluster assignments when a cluster has fewer than min.cell cells assigned to it during Gibbs sampling. (TODO param currently unused?)}
31
-
32
-\item{converge}{Threshold at which to consider the Markov chain converged}
33
-}
34
-\description{
35
-geneCluster provides cluster assignments for all genes in a provided single-cell 
36
-sequencing count matrix, using the celda Bayesian hierarchical model.
37
-}
38
-\examples{
39
-TODO
40
-}
41
-\keyword{LDA}
42
-\keyword{clustering}
43
-\keyword{gene}
44
-\keyword{gibbs}
45
-
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/celda_G.R
3
+\name{geneCluster}
4
+\alias{geneCluster}
5
+\title{Cluster Genes from Single Cell Sequencing Data}
6
+\usage{
7
+geneCluster(counts, k, a = 1, b = 1, g = 1, max.iter = 25,
8
+  min.cell = 5, seed = 12345, best = TRUE, kick = TRUE,
9
+  converge = 1e-05)
10
+}
11
+\arguments{
12
+\item{counts}{A numeric count matrix}
13
+
14
+\item{k}{The number of clusters to generate}
15
+
16
+\item{a}{Vector of non-zero concentration parameters for sample <-> cluster assignment Dirichlet distribution}
17
+
18
+\item{b}{Vector of non-zero concentration parameters for cluster <-> gene assignment Dirichlet distribution}
19
+
20
+\item{g}{Number of cell states ("topics")}
21
+
22
+\item{max.iter}{Maximum iterations of Gibbs sampling to perform. Defaults to 25.}
23
+
24
+\item{min.cell}{Desired minimum number of cells per cluster}
25
+
26
+\item{seed}{Parameter to set.seed() for random number generation}
27
+
28
+\item{best}{Whether to return the cluster assignment with the highest log-likelihood. Defaults to TRUE. Returns last generated cluster assignment when FALSE.}
29
+
30
+\item{kick}{Whether to randomize cluster assignments when a cluster has fewer than min.cell cells assigned to it during Gibbs sampling. (TODO param currently unused?)}
31
+
32
+\item{converge}{Threshold at which to consider the Markov chain converged}
33
+}
34
+\description{
35
+geneCluster provides cluster assignments for all genes in a provided single-cell 
36
+sequencing count matrix, using the celda Bayesian hierarchical model.
37
+}
38
+\examples{
39
+TODO
40
+}
41
+\keyword{LDA}
42
+\keyword{clustering}
43
+\keyword{gene}
44
+\keyword{gibbs}
... ...
@@ -1,32 +1,31 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/celda_G.R
3
-\name{generateCells_gene_clustering}
4
-\alias{generateCells_gene_clustering}
5
-\title{Generate Count Data}
6
-\usage{
7
-generateCells_gene_clustering(C = 100, N.Range = c(500, 5000), G = 1000,
8
-  k = 5, a = 1, b = 1, g = 1, seed = 12345)
9
-}
10
-\arguments{
11
-\item{C}{The number of cells}
12
-
13
-\item{N.Range}{The range of counts each gene should have}
14
-
15
-\item{G}{The number of genes for which to simulate counts}
16
-
17
-\item{a}{Vector of non-zero concentration parameters for sample <-> cluster assignment Dirichlet distribution}
18
-
19
-\item{b}{Vector of non-zero concentration parameters for cluster <-> gene assignment Dirichlet distribution}
20
-
21
-\item{g}{The number of cell states ("topics")' @param k The number of gene clusters to simulate from}
22
-
23
-\item{seed}{Parameter to set.seed() for random number generation}
24
-}
25
-\description{
26
-Generate a simulated count matrix, based off a generative distribution whose 
27
-parameters can be provided by the user.
28
-}
29
-\examples{
30
-TODO
31
-}
32
-
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/celda_G.R
3
+\name{generateCells_gene_clustering}
4
+\alias{generateCells_gene_clustering}
5
+\title{Generate Count Data}
6
+\usage{
7
+generateCells_gene_clustering(C = 100, N.Range = c(500, 5000), G = 1000,
8
+  k = 5, a = 1, b = 1, g = 1, seed = 12345)
9
+}
10
+\arguments{
11
+\item{C}{The number of cells}
12
+
13
+\item{N.Range}{The range of counts each gene should have}
14
+
15
+\item{G}{The number of genes for which to simulate counts}
16
+
17
+\item{a}{Vector of non-zero concentration parameters for sample <-> cluster assignment Dirichlet distribution}
18
+
19
+\item{b}{Vector of non-zero concentration parameters for cluster <-> gene assignment Dirichlet distribution}
20
+
21
+\item{g}{The number of cell states ("topics")' @param k The number of gene clusters to simulate from}
22
+
23
+\item{seed}{Parameter to set.seed() for random number generation}
24
+}
25
+\description{
26
+Generate a simulated count matrix, based off a generative distribution whose 
27
+parameters can be provided by the user.
28
+}
29
+\examples{
30
+TODO
31
+}
... ...
@@ -1,24 +1,23 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/celda_G.R
3
-\name{sample.ll}
4
-\alias{sample.ll}
5
-\title{Sample log-likelihood probabilities}
6
-\usage{
7
-sample.ll(ll.probs)
8
-}
9
-\arguments{
10
-\item{counts}{A numeric count matrix}
11
-}
12
-\value{
13
-A single integer in 1:k corresponding to a cluster assignment
14
-}
15
-\description{
16
-Given a set of log-likelihoods for cluster membership, return a single cluster assignment.
17
-}
18
-\examples{
19
-TODO
20
-}
21
-\keyword{likelihood}
22
-\keyword{log}
23
-\keyword{sample}
24
-
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/celda_G.R
3
+\name{sample.ll}
4
+\alias{sample.ll}
5
+\title{Sample log-likelihood probabilities}
6
+\usage{
7
+sample.ll(ll.probs)
8
+}
9
+\arguments{
10
+\item{counts}{A numeric count matrix}
11
+}
12
+\value{
13
+A single integer in 1:k corresponding to a cluster assignment
14
+}
15
+\description{
16
+Given a set of log-likelihoods for cluster membership, return a single cluster assignment.
17
+}
18
+\examples{
19
+TODO
20
+}
21
+\keyword{likelihood}
22
+\keyword{log}
23
+\keyword{sample}