... | ... |
@@ -1,48 +1,34 @@ |
1 |
-#' Create Gene Sets for scCoGAPS |
|
1 |
+#' createGWCoGAPSSets |
|
2 | 2 |
#' |
3 |
-#' @details factors whole genome data into randomly generated sets for indexing |
|
3 |
+#'\code{createGWCoGAPSSets} factors whole genome data into randomly generated sets for indexing; |
|
4 |
+#' |
|
5 |
+#'@param data data matrix with unique rownames |
|
6 |
+#'@param nSets number of sets for parallelization |
|
7 |
+#'@param outRDA name of output file |
|
8 |
+#'@param keep logical indicating whether or not to save gene set list. Default is TRUE. |
|
9 |
+#'@export |
|
10 |
+#'@return list with randomly generated sets of genes from whole genome data |
|
11 |
+#'@examples \dontrun{ |
|
12 |
+#'createGWCoGAPSSet(D,nSets=nSets) |
|
13 |
+#'} |
|
4 | 14 |
#' |
5 |
-#' @param D data matrix |
|
6 |
-#' @param S uncertainty matrix |
|
7 |
-#' @param nSets number of sets to partition the data into |
|
8 |
-#' @param simulationName name used to identify files created by this simulation |
|
9 |
-#' @param samplingRatio vector of relative quantities to use for sampling celltypes |
|
10 |
-#' @param annotionObj vector of same length as number of columns of D |
|
11 |
-#' @return simulationName used to identify saved files |
|
12 |
-#' @examples |
|
13 |
-#' data(SimpSim) |
|
14 |
-#' createscCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, "example") |
|
15 |
-#' @export |
|
16 |
-createscCoGAPSSets <- function(D, S, nSets, simulationName,samplingRatio=NULL) |
|
17 |
-{ |
|
18 |
- # check gene names |
|
19 |
- if (length(unique(colnames(D))) != length(colnames(D))) |
|
20 |
- { |
|
21 |
- warning("Cell identifiers not unique!") |
|
22 |
- } |
|
23 |
- |
|
24 |
- # partition data by sampling random sets of cells |
|
25 |
- cells <- 1:ncol(D) |
|
26 |
- setSize <- floor(length(cells) / nSets) |
|
27 |
- for (set in 1:nSets) |
|
28 |
- { |
|
29 |
- |
|
30 |
- if(is.null(samplingRatio)){ |
|
31 |
- # sample cell names |
|
32 |
- sampleSize <- ifelse(set == nSets, length(cells), setSize) |
|
33 |
- cellset <- sample(cells, sampleSize, replace=FALSE) |
|
34 |
- cells <- cells[!(cells %in% cellset)] |
|
35 |
- } else { |
|
36 |
- if(length(unique(annotionObj))!=length(samplingRatio)){warning("Not all celltypes will be sampled from.")} |
|
37 |
- ct.indx<-lapply(unique(annotionObj),function(x) which(annotionObj == x)) |
|
38 |
- cellset<-sample(colnames(D)[ct.indx[[x]]], samplingRatio[x],replace=TRUE) |
|
39 |
- } |
|
40 | 15 |
|
41 |
- # partition data |
|
42 |
- sampleD <- D[,cellset] |
|
43 |
- sampleS <- S[,cellset] |
|
44 |
- save(sampleD, sampleS, file=paste(simulationName, "_partition_", set, |
|
45 |
- ".RData", sep="")); |
|
46 |
- } |
|
47 |
- return(simulationName) |
|
16 |
+createGWCoGAPSSets<-function(data=D, #data matrix with unique rownames |
|
17 |
+ nSets=nSets, #number of sets for parallelization |
|
18 |
+ outRDA="GenesInCoGAPSSets.Rda", #name of output file |
|
19 |
+ keep=TRUE #logical indicating whether or not to save gene set list. Default is TRUE. |
|
20 |
+ ){ |
|
21 |
+genes=rownames(data) |
|
22 |
+setSize=floor(length(genes)/nSets) |
|
23 |
+genesInSets <- list() |
|
24 |
+for (set in 1:nSets) { |
|
25 |
+ if(set!=nSets){genesInSets[[set]] <- sample(genes,setSize)} |
|
26 |
+ if(set==nSets){genesInSets[[set]] <- genes} |
|
27 |
+ genes=genes[!genes%in%genesInSets[[set]]] |
|
28 |
+} |
|
29 |
+if(!identical(sort(unlist(genesInSets)),sort(rownames(data)))){print("Gene identifiers not unique!")} |
|
30 |
+if(keep==TRUE){save(list=c('genesInSets'),file=outRDA)} |
|
31 |
+return(genesInSets) |
|
48 | 32 |
} |
33 |
+ |
|
34 |
+ |
49 | 35 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,48 @@ |
1 |
+#' Create Gene Sets for scCoGAPS |
|
2 |
+#' |
|
3 |
+#' @details factors whole genome data into randomly generated sets for indexing |
|
4 |
+#' |
|
5 |
+#' @param D data matrix |
|
6 |
+#' @param S uncertainty matrix |
|
7 |
+#' @param nSets number of sets to partition the data into |
|
8 |
+#' @param simulationName name used to identify files created by this simulation |
|
9 |
+#' @param samplingRatio vector of relative quantities to use for sampling celltypes |
|
10 |
+#' @param annotionObj vector of same length as number of columns of D |
|
11 |
+#' @return simulationName used to identify saved files |
|
12 |
+#' @examples |
|
13 |
+#' data(SimpSim) |
|
14 |
+#' createscCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, "example") |
|
15 |
+#' @export |
|
16 |
+createscCoGAPSSets <- function(D, S, nSets, simulationName,samplingRatio=NULL) |
|
17 |
+{ |
|
18 |
+ # check gene names |
|
19 |
+ if (length(unique(colnames(D))) != length(colnames(D))) |
|
20 |
+ { |
|
21 |
+ warning("Cell identifiers not unique!") |
|
22 |
+ } |
|
23 |
+ |
|
24 |
+ # partition data by sampling random sets of cells |
|
25 |
+ cells <- 1:ncol(D) |
|
26 |
+ setSize <- floor(length(cells) / nSets) |
|
27 |
+ for (set in 1:nSets) |
|
28 |
+ { |
|
29 |
+ |
|
30 |
+ if(is.null(samplingRatio)){ |
|
31 |
+ # sample cell names |
|
32 |
+ sampleSize <- ifelse(set == nSets, length(cells), setSize) |
|
33 |
+ cellset <- sample(cells, sampleSize, replace=FALSE) |
|
34 |
+ cells <- cells[!(cells %in% cellset)] |
|
35 |
+ } else { |
|
36 |
+ if(length(unique(annotionObj))!=length(samplingRatio)){warning("Not all celltypes will be sampled from.")} |
|
37 |
+ ct.indx<-lapply(unique(annotionObj),function(x) which(annotionObj == x)) |
|
38 |
+ cellset<-sample(colnames(D)[ct.indx[[x]]], samplingRatio[x],replace=TRUE) |
|
39 |
+ } |
|
40 |
+ |
|
41 |
+ # partition data |
|
42 |
+ sampleD <- D[,cellset] |
|
43 |
+ sampleS <- S[,cellset] |
|
44 |
+ save(sampleD, sampleS, file=paste(simulationName, "_partition_", set, |
|
45 |
+ ".RData", sep="")); |
|
46 |
+ } |
|
47 |
+ return(simulationName) |
|
48 |
+} |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-#' patternMatch4Parallel |
|
1 |
+#' patternMatch4singleCell |
|
2 | 2 |
#' |
3 | 3 |
#' @param Ptot a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet} |
4 | 4 |
#' @param nSets number of parallel sets used to generate \code{Ptot} |
... | ... |
@@ -12,7 +12,10 @@ |
12 | 12 |
#' concensus pattern is also returned. |
13 | 13 |
#' @seealso \code{\link{agnes}} |
14 | 14 |
#' @export |
15 |
-patternMatch4Parallel <- function(Ptot, nSets, cnt, minNS, |
|
15 |
+#' |
|
16 |
+#' |
|
17 |
+ |
|
18 |
+patternMatch4singleCell <- function(Ptot, nSets, cnt, minNS, |
|
16 | 19 |
cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...) |
17 | 20 |
{ |
18 | 21 |
if (!is.null(minNS)) |
... | ... |
@@ -24,7 +27,7 @@ cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...) |
24 | 27 |
Ptot <- Ptot[complete.cases(Ptot),] |
25 | 28 |
|
26 | 29 |
# corr dist |
27 |
- corr.dist=cor(t(Ptot)) |
|
30 |
+ corr.dist=cor(Ptot) |
|
28 | 31 |
corr.dist=1-corr.dist |
29 | 32 |
# cluster |
30 | 33 |
#library(cluster) |