Browse code

createscCoGAPSsets

Genevieve Stein-O'Brien authored on 27/02/2018 20:42:09
Showing 3 changed files

... ...
@@ -1,48 +1,34 @@
1
-#' Create Gene Sets for scCoGAPS
1
+#' createGWCoGAPSSets
2 2
 #'
3
-#' @details factors whole genome data into randomly generated sets for indexing
3
+#'\code{createGWCoGAPSSets} factors whole genome data into randomly generated sets for indexing;
4
+#'
5
+#'@param data data matrix with unique rownames
6
+#'@param nSets number of sets for parallelization
7
+#'@param outRDA name of output file
8
+#'@param keep logical indicating whether or not to save gene set list. Default is TRUE.
9
+#'@export
10
+#'@return list with randomly generated sets of genes from whole genome data
11
+#'@examples \dontrun{
12
+#'createGWCoGAPSSet(D,nSets=nSets)
13
+#'}
4 14
 #'
5
-#' @param D data matrix
6
-#' @param S uncertainty matrix
7
-#' @param nSets number of sets to partition the data into
8
-#' @param simulationName name used to identify files created by this simulation
9
-#' @param samplingRatio vector of relative quantities to use for sampling celltypes
10
-#' @param annotionObj vector of same length as number of columns of D 
11
-#' @return simulationName used to identify saved files
12
-#' @examples
13
-#' data(SimpSim)
14
-#' createscCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, "example")
15
-#' @export
16
-createscCoGAPSSets <- function(D, S, nSets, simulationName,samplingRatio=NULL)
17
-{
18
-    # check gene names
19
-    if (length(unique(colnames(D))) != length(colnames(D)))
20
-    {
21
-        warning("Cell identifiers not unique!")
22
-    }
23
-
24
-    # partition data by sampling random sets of cells
25
-    cells <- 1:ncol(D)
26
-    setSize <- floor(length(cells) / nSets)
27
-    for (set in 1:nSets)
28
-    {
29
-        
30
-        if(is.null(samplingRatio)){
31
-        # sample cell names
32
-            sampleSize <- ifelse(set == nSets, length(cells), setSize)
33
-            cellset <- sample(cells, sampleSize, replace=FALSE)
34
-            cells <- cells[!(cells %in% cellset)]
35
-        } else {
36
-        if(length(unique(annotionObj))!=length(samplingRatio)){warning("Not all celltypes will be sampled from.")}
37
-        ct.indx<-lapply(unique(annotionObj),function(x) which(annotionObj == x))
38
-        cellset<-sample(colnames(D)[ct.indx[[x]]], samplingRatio[x],replace=TRUE)
39
-        }
40 15
 
41
-        # partition data
42
-        sampleD <- D[,cellset]
43
-        sampleS <- S[,cellset]
44
-        save(sampleD, sampleS, file=paste(simulationName, "_partition_", set,
45
-            ".RData", sep=""));
46
-    }
47
-    return(simulationName)
16
+createGWCoGAPSSets<-function(data=D, #data matrix with unique rownames
17
+	nSets=nSets, #number of sets for parallelization
18
+	outRDA="GenesInCoGAPSSets.Rda", #name of output file
19
+	keep=TRUE #logical indicating whether or not to save gene set list. Default is TRUE.
20
+	){
21
+genes=rownames(data)
22
+setSize=floor(length(genes)/nSets)
23
+genesInSets <- list()
24
+for (set in 1:nSets) {
25
+  if(set!=nSets){genesInSets[[set]] <- sample(genes,setSize)}
26
+  if(set==nSets){genesInSets[[set]] <- genes}
27
+  genes=genes[!genes%in%genesInSets[[set]]]
28
+}
29
+if(!identical(sort(unlist(genesInSets)),sort(rownames(data)))){print("Gene identifiers not unique!")}
30
+if(keep==TRUE){save(list=c('genesInSets'),file=outRDA)}
31
+return(genesInSets)
48 32
 }
33
+
34
+
49 35
new file mode 100644
... ...
@@ -0,0 +1,48 @@
1
+#' Create Gene Sets for scCoGAPS
2
+#'
3
+#' @details factors whole genome data into randomly generated sets for indexing
4
+#'
5
+#' @param D data matrix
6
+#' @param S uncertainty matrix
7
+#' @param nSets number of sets to partition the data into
8
+#' @param simulationName name used to identify files created by this simulation
9
+#' @param samplingRatio vector of relative quantities to use for sampling celltypes
10
+#' @param annotionObj vector of same length as number of columns of D 
11
+#' @return simulationName used to identify saved files
12
+#' @examples
13
+#' data(SimpSim)
14
+#' createscCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, "example")
15
+#' @export
16
+createscCoGAPSSets <- function(D, S, nSets, simulationName,samplingRatio=NULL)
17
+{
18
+    # check gene names
19
+    if (length(unique(colnames(D))) != length(colnames(D)))
20
+    {
21
+        warning("Cell identifiers not unique!")
22
+    }
23
+
24
+    # partition data by sampling random sets of cells
25
+    cells <- 1:ncol(D)
26
+    setSize <- floor(length(cells) / nSets)
27
+    for (set in 1:nSets)
28
+    {
29
+        
30
+        if(is.null(samplingRatio)){
31
+        # sample cell names
32
+            sampleSize <- ifelse(set == nSets, length(cells), setSize)
33
+            cellset <- sample(cells, sampleSize, replace=FALSE)
34
+            cells <- cells[!(cells %in% cellset)]
35
+        } else {
36
+        if(length(unique(annotionObj))!=length(samplingRatio)){warning("Not all celltypes will be sampled from.")}
37
+        ct.indx<-lapply(unique(annotionObj),function(x) which(annotionObj == x))
38
+        cellset<-sample(colnames(D)[ct.indx[[x]]], samplingRatio[x],replace=TRUE)
39
+        }
40
+
41
+        # partition data
42
+        sampleD <- D[,cellset]
43
+        sampleS <- S[,cellset]
44
+        save(sampleD, sampleS, file=paste(simulationName, "_partition_", set,
45
+            ".RData", sep=""));
46
+    }
47
+    return(simulationName)
48
+}
... ...
@@ -1,4 +1,4 @@
1
-#' patternMatch4Parallel
1
+#' patternMatch4singleCell
2 2
 #'
3 3
 #' @param Ptot a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}
4 4
 #' @param nSets number of parallel sets used to generate \code{Ptot}
... ...
@@ -12,7 +12,10 @@
12 12
 #' concensus pattern is also returned.
13 13
 #' @seealso \code{\link{agnes}}
14 14
 #' @export
15
-patternMatch4Parallel <- function(Ptot, nSets, cnt, minNS, 
15
+#' 
16
+#' 
17
+
18
+patternMatch4singleCell <- function(Ptot, nSets, cnt, minNS, 
16 19
 cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...)
17 20
 {
18 21
     if (!is.null(minNS))
... ...
@@ -24,7 +27,7 @@ cluster.method="complete", ignore.NA=FALSE, bySet=FALSE, ...)
24 27
         Ptot <- Ptot[complete.cases(Ptot),]
25 28
 
26 29
     # corr dist
27
-    corr.dist=cor(t(Ptot))
30
+    corr.dist=cor(Ptot)
28 31
     corr.dist=1-corr.dist
29 32
     # cluster
30 33
     #library(cluster)