... | ... |
@@ -27,7 +27,7 @@ |
27 | 27 |
column_name = coln, |
28 | 28 |
sample = sample, |
29 | 29 |
row.names = coln) |
30 |
- |
|
30 |
+ |
|
31 | 31 |
return(sce) |
32 | 32 |
} |
33 | 33 |
|
... | ... |
@@ -41,7 +41,8 @@ |
41 | 41 |
barcodesFileNames, |
42 | 42 |
gzipped, |
43 | 43 |
class, |
44 |
- delayedArray) { |
|
44 |
+ delayedArray, |
|
45 |
+ rowNamesDedup) { |
|
45 | 46 |
|
46 | 47 |
if (length(BUStoolsDirs) != length(samples)) { |
47 | 48 |
stop("'BUStoolsDirs' and 'samples' have unequal lengths!") |
... | ... |
@@ -68,6 +69,15 @@ |
68 | 69 |
} |
69 | 70 |
|
70 | 71 |
sce <- do.call(SingleCellExperiment::cbind, res) |
72 |
+ |
|
73 |
+ if (isTRUE(rowNamesDedup)) { |
|
74 |
+ if (any(duplicated(rownames(sce)))) { |
|
75 |
+ message("Duplicated gene names found, adding '-1', '-2', ", |
|
76 |
+ "... suffix to them.") |
|
77 |
+ } |
|
78 |
+ sce <- dedupRowNames(sce) |
|
79 |
+ } |
|
80 |
+ |
|
71 | 81 |
return(sce) |
72 | 82 |
} |
73 | 83 |
|
... | ... |
@@ -103,6 +113,8 @@ |
103 | 113 |
#' \link[base]{matrix} function). Default "Matrix". |
104 | 114 |
#' @param delayedArray Boolean. Whether to read the expression matrix as |
105 | 115 |
#' \link[DelayedArray]{DelayedArray-class} object or not. Default \code{FALSE}. |
116 |
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default |
|
117 |
+#' \code{TRUE}. |
|
106 | 118 |
#' @return A \code{SingleCellExperiment} object containing the count |
107 | 119 |
#' matrix, the gene annotation, and the cell annotation. |
108 | 120 |
#' @examples |
... | ... |
@@ -140,7 +152,8 @@ importBUStools <- function( |
140 | 152 |
barcodesFileNames = "genes.barcodes.txt", |
141 | 153 |
gzipped = "auto", |
142 | 154 |
class = c("Matrix", "matrix"), |
143 |
- delayedArray = FALSE) { |
|
155 |
+ delayedArray = FALSE, |
|
156 |
+ rowNamesDedup = TRUE) { |
|
144 | 157 |
|
145 | 158 |
class <- match.arg(class) |
146 | 159 |
|
... | ... |
@@ -152,5 +165,6 @@ importBUStools <- function( |
152 | 165 |
barcodesFileNames = barcodesFileNames, |
153 | 166 |
gzipped = gzipped, |
154 | 167 |
class = class, |
155 |
- delayedArray = delayedArray) |
|
168 |
+ delayedArray = delayedArray, |
|
169 |
+ rowNamesDedup = rowNamesDedup) |
|
156 | 170 |
} |
... | ... |
@@ -102,7 +102,7 @@ |
102 | 102 |
#' \link{readMM} function), or "matrix" (as returned by |
103 | 103 |
#' \link[base]{matrix} function). Default "Matrix". |
104 | 104 |
#' @param delayedArray Boolean. Whether to read the expression matrix as |
105 |
-#' \link[DelayedArray]{DelayedArray-class} object or not. Default \code{TRUE}. |
|
105 |
+#' \link[DelayedArray]{DelayedArray-class} object or not. Default \code{FALSE}. |
|
106 | 106 |
#' @return A \code{SingleCellExperiment} object containing the count |
107 | 107 |
#' matrix, the gene annotation, and the cell annotation. |
108 | 108 |
#' @examples |
... | ... |
@@ -140,7 +140,7 @@ importBUStools <- function( |
140 | 140 |
barcodesFileNames = "genes.barcodes.txt", |
141 | 141 |
gzipped = "auto", |
142 | 142 |
class = c("Matrix", "matrix"), |
143 |
- delayedArray = TRUE) { |
|
143 |
+ delayedArray = FALSE) { |
|
144 | 144 |
|
145 | 145 |
class <- match.arg(class) |
146 | 146 |
|
... | ... |
@@ -99,10 +99,10 @@ |
99 | 99 |
#' \code{samples}. |
100 | 100 |
#' @param class Character. The class of the expression matrix stored in the SCE |
101 | 101 |
#' object. Can be one of "Matrix" (as returned by |
102 |
-#' \link[Matrix]{readMM} function), or "matrix" (as returned by |
|
102 |
+#' \link{readMM} function), or "matrix" (as returned by |
|
103 | 103 |
#' \link[base]{matrix} function). Default "Matrix". |
104 | 104 |
#' @param delayedArray Boolean. Whether to read the expression matrix as |
105 |
-#' \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}. |
|
105 |
+#' \link[DelayedArray]{DelayedArray-class} object or not. Default \code{TRUE}. |
|
106 | 106 |
#' @return A \code{SingleCellExperiment} object containing the count |
107 | 107 |
#' matrix, the gene annotation, and the cell annotation. |
108 | 108 |
#' @examples |
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,156 @@ |
1 |
+ |
|
2 |
+# dir <- "genecount" |
|
3 |
+.constructSCEFromBUStoolsOutputs <- function(dir, |
|
4 |
+ sample, |
|
5 |
+ matrixFileName, |
|
6 |
+ featuresFileName, |
|
7 |
+ barcodesFileName, |
|
8 |
+ gzipped, |
|
9 |
+ class, |
|
10 |
+ delayedArray) { |
|
11 |
+ |
|
12 |
+ cb <- .readBarcodes(file.path(dir, barcodesFileName)) |
|
13 |
+ fe <- .readFeatures(file.path(dir, featuresFileName)) |
|
14 |
+ ma <- .readMatrixMM(file.path(dir, matrixFileName), |
|
15 |
+ gzipped = gzipped, |
|
16 |
+ class = class, |
|
17 |
+ delayedArray = delayedArray) |
|
18 |
+ ma <- t(ma) |
|
19 |
+ |
|
20 |
+ coln <- paste(sample, cb[[1]], sep = "_") |
|
21 |
+ rownames(ma) <- fe[[1]] |
|
22 |
+ |
|
23 |
+ sce <- SingleCellExperiment::SingleCellExperiment( |
|
24 |
+ assays = list(counts = ma)) |
|
25 |
+ SummarizedExperiment::rowData(sce) <- fe |
|
26 |
+ SummarizedExperiment::colData(sce) <- S4Vectors::DataFrame(cb, |
|
27 |
+ column_name = coln, |
|
28 |
+ sample = sample, |
|
29 |
+ row.names = coln) |
|
30 |
+ |
|
31 |
+ return(sce) |
|
32 |
+} |
|
33 |
+ |
|
34 |
+ |
|
35 |
+# main function |
|
36 |
+.importBUStools <- function( |
|
37 |
+ BUStoolsDirs, |
|
38 |
+ samples, |
|
39 |
+ matrixFileNames, |
|
40 |
+ featuresFileNames, |
|
41 |
+ barcodesFileNames, |
|
42 |
+ gzipped, |
|
43 |
+ class, |
|
44 |
+ delayedArray) { |
|
45 |
+ |
|
46 |
+ if (length(BUStoolsDirs) != length(samples)) { |
|
47 |
+ stop("'BUStoolsDirs' and 'samples' have unequal lengths!") |
|
48 |
+ } |
|
49 |
+ |
|
50 |
+ res <- vector("list", length = length(samples)) |
|
51 |
+ |
|
52 |
+ matrixFileNames <- .getVectorized(matrixFileNames, length(samples)) |
|
53 |
+ featuresFileNames <- .getVectorized(featuresFileNames, length(samples)) |
|
54 |
+ barcodesFileNames <- .getVectorized(barcodesFileNames, length(samples)) |
|
55 |
+ gzipped <- .getVectorized(gzipped, length(samples)) |
|
56 |
+ |
|
57 |
+ for (i in seq_along(samples)) { |
|
58 |
+ dir <- file.path(BUStoolsDirs[i]) |
|
59 |
+ scei <- .constructSCEFromBUStoolsOutputs(dir, |
|
60 |
+ sample = samples[i], |
|
61 |
+ matrixFileName = matrixFileNames[i], |
|
62 |
+ featuresFileName = featuresFileNames[i], |
|
63 |
+ barcodesFileName = barcodesFileNames[i], |
|
64 |
+ gzipped = gzipped[i], |
|
65 |
+ class = class, |
|
66 |
+ delayedArray = delayedArray) |
|
67 |
+ res[[i]] <- scei |
|
68 |
+ } |
|
69 |
+ |
|
70 |
+ sce <- do.call(SingleCellExperiment::cbind, res) |
|
71 |
+ return(sce) |
|
72 |
+} |
|
73 |
+ |
|
74 |
+ |
|
75 |
+#' @name importBUStools |
|
76 |
+#' @rdname importBUStools |
|
77 |
+#' @title Construct SCE object from BUStools output |
|
78 |
+#' @description Read the barcodes, features (genes), and matrix from BUStools |
|
79 |
+#' output. Import them |
|
80 |
+#' as one \link[SingleCellExperiment]{SingleCellExperiment} object. Note the |
|
81 |
+#' cells in the output files for BUStools 0.39.4 are not filtered. |
|
82 |
+#' @param BUStoolsDirs A vector of paths to BUStools output files. Each sample |
|
83 |
+#' should have its own path. For example: \code{./genecount}. |
|
84 |
+#' Must have the same length as \code{samples}. |
|
85 |
+#' @param samples A vector of user-defined sample names for the samples to be |
|
86 |
+#' imported. Must have the same length as \code{BUStoolsDirs}. |
|
87 |
+#' @param matrixFileNames Filenames for the Market Exchange Format (MEX) sparse |
|
88 |
+#' matrix files (.mtx files). Must have length 1 or the same |
|
89 |
+#' length as \code{samples}. |
|
90 |
+#' @param featuresFileNames Filenames for the feature annotation files. |
|
91 |
+#' Must have length 1 or the same length as \code{samples}. |
|
92 |
+#' @param barcodesFileNames Filenames for the cell barcode list file. |
|
93 |
+#' Must have length 1 or the same length as \code{samples}. |
|
94 |
+#' @param gzipped Boolean. \code{TRUE} if the BUStools output files |
|
95 |
+#' (barcodes.txt, genes.txt, and genes.mtx) were |
|
96 |
+#' gzip compressed. \code{FALSE} otherwise. This is \code{FALSE} in BUStools |
|
97 |
+#' 0.39.4. Default \code{"auto"} which automatically detects if the |
|
98 |
+#' files are gzip compressed. Must have length 1 or the same length as |
|
99 |
+#' \code{samples}. |
|
100 |
+#' @param class Character. The class of the expression matrix stored in the SCE |
|
101 |
+#' object. Can be one of "Matrix" (as returned by |
|
102 |
+#' \link[Matrix]{readMM} function), or "matrix" (as returned by |
|
103 |
+#' \link[base]{matrix} function). Default "Matrix". |
|
104 |
+#' @param delayedArray Boolean. Whether to read the expression matrix as |
|
105 |
+#' \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}. |
|
106 |
+#' @return A \code{SingleCellExperiment} object containing the count |
|
107 |
+#' matrix, the gene annotation, and the cell annotation. |
|
108 |
+#' @examples |
|
109 |
+#' # Example #1 |
|
110 |
+#' # FASTQ files were downloaded from |
|
111 |
+#' # https://support.10xgenomics.com/single-cell-gene-expression/datasets/3.0.0 |
|
112 |
+#' # /pbmc_1k_v3 |
|
113 |
+#' # They were concatenated as follows: |
|
114 |
+#' # cat pbmc_1k_v3_S1_L001_R1_001.fastq.gz pbmc_1k_v3_S1_L002_R1_001.fastq.gz > |
|
115 |
+#' # pbmc_1k_v3_R1.fastq.gz |
|
116 |
+#' # cat pbmc_1k_v3_S1_L001_R2_001.fastq.gz pbmc_1k_v3_S1_L002_R2_001.fastq.gz > |
|
117 |
+#' # pbmc_1k_v3_R2.fastq.gz |
|
118 |
+#' # The following BUStools command generates the gene, cell, and |
|
119 |
+#' # matrix files |
|
120 |
+#' |
|
121 |
+#' # bustools correct -w ./3M-february-2018.txt -p output.bus | \ |
|
122 |
+#' # bustools sort -T tmp/ -t 4 -p - | \ |
|
123 |
+#' # bustools count -o genecount/genes \ |
|
124 |
+#' # -g ./transcripts_to_genes.txt \ |
|
125 |
+#' # -e matrix.ec \ |
|
126 |
+#' # -t transcripts.txt \ |
|
127 |
+#' # --genecounts - |
|
128 |
+#' |
|
129 |
+#' # The top 20 genes and the first 20 cells are included in this example. |
|
130 |
+#' sce <- importBUStools( |
|
131 |
+#' BUStoolsDirs = system.file("extdata/BUStools_PBMC_1k_v3_20x20/genecount/", |
|
132 |
+#' package = "singleCellTK"), |
|
133 |
+#' samples = "PBMC_1k_v3_20x20") |
|
134 |
+#' @export |
|
135 |
+importBUStools <- function( |
|
136 |
+ BUStoolsDirs, |
|
137 |
+ samples, |
|
138 |
+ matrixFileNames = "genes.mtx", |
|
139 |
+ featuresFileNames = "genes.genes.txt", |
|
140 |
+ barcodesFileNames = "genes.barcodes.txt", |
|
141 |
+ gzipped = "auto", |
|
142 |
+ class = c("Matrix", "matrix"), |
|
143 |
+ delayedArray = TRUE) { |
|
144 |
+ |
|
145 |
+ class <- match.arg(class) |
|
146 |
+ |
|
147 |
+ .importBUStools( |
|
148 |
+ BUStoolsDirs = BUStoolsDirs, |
|
149 |
+ samples = samples, |
|
150 |
+ matrixFileNames = matrixFileNames, |
|
151 |
+ featuresFileNames = featuresFileNames, |
|
152 |
+ barcodesFileNames = barcodesFileNames, |
|
153 |
+ gzipped = gzipped, |
|
154 |
+ class = class, |
|
155 |
+ delayedArray = delayedArray) |
|
156 |
+} |
1 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,156 +0,0 @@ |
1 |
- |
|
2 |
-# dir <- "genecount" |
|
3 |
-.constructSCEFromBUStoolsOutputs <- function(dir, |
|
4 |
- sample, |
|
5 |
- matrixFileName, |
|
6 |
- featuresFileName, |
|
7 |
- barcodesFileName, |
|
8 |
- gzipped, |
|
9 |
- class, |
|
10 |
- delayedArray) { |
|
11 |
- |
|
12 |
- cb <- .readBarcodes(file.path(dir, barcodesFileName)) |
|
13 |
- fe <- .readFeatures(file.path(dir, featuresFileName)) |
|
14 |
- ma <- .readMatrixMM(file.path(dir, matrixFileName), |
|
15 |
- gzipped = gzipped, |
|
16 |
- class = class, |
|
17 |
- delayedArray = delayedArray) |
|
18 |
- ma <- t(ma) |
|
19 |
- |
|
20 |
- coln <- paste(sample, cb[[1]], sep = "_") |
|
21 |
- rownames(ma) <- fe[[1]] |
|
22 |
- |
|
23 |
- sce <- SingleCellExperiment::SingleCellExperiment( |
|
24 |
- assays = list(counts = ma)) |
|
25 |
- SummarizedExperiment::rowData(sce) <- fe |
|
26 |
- SummarizedExperiment::colData(sce) <- S4Vectors::DataFrame(cb, |
|
27 |
- column_name = coln, |
|
28 |
- sample = sample, |
|
29 |
- row.names = coln) |
|
30 |
- |
|
31 |
- return(sce) |
|
32 |
-} |
|
33 |
- |
|
34 |
- |
|
35 |
-# main function |
|
36 |
-.importBUStools <- function( |
|
37 |
- BUStoolsDirs, |
|
38 |
- samples, |
|
39 |
- matrixFileNames, |
|
40 |
- featuresFileNames, |
|
41 |
- barcodesFileNames, |
|
42 |
- gzipped, |
|
43 |
- class, |
|
44 |
- delayedArray) { |
|
45 |
- |
|
46 |
- if (length(BUStoolsDirs) != length(samples)) { |
|
47 |
- stop("'BUStoolsDirs' and 'samples' have unequal lengths!") |
|
48 |
- } |
|
49 |
- |
|
50 |
- res <- vector("list", length = length(samples)) |
|
51 |
- |
|
52 |
- matrixFileNames <- .getVectorized(matrixFileNames, length(samples)) |
|
53 |
- featuresFileNames <- .getVectorized(featuresFileNames, length(samples)) |
|
54 |
- barcodesFileNames <- .getVectorized(barcodesFileNames, length(samples)) |
|
55 |
- gzipped <- .getVectorized(gzipped, length(samples)) |
|
56 |
- |
|
57 |
- for (i in seq_along(samples)) { |
|
58 |
- dir <- file.path(BUStoolsDirs[i]) |
|
59 |
- scei <- .constructSCEFromBUStoolsOutputs(dir, |
|
60 |
- sample = samples[i], |
|
61 |
- matrixFileName = matrixFileNames[i], |
|
62 |
- featuresFileName = featuresFileNames[i], |
|
63 |
- barcodesFileName = barcodesFileNames[i], |
|
64 |
- gzipped = gzipped[i], |
|
65 |
- class = class, |
|
66 |
- delayedArray = delayedArray) |
|
67 |
- res[[i]] <- scei |
|
68 |
- } |
|
69 |
- |
|
70 |
- sce <- do.call(SingleCellExperiment::cbind, res) |
|
71 |
- return(sce) |
|
72 |
-} |
|
73 |
- |
|
74 |
- |
|
75 |
-#' @name importBUStools |
|
76 |
-#' @rdname importBUStools |
|
77 |
-#' @title Construct SCE object from BUStools output |
|
78 |
-#' @description Read the barcodes, features (genes), and matrix from BUStools |
|
79 |
-#' output. Import them |
|
80 |
-#' as one \link[SingleCellExperiment]{SingleCellExperiment} object. Note the |
|
81 |
-#' cells in the output files for BUStools 0.39.4 are not filtered. |
|
82 |
-#' @param BUStoolsDirs A vector of paths to BUStools output files. Each sample |
|
83 |
-#' should have its own path. For example: \code{./genecount}. |
|
84 |
-#' Must have the same length as \code{samples}. |
|
85 |
-#' @param samples A vector of user-defined sample names for the samples to be |
|
86 |
-#' imported. Must have the same length as \code{BUStoolsDirs}. |
|
87 |
-#' @param matrixFileNames Filenames for the Market Exchange Format (MEX) sparse |
|
88 |
-#' matrix files (.mtx files). Must have length 1 or the same |
|
89 |
-#' length as \code{samples}. |
|
90 |
-#' @param featuresFileNames Filenames for the feature annotation files. |
|
91 |
-#' Must have length 1 or the same length as \code{samples}. |
|
92 |
-#' @param barcodesFileNames Filenames for the cell barcode list file. |
|
93 |
-#' Must have length 1 or the same length as \code{samples}. |
|
94 |
-#' @param gzipped Boolean. \code{TRUE} if the BUStools output files |
|
95 |
-#' (barcodes.txt, genes.txt, and genes.mtx) were |
|
96 |
-#' gzip compressed. \code{FALSE} otherwise. This is \code{FALSE} in BUStools |
|
97 |
-#' 0.39.4. Default \code{"auto"} which automatically detects if the |
|
98 |
-#' files are gzip compressed. Must have length 1 or the same length as |
|
99 |
-#' \code{samples}. |
|
100 |
-#' @param class Character. The class of the expression matrix stored in the SCE |
|
101 |
-#' object. Can be one of "Matrix" (as returned by |
|
102 |
-#' \link[Matrix]{readMM} function), or "matrix" (as returned by |
|
103 |
-#' \link[base]{matrix} function). Default "Matrix". |
|
104 |
-#' @param delayedArray Boolean. Whether to read the expression matrix as |
|
105 |
-#' \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}. |
|
106 |
-#' @return A \code{SingleCellExperiment} object containing the count |
|
107 |
-#' matrix, the gene annotation, and the cell annotation. |
|
108 |
-#' @examples |
|
109 |
-#' # Example #1 |
|
110 |
-#' # FASTQ files were downloaded from |
|
111 |
-#' # https://support.10xgenomics.com/single-cell-gene-expression/datasets/3.0.0 |
|
112 |
-#' # /pbmc_1k_v3 |
|
113 |
-#' # They were concatenated as follows: |
|
114 |
-#' # cat pbmc_1k_v3_S1_L001_R1_001.fastq.gz pbmc_1k_v3_S1_L002_R1_001.fastq.gz > |
|
115 |
-#' # pbmc_1k_v3_R1.fastq.gz |
|
116 |
-#' # cat pbmc_1k_v3_S1_L001_R2_001.fastq.gz pbmc_1k_v3_S1_L002_R2_001.fastq.gz > |
|
117 |
-#' # pbmc_1k_v3_R2.fastq.gz |
|
118 |
-#' # The following BUStools command generates the gene, cell, and |
|
119 |
-#' # matrix files |
|
120 |
-#' |
|
121 |
-#' # bustools correct -w ./3M-february-2018.txt -p output.bus | \ |
|
122 |
-#' # bustools sort -T tmp/ -t 4 -p - | \ |
|
123 |
-#' # bustools count -o genecount/genes \ |
|
124 |
-#' # -g ./transcripts_to_genes.txt \ |
|
125 |
-#' # -e matrix.ec \ |
|
126 |
-#' # -t transcripts.txt \ |
|
127 |
-#' # --genecounts - |
|
128 |
-#' |
|
129 |
-#' # The top 20 genes and the first 20 cells are included in this example. |
|
130 |
-#' sce <- importBUStools( |
|
131 |
-#' BUStoolsDirs = system.file("extdata/BUStools_PBMC_1k_v3_20x20/genecount/", |
|
132 |
-#' package = "singleCellTK"), |
|
133 |
-#' samples = "PBMC_1k_v3_20x20") |
|
134 |
-#' @export |
|
135 |
-importBUStools <- function( |
|
136 |
- BUStoolsDirs, |
|
137 |
- samples, |
|
138 |
- matrixFileNames = "genes.mtx", |
|
139 |
- featuresFileNames = "genes.genes.txt", |
|
140 |
- barcodesFileNames = "genes.barcodes.txt", |
|
141 |
- gzipped = "auto", |
|
142 |
- class = c("Matrix", "matrix"), |
|
143 |
- delayedArray = TRUE) { |
|
144 |
- |
|
145 |
- class <- match.arg(class) |
|
146 |
- |
|
147 |
- .importBUStools( |
|
148 |
- BUStoolsDirs = BUStoolsDirs, |
|
149 |
- samples = samples, |
|
150 |
- matrixFileNames = matrixFileNames, |
|
151 |
- featuresFileNames = featuresFileNames, |
|
152 |
- barcodesFileNames = barcodesFileNames, |
|
153 |
- gzipped = gzipped, |
|
154 |
- class = class, |
|
155 |
- delayedArray = delayedArray) |
|
156 |
-} |
... | ... |
@@ -36,9 +36,9 @@ |
36 | 36 |
.importBUStools <- function( |
37 | 37 |
BUStoolsDirs, |
38 | 38 |
samples, |
39 |
- matrixFileName, |
|
40 |
- featuresFileName, |
|
41 |
- barcodesFileName, |
|
39 |
+ matrixFileNames, |
|
40 |
+ featuresFileNames, |
|
41 |
+ barcodesFileNames, |
|
42 | 42 |
gzipped, |
43 | 43 |
class, |
44 | 44 |
delayedArray) { |
... | ... |
@@ -49,14 +49,19 @@ |
49 | 49 |
|
50 | 50 |
res <- vector("list", length = length(samples)) |
51 | 51 |
|
52 |
+ matrixFileNames <- .getVectorized(matrixFileNames, length(samples)) |
|
53 |
+ featuresFileNames <- .getVectorized(featuresFileNames, length(samples)) |
|
54 |
+ barcodesFileNames <- .getVectorized(barcodesFileNames, length(samples)) |
|
55 |
+ gzipped <- .getVectorized(gzipped, length(samples)) |
|
56 |
+ |
|
52 | 57 |
for (i in seq_along(samples)) { |
53 | 58 |
dir <- file.path(BUStoolsDirs[i]) |
54 | 59 |
scei <- .constructSCEFromBUStoolsOutputs(dir, |
55 | 60 |
sample = samples[i], |
56 |
- matrixFileName = matrixFileName, |
|
57 |
- featuresFileName = featuresFileName, |
|
58 |
- barcodesFileName = barcodesFileName, |
|
59 |
- gzipped = gzipped, |
|
61 |
+ matrixFileName = matrixFileNames[i], |
|
62 |
+ featuresFileName = featuresFileNames[i], |
|
63 |
+ barcodesFileName = barcodesFileNames[i], |
|
64 |
+ gzipped = gzipped[i], |
|
60 | 65 |
class = class, |
61 | 66 |
delayedArray = delayedArray) |
62 | 67 |
res[[i]] <- scei |
... | ... |
@@ -79,14 +84,19 @@ |
79 | 84 |
#' Must have the same length as \code{samples}. |
80 | 85 |
#' @param samples A vector of user-defined sample names for the samples to be |
81 | 86 |
#' imported. Must have the same length as \code{BUStoolsDirs}. |
82 |
-#' @param matrixFileName Filename for the Market Exchange Format (MEX) sparse |
|
83 |
-#' matrix file (.mtx file). |
|
84 |
-#' @param featuresFileName Filename for the feature annotation file. |
|
85 |
-#' @param barcodesFileName Filename for the cell barcode list file. |
|
87 |
+#' @param matrixFileNames Filenames for the Market Exchange Format (MEX) sparse |
|
88 |
+#' matrix files (.mtx files). Must have length 1 or the same |
|
89 |
+#' length as \code{samples}. |
|
90 |
+#' @param featuresFileNames Filenames for the feature annotation files. |
|
91 |
+#' Must have length 1 or the same length as \code{samples}. |
|
92 |
+#' @param barcodesFileNames Filenames for the cell barcode list file. |
|
93 |
+#' Must have length 1 or the same length as \code{samples}. |
|
86 | 94 |
#' @param gzipped Boolean. \code{TRUE} if the BUStools output files |
87 | 95 |
#' (barcodes.txt, genes.txt, and genes.mtx) were |
88 | 96 |
#' gzip compressed. \code{FALSE} otherwise. This is \code{FALSE} in BUStools |
89 |
-#' 0.39.4. Default \code{FALSE}. |
|
97 |
+#' 0.39.4. Default \code{"auto"} which automatically detects if the |
|
98 |
+#' files are gzip compressed. Must have length 1 or the same length as |
|
99 |
+#' \code{samples}. |
|
90 | 100 |
#' @param class Character. The class of the expression matrix stored in the SCE |
91 | 101 |
#' object. Can be one of "Matrix" (as returned by |
92 | 102 |
#' \link[Matrix]{readMM} function), or "matrix" (as returned by |
... | ... |
@@ -125,10 +135,10 @@ |
125 | 135 |
importBUStools <- function( |
126 | 136 |
BUStoolsDirs, |
127 | 137 |
samples, |
128 |
- matrixFileName = "genes.mtx", |
|
129 |
- featuresFileName = "genes.genes.txt", |
|
130 |
- barcodesFileName = "genes.barcodes.txt", |
|
131 |
- gzipped = FALSE, |
|
138 |
+ matrixFileNames = "genes.mtx", |
|
139 |
+ featuresFileNames = "genes.genes.txt", |
|
140 |
+ barcodesFileNames = "genes.barcodes.txt", |
|
141 |
+ gzipped = "auto", |
|
132 | 142 |
class = c("Matrix", "matrix"), |
133 | 143 |
delayedArray = TRUE) { |
134 | 144 |
|
... | ... |
@@ -137,9 +147,9 @@ importBUStools <- function( |
137 | 147 |
.importBUStools( |
138 | 148 |
BUStoolsDirs = BUStoolsDirs, |
139 | 149 |
samples = samples, |
140 |
- matrixFileName = matrixFileName, |
|
141 |
- featuresFileName = featuresFileName, |
|
142 |
- barcodesFileName = barcodesFileName, |
|
150 |
+ matrixFileNames = matrixFileNames, |
|
151 |
+ featuresFileNames = featuresFileNames, |
|
152 |
+ barcodesFileNames = barcodesFileNames, |
|
143 | 153 |
gzipped = gzipped, |
144 | 154 |
class = class, |
145 | 155 |
delayedArray = delayedArray) |
... | ... |
@@ -43,8 +43,6 @@ |
43 | 43 |
class, |
44 | 44 |
delayedArray) { |
45 | 45 |
|
46 |
- class <- match.arg(class) |
|
47 |
- |
|
48 | 46 |
if (length(BUStoolsDirs) != length(samples)) { |
49 | 47 |
stop("'BUStoolsDirs' and 'samples' have unequal lengths!") |
50 | 48 |
} |
... | ... |
@@ -131,9 +129,11 @@ importBUStools <- function( |
131 | 129 |
featuresFileName = "genes.genes.txt", |
132 | 130 |
barcodesFileName = "genes.barcodes.txt", |
133 | 131 |
gzipped = FALSE, |
134 |
- class = "Matrix", |
|
132 |
+ class = c("Matrix", "matrix"), |
|
135 | 133 |
delayedArray = TRUE) { |
136 | 134 |
|
135 |
+ class <- match.arg(class) |
|
136 |
+ |
|
137 | 137 |
.importBUStools( |
138 | 138 |
BUStoolsDirs = BUStoolsDirs, |
139 | 139 |
samples = samples, |
add dataType = c("raw", "filtered") for importCellRangerV2 & V3
delayedArray = TRUE
add importSingleCellMatrix
... | ... |
@@ -6,13 +6,15 @@ |
6 | 6 |
featuresFileName, |
7 | 7 |
barcodesFileName, |
8 | 8 |
gzipped, |
9 |
- class) { |
|
9 |
+ class, |
|
10 |
+ delayedArray) { |
|
10 | 11 |
|
11 | 12 |
cb <- .readBarcodes(file.path(dir, barcodesFileName)) |
12 | 13 |
fe <- .readFeatures(file.path(dir, featuresFileName)) |
13 | 14 |
ma <- .readMatrixMM(file.path(dir, matrixFileName), |
14 | 15 |
gzipped = gzipped, |
15 |
- class = class) |
|
16 |
+ class = class, |
|
17 |
+ delayedArray = delayedArray) |
|
16 | 18 |
ma <- t(ma) |
17 | 19 |
|
18 | 20 |
coln <- paste(sample, cb[[1]], sep = "_") |
... | ... |
@@ -38,7 +40,10 @@ |
38 | 40 |
featuresFileName, |
39 | 41 |
barcodesFileName, |
40 | 42 |
gzipped, |
41 |
- class) { |
|
43 |
+ class, |
|
44 |
+ delayedArray) { |
|
45 |
+ |
|
46 |
+ class <- match.arg(class) |
|
42 | 47 |
|
43 | 48 |
if (length(BUStoolsDirs) != length(samples)) { |
44 | 49 |
stop("'BUStoolsDirs' and 'samples' have unequal lengths!") |
... | ... |
@@ -54,7 +59,8 @@ |
54 | 59 |
featuresFileName = featuresFileName, |
55 | 60 |
barcodesFileName = barcodesFileName, |
56 | 61 |
gzipped = gzipped, |
57 |
- class = class) |
|
62 |
+ class = class, |
|
63 |
+ delayedArray = delayedArray) |
|
58 | 64 |
res[[i]] <- scei |
59 | 65 |
} |
60 | 66 |
|
... | ... |
@@ -84,10 +90,11 @@ |
84 | 90 |
#' gzip compressed. \code{FALSE} otherwise. This is \code{FALSE} in BUStools |
85 | 91 |
#' 0.39.4. Default \code{FALSE}. |
86 | 92 |
#' @param class Character. The class of the expression matrix stored in the SCE |
87 |
-#' object. Can be one of "DelayedArray" (as returned by |
|
88 |
-#' \link[DelayedArray]{DelayedArray} function), "Matrix" (as returned by |
|
93 |
+#' object. Can be one of "Matrix" (as returned by |
|
89 | 94 |
#' \link[Matrix]{readMM} function), or "matrix" (as returned by |
90 | 95 |
#' \link[base]{matrix} function). Default "Matrix". |
96 |
+#' @param delayedArray Boolean. Whether to read the expression matrix as |
|
97 |
+#' \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}. |
|
91 | 98 |
#' @return A \code{SingleCellExperiment} object containing the count |
92 | 99 |
#' matrix, the gene annotation, and the cell annotation. |
93 | 100 |
#' @examples |
... | ... |
@@ -124,7 +131,8 @@ importBUStools <- function( |
124 | 131 |
featuresFileName = "genes.genes.txt", |
125 | 132 |
barcodesFileName = "genes.barcodes.txt", |
126 | 133 |
gzipped = FALSE, |
127 |
- class = "Matrix") { |
|
134 |
+ class = "Matrix", |
|
135 |
+ delayedArray = TRUE) { |
|
128 | 136 |
|
129 | 137 |
.importBUStools( |
130 | 138 |
BUStoolsDirs = BUStoolsDirs, |
... | ... |
@@ -133,5 +141,6 @@ importBUStools <- function( |
133 | 141 |
featuresFileName = featuresFileName, |
134 | 142 |
barcodesFileName = barcodesFileName, |
135 | 143 |
gzipped = gzipped, |
136 |
- class = class) |
|
144 |
+ class = class, |
|
145 |
+ delayedArray = delayedArray) |
|
137 | 146 |
} |
... | ... |
@@ -32,23 +32,33 @@ |
32 | 32 |
|
33 | 33 |
# main function |
34 | 34 |
.importBUStools <- function( |
35 |
- BUStoolsDir, |
|
36 |
- sample, |
|
35 |
+ BUStoolsDirs, |
|
36 |
+ samples, |
|
37 | 37 |
matrixFileName, |
38 | 38 |
featuresFileName, |
39 | 39 |
barcodesFileName, |
40 | 40 |
gzipped, |
41 | 41 |
class) { |
42 | 42 |
|
43 |
- dir <- file.path(BUStoolsDir) |
|
44 |
- sce <- .constructSCEFromBUStoolsOutputs(dir, |
|
45 |
- sample = sample, |
|
46 |
- matrixFileName = matrixFileName, |
|
47 |
- featuresFileName = featuresFileName, |
|
48 |
- barcodesFileName = barcodesFileName, |
|
49 |
- gzipped = gzipped, |
|
50 |
- class = class) |
|
43 |
+ if (length(BUStoolsDirs) != length(samples)) { |
|
44 |
+ stop("'BUStoolsDirs' and 'samples' have unequal lengths!") |
|
45 |
+ } |
|
46 |
+ |
|
47 |
+ res <- vector("list", length = length(samples)) |
|
48 |
+ |
|
49 |
+ for (i in seq_along(samples)) { |
|
50 |
+ dir <- file.path(BUStoolsDirs[i]) |
|
51 |
+ scei <- .constructSCEFromBUStoolsOutputs(dir, |
|
52 |
+ sample = samples[i], |
|
53 |
+ matrixFileName = matrixFileName, |
|
54 |
+ featuresFileName = featuresFileName, |
|
55 |
+ barcodesFileName = barcodesFileName, |
|
56 |
+ gzipped = gzipped, |
|
57 |
+ class = class) |
|
58 |
+ res[[i]] <- scei |
|
59 |
+ } |
|
51 | 60 |
|
61 |
+ sce <- do.call(BiocGenerics::cbind, res) |
|
52 | 62 |
return(sce) |
53 | 63 |
} |
54 | 64 |
|
... | ... |
@@ -60,9 +70,11 @@ |
60 | 70 |
#' output. Import them |
61 | 71 |
#' as one \link[SingleCellExperiment]{SingleCellExperiment} object. Note the |
62 | 72 |
#' cells in the output files for BUStools 0.39.4 are not filtered. |
63 |
-#' @param BUStoolsDir The path to BUStools output files. For |
|
64 |
-#' example: \code{./genecount}. |
|
65 |
-#' @param sample User-defined sample name for the sample to be imported. |
|
73 |
+#' @param BUStoolsDirs A vector of paths to BUStools output files. Each sample |
|
74 |
+#' should have its own path. For example: \code{./genecount}. |
|
75 |
+#' Must have the same length as \code{samples}. |
|
76 |
+#' @param samples A vector of user-defined sample names for the samples to be |
|
77 |
+#' imported. Must have the same length as \code{BUStoolsDirs}. |
|
66 | 78 |
#' @param matrixFileName Filename for the Market Exchange Format (MEX) sparse |
67 | 79 |
#' matrix file (.mtx file). |
68 | 80 |
#' @param featuresFileName Filename for the feature annotation file. |
... | ... |
@@ -101,13 +113,13 @@ |
101 | 113 |
#' |
102 | 114 |
#' # The top 20 genes and the first 20 cells are included in this example. |
103 | 115 |
#' sce <- importBUStools( |
104 |
-#' BUStoolsDir = system.file("extdata/BUStools_PBMC_1k_v3_20x20/genecount/", |
|
116 |
+#' BUStoolsDirs = system.file("extdata/BUStools_PBMC_1k_v3_20x20/genecount/", |
|
105 | 117 |
#' package = "singleCellTK"), |
106 |
-#' sample = "PBMC_1k_v3_20x20") |
|
118 |
+#' samples = "PBMC_1k_v3_20x20") |
|
107 | 119 |
#' @export |
108 | 120 |
importBUStools <- function( |
109 |
- BUStoolsDir, |
|
110 |
- sample, |
|
121 |
+ BUStoolsDirs, |
|
122 |
+ samples, |
|
111 | 123 |
matrixFileName = "genes.mtx", |
112 | 124 |
featuresFileName = "genes.genes.txt", |
113 | 125 |
barcodesFileName = "genes.barcodes.txt", |
... | ... |
@@ -115,8 +127,8 @@ importBUStools <- function( |
115 | 127 |
class = "Matrix") { |
116 | 128 |
|
117 | 129 |
.importBUStools( |
118 |
- BUStoolsDir = BUStoolsDir, |
|
119 |
- sample = sample, |
|
130 |
+ BUStoolsDirs = BUStoolsDirs, |
|
131 |
+ samples = samples, |
|
120 | 132 |
matrixFileName = matrixFileName, |
121 | 133 |
featuresFileName = featuresFileName, |
122 | 134 |
barcodesFileName = barcodesFileName, |
... | ... |
@@ -75,7 +75,7 @@ |
75 | 75 |
#' object. Can be one of "DelayedArray" (as returned by |
76 | 76 |
#' \link[DelayedArray]{DelayedArray} function), "Matrix" (as returned by |
77 | 77 |
#' \link[Matrix]{readMM} function), or "matrix" (as returned by |
78 |
-#' \link[base]{matrix} function). Default "DelayedArray". |
|
78 |
+#' \link[base]{matrix} function). Default "Matrix". |
|
79 | 79 |
#' @return A \code{SingleCellExperiment} object containing the count |
80 | 80 |
#' matrix, the gene annotation, and the cell annotation. |
81 | 81 |
#' @examples |
... | ... |
@@ -112,7 +112,7 @@ importBUStools <- function( |
112 | 112 |
featuresFileName = "genes.genes.txt", |
113 | 113 |
barcodesFileName = "genes.barcodes.txt", |
114 | 114 |
gzipped = FALSE, |
115 |
- class = "DelayedArray") { |
|
115 |
+ class = "Matrix") { |
|
116 | 116 |
|
117 | 117 |
.importBUStools( |
118 | 118 |
BUStoolsDir = BUStoolsDir, |
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,125 @@ |
1 |
+ |
|
2 |
+# dir <- "genecount" |
|
3 |
+.constructSCEFromBUStoolsOutputs <- function(dir, |
|
4 |
+ sample, |
|
5 |
+ matrixFileName, |
|
6 |
+ featuresFileName, |
|
7 |
+ barcodesFileName, |
|
8 |
+ gzipped, |
|
9 |
+ class) { |
|
10 |
+ |
|
11 |
+ cb <- .readBarcodes(file.path(dir, barcodesFileName)) |
|
12 |
+ fe <- .readFeatures(file.path(dir, featuresFileName)) |
|
13 |
+ ma <- .readMatrixMM(file.path(dir, matrixFileName), |
|
14 |
+ gzipped = gzipped, |
|
15 |
+ class = class) |
|
16 |
+ ma <- t(ma) |
|
17 |
+ |
|
18 |
+ coln <- paste(sample, cb[[1]], sep = "_") |
|
19 |
+ rownames(ma) <- fe[[1]] |
|
20 |
+ |
|
21 |
+ sce <- SingleCellExperiment::SingleCellExperiment( |
|
22 |
+ assays = list(counts = ma)) |
|
23 |
+ SummarizedExperiment::rowData(sce) <- fe |
|
24 |
+ SummarizedExperiment::colData(sce) <- S4Vectors::DataFrame(cb, |
|
25 |
+ column_name = coln, |
|
26 |
+ sample = sample, |
|
27 |
+ row.names = coln) |
|
28 |
+ |
|
29 |
+ return(sce) |
|
30 |
+} |
|
31 |
+ |
|
32 |
+ |
|
33 |
+# main function |
|
34 |
+.importBUStools <- function( |
|
35 |
+ BUStoolsDir, |
|
36 |
+ sample, |
|
37 |
+ matrixFileName, |
|
38 |
+ featuresFileName, |
|
39 |
+ barcodesFileName, |
|
40 |
+ gzipped, |
|
41 |
+ class) { |
|
42 |
+ |
|
43 |
+ dir <- file.path(BUStoolsDir) |
|
44 |
+ sce <- .constructSCEFromBUStoolsOutputs(dir, |
|
45 |
+ sample = sample, |
|
46 |
+ matrixFileName = matrixFileName, |
|
47 |
+ featuresFileName = featuresFileName, |
|
48 |
+ barcodesFileName = barcodesFileName, |
|
49 |
+ gzipped = gzipped, |
|
50 |
+ class = class) |
|
51 |
+ |
|
52 |
+ return(sce) |
|
53 |
+} |
|
54 |
+ |
|
55 |
+ |
|
56 |
+#' @name importBUStools |
|
57 |
+#' @rdname importBUStools |
|
58 |
+#' @title Construct SCE object from BUStools output |
|
59 |
+#' @description Read the barcodes, features (genes), and matrix from BUStools |
|
60 |
+#' output. Import them |
|
61 |
+#' as one \link[SingleCellExperiment]{SingleCellExperiment} object. Note the |
|
62 |
+#' cells in the output files for BUStools 0.39.4 are not filtered. |
|
63 |
+#' @param BUStoolsDir The path to BUStools output files. For |
|
64 |
+#' example: \code{./genecount}. |
|
65 |
+#' @param sample User-defined sample name for the sample to be imported. |
|
66 |
+#' @param matrixFileName Filename for the Market Exchange Format (MEX) sparse |
|
67 |
+#' matrix file (.mtx file). |
|
68 |
+#' @param featuresFileName Filename for the feature annotation file. |
|
69 |
+#' @param barcodesFileName Filename for the cell barcode list file. |
|
70 |
+#' @param gzipped Boolean. \code{TRUE} if the BUStools output files |
|
71 |
+#' (barcodes.txt, genes.txt, and genes.mtx) were |
|
72 |
+#' gzip compressed. \code{FALSE} otherwise. This is \code{FALSE} in BUStools |
|
73 |
+#' 0.39.4. Default \code{FALSE}. |
|
74 |
+#' @param class Character. The class of the expression matrix stored in the SCE |
|
75 |
+#' object. Can be one of "DelayedArray" (as returned by |
|
76 |
+#' \link[DelayedArray]{DelayedArray} function), "Matrix" (as returned by |
|
77 |
+#' \link[Matrix]{readMM} function), or "matrix" (as returned by |
|
78 |
+#' \link[base]{matrix} function). Default "DelayedArray". |
|
79 |
+#' @return A \code{SingleCellExperiment} object containing the count |
|
80 |
+#' matrix, the gene annotation, and the cell annotation. |
|
81 |
+#' @examples |
|
82 |
+#' # Example #1 |
|
83 |
+#' # FASTQ files were downloaded from |
|
84 |
+#' # https://support.10xgenomics.com/single-cell-gene-expression/datasets/3.0.0 |
|
85 |
+#' # /pbmc_1k_v3 |
|
86 |
+#' # They were concatenated as follows: |
|
87 |
+#' # cat pbmc_1k_v3_S1_L001_R1_001.fastq.gz pbmc_1k_v3_S1_L002_R1_001.fastq.gz > |
|
88 |
+#' # pbmc_1k_v3_R1.fastq.gz |
|
89 |
+#' # cat pbmc_1k_v3_S1_L001_R2_001.fastq.gz pbmc_1k_v3_S1_L002_R2_001.fastq.gz > |
|
90 |
+#' # pbmc_1k_v3_R2.fastq.gz |
|
91 |
+#' # The following BUStools command generates the gene, cell, and |
|
92 |
+#' # matrix files |
|
93 |
+#' |
|
94 |
+#' # bustools correct -w ./3M-february-2018.txt -p output.bus | \ |
|
95 |
+#' # bustools sort -T tmp/ -t 4 -p - | \ |
|
96 |
+#' # bustools count -o genecount/genes \ |
|
97 |
+#' # -g ./transcripts_to_genes.txt \ |
|
98 |
+#' # -e matrix.ec \ |
|
99 |
+#' # -t transcripts.txt \ |
|
100 |
+#' # --genecounts - |
|
101 |
+#' |
|
102 |
+#' # The top 20 genes and the first 20 cells are included in this example. |
|
103 |
+#' sce <- importBUStools( |
|
104 |
+#' BUStoolsDir = system.file("extdata/BUStools_PBMC_1k_v3_20x20/genecount/", |
|
105 |
+#' package = "singleCellTK"), |
|
106 |
+#' sample = "PBMC_1k_v3_20x20") |
|
107 |
+#' @export |
|
108 |
+importBUStools <- function( |
|
109 |
+ BUStoolsDir, |
|
110 |
+ sample, |
|
111 |
+ matrixFileName = "genes.mtx", |
|
112 |
+ featuresFileName = "genes.genes.txt", |
|
113 |
+ barcodesFileName = "genes.barcodes.txt", |
|
114 |
+ gzipped = FALSE, |
|
115 |
+ class = "DelayedArray") { |
|
116 |
+ |
|
117 |
+ .importBUStools( |
|
118 |
+ BUStoolsDir = BUStoolsDir, |
|
119 |
+ sample = sample, |
|
120 |
+ matrixFileName = matrixFileName, |
|
121 |
+ featuresFileName = featuresFileName, |
|
122 |
+ barcodesFileName = barcodesFileName, |
|
123 |
+ gzipped = gzipped, |
|
124 |
+ class = class) |
|
125 |
+} |