Browse code

Update dedup functionality for import data functions & update tutorial with VAM

Yichen Wang authored on 22/11/2021 17:57:17
Showing1 changed files
... ...
@@ -27,7 +27,7 @@
27 27
         column_name = coln,
28 28
         sample = sample,
29 29
         row.names = coln)
30
-
30
+    
31 31
     return(sce)
32 32
 }
33 33
 
... ...
@@ -41,7 +41,8 @@
41 41
     barcodesFileNames,
42 42
     gzipped,
43 43
     class,
44
-    delayedArray) {
44
+    delayedArray,
45
+    rowNamesDedup) {
45 46
 
46 47
     if (length(BUStoolsDirs) != length(samples)) {
47 48
         stop("'BUStoolsDirs' and 'samples' have unequal lengths!")
... ...
@@ -68,6 +69,15 @@
68 69
     }
69 70
 
70 71
     sce <- do.call(SingleCellExperiment::cbind, res)
72
+    
73
+    if (isTRUE(rowNamesDedup)) {
74
+        if (any(duplicated(rownames(sce)))) {
75
+            message("Duplicated gene names found, adding '-1', '-2', ",
76
+                    "... suffix to them.")
77
+        }
78
+        sce <- dedupRowNames(sce)
79
+    }
80
+    
71 81
     return(sce)
72 82
 }
73 83
 
... ...
@@ -103,6 +113,8 @@
103 113
 #'  \link[base]{matrix} function). Default "Matrix".
104 114
 #' @param delayedArray Boolean. Whether to read the expression matrix as
105 115
 #'  \link[DelayedArray]{DelayedArray-class} object or not. Default \code{FALSE}.
116
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default 
117
+#'  \code{TRUE}.
106 118
 #' @return A \code{SingleCellExperiment} object containing the count
107 119
 #'  matrix, the gene annotation, and the cell annotation.
108 120
 #' @examples
... ...
@@ -140,7 +152,8 @@ importBUStools <- function(
140 152
     barcodesFileNames = "genes.barcodes.txt",
141 153
     gzipped = "auto",
142 154
     class = c("Matrix", "matrix"),
143
-    delayedArray = FALSE) {
155
+    delayedArray = FALSE,
156
+    rowNamesDedup = TRUE) {
144 157
 
145 158
     class <- match.arg(class)
146 159
 
... ...
@@ -152,5 +165,6 @@ importBUStools <- function(
152 165
         barcodesFileNames = barcodesFileNames,
153 166
         gzipped = gzipped,
154 167
         class = class,
155
-        delayedArray = delayedArray)
168
+        delayedArray = delayedArray,
169
+        rowNamesDedup = rowNamesDedup)
156 170
 }
Browse code

Update importMitoGeneSet function. Change delayedArray=False for all import function. Minor fixs in QC HTML report

rz2333 authored on 30/12/2020 17:40:20
Showing1 changed files
... ...
@@ -102,7 +102,7 @@
102 102
 #'  \link{readMM} function), or "matrix" (as returned by
103 103
 #'  \link[base]{matrix} function). Default "Matrix".
104 104
 #' @param delayedArray Boolean. Whether to read the expression matrix as
105
-#'  \link[DelayedArray]{DelayedArray-class} object or not. Default \code{TRUE}.
105
+#'  \link[DelayedArray]{DelayedArray-class} object or not. Default \code{FALSE}.
106 106
 #' @return A \code{SingleCellExperiment} object containing the count
107 107
 #'  matrix, the gene annotation, and the cell annotation.
108 108
 #' @examples
... ...
@@ -140,7 +140,7 @@ importBUStools <- function(
140 140
     barcodesFileNames = "genes.barcodes.txt",
141 141
     gzipped = "auto",
142 142
     class = c("Matrix", "matrix"),
143
-    delayedArray = TRUE) {
143
+    delayedArray = FALSE) {
144 144
 
145 145
     class <- match.arg(class)
146 146
 
Browse code

Edit links to documentation

unknown authored on 22/10/2020 03:39:09
Showing1 changed files
... ...
@@ -99,10 +99,10 @@
99 99
 #'  \code{samples}.
100 100
 #' @param class Character. The class of the expression matrix stored in the SCE
101 101
 #'  object. Can be one of "Matrix" (as returned by
102
-#'  \link[Matrix]{readMM} function), or "matrix" (as returned by
102
+#'  \link{readMM} function), or "matrix" (as returned by
103 103
 #'  \link[base]{matrix} function). Default "Matrix".
104 104
 #' @param delayedArray Boolean. Whether to read the expression matrix as
105
-#'  \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}.
105
+#'  \link[DelayedArray]{DelayedArray-class} object or not. Default \code{TRUE}.
106 106
 #' @return A \code{SingleCellExperiment} object containing the count
107 107
 #'  matrix, the gene annotation, and the cell annotation.
108 108
 #' @examples
Browse code

Merge devel branch (Oct 5) into master branch

Yusuke Koga authored on 09/10/2020 17:57:06
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,156 @@
1
+
2
+# dir <- "genecount"
3
+.constructSCEFromBUStoolsOutputs <- function(dir,
4
+    sample,
5
+    matrixFileName,
6
+    featuresFileName,
7
+    barcodesFileName,
8
+    gzipped,
9
+    class,
10
+    delayedArray) {
11
+
12
+    cb <- .readBarcodes(file.path(dir, barcodesFileName))
13
+    fe <- .readFeatures(file.path(dir, featuresFileName))
14
+    ma <- .readMatrixMM(file.path(dir, matrixFileName),
15
+        gzipped = gzipped,
16
+        class = class,
17
+        delayedArray = delayedArray)
18
+    ma <- t(ma)
19
+
20
+    coln <- paste(sample, cb[[1]], sep = "_")
21
+    rownames(ma) <- fe[[1]]
22
+
23
+    sce <- SingleCellExperiment::SingleCellExperiment(
24
+        assays = list(counts = ma))
25
+    SummarizedExperiment::rowData(sce) <- fe
26
+    SummarizedExperiment::colData(sce) <- S4Vectors::DataFrame(cb,
27
+        column_name = coln,
28
+        sample = sample,
29
+        row.names = coln)
30
+
31
+    return(sce)
32
+}
33
+
34
+
35
+# main function
36
+.importBUStools <- function(
37
+    BUStoolsDirs,
38
+    samples,
39
+    matrixFileNames,
40
+    featuresFileNames,
41
+    barcodesFileNames,
42
+    gzipped,
43
+    class,
44
+    delayedArray) {
45
+
46
+    if (length(BUStoolsDirs) != length(samples)) {
47
+        stop("'BUStoolsDirs' and 'samples' have unequal lengths!")
48
+    }
49
+
50
+    res <- vector("list", length = length(samples))
51
+
52
+    matrixFileNames <- .getVectorized(matrixFileNames, length(samples))
53
+    featuresFileNames <- .getVectorized(featuresFileNames, length(samples))
54
+    barcodesFileNames <- .getVectorized(barcodesFileNames, length(samples))
55
+    gzipped <- .getVectorized(gzipped, length(samples))
56
+
57
+    for (i in seq_along(samples)) {
58
+        dir <- file.path(BUStoolsDirs[i])
59
+        scei <- .constructSCEFromBUStoolsOutputs(dir,
60
+            sample = samples[i],
61
+            matrixFileName = matrixFileNames[i],
62
+            featuresFileName = featuresFileNames[i],
63
+            barcodesFileName = barcodesFileNames[i],
64
+            gzipped = gzipped[i],
65
+            class = class,
66
+            delayedArray = delayedArray)
67
+        res[[i]] <- scei
68
+    }
69
+
70
+    sce <- do.call(SingleCellExperiment::cbind, res)
71
+    return(sce)
72
+}
73
+
74
+
75
+#' @name importBUStools
76
+#' @rdname importBUStools
77
+#' @title Construct SCE object from BUStools output
78
+#' @description Read the barcodes, features (genes), and matrix from BUStools
79
+#'  output. Import them
80
+#'  as one \link[SingleCellExperiment]{SingleCellExperiment} object. Note the
81
+#'  cells in the output files for BUStools 0.39.4 are not filtered.
82
+#' @param BUStoolsDirs A vector of paths to BUStools output files. Each sample
83
+#'  should have its own path. For example: \code{./genecount}.
84
+#'  Must have the same length as \code{samples}.
85
+#' @param samples A vector of user-defined sample names for the samples to be
86
+#'  imported. Must have the same length as \code{BUStoolsDirs}.
87
+#' @param matrixFileNames Filenames for the Market Exchange Format (MEX) sparse
88
+#'  matrix files (.mtx files). Must have length 1 or the same
89
+#'  length as \code{samples}.
90
+#' @param featuresFileNames Filenames for the feature annotation files.
91
+#'  Must have length 1 or the same length as \code{samples}.
92
+#' @param barcodesFileNames Filenames for the cell barcode list file.
93
+#'  Must have length 1 or the same length as \code{samples}.
94
+#' @param gzipped Boolean. \code{TRUE} if the BUStools output files
95
+#'  (barcodes.txt, genes.txt, and genes.mtx) were
96
+#'  gzip compressed. \code{FALSE} otherwise. This is \code{FALSE} in BUStools
97
+#'  0.39.4. Default \code{"auto"} which automatically detects if the
98
+#'  files are gzip compressed. Must have length 1 or the same length as
99
+#'  \code{samples}.
100
+#' @param class Character. The class of the expression matrix stored in the SCE
101
+#'  object. Can be one of "Matrix" (as returned by
102
+#'  \link[Matrix]{readMM} function), or "matrix" (as returned by
103
+#'  \link[base]{matrix} function). Default "Matrix".
104
+#' @param delayedArray Boolean. Whether to read the expression matrix as
105
+#'  \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}.
106
+#' @return A \code{SingleCellExperiment} object containing the count
107
+#'  matrix, the gene annotation, and the cell annotation.
108
+#' @examples
109
+#' # Example #1
110
+#' # FASTQ files were downloaded from
111
+#' # https://support.10xgenomics.com/single-cell-gene-expression/datasets/3.0.0
112
+#' # /pbmc_1k_v3
113
+#' # They were concatenated as follows:
114
+#' # cat pbmc_1k_v3_S1_L001_R1_001.fastq.gz pbmc_1k_v3_S1_L002_R1_001.fastq.gz >
115
+#' # pbmc_1k_v3_R1.fastq.gz
116
+#' # cat pbmc_1k_v3_S1_L001_R2_001.fastq.gz pbmc_1k_v3_S1_L002_R2_001.fastq.gz >
117
+#' # pbmc_1k_v3_R2.fastq.gz
118
+#' # The following BUStools command generates the gene, cell, and
119
+#' # matrix files
120
+#'
121
+#' # bustools correct -w ./3M-february-2018.txt -p output.bus | \
122
+#' #   bustools sort -T tmp/ -t 4 -p - | \
123
+#' #   bustools count -o genecount/genes \
124
+#' #     -g ./transcripts_to_genes.txt \
125
+#' #     -e matrix.ec \
126
+#' #     -t transcripts.txt \
127
+#' #     --genecounts -
128
+#'
129
+#' # The top 20 genes and the first 20 cells are included in this example.
130
+#' sce <- importBUStools(
131
+#'   BUStoolsDirs = system.file("extdata/BUStools_PBMC_1k_v3_20x20/genecount/",
132
+#'     package = "singleCellTK"),
133
+#'   samples = "PBMC_1k_v3_20x20")
134
+#' @export
135
+importBUStools <- function(
136
+    BUStoolsDirs,
137
+    samples,
138
+    matrixFileNames = "genes.mtx",
139
+    featuresFileNames = "genes.genes.txt",
140
+    barcodesFileNames = "genes.barcodes.txt",
141
+    gzipped = "auto",
142
+    class = c("Matrix", "matrix"),
143
+    delayedArray = TRUE) {
144
+
145
+    class <- match.arg(class)
146
+
147
+    .importBUStools(
148
+        BUStoolsDirs = BUStoolsDirs,
149
+        samples = samples,
150
+        matrixFileNames = matrixFileNames,
151
+        featuresFileNames = featuresFileNames,
152
+        barcodesFileNames = barcodesFileNames,
153
+        gzipped = gzipped,
154
+        class = class,
155
+        delayedArray = delayedArray)
156
+}
Browse code

Revert "Sctk documentation "

Joshua D. Campbell authored on 09/06/2020 23:22:05 • GitHub committed on 09/06/2020 23:22:05
Showing1 changed files
1 1
deleted file mode 100644
... ...
@@ -1,156 +0,0 @@
1
-
2
-# dir <- "genecount"
3
-.constructSCEFromBUStoolsOutputs <- function(dir,
4
-    sample,
5
-    matrixFileName,
6
-    featuresFileName,
7
-    barcodesFileName,
8
-    gzipped,
9
-    class,
10
-    delayedArray) {
11
-
12
-    cb <- .readBarcodes(file.path(dir, barcodesFileName))
13
-    fe <- .readFeatures(file.path(dir, featuresFileName))
14
-    ma <- .readMatrixMM(file.path(dir, matrixFileName),
15
-        gzipped = gzipped,
16
-        class = class,
17
-        delayedArray = delayedArray)
18
-    ma <- t(ma)
19
-
20
-    coln <- paste(sample, cb[[1]], sep = "_")
21
-    rownames(ma) <- fe[[1]]
22
-
23
-    sce <- SingleCellExperiment::SingleCellExperiment(
24
-        assays = list(counts = ma))
25
-    SummarizedExperiment::rowData(sce) <- fe
26
-    SummarizedExperiment::colData(sce) <- S4Vectors::DataFrame(cb,
27
-        column_name = coln,
28
-        sample = sample,
29
-        row.names = coln)
30
-
31
-    return(sce)
32
-}
33
-
34
-
35
-# main function
36
-.importBUStools <- function(
37
-    BUStoolsDirs,
38
-    samples,
39
-    matrixFileNames,
40
-    featuresFileNames,
41
-    barcodesFileNames,
42
-    gzipped,
43
-    class,
44
-    delayedArray) {
45
-
46
-    if (length(BUStoolsDirs) != length(samples)) {
47
-        stop("'BUStoolsDirs' and 'samples' have unequal lengths!")
48
-    }
49
-
50
-    res <- vector("list", length = length(samples))
51
-
52
-    matrixFileNames <- .getVectorized(matrixFileNames, length(samples))
53
-    featuresFileNames <- .getVectorized(featuresFileNames, length(samples))
54
-    barcodesFileNames <- .getVectorized(barcodesFileNames, length(samples))
55
-    gzipped <- .getVectorized(gzipped, length(samples))
56
-
57
-    for (i in seq_along(samples)) {
58
-        dir <- file.path(BUStoolsDirs[i])
59
-        scei <- .constructSCEFromBUStoolsOutputs(dir,
60
-            sample = samples[i],
61
-            matrixFileName = matrixFileNames[i],
62
-            featuresFileName = featuresFileNames[i],
63
-            barcodesFileName = barcodesFileNames[i],
64
-            gzipped = gzipped[i],
65
-            class = class,
66
-            delayedArray = delayedArray)
67
-        res[[i]] <- scei
68
-    }
69
-
70
-    sce <- do.call(SingleCellExperiment::cbind, res)
71
-    return(sce)
72
-}
73
-
74
-
75
-#' @name importBUStools
76
-#' @rdname importBUStools
77
-#' @title Construct SCE object from BUStools output
78
-#' @description Read the barcodes, features (genes), and matrix from BUStools
79
-#'  output. Import them
80
-#'  as one \link[SingleCellExperiment]{SingleCellExperiment} object. Note the
81
-#'  cells in the output files for BUStools 0.39.4 are not filtered.
82
-#' @param BUStoolsDirs A vector of paths to BUStools output files. Each sample
83
-#'  should have its own path. For example: \code{./genecount}.
84
-#'  Must have the same length as \code{samples}.
85
-#' @param samples A vector of user-defined sample names for the samples to be
86
-#'  imported. Must have the same length as \code{BUStoolsDirs}.
87
-#' @param matrixFileNames Filenames for the Market Exchange Format (MEX) sparse
88
-#'  matrix files (.mtx files). Must have length 1 or the same
89
-#'  length as \code{samples}.
90
-#' @param featuresFileNames Filenames for the feature annotation files.
91
-#'  Must have length 1 or the same length as \code{samples}.
92
-#' @param barcodesFileNames Filenames for the cell barcode list file.
93
-#'  Must have length 1 or the same length as \code{samples}.
94
-#' @param gzipped Boolean. \code{TRUE} if the BUStools output files
95
-#'  (barcodes.txt, genes.txt, and genes.mtx) were
96
-#'  gzip compressed. \code{FALSE} otherwise. This is \code{FALSE} in BUStools
97
-#'  0.39.4. Default \code{"auto"} which automatically detects if the
98
-#'  files are gzip compressed. Must have length 1 or the same length as
99
-#'  \code{samples}.
100
-#' @param class Character. The class of the expression matrix stored in the SCE
101
-#'  object. Can be one of "Matrix" (as returned by
102
-#'  \link[Matrix]{readMM} function), or "matrix" (as returned by
103
-#'  \link[base]{matrix} function). Default "Matrix".
104
-#' @param delayedArray Boolean. Whether to read the expression matrix as
105
-#'  \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}.
106
-#' @return A \code{SingleCellExperiment} object containing the count
107
-#'  matrix, the gene annotation, and the cell annotation.
108
-#' @examples
109
-#' # Example #1
110
-#' # FASTQ files were downloaded from
111
-#' # https://support.10xgenomics.com/single-cell-gene-expression/datasets/3.0.0
112
-#' # /pbmc_1k_v3
113
-#' # They were concatenated as follows:
114
-#' # cat pbmc_1k_v3_S1_L001_R1_001.fastq.gz pbmc_1k_v3_S1_L002_R1_001.fastq.gz >
115
-#' # pbmc_1k_v3_R1.fastq.gz
116
-#' # cat pbmc_1k_v3_S1_L001_R2_001.fastq.gz pbmc_1k_v3_S1_L002_R2_001.fastq.gz >
117
-#' # pbmc_1k_v3_R2.fastq.gz
118
-#' # The following BUStools command generates the gene, cell, and
119
-#' # matrix files
120
-#'
121
-#' # bustools correct -w ./3M-february-2018.txt -p output.bus | \
122
-#' #   bustools sort -T tmp/ -t 4 -p - | \
123
-#' #   bustools count -o genecount/genes \
124
-#' #     -g ./transcripts_to_genes.txt \
125
-#' #     -e matrix.ec \
126
-#' #     -t transcripts.txt \
127
-#' #     --genecounts -
128
-#'
129
-#' # The top 20 genes and the first 20 cells are included in this example.
130
-#' sce <- importBUStools(
131
-#'   BUStoolsDirs = system.file("extdata/BUStools_PBMC_1k_v3_20x20/genecount/",
132
-#'     package = "singleCellTK"),
133
-#'   samples = "PBMC_1k_v3_20x20")
134
-#' @export
135
-importBUStools <- function(
136
-    BUStoolsDirs,
137
-    samples,
138
-    matrixFileNames = "genes.mtx",
139
-    featuresFileNames = "genes.genes.txt",
140
-    barcodesFileNames = "genes.barcodes.txt",
141
-    gzipped = "auto",
142
-    class = c("Matrix", "matrix"),
143
-    delayedArray = TRUE) {
144
-
145
-    class <- match.arg(class)
146
-
147
-    .importBUStools(
148
-        BUStoolsDirs = BUStoolsDirs,
149
-        samples = samples,
150
-        matrixFileNames = matrixFileNames,
151
-        featuresFileNames = featuresFileNames,
152
-        barcodesFileNames = barcodesFileNames,
153
-        gzipped = gzipped,
154
-        class = class,
155
-        delayedArray = delayedArray)
156
-}
Browse code

Merge branch 'importQC' of github.com:joshua-d-campbell/singleCellTK into importQC

zhewa authored on 11/02/2020 01:16:21
Showing0 changed files
Browse code

add vectorized inputs, gzipped = "auto"

zhewa authored on 07/02/2020 06:32:44
Showing1 changed files
... ...
@@ -36,9 +36,9 @@
36 36
 .importBUStools <- function(
37 37
     BUStoolsDirs,
38 38
     samples,
39
-    matrixFileName,
40
-    featuresFileName,
41
-    barcodesFileName,
39
+    matrixFileNames,
40
+    featuresFileNames,
41
+    barcodesFileNames,
42 42
     gzipped,
43 43
     class,
44 44
     delayedArray) {
... ...
@@ -49,14 +49,19 @@
49 49
 
50 50
     res <- vector("list", length = length(samples))
51 51
 
52
+    matrixFileNames <- .getVectorized(matrixFileNames, length(samples))
53
+    featuresFileNames <- .getVectorized(featuresFileNames, length(samples))
54
+    barcodesFileNames <- .getVectorized(barcodesFileNames, length(samples))
55
+    gzipped <- .getVectorized(gzipped, length(samples))
56
+
52 57
     for (i in seq_along(samples)) {
53 58
         dir <- file.path(BUStoolsDirs[i])
54 59
         scei <- .constructSCEFromBUStoolsOutputs(dir,
55 60
             sample = samples[i],
56
-            matrixFileName = matrixFileName,
57
-            featuresFileName = featuresFileName,
58
-            barcodesFileName = barcodesFileName,
59
-            gzipped = gzipped,
61
+            matrixFileName = matrixFileNames[i],
62
+            featuresFileName = featuresFileNames[i],
63
+            barcodesFileName = barcodesFileNames[i],
64
+            gzipped = gzipped[i],
60 65
             class = class,
61 66
             delayedArray = delayedArray)
62 67
         res[[i]] <- scei
... ...
@@ -79,14 +84,19 @@
79 84
 #'  Must have the same length as \code{samples}.
80 85
 #' @param samples A vector of user-defined sample names for the samples to be
81 86
 #'  imported. Must have the same length as \code{BUStoolsDirs}.
82
-#' @param matrixFileName Filename for the Market Exchange Format (MEX) sparse
83
-#'  matrix file (.mtx file).
84
-#' @param featuresFileName Filename for the feature annotation file.
85
-#' @param barcodesFileName Filename for the cell barcode list file.
87
+#' @param matrixFileNames Filenames for the Market Exchange Format (MEX) sparse
88
+#'  matrix files (.mtx files). Must have length 1 or the same
89
+#'  length as \code{samples}.
90
+#' @param featuresFileNames Filenames for the feature annotation files.
91
+#'  Must have length 1 or the same length as \code{samples}.
92
+#' @param barcodesFileNames Filenames for the cell barcode list file.
93
+#'  Must have length 1 or the same length as \code{samples}.
86 94
 #' @param gzipped Boolean. \code{TRUE} if the BUStools output files
87 95
 #'  (barcodes.txt, genes.txt, and genes.mtx) were
88 96
 #'  gzip compressed. \code{FALSE} otherwise. This is \code{FALSE} in BUStools
89
-#'  0.39.4. Default \code{FALSE}.
97
+#'  0.39.4. Default \code{"auto"} which automatically detects if the
98
+#'  files are gzip compressed. Must have length 1 or the same length as
99
+#'  \code{samples}.
90 100
 #' @param class Character. The class of the expression matrix stored in the SCE
91 101
 #'  object. Can be one of "Matrix" (as returned by
92 102
 #'  \link[Matrix]{readMM} function), or "matrix" (as returned by
... ...
@@ -125,10 +135,10 @@
125 135
 importBUStools <- function(
126 136
     BUStoolsDirs,
127 137
     samples,
128
-    matrixFileName = "genes.mtx",
129
-    featuresFileName = "genes.genes.txt",
130
-    barcodesFileName = "genes.barcodes.txt",
131
-    gzipped = FALSE,
138
+    matrixFileNames = "genes.mtx",
139
+    featuresFileNames = "genes.genes.txt",
140
+    barcodesFileNames = "genes.barcodes.txt",
141
+    gzipped = "auto",
132 142
     class = c("Matrix", "matrix"),
133 143
     delayedArray = TRUE) {
134 144
 
... ...
@@ -137,9 +147,9 @@ importBUStools <- function(
137 147
     .importBUStools(
138 148
         BUStoolsDirs = BUStoolsDirs,
139 149
         samples = samples,
140
-        matrixFileName = matrixFileName,
141
-        featuresFileName = featuresFileName,
142
-        barcodesFileName = barcodesFileName,
150
+        matrixFileNames = matrixFileNames,
151
+        featuresFileNames = featuresFileNames,
152
+        barcodesFileNames = barcodesFileNames,
143 153
         gzipped = gzipped,
144 154
         class = class,
145 155
         delayedArray = delayedArray)
Browse code

fix bug

zhewa authored on 06/02/2020 04:32:28
Showing1 changed files
... ...
@@ -43,8 +43,6 @@
43 43
     class,
44 44
     delayedArray) {
45 45
 
46
-    class <- match.arg(class)
47
-
48 46
     if (length(BUStoolsDirs) != length(samples)) {
49 47
         stop("'BUStoolsDirs' and 'samples' have unequal lengths!")
50 48
     }
... ...
@@ -131,9 +129,11 @@ importBUStools <- function(
131 129
     featuresFileName = "genes.genes.txt",
132 130
     barcodesFileName = "genes.barcodes.txt",
133 131
     gzipped = FALSE,
134
-    class = "Matrix",
132
+    class = c("Matrix", "matrix"),
135 133
     delayedArray = TRUE) {
136 134
 
135
+    class <- match.arg(class)
136
+
137 137
     .importBUStools(
138 138
         BUStoolsDirs = BUStoolsDirs,
139 139
         samples = samples,
Browse code

various updates

add dataType = c("raw", "filtered") for importCellRangerV2 & V3
delayedArray = TRUE
add importSingleCellMatrix

zhewa authored on 06/02/2020 04:08:26
Showing1 changed files
... ...
@@ -6,13 +6,15 @@
6 6
     featuresFileName,
7 7
     barcodesFileName,
8 8
     gzipped,
9
-    class) {
9
+    class,
10
+    delayedArray) {
10 11
 
11 12
     cb <- .readBarcodes(file.path(dir, barcodesFileName))
12 13
     fe <- .readFeatures(file.path(dir, featuresFileName))
13 14
     ma <- .readMatrixMM(file.path(dir, matrixFileName),
14 15
         gzipped = gzipped,
15
-        class = class)
16
+        class = class,
17
+        delayedArray = delayedArray)
16 18
     ma <- t(ma)
17 19
 
18 20
     coln <- paste(sample, cb[[1]], sep = "_")
... ...
@@ -38,7 +40,10 @@
38 40
     featuresFileName,
39 41
     barcodesFileName,
40 42
     gzipped,
41
-    class) {
43
+    class,
44
+    delayedArray) {
45
+
46
+    class <- match.arg(class)
42 47
 
43 48
     if (length(BUStoolsDirs) != length(samples)) {
44 49
         stop("'BUStoolsDirs' and 'samples' have unequal lengths!")
... ...
@@ -54,7 +59,8 @@
54 59
             featuresFileName = featuresFileName,
55 60
             barcodesFileName = barcodesFileName,
56 61
             gzipped = gzipped,
57
-            class = class)
62
+            class = class,
63
+            delayedArray = delayedArray)
58 64
         res[[i]] <- scei
59 65
     }
60 66
 
... ...
@@ -84,10 +90,11 @@
84 90
 #'  gzip compressed. \code{FALSE} otherwise. This is \code{FALSE} in BUStools
85 91
 #'  0.39.4. Default \code{FALSE}.
86 92
 #' @param class Character. The class of the expression matrix stored in the SCE
87
-#'  object. Can be one of "DelayedArray" (as returned by
88
-#'  \link[DelayedArray]{DelayedArray} function), "Matrix" (as returned by
93
+#'  object. Can be one of "Matrix" (as returned by
89 94
 #'  \link[Matrix]{readMM} function), or "matrix" (as returned by
90 95
 #'  \link[base]{matrix} function). Default "Matrix".
96
+#' @param delayedArray Boolean. Whether to read the expression matrix as
97
+#'  \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}.
91 98
 #' @return A \code{SingleCellExperiment} object containing the count
92 99
 #'  matrix, the gene annotation, and the cell annotation.
93 100
 #' @examples
... ...
@@ -124,7 +131,8 @@ importBUStools <- function(
124 131
     featuresFileName = "genes.genes.txt",
125 132
     barcodesFileName = "genes.barcodes.txt",
126 133
     gzipped = FALSE,
127
-    class = "Matrix") {
134
+    class = "Matrix",
135
+    delayedArray = TRUE) {
128 136
 
129 137
     .importBUStools(
130 138
         BUStoolsDirs = BUStoolsDirs,
... ...
@@ -133,5 +141,6 @@ importBUStools <- function(
133 141
         featuresFileName = featuresFileName,
134 142
         barcodesFileName = barcodesFileName,
135 143
         gzipped = gzipped,
136
-        class = class)
144
+        class = class,
145
+        delayedArray = delayedArray)
137 146
 }
Browse code

Fixed package/function references

Joshua D. Campbell authored on 03/02/2020 21:03:41
Showing1 changed files
... ...
@@ -58,7 +58,7 @@
58 58
         res[[i]] <- scei
59 59
     }
60 60
 
61
-    sce <- do.call(BiocGenerics::cbind, res)
61
+    sce <- do.call(SingleCellExperiment::cbind, res)
62 62
     return(sce)
63 63
 }
64 64
 
Browse code

delayedArray as default runQCFilteredCells, runQCAllDroplets

zhewa authored on 20/12/2019 23:31:14
Showing1 changed files
... ...
@@ -32,23 +32,33 @@
32 32
 
33 33
 # main function
34 34
 .importBUStools <- function(
35
-    BUStoolsDir,
36
-    sample,
35
+    BUStoolsDirs,
36
+    samples,
37 37
     matrixFileName,
38 38
     featuresFileName,
39 39
     barcodesFileName,
40 40
     gzipped,
41 41
     class) {
42 42
 
43
-    dir <- file.path(BUStoolsDir)
44
-    sce <- .constructSCEFromBUStoolsOutputs(dir,
45
-        sample = sample,
46
-        matrixFileName = matrixFileName,
47
-        featuresFileName = featuresFileName,
48
-        barcodesFileName = barcodesFileName,
49
-        gzipped = gzipped,
50
-        class = class)
43
+    if (length(BUStoolsDirs) != length(samples)) {
44
+        stop("'BUStoolsDirs' and 'samples' have unequal lengths!")
45
+    }
46
+
47
+    res <- vector("list", length = length(samples))
48
+
49
+    for (i in seq_along(samples)) {
50
+        dir <- file.path(BUStoolsDirs[i])
51
+        scei <- .constructSCEFromBUStoolsOutputs(dir,
52
+            sample = samples[i],
53
+            matrixFileName = matrixFileName,
54
+            featuresFileName = featuresFileName,
55
+            barcodesFileName = barcodesFileName,
56
+            gzipped = gzipped,
57
+            class = class)
58
+        res[[i]] <- scei
59
+    }
51 60
 
61
+    sce <- do.call(BiocGenerics::cbind, res)
52 62
     return(sce)
53 63
 }
54 64
 
... ...
@@ -60,9 +70,11 @@
60 70
 #'  output. Import them
61 71
 #'  as one \link[SingleCellExperiment]{SingleCellExperiment} object. Note the
62 72
 #'  cells in the output files for BUStools 0.39.4 are not filtered.
63
-#' @param BUStoolsDir The path to BUStools output files. For
64
-#'  example: \code{./genecount}.
65
-#' @param sample User-defined sample name for the sample to be imported.
73
+#' @param BUStoolsDirs A vector of paths to BUStools output files. Each sample
74
+#'  should have its own path. For example: \code{./genecount}.
75
+#'  Must have the same length as \code{samples}.
76
+#' @param samples A vector of user-defined sample names for the samples to be
77
+#'  imported. Must have the same length as \code{BUStoolsDirs}.
66 78
 #' @param matrixFileName Filename for the Market Exchange Format (MEX) sparse
67 79
 #'  matrix file (.mtx file).
68 80
 #' @param featuresFileName Filename for the feature annotation file.
... ...
@@ -101,13 +113,13 @@
101 113
 #'
102 114
 #' # The top 20 genes and the first 20 cells are included in this example.
103 115
 #' sce <- importBUStools(
104
-#'   BUStoolsDir = system.file("extdata/BUStools_PBMC_1k_v3_20x20/genecount/",
116
+#'   BUStoolsDirs = system.file("extdata/BUStools_PBMC_1k_v3_20x20/genecount/",
105 117
 #'     package = "singleCellTK"),
106
-#'   sample = "PBMC_1k_v3_20x20")
118
+#'   samples = "PBMC_1k_v3_20x20")
107 119
 #' @export
108 120
 importBUStools <- function(
109
-    BUStoolsDir,
110
-    sample,
121
+    BUStoolsDirs,
122
+    samples,
111 123
     matrixFileName = "genes.mtx",
112 124
     featuresFileName = "genes.genes.txt",
113 125
     barcodesFileName = "genes.barcodes.txt",
... ...
@@ -115,8 +127,8 @@ importBUStools <- function(
115 127
     class = "Matrix") {
116 128
 
117 129
     .importBUStools(
118
-        BUStoolsDir = BUStoolsDir,
119
-        sample = sample,
130
+        BUStoolsDirs = BUStoolsDirs,
131
+        samples = samples,
120 132
         matrixFileName = matrixFileName,
121 133
         featuresFileName = featuresFileName,
122 134
         barcodesFileName = barcodesFileName,
Browse code

default dgTMatrix

87875172 authored on 19/12/2019 20:26:11
Showing1 changed files
... ...
@@ -75,7 +75,7 @@
75 75
 #'  object. Can be one of "DelayedArray" (as returned by
76 76
 #'  \link[DelayedArray]{DelayedArray} function), "Matrix" (as returned by
77 77
 #'  \link[Matrix]{readMM} function), or "matrix" (as returned by
78
-#'  \link[base]{matrix} function). Default "DelayedArray".
78
+#'  \link[base]{matrix} function). Default "Matrix".
79 79
 #' @return A \code{SingleCellExperiment} object containing the count
80 80
 #'  matrix, the gene annotation, and the cell annotation.
81 81
 #' @examples
... ...
@@ -112,7 +112,7 @@ importBUStools <- function(
112 112
     featuresFileName = "genes.genes.txt",
113 113
     barcodesFileName = "genes.barcodes.txt",
114 114
     gzipped = FALSE,
115
-    class = "DelayedArray") {
115
+    class = "Matrix") {
116 116
 
117 117
     .importBUStools(
118 118
         BUStoolsDir = BUStoolsDir,
Browse code

add import and QC functions

87875172 authored on 18/12/2019 15:45:52
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,125 @@
1
+
2
+# dir <- "genecount"
3
+.constructSCEFromBUStoolsOutputs <- function(dir,
4
+    sample,
5
+    matrixFileName,
6
+    featuresFileName,
7
+    barcodesFileName,
8
+    gzipped,
9
+    class) {
10
+
11
+    cb <- .readBarcodes(file.path(dir, barcodesFileName))
12
+    fe <- .readFeatures(file.path(dir, featuresFileName))
13
+    ma <- .readMatrixMM(file.path(dir, matrixFileName),
14
+        gzipped = gzipped,
15
+        class = class)
16
+    ma <- t(ma)
17
+
18
+    coln <- paste(sample, cb[[1]], sep = "_")
19
+    rownames(ma) <- fe[[1]]
20
+
21
+    sce <- SingleCellExperiment::SingleCellExperiment(
22
+        assays = list(counts = ma))
23
+    SummarizedExperiment::rowData(sce) <- fe
24
+    SummarizedExperiment::colData(sce) <- S4Vectors::DataFrame(cb,
25
+        column_name = coln,
26
+        sample = sample,
27
+        row.names = coln)
28
+
29
+    return(sce)
30
+}
31
+
32
+
33
+# main function
34
+.importBUStools <- function(
35
+    BUStoolsDir,
36
+    sample,
37
+    matrixFileName,
38
+    featuresFileName,
39
+    barcodesFileName,
40
+    gzipped,
41
+    class) {
42
+
43
+    dir <- file.path(BUStoolsDir)
44
+    sce <- .constructSCEFromBUStoolsOutputs(dir,
45
+        sample = sample,
46
+        matrixFileName = matrixFileName,
47
+        featuresFileName = featuresFileName,
48
+        barcodesFileName = barcodesFileName,
49
+        gzipped = gzipped,
50
+        class = class)
51
+
52
+    return(sce)
53
+}
54
+
55
+
56
+#' @name importBUStools
57
+#' @rdname importBUStools
58
+#' @title Construct SCE object from BUStools output
59
+#' @description Read the barcodes, features (genes), and matrix from BUStools
60
+#'  output. Import them
61
+#'  as one \link[SingleCellExperiment]{SingleCellExperiment} object. Note the
62
+#'  cells in the output files for BUStools 0.39.4 are not filtered.
63
+#' @param BUStoolsDir The path to BUStools output files. For
64
+#'  example: \code{./genecount}.
65
+#' @param sample User-defined sample name for the sample to be imported.
66
+#' @param matrixFileName Filename for the Market Exchange Format (MEX) sparse
67
+#'  matrix file (.mtx file).
68
+#' @param featuresFileName Filename for the feature annotation file.
69
+#' @param barcodesFileName Filename for the cell barcode list file.
70
+#' @param gzipped Boolean. \code{TRUE} if the BUStools output files
71
+#'  (barcodes.txt, genes.txt, and genes.mtx) were
72
+#'  gzip compressed. \code{FALSE} otherwise. This is \code{FALSE} in BUStools
73
+#'  0.39.4. Default \code{FALSE}.
74
+#' @param class Character. The class of the expression matrix stored in the SCE
75
+#'  object. Can be one of "DelayedArray" (as returned by
76
+#'  \link[DelayedArray]{DelayedArray} function), "Matrix" (as returned by
77
+#'  \link[Matrix]{readMM} function), or "matrix" (as returned by
78
+#'  \link[base]{matrix} function). Default "DelayedArray".
79
+#' @return A \code{SingleCellExperiment} object containing the count
80
+#'  matrix, the gene annotation, and the cell annotation.
81
+#' @examples
82
+#' # Example #1
83
+#' # FASTQ files were downloaded from
84
+#' # https://support.10xgenomics.com/single-cell-gene-expression/datasets/3.0.0
85
+#' # /pbmc_1k_v3
86
+#' # They were concatenated as follows:
87
+#' # cat pbmc_1k_v3_S1_L001_R1_001.fastq.gz pbmc_1k_v3_S1_L002_R1_001.fastq.gz >
88
+#' # pbmc_1k_v3_R1.fastq.gz
89
+#' # cat pbmc_1k_v3_S1_L001_R2_001.fastq.gz pbmc_1k_v3_S1_L002_R2_001.fastq.gz >
90
+#' # pbmc_1k_v3_R2.fastq.gz
91
+#' # The following BUStools command generates the gene, cell, and
92
+#' # matrix files
93
+#'
94
+#' # bustools correct -w ./3M-february-2018.txt -p output.bus | \
95
+#' #   bustools sort -T tmp/ -t 4 -p - | \
96
+#' #   bustools count -o genecount/genes \
97
+#' #     -g ./transcripts_to_genes.txt \
98
+#' #     -e matrix.ec \
99
+#' #     -t transcripts.txt \
100
+#' #     --genecounts -
101
+#'
102
+#' # The top 20 genes and the first 20 cells are included in this example.
103
+#' sce <- importBUStools(
104
+#'   BUStoolsDir = system.file("extdata/BUStools_PBMC_1k_v3_20x20/genecount/",
105
+#'     package = "singleCellTK"),
106
+#'   sample = "PBMC_1k_v3_20x20")
107
+#' @export
108
+importBUStools <- function(
109
+    BUStoolsDir,
110
+    sample,
111
+    matrixFileName = "genes.mtx",
112
+    featuresFileName = "genes.genes.txt",
113
+    barcodesFileName = "genes.barcodes.txt",
114
+    gzipped = FALSE,
115
+    class = "DelayedArray") {
116
+
117
+    .importBUStools(
118
+        BUStoolsDir = BUStoolsDir,
119
+        sample = sample,
120
+        matrixFileName = matrixFileName,
121
+        featuresFileName = featuresFileName,
122
+        barcodesFileName = barcodesFileName,
123
+        gzipped = gzipped,
124
+        class = class)
125
+}