Browse code

Fix as(., "dgCMatrix") issue, note that SoupX is still failing

Yichen Wang authored on 30/09/2022 04:09:54
Showing1 changed files
... ...
@@ -42,7 +42,7 @@
42 42
 
43 43
     ## Convert to "dgCMatrix"
44 44
     newM <- Matrix::Matrix(mat[,1], nrow=nrow(mat))
45
-    newM <- methods::as(newM, "dgCMatrix")
45
+    newM <- methods::as(newM, "CsparseMatrix")
46 46
     breaks <- seq(2, ncol(mat), by=1000)
47 47
     if(length(breaks) > 2) {
48 48
       for(i in seq(2, length(breaks))) {
Browse code

Update dedup functionality for import data functions & update tutorial with VAM

Yichen Wang authored on 22/11/2021 17:57:17
Showing1 changed files
... ...
@@ -10,20 +10,26 @@
10 10
   #  sparse <- reticulate::import("scipy.sparse")
11 11
   #  numpy <- reticulate::import("numpy")
12 12
   if (!reticulate::py_module_available(module = "scipy.sparse")) {
13
-    stop("Error!", "Cannot find python module 'scipy.sparse', please install Conda and run sctkPythonInstallConda()
14
-         or run sctkPythonInstallVirtualEnv(). If one of these have been previously run to install the modules,
15
-         make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(), respectively, if R has been
16
-         restarted since the module installation. Alternatively, scipy can be installed on the local machine
17
-         with pip (e.g. pip install scipy) and then the 'use_python()' function from the 'reticulate' package
18
-         can be used to select the correct Python environment.")
13
+    stop("Error!", "Cannot find python module 'scipy.sparse', please install 
14
+          Conda and run sctkPythonInstallConda() or run 
15
+          sctkPythonInstallVirtualEnv(). If one of these have been previously 
16
+          run to install the modules, make sure to run selectSCTKConda() or 
17
+          selectSCTKVirtualEnvironment(), respectively, if R has been restarted 
18
+          since the module installation. Alternatively, scipy can be installed 
19
+          on the local machine with pip (e.g. pip install scipy) and then the 
20
+          'use_python()' function from the 'reticulate' package can be used to 
21
+         select the correct Python environment.")
19 22
   }
20 23
   if (!reticulate::py_module_available(module = "numpy")) {
21
-    stop("Error!", "Cannot find python module 'numpy', please install Conda and run sctkPythonInstallConda()
22
-         or run sctkPythonInstallVirtualEnv(). If one of these have been previously run to install the modules,
23
-         make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(), respectively, if R has been
24
-         restarted since the module installation. Alternatively, numpy can be installed on the local machine
25
-         with pip (e.g. pip install numpy) and then the 'use_python()' function from the 'reticulate' package
26
-         can be used to select the correct Python environment.")
24
+    stop("Error!", "Cannot find python module 'numpy', please install Conda and 
25
+          run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv(). If 
26
+          one of these have been previously run to install the modules, make 
27
+          sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(), 
28
+          respectively, if R has been restarted since the module installation. 
29
+          Alternatively, numpy can be installed on the local machine with pip 
30
+          (e.g. pip install numpy) and then the 'use_python()' function from the
31
+          'reticulate' package can be used to select the correct Python 
32
+          environment.")
27 33
   }
28 34
 
29 35
   error <- try({
... ...
@@ -56,7 +62,9 @@
56 62
   }, silent = TRUE)
57 63
 
58 64
   if(inherits(error, "try-error")) {
59
-    stop(paste0("importOptimus did not complete successfully. SCE could not be generated. Error given during the import process: \n\n", error))
65
+    stop(paste0("importOptimus did not complete successfully. SCE could not be",
66
+                "generated. Error given during the import process: \n\n", 
67
+                error))
60 68
   }
61 69
 
62 70
   if (class == "matrix") {
... ...
@@ -180,7 +188,8 @@
180 188
   geneMetricsLocation,
181 189
   emptyDropsLocation,
182 190
   class,
183
-  delayedArray) {
191
+  delayedArray,
192
+  rowNamesDedup) {
184 193
 
185 194
   .checkArgsImportOptimus(OptimusDirs, samples)
186 195
 
... ...
@@ -201,6 +210,15 @@
201 210
   }
202 211
 
203 212
   sce <- do.call(SingleCellExperiment::cbind, res)
213
+  
214
+  if (isTRUE(rowNamesDedup)) {
215
+    if (any(duplicated(rownames(sce)))) {
216
+      message("Duplicated gene names found, adding '-1', '-2', ",
217
+              "... suffix to them.")
218
+    }
219
+    sce <- dedupRowNames(sce)
220
+  }
221
+  
204 222
   return(sce)
205 223
 }
206 224
 
... ...
@@ -247,6 +265,8 @@
247 265
 #'  \link[base]{matrix} function). Default "Matrix".
248 266
 #' @param delayedArray Boolean. Whether to read the expression matrix as
249 267
 #'  \link{DelayedArray} object or not. Default \code{FALSE}.
268
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default 
269
+#'  \code{TRUE}.
250 270
 #' @return A \link[SingleCellExperiment]{SingleCellExperiment} object
251 271
 #'  containing the count
252 272
 #'  matrix, the gene annotation, and the cell annotation.
... ...
@@ -267,7 +287,8 @@ importOptimus <- function(OptimusDirs,
267 287
   geneMetricsLocation = "call-MergeGeneMetrics/merged-gene-metrics.csv.gz",
268 288
   emptyDropsLocation = "call-RunEmptyDrops/empty_drops_result.csv",
269 289
   class = c("Matrix", "matrix"),
270
-  delayedArray = FALSE) {
290
+  delayedArray = FALSE,
291
+  rowNamesDedup = TRUE) {
271 292
 
272 293
   class <- match.arg(class)
273 294
 
... ...
@@ -280,6 +301,7 @@ importOptimus <- function(OptimusDirs,
280 301
     geneMetricsLocation = geneMetricsLocation,
281 302
     emptyDropsLocation = emptyDropsLocation,
282 303
     class = class,
283
-    delayedArray = delayedArray)
304
+    delayedArray = delayedArray,
305
+    rowNamesDedup = rowNamesDedup)
284 306
 
285 307
 }
Browse code

Update importMitoGeneSet function. Change delayedArray=False for all import function. Minor fixs in QC HTML report

rz2333 authored on 30/12/2020 17:40:20
Showing1 changed files
... ...
@@ -246,7 +246,7 @@
246 246
 #'  \link{readMM} function), or "matrix" (as returned by
247 247
 #'  \link[base]{matrix} function). Default "Matrix".
248 248
 #' @param delayedArray Boolean. Whether to read the expression matrix as
249
-#'  \link{DelayedArray} object or not. Default \code{TRUE}.
249
+#'  \link{DelayedArray} object or not. Default \code{FALSE}.
250 250
 #' @return A \link[SingleCellExperiment]{SingleCellExperiment} object
251 251
 #'  containing the count
252 252
 #'  matrix, the gene annotation, and the cell annotation.
... ...
@@ -267,7 +267,7 @@ importOptimus <- function(OptimusDirs,
267 267
   geneMetricsLocation = "call-MergeGeneMetrics/merged-gene-metrics.csv.gz",
268 268
   emptyDropsLocation = "call-RunEmptyDrops/empty_drops_result.csv",
269 269
   class = c("Matrix", "matrix"),
270
-  delayedArray = TRUE) {
270
+  delayedArray = FALSE) {
271 271
 
272 272
   class <- match.arg(class)
273 273
 
Browse code

Edit links to documentation

unknown authored on 22/10/2020 03:39:09
Showing1 changed files
... ...
@@ -243,10 +243,10 @@
243 243
 #'  optimus_v1.4.0.
244 244
 #' @param class Character. The class of the expression matrix stored in the SCE
245 245
 #'  object. Can be one of "Matrix" (as returned by
246
-#'  \link[Matrix]{readMM} function), or "matrix" (as returned by
246
+#'  \link{readMM} function), or "matrix" (as returned by
247 247
 #'  \link[base]{matrix} function). Default "Matrix".
248 248
 #' @param delayedArray Boolean. Whether to read the expression matrix as
249
-#'  \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}.
249
+#'  \link{DelayedArray} object or not. Default \code{TRUE}.
250 250
 #' @return A \link[SingleCellExperiment]{SingleCellExperiment} object
251 251
 #'  containing the count
252 252
 #'  matrix, the gene annotation, and the cell annotation.
Browse code

Add Runnable functions

Yusuke Koga authored on 15/10/2020 16:44:51
Showing1 changed files
... ...
@@ -10,7 +10,7 @@
10 10
   #  sparse <- reticulate::import("scipy.sparse")
11 11
   #  numpy <- reticulate::import("numpy")
12 12
   if (!reticulate::py_module_available(module = "scipy.sparse")) {
13
-    stop("Error!", "Cannot find python module 'scipy.sparse', please install Conda and run sctkPythonInstallConda() 
13
+    stop("Error!", "Cannot find python module 'scipy.sparse', please install Conda and run sctkPythonInstallConda()
14 14
          or run sctkPythonInstallVirtualEnv(). If one of these have been previously run to install the modules,
15 15
          make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(), respectively, if R has been
16 16
          restarted since the module installation. Alternatively, scipy can be installed on the local machine
... ...
@@ -18,14 +18,14 @@
18 18
          can be used to select the correct Python environment.")
19 19
   }
20 20
   if (!reticulate::py_module_available(module = "numpy")) {
21
-    stop("Error!", "Cannot find python module 'numpy', please install Conda and run sctkPythonInstallConda() 
21
+    stop("Error!", "Cannot find python module 'numpy', please install Conda and run sctkPythonInstallConda()
22 22
          or run sctkPythonInstallVirtualEnv(). If one of these have been previously run to install the modules,
23 23
          make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(), respectively, if R has been
24 24
          restarted since the module installation. Alternatively, numpy can be installed on the local machine
25 25
          with pip (e.g. pip install numpy) and then the 'use_python()' function from the 'reticulate' package
26 26
          can be used to select the correct Python environment.")
27 27
   }
28
-  
28
+
29 29
   error <- try({
30 30
     mat <- sparse$load_npz(matrixLocation)
31 31
     colIndex <- as.vector(numpy$load(colIndexLocation, allow_pickle = TRUE))
... ...
@@ -54,11 +54,11 @@
54 54
     rownames(newM) <- rownames(mat)
55 55
     mat <- newM
56 56
   }, silent = TRUE)
57
-  
57
+
58 58
   if(inherits(error, "try-error")) {
59 59
     stop(paste0("importOptimus did not complete successfully. SCE could not be generated. Error given during the import process: \n\n", error))
60 60
   }
61
-  
61
+
62 62
   if (class == "matrix") {
63 63
     mat <- as.matrix(mat)
64 64
   }
... ...
@@ -251,10 +251,10 @@
251 251
 #'  containing the count
252 252
 #'  matrix, the gene annotation, and the cell annotation.
253 253
 #' @examples
254
+#' file.path <- system.file("extdata/Optimus_20x1000",
255
+#'   package = "singleCellTK")
254 256
 #' \dontrun{
255
-#' sce <- importOptimus(OptimusDirs =
256
-#'   system.file("extdata/Optimus_20x1000",
257
-#'   package = "singleCellTK"),
257
+#' sce <- importOptimus(OptimusDirs = file.path,
258 258
 #'   samples = "Optimus_20x1000")
259 259
 #' }
260 260
 #' @export
Browse code

Merge devel branch (Oct 5) into master branch

Yusuke Koga authored on 09/10/2020 17:57:06
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,285 @@
1
+
2
+#' @importFrom reticulate import
3
+.readMatrixNpz <- function(matrixLocation,
4
+  colIndexLocation,
5
+  rowIndexLocation,
6
+  class,
7
+  delayedArray) {
8
+
9
+  ## Now importing these functions in 'reticulate_setup.R' file
10
+  #  sparse <- reticulate::import("scipy.sparse")
11
+  #  numpy <- reticulate::import("numpy")
12
+  if (!reticulate::py_module_available(module = "scipy.sparse")) {
13
+    stop("Error!", "Cannot find python module 'scipy.sparse', please install Conda and run sctkPythonInstallConda() 
14
+         or run sctkPythonInstallVirtualEnv(). If one of these have been previously run to install the modules,
15
+         make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(), respectively, if R has been
16
+         restarted since the module installation. Alternatively, scipy can be installed on the local machine
17
+         with pip (e.g. pip install scipy) and then the 'use_python()' function from the 'reticulate' package
18
+         can be used to select the correct Python environment.")
19
+  }
20
+  if (!reticulate::py_module_available(module = "numpy")) {
21
+    stop("Error!", "Cannot find python module 'numpy', please install Conda and run sctkPythonInstallConda() 
22
+         or run sctkPythonInstallVirtualEnv(). If one of these have been previously run to install the modules,
23
+         make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(), respectively, if R has been
24
+         restarted since the module installation. Alternatively, numpy can be installed on the local machine
25
+         with pip (e.g. pip install numpy) and then the 'use_python()' function from the 'reticulate' package
26
+         can be used to select the correct Python environment.")
27
+  }
28
+  
29
+  error <- try({
30
+    mat <- sparse$load_npz(matrixLocation)
31
+    colIndex <- as.vector(numpy$load(colIndexLocation, allow_pickle = TRUE))
32
+    rowIndex <- as.vector(numpy$load(rowIndexLocation, allow_pickle = TRUE))
33
+    colnames(mat) <- colIndex
34
+    rownames(mat) <- rowIndex
35
+    mat <- t(mat)
36
+
37
+    ## Convert to "dgCMatrix"
38
+    newM <- Matrix::Matrix(mat[,1], nrow=nrow(mat))
39
+    newM <- methods::as(newM, "dgCMatrix")
40
+    breaks <- seq(2, ncol(mat), by=1000)
41
+    if(length(breaks) > 2) {
42
+      for(i in seq(2, length(breaks))) {
43
+        ix <- seq(breaks[i-1], (breaks[i]-1))
44
+        newM <- cbind(newM, mat[,ix])
45
+      }
46
+      ix <- seq(utils::tail(breaks, n = 1), ncol(mat))
47
+      newM <- cbind(newM, mat[,ix])
48
+    } else {
49
+      ix <- seq(2, ncol(mat))
50
+      newM <- cbind(newM, mat[,ix])
51
+    }
52
+
53
+    colnames(newM) <- colnames(mat)
54
+    rownames(newM) <- rownames(mat)
55
+    mat <- newM
56
+  }, silent = TRUE)
57
+  
58
+  if(inherits(error, "try-error")) {
59
+    stop(paste0("importOptimus did not complete successfully. SCE could not be generated. Error given during the import process: \n\n", error))
60
+  }
61
+  
62
+  if (class == "matrix") {
63
+    mat <- as.matrix(mat)
64
+  }
65
+
66
+  if (isTRUE(delayedArray)) {
67
+    mat <- DelayedArray::DelayedArray(mat)
68
+  }
69
+  return(mat)
70
+}
71
+
72
+
73
+.readMetricsOptimus <- function(path) {
74
+  metrics <- data.table::fread(path)
75
+  return(metrics)
76
+}
77
+
78
+
79
+.readEmptyDrops <- function(path) {
80
+  emptyDrops <- data.table::fread(path)
81
+  colnames(emptyDrops) <- paste0("dropletUtils_emptyDrops_",
82
+    colnames(emptyDrops))
83
+  return(emptyDrops)
84
+}
85
+
86
+
87
+.combineColData <- function(colnames, cellMetrics, emptyDrops) {
88
+  cd <- data.table::data.table(CellId = colnames)
89
+  cd <- merge(cd,
90
+    cellMetrics,
91
+    by.x = "CellId",
92
+    by.y = "V1",
93
+    all.x = TRUE,
94
+    all.y = FALSE,
95
+    sort = FALSE)
96
+
97
+  if (!is.null(emptyDrops)) {
98
+    cd <- merge(cd,
99
+      emptyDrops,
100
+      by.x = "CellId",
101
+      by.y = "dropletUtils_emptyDrops_CellId",
102
+      all.x = TRUE,
103
+      all.y = FALSE,
104
+      sort = FALSE)
105
+  }
106
+
107
+  return(cd)
108
+}
109
+
110
+
111
+.combineRowData <- function(rownames, geneMetrics) {
112
+  rd <- data.table::data.table(feature_ID = rownames)
113
+  rd <- merge(rd,
114
+    geneMetrics,
115
+    by.x = "feature_ID",
116
+    by.y = "V1",
117
+    all.x = TRUE,
118
+    all.y = FALSE,
119
+    sort = FALSE)
120
+  return(rd)
121
+}
122
+
123
+
124
+.constructSCEFromOptimusOutputs <- function(dir,
125
+  sample,
126
+  matrixLocation,
127
+  colIndexLocation,
128
+  rowIndexLocation,
129
+  cellMetricsLocation,
130
+  geneMetricsLocation,
131
+  emptyDropsLocation,
132
+  class,
133
+  delayedArray) {
134
+
135
+  mat <- .readMatrixNpz(file.path(dir, matrixLocation),
136
+    file.path(dir, colIndexLocation),
137
+    file.path(dir, rowIndexLocation),
138
+    class,
139
+    delayedArray)
140
+
141
+  cellMetrics <- .readMetricsOptimus(file.path(dir, cellMetricsLocation))
142
+  geneMetrics <- .readMetricsOptimus(file.path(dir, geneMetricsLocation))
143
+
144
+  if (!is.null(geneMetricsLocation)) {
145
+    emptyDrops <- .readEmptyDrops(file.path(dir, emptyDropsLocation))
146
+  }
147
+
148
+  cd <- .combineColData(colnames(mat), cellMetrics, emptyDrops)
149
+  rd <- .combineRowData(rownames(mat), geneMetrics)
150
+
151
+  coln <- paste(sample, colnames(mat), sep = "_")
152
+
153
+  sce <- SingleCellExperiment::SingleCellExperiment(
154
+    assays = list(counts = mat))
155
+  SummarizedExperiment::colData(sce) <- S4Vectors::DataFrame(cd,
156
+    column_name = coln,
157
+    sample = sample,
158
+    row.names = coln)
159
+  SummarizedExperiment::rowData(sce) <- S4Vectors::DataFrame(rd)
160
+
161
+  return(sce)
162
+}
163
+
164
+
165
+.checkArgsImportOptimus <- function(OptimusDirs,
166
+  samples) {
167
+
168
+  if (length(OptimusDirs) != length(samples)) {
169
+    stop("'OptimusDirs' and 'samples' have unequal lengths!")
170
+  }
171
+}
172
+
173
+
174
+.importOptimus <- function(OptimusDirs,
175
+  samples,
176
+  matrixLocation,
177
+  colIndexLocation,
178
+  rowIndexLocation,
179
+  cellMetricsLocation,
180
+  geneMetricsLocation,
181
+  emptyDropsLocation,
182
+  class,
183
+  delayedArray) {
184
+
185
+  .checkArgsImportOptimus(OptimusDirs, samples)
186
+
187
+  res <- vector("list", length = length(samples))
188
+
189
+  for (i in seq_along(samples)) {
190
+    scei <- .constructSCEFromOptimusOutputs(OptimusDirs[i],
191
+      sample = samples[i],
192
+      matrixLocation = matrixLocation,
193
+      colIndexLocation = colIndexLocation,
194
+      rowIndexLocation = rowIndexLocation,
195
+      cellMetricsLocation = cellMetricsLocation,
196
+      geneMetricsLocation = geneMetricsLocation,
197
+      emptyDropsLocation = emptyDropsLocation,
198
+      class = class,
199
+      delayedArray = delayedArray)
200
+    res[[i]] <- scei
201
+  }
202
+
203
+  sce <- do.call(SingleCellExperiment::cbind, res)
204
+  return(sce)
205
+}
206
+
207
+
208
+#' @name importOptimus
209
+#' @rdname importOptimus
210
+#' @title Construct SCE object from Optimus output
211
+#' @description Read the barcodes, features (genes), and matrices from Optimus
212
+#'  outputs. Import them
213
+#'  as one \link[SingleCellExperiment]{SingleCellExperiment} object.
214
+#' @param OptimusDirs A vector of root directories of Optimus output files.
215
+#'  The paths should be something like this:
216
+#'  \code{/PATH/TO/bb4a2a5e-ff34-41b6-97d2-0c0c0c534530}.
217
+#'  Each entry in \code{OptimusDirs} is considered a sample and should have
218
+#'  its own path. Must have the same length as \code{samples}.
219
+#' @param samples A vector of user-defined sample names for the sample to be
220
+#'  imported. Must have the same length as \code{OptimusDirs}.
221
+#' @param matrixLocation Character. It is the intermediate
222
+#'  path to the filtered count maxtrix file saved in sparse matrix format
223
+#'  (\code{.npz}). Default
224
+#'  \code{call-MergeCountFiles/sparse_counts.npz} which works for
225
+#'  optimus_v1.4.0.
226
+#' @param colIndexLocation Character. The intermediate path to the barcode
227
+#'  index file. Default \code{call-MergeCountFiles/sparse_counts_col_index.npy}.
228
+#' @param rowIndexLocation Character. The intermediate path to the feature
229
+#'  (gene) index file. Default
230
+#'  \code{call-MergeCountFiles/sparse_counts_row_index.npy}.
231
+#' @param cellMetricsLocation Character. It is the intermediate
232
+#'  path to the cell metrics file (\code{merged-cell-metrics.csv.gz}). Default
233
+#'  \code{call-MergeCellMetrics/merged-cell-metrics.csv.gz} which works for
234
+#'  optimus_v1.4.0.
235
+#' @param geneMetricsLocation Character. It is the intermediate
236
+#'  path to the feature (gene) metrics file (\code{merged-gene-metrics.csv.gz}).
237
+#'  Default \code{call-MergeGeneMetrics/merged-gene-metrics.csv.gz} which works
238
+#'  for optimus_v1.4.0.
239
+#' @param emptyDropsLocation Character. It is the intermediate
240
+#'  path to \link[DropletUtils]{emptyDrops} metrics file
241
+#'  (\code{empty_drops_result.csv}).
242
+#'  Default \code{call-RunEmptyDrops/empty_drops_result.csv} which works for
243
+#'  optimus_v1.4.0.
244
+#' @param class Character. The class of the expression matrix stored in the SCE
245
+#'  object. Can be one of "Matrix" (as returned by
246
+#'  \link[Matrix]{readMM} function), or "matrix" (as returned by
247
+#'  \link[base]{matrix} function). Default "Matrix".
248
+#' @param delayedArray Boolean. Whether to read the expression matrix as
249
+#'  \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}.
250
+#' @return A \link[SingleCellExperiment]{SingleCellExperiment} object
251
+#'  containing the count
252
+#'  matrix, the gene annotation, and the cell annotation.
253
+#' @examples
254
+#' \dontrun{
255
+#' sce <- importOptimus(OptimusDirs =
256
+#'   system.file("extdata/Optimus_20x1000",
257
+#'   package = "singleCellTK"),
258
+#'   samples = "Optimus_20x1000")
259
+#' }
260
+#' @export
261
+importOptimus <- function(OptimusDirs,
262
+  samples,
263
+  matrixLocation = "call-MergeCountFiles/sparse_counts.npz",
264
+  colIndexLocation = "call-MergeCountFiles/sparse_counts_col_index.npy",
265
+  rowIndexLocation = "call-MergeCountFiles/sparse_counts_row_index.npy",
266
+  cellMetricsLocation = "call-MergeCellMetrics/merged-cell-metrics.csv.gz",
267
+  geneMetricsLocation = "call-MergeGeneMetrics/merged-gene-metrics.csv.gz",
268
+  emptyDropsLocation = "call-RunEmptyDrops/empty_drops_result.csv",
269
+  class = c("Matrix", "matrix"),
270
+  delayedArray = TRUE) {
271
+
272
+  class <- match.arg(class)
273
+
274
+  .importOptimus(OptimusDirs = OptimusDirs,
275
+    samples = samples,
276
+    matrixLocation = matrixLocation,
277
+    colIndexLocation = colIndexLocation,
278
+    rowIndexLocation = rowIndexLocation,
279
+    cellMetricsLocation = cellMetricsLocation,
280
+    geneMetricsLocation = geneMetricsLocation,
281
+    emptyDropsLocation = emptyDropsLocation,
282
+    class = class,
283
+    delayedArray = delayedArray)
284
+
285
+}
Browse code

Revert "Sctk documentation "

Joshua D. Campbell authored on 09/06/2020 23:22:05 • GitHub committed on 09/06/2020 23:22:05
Showing1 changed files
1 1
deleted file mode 100644
... ...
@@ -1,285 +0,0 @@
1
-
2
-#' @importFrom reticulate import
3
-.readMatrixNpz <- function(matrixLocation,
4
-  colIndexLocation,
5
-  rowIndexLocation,
6
-  class,
7
-  delayedArray) {
8
-
9
-  ## Now importing these functions in 'reticulate_setup.R' file
10
-  #  sparse <- reticulate::import("scipy.sparse")
11
-  #  numpy <- reticulate::import("numpy")
12
-  if (!reticulate::py_module_available(module = "scipy.sparse")) {
13
-    stop("Error!", "Cannot find python module 'scipy.sparse', please install Conda and run sctkPythonInstallConda() 
14
-         or run sctkPythonInstallVirtualEnv(). If one of these have been previously run to install the modules,
15
-         make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(), respectively, if R has been
16
-         restarted since the module installation. Alternatively, scipy can be installed on the local machine
17
-         with pip (e.g. pip install scipy) and then the 'use_python()' function from the 'reticulate' package
18
-         can be used to select the correct Python environment.")
19
-  }
20
-  if (!reticulate::py_module_available(module = "numpy")) {
21
-    stop("Error!", "Cannot find python module 'numpy', please install Conda and run sctkPythonInstallConda() 
22
-         or run sctkPythonInstallVirtualEnv(). If one of these have been previously run to install the modules,
23
-         make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(), respectively, if R has been
24
-         restarted since the module installation. Alternatively, numpy can be installed on the local machine
25
-         with pip (e.g. pip install numpy) and then the 'use_python()' function from the 'reticulate' package
26
-         can be used to select the correct Python environment.")
27
-  }
28
-  
29
-  error <- try({
30
-    mat <- sparse$load_npz(matrixLocation)
31
-    colIndex <- as.vector(numpy$load(colIndexLocation, allow_pickle = TRUE))
32
-    rowIndex <- as.vector(numpy$load(rowIndexLocation, allow_pickle = TRUE))
33
-    colnames(mat) <- colIndex
34
-    rownames(mat) <- rowIndex
35
-    mat <- t(mat)
36
-
37
-    ## Convert to "dgCMatrix"
38
-    newM <- Matrix::Matrix(mat[,1], nrow=nrow(mat))
39
-    newM <- methods::as(newM, "dgCMatrix")
40
-    breaks <- seq(2, ncol(mat), by=1000)
41
-    if(length(breaks) > 2) {
42
-      for(i in seq(2, length(breaks))) {
43
-        ix <- seq(breaks[i-1], (breaks[i]-1))
44
-        newM <- cbind(newM, mat[,ix])
45
-      }
46
-      ix <- seq(utils::tail(breaks, n = 1), ncol(mat))
47
-      newM <- cbind(newM, mat[,ix])
48
-    } else {
49
-      ix <- seq(2, ncol(mat))
50
-      newM <- cbind(newM, mat[,ix])
51
-    }
52
-
53
-    colnames(newM) <- colnames(mat)
54
-    rownames(newM) <- rownames(mat)
55
-    mat <- newM
56
-  }, silent = TRUE)
57
-  
58
-  if(inherits(error, "try-error")) {
59
-    stop(paste0("importOptimus did not complete successfully. SCE could not be generated. Error given during the import process: \n\n", error))
60
-  }
61
-  
62
-  if (class == "matrix") {
63
-    mat <- as.matrix(mat)
64
-  }
65
-
66
-  if (isTRUE(delayedArray)) {
67
-    mat <- DelayedArray::DelayedArray(mat)
68
-  }
69
-  return(mat)
70
-}
71
-
72
-
73
-.readMetricsOptimus <- function(path) {
74
-  metrics <- data.table::fread(path)
75
-  return(metrics)
76
-}
77
-
78
-
79
-.readEmptyDrops <- function(path) {
80
-  emptyDrops <- data.table::fread(path)
81
-  colnames(emptyDrops) <- paste0("dropletUtils_emptyDrops_",
82
-    colnames(emptyDrops))
83
-  return(emptyDrops)
84
-}
85
-
86
-
87
-.combineColData <- function(colnames, cellMetrics, emptyDrops) {
88
-  cd <- data.table::data.table(CellId = colnames)
89
-  cd <- merge(cd,
90
-    cellMetrics,
91
-    by.x = "CellId",
92
-    by.y = "V1",
93
-    all.x = TRUE,
94
-    all.y = FALSE,
95
-    sort = FALSE)
96
-
97
-  if (!is.null(emptyDrops)) {
98
-    cd <- merge(cd,
99
-      emptyDrops,
100
-      by.x = "CellId",
101
-      by.y = "dropletUtils_emptyDrops_CellId",
102
-      all.x = TRUE,
103
-      all.y = FALSE,
104
-      sort = FALSE)
105
-  }
106
-
107
-  return(cd)
108
-}
109
-
110
-
111
-.combineRowData <- function(rownames, geneMetrics) {
112
-  rd <- data.table::data.table(feature_ID = rownames)
113
-  rd <- merge(rd,
114
-    geneMetrics,
115
-    by.x = "feature_ID",
116
-    by.y = "V1",
117
-    all.x = TRUE,
118
-    all.y = FALSE,
119
-    sort = FALSE)
120
-  return(rd)
121
-}
122
-
123
-
124
-.constructSCEFromOptimusOutputs <- function(dir,
125
-  sample,
126
-  matrixLocation,
127
-  colIndexLocation,
128
-  rowIndexLocation,
129
-  cellMetricsLocation,
130
-  geneMetricsLocation,
131
-  emptyDropsLocation,
132
-  class,
133
-  delayedArray) {
134
-
135
-  mat <- .readMatrixNpz(file.path(dir, matrixLocation),
136
-    file.path(dir, colIndexLocation),
137
-    file.path(dir, rowIndexLocation),
138
-    class,
139
-    delayedArray)
140
-
141
-  cellMetrics <- .readMetricsOptimus(file.path(dir, cellMetricsLocation))
142
-  geneMetrics <- .readMetricsOptimus(file.path(dir, geneMetricsLocation))
143
-
144
-  if (!is.null(geneMetricsLocation)) {
145
-    emptyDrops <- .readEmptyDrops(file.path(dir, emptyDropsLocation))
146
-  }
147
-
148
-  cd <- .combineColData(colnames(mat), cellMetrics, emptyDrops)
149
-  rd <- .combineRowData(rownames(mat), geneMetrics)
150
-
151
-  coln <- paste(sample, colnames(mat), sep = "_")
152
-
153
-  sce <- SingleCellExperiment::SingleCellExperiment(
154
-    assays = list(counts = mat))
155
-  SummarizedExperiment::colData(sce) <- S4Vectors::DataFrame(cd,
156
-    column_name = coln,
157
-    sample = sample,
158
-    row.names = coln)
159
-  SummarizedExperiment::rowData(sce) <- S4Vectors::DataFrame(rd)
160
-
161
-  return(sce)
162
-}
163
-
164
-
165
-.checkArgsImportOptimus <- function(OptimusDirs,
166
-  samples) {
167
-
168
-  if (length(OptimusDirs) != length(samples)) {
169
-    stop("'OptimusDirs' and 'samples' have unequal lengths!")
170
-  }
171
-}
172
-
173
-
174
-.importOptimus <- function(OptimusDirs,
175
-  samples,
176
-  matrixLocation,
177
-  colIndexLocation,
178
-  rowIndexLocation,
179
-  cellMetricsLocation,
180
-  geneMetricsLocation,
181
-  emptyDropsLocation,
182
-  class,
183
-  delayedArray) {
184
-
185
-  .checkArgsImportOptimus(OptimusDirs, samples)
186
-
187
-  res <- vector("list", length = length(samples))
188
-
189
-  for (i in seq_along(samples)) {
190
-    scei <- .constructSCEFromOptimusOutputs(OptimusDirs[i],
191
-      sample = samples[i],
192
-      matrixLocation = matrixLocation,
193
-      colIndexLocation = colIndexLocation,
194
-      rowIndexLocation = rowIndexLocation,
195
-      cellMetricsLocation = cellMetricsLocation,
196
-      geneMetricsLocation = geneMetricsLocation,
197
-      emptyDropsLocation = emptyDropsLocation,
198
-      class = class,
199
-      delayedArray = delayedArray)
200
-    res[[i]] <- scei
201
-  }
202
-
203
-  sce <- do.call(SingleCellExperiment::cbind, res)
204
-  return(sce)
205
-}
206
-
207
-
208
-#' @name importOptimus
209
-#' @rdname importOptimus
210
-#' @title Construct SCE object from Optimus output
211
-#' @description Read the barcodes, features (genes), and matrices from Optimus
212
-#'  outputs. Import them
213
-#'  as one \link[SingleCellExperiment]{SingleCellExperiment} object.
214
-#' @param OptimusDirs A vector of root directories of Optimus output files.
215
-#'  The paths should be something like this:
216
-#'  \code{/PATH/TO/bb4a2a5e-ff34-41b6-97d2-0c0c0c534530}.
217
-#'  Each entry in \code{OptimusDirs} is considered a sample and should have
218
-#'  its own path. Must have the same length as \code{samples}.
219
-#' @param samples A vector of user-defined sample names for the sample to be
220
-#'  imported. Must have the same length as \code{OptimusDirs}.
221
-#' @param matrixLocation Character. It is the intermediate
222
-#'  path to the filtered count maxtrix file saved in sparse matrix format
223
-#'  (\code{.npz}). Default
224
-#'  \code{call-MergeCountFiles/sparse_counts.npz} which works for
225
-#'  optimus_v1.4.0.
226
-#' @param colIndexLocation Character. The intermediate path to the barcode
227
-#'  index file. Default \code{call-MergeCountFiles/sparse_counts_col_index.npy}.
228
-#' @param rowIndexLocation Character. The intermediate path to the feature
229
-#'  (gene) index file. Default
230
-#'  \code{call-MergeCountFiles/sparse_counts_row_index.npy}.
231
-#' @param cellMetricsLocation Character. It is the intermediate
232
-#'  path to the cell metrics file (\code{merged-cell-metrics.csv.gz}). Default
233
-#'  \code{call-MergeCellMetrics/merged-cell-metrics.csv.gz} which works for
234
-#'  optimus_v1.4.0.
235
-#' @param geneMetricsLocation Character. It is the intermediate
236
-#'  path to the feature (gene) metrics file (\code{merged-gene-metrics.csv.gz}).
237
-#'  Default \code{call-MergeGeneMetrics/merged-gene-metrics.csv.gz} which works
238
-#'  for optimus_v1.4.0.
239
-#' @param emptyDropsLocation Character. It is the intermediate
240
-#'  path to \link[DropletUtils]{emptyDrops} metrics file
241
-#'  (\code{empty_drops_result.csv}).
242
-#'  Default \code{call-RunEmptyDrops/empty_drops_result.csv} which works for
243
-#'  optimus_v1.4.0.
244
-#' @param class Character. The class of the expression matrix stored in the SCE
245
-#'  object. Can be one of "Matrix" (as returned by
246
-#'  \link[Matrix]{readMM} function), or "matrix" (as returned by
247
-#'  \link[base]{matrix} function). Default "Matrix".
248
-#' @param delayedArray Boolean. Whether to read the expression matrix as
249
-#'  \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}.
250
-#' @return A \link[SingleCellExperiment]{SingleCellExperiment} object
251
-#'  containing the count
252
-#'  matrix, the gene annotation, and the cell annotation.
253
-#' @examples
254
-#' \dontrun{
255
-#' sce <- importOptimus(OptimusDirs =
256
-#'   system.file("extdata/Optimus_20x1000",
257
-#'   package = "singleCellTK"),
258
-#'   samples = "Optimus_20x1000")
259
-#' }
260
-#' @export
261
-importOptimus <- function(OptimusDirs,
262
-  samples,
263
-  matrixLocation = "call-MergeCountFiles/sparse_counts.npz",
264
-  colIndexLocation = "call-MergeCountFiles/sparse_counts_col_index.npy",
265
-  rowIndexLocation = "call-MergeCountFiles/sparse_counts_row_index.npy",
266
-  cellMetricsLocation = "call-MergeCellMetrics/merged-cell-metrics.csv.gz",
267
-  geneMetricsLocation = "call-MergeGeneMetrics/merged-gene-metrics.csv.gz",
268
-  emptyDropsLocation = "call-RunEmptyDrops/empty_drops_result.csv",
269
-  class = c("Matrix", "matrix"),
270
-  delayedArray = TRUE) {
271
-
272
-  class <- match.arg(class)
273
-
274
-  .importOptimus(OptimusDirs = OptimusDirs,
275
-    samples = samples,
276
-    matrixLocation = matrixLocation,
277
-    colIndexLocation = colIndexLocation,
278
-    rowIndexLocation = rowIndexLocation,
279
-    cellMetricsLocation = cellMetricsLocation,
280
-    geneMetricsLocation = geneMetricsLocation,
281
-    emptyDropsLocation = emptyDropsLocation,
282
-    class = class,
283
-    delayedArray = delayedArray)
284
-
285
-}
Browse code

Merge pull request #261 from shruthibandyadka/devel

test import & QC functions

Joshua D. Campbell authored on 11/05/2020 21:42:53 • GitHub committed on 11/05/2020 21:42:53
Showing0 changed files
Browse code

Adding test import functions

Shruthi Bandyadka authored on 07/05/2020 17:40:45
Showing1 changed files
... ...
@@ -8,7 +8,7 @@
8 8
 
9 9
   ## Now importing these functions in 'reticulate_setup.R' file
10 10
   #  sparse <- reticulate::import("scipy.sparse")
11
-  #  np <- reticulate::import("numpy")
11
+  #  numpy <- reticulate::import("numpy")
12 12
   if (!reticulate::py_module_available(module = "scipy.sparse")) {
13 13
     stop("Cannot find python module 'scipy.sparse', please install Conda and run sctkPythonInstallConda() 
14 14
          or run sctkPythonInstallVirtualEnv(). If one of these have been previously run to install the modules,
Browse code

Refine CellRanger import and import error messages

Vidya Akavoor authored on 04/05/2020 15:40:50
Showing1 changed files
... ...
@@ -10,7 +10,7 @@
10 10
   #  sparse <- reticulate::import("scipy.sparse")
11 11
   #  np <- reticulate::import("numpy")
12 12
   if (!reticulate::py_module_available(module = "scipy.sparse")) {
13
-    stop("Cannot find python module 'scipy.sparse', please install Conda and run sctkPythonInstallConda() 
13
+    stop("Error!", "Cannot find python module 'scipy.sparse', please install Conda and run sctkPythonInstallConda() 
14 14
          or run sctkPythonInstallVirtualEnv(). If one of these have been previously run to install the modules,
15 15
          make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(), respectively, if R has been
16 16
          restarted since the module installation. Alternatively, scipy can be installed on the local machine
... ...
@@ -18,7 +18,7 @@
18 18
          can be used to select the correct Python environment.")
19 19
   }
20 20
   if (!reticulate::py_module_available(module = "numpy")) {
21
-    stop("Cannot find python module 'numpy', please install Conda and run sctkPythonInstallConda() 
21
+    stop("Error!", "Cannot find python module 'numpy', please install Conda and run sctkPythonInstallConda() 
22 22
          or run sctkPythonInstallVirtualEnv(). If one of these have been previously run to install the modules,
23 23
          make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(), respectively, if R has been
24 24
          restarted since the module installation. Alternatively, numpy can be installed on the local machine
Browse code

Added 'try' statemnt and made the errors messages more verbose

Joshua D. Campbell authored on 17/02/2020 04:44:30
Showing1 changed files
... ...
@@ -10,42 +10,55 @@
10 10
   #  sparse <- reticulate::import("scipy.sparse")
11 11
   #  np <- reticulate::import("numpy")
12 12
   if (!reticulate::py_module_available(module = "scipy.sparse")) {
13
-    stop("Cannot find python module 'scipy.sparse', please install through pip (e.g. pip install scipy.sparse)
14
-            or use 'use_python()' to select correct Python environment.")
13
+    stop("Cannot find python module 'scipy.sparse', please install Conda and run sctkPythonInstallConda() 
14
+         or run sctkPythonInstallVirtualEnv(). If one of these have been previously run to install the modules,
15
+         make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(), respectively, if R has been
16
+         restarted since the module installation. Alternatively, scipy can be installed on the local machine
17
+         with pip (e.g. pip install scipy) and then the 'use_python()' function from the 'reticulate' package
18
+         can be used to select the correct Python environment.")
15 19
   }
16 20
   if (!reticulate::py_module_available(module = "numpy")) {
17
-    stop("Cannot find python module 'numpy', please install through pip (e.g. pip install numpy)
18
-            or use 'use_python()' to select correct Python environment.")
21
+    stop("Cannot find python module 'numpy', please install Conda and run sctkPythonInstallConda() 
22
+         or run sctkPythonInstallVirtualEnv(). If one of these have been previously run to install the modules,
23
+         make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(), respectively, if R has been
24
+         restarted since the module installation. Alternatively, numpy can be installed on the local machine
25
+         with pip (e.g. pip install numpy) and then the 'use_python()' function from the 'reticulate' package
26
+         can be used to select the correct Python environment.")
19 27
   }
20 28
   
21
-
22
-  mat <- sparse$load_npz(matrixLocation)
23
-  colIndex <- as.vector(numpy$load(colIndexLocation, allow_pickle = TRUE))
24
-  rowIndex <- as.vector(numpy$load(rowIndexLocation, allow_pickle = TRUE))
25
-  colnames(mat) <- colIndex
26
-  rownames(mat) <- rowIndex
27
-  mat <- t(mat)
28
-
29
-  ## Convert to "dgCMatrix"
30
-  newM <- Matrix::Matrix(mat[,1], nrow=nrow(mat))
31
-  newM <- methods::as(newM, "dgCMatrix")
32
-  breaks <- seq(2, ncol(mat), by=1000)
33
-  if(length(breaks) > 2) {
34
-    for(i in seq(2, length(breaks))) {
35
-      ix <- seq(breaks[i-1], (breaks[i]-1))
29
+  error <- try({
30
+    mat <- sparse$load_npz(matrixLocation)
31
+    colIndex <- as.vector(numpy$load(colIndexLocation, allow_pickle = TRUE))
32
+    rowIndex <- as.vector(numpy$load(rowIndexLocation, allow_pickle = TRUE))
33
+    colnames(mat) <- colIndex
34
+    rownames(mat) <- rowIndex
35
+    mat <- t(mat)
36
+
37
+    ## Convert to "dgCMatrix"
38
+    newM <- Matrix::Matrix(mat[,1], nrow=nrow(mat))
39
+    newM <- methods::as(newM, "dgCMatrix")
40
+    breaks <- seq(2, ncol(mat), by=1000)
41
+    if(length(breaks) > 2) {
42
+      for(i in seq(2, length(breaks))) {
43
+        ix <- seq(breaks[i-1], (breaks[i]-1))
44
+        newM <- cbind(newM, mat[,ix])
45
+      }
46
+      ix <- seq(utils::tail(breaks, n = 1), ncol(mat))
47
+      newM <- cbind(newM, mat[,ix])
48
+    } else {
49
+      ix <- seq(2, ncol(mat))
36 50
       newM <- cbind(newM, mat[,ix])
37 51
     }
38
-    ix <- seq(utils::tail(breaks, n = 1), ncol(mat))
39
-    newM <- cbind(newM, mat[,ix])
40
-  } else {
41
-    ix <- seq(2, ncol(mat))
42
-    newM <- cbind(newM, mat[,ix])
43
-  }
44
-
45
-  colnames(newM) <- colnames(mat)
46
-  rownames(newM) <- rownames(mat)
47
-  mat <- newM
48 52
 
53
+    colnames(newM) <- colnames(mat)
54
+    rownames(newM) <- rownames(mat)
55
+    mat <- newM
56
+  }, silent = TRUE)
57
+  
58
+  if(inherits(error, "try-error")) {
59
+    stop(paste0("importOptimus did not complete successfully. SCE could not be generated. Error given during the import process: \n\n", error))
60
+  }
61
+  
49 62
   if (class == "matrix") {
50 63
     mat <- as.matrix(mat)
51 64
   }
Browse code

Updated reticulate import calls for importOptimus

Joshua D. Campbell authored on 15/02/2020 03:05:50
Showing1 changed files
... ...
@@ -6,8 +6,18 @@
6 6
   class,
7 7
   delayedArray) {
8 8
 
9
-#  sparse <- reticulate::import("scipy.sparse")
10
-#  np <- reticulate::import("numpy")
9
+  ## Now importing these functions in 'reticulate_setup.R' file
10
+  #  sparse <- reticulate::import("scipy.sparse")
11
+  #  np <- reticulate::import("numpy")
12
+  if (!reticulate::py_module_available(module = "scipy.sparse")) {
13
+    stop("Cannot find python module 'scipy.sparse', please install through pip (e.g. pip install scipy.sparse)
14
+            or use 'use_python()' to select correct Python environment.")
15
+  }
16
+  if (!reticulate::py_module_available(module = "numpy")) {
17
+    stop("Cannot find python module 'numpy', please install through pip (e.g. pip install numpy)
18
+            or use 'use_python()' to select correct Python environment.")
19
+  }
20
+  
11 21
 
12 22
   mat <- sparse$load_npz(matrixLocation)
13 23
   colIndex <- as.vector(numpy$load(colIndexLocation, allow_pickle = TRUE))
Browse code

Updated reticulate import calls for importOptimus

Joshua D. Campbell authored on 15/02/2020 01:58:48
Showing1 changed files
... ...
@@ -6,12 +6,12 @@
6 6
   class,
7 7
   delayedArray) {
8 8
 
9
-  sparse <- reticulate::import("scipy.sparse")
10
-  np <- reticulate::import("numpy")
9
+#  sparse <- reticulate::import("scipy.sparse")
10
+#  np <- reticulate::import("numpy")
11 11
 
12 12
   mat <- sparse$load_npz(matrixLocation)
13
-  colIndex <- as.vector(np$load(colIndexLocation, allow_pickle = TRUE))
14
-  rowIndex <- as.vector(np$load(rowIndexLocation, allow_pickle = TRUE))
13
+  colIndex <- as.vector(numpy$load(colIndexLocation, allow_pickle = TRUE))
14
+  rowIndex <- as.vector(numpy$load(rowIndexLocation, allow_pickle = TRUE))
15 15
   colnames(mat) <- colIndex
16 16
   rownames(mat) <- rowIndex
17 17
   mat <- t(mat)
Browse code

Fixed errors/warnings in DoubletFinder code. Updated docs. Added documentation for importCellRangerV2/V3Sample functions. Added install of DoubletFinder from github in DESCRIPTION and in .travis.yml

Joshua D. Campbell authored on 15/02/2020 00:05:10
Showing1 changed files
... ...
@@ -25,7 +25,7 @@
25 25
       ix <- seq(breaks[i-1], (breaks[i]-1))
26 26
       newM <- cbind(newM, mat[,ix])
27 27
     }
28
-    ix <- seq(tail(breaks, n = 1), ncol(mat))
28
+    ix <- seq(utils::tail(breaks, n = 1), ncol(mat))
29 29
     newM <- cbind(newM, mat[,ix])
30 30
   } else {
31 31
     ix <- seq(2, ncol(mat))
Browse code

Merge branch 'importQC' of github.com:joshua-d-campbell/singleCellTK into importQC

zhewa authored on 11/02/2020 01:16:21
Showing0 changed files
Browse code

fix bug

zhewa authored on 06/02/2020 04:32:28
Showing1 changed files
... ...
@@ -159,7 +159,6 @@
159 159
   class,
160 160
   delayedArray) {
161 161
 
162
-  class <- match.arg(class)
163 162
   .checkArgsImportOptimus(OptimusDirs, samples)
164 163
 
165 164
   res <- vector("list", length = length(samples))
... ...
@@ -247,6 +246,8 @@ importOptimus <- function(OptimusDirs,
247 246
   class = c("Matrix", "matrix"),
248 247
   delayedArray = TRUE) {
249 248
 
249
+  class <- match.arg(class)
250
+
250 251
   .importOptimus(OptimusDirs = OptimusDirs,
251 252
     samples = samples,
252 253
     matrixLocation = matrixLocation,
Browse code

various updates

add dataType = c("raw", "filtered") for importCellRangerV2 & V3
delayedArray = TRUE
add importSingleCellMatrix

zhewa authored on 06/02/2020 04:08:26
Showing1 changed files
... ...
@@ -3,7 +3,8 @@
3 3
 .readMatrixNpz <- function(matrixLocation,
4 4
   colIndexLocation,
5 5
   rowIndexLocation,
6
-  class) {
6
+  class,
7
+  delayedArray) {
7 8
 
8 9
   sparse <- reticulate::import("scipy.sparse")
9 10
   np <- reticulate::import("numpy")
... ...
@@ -12,7 +13,7 @@
12 13
   colIndex <- as.vector(np$load(colIndexLocation, allow_pickle = TRUE))
13 14
   rowIndex <- as.vector(np$load(rowIndexLocation, allow_pickle = TRUE))
14 15
   colnames(mat) <- colIndex
15
-  rownames(mat) <- rowIndex  
16
+  rownames(mat) <- rowIndex
16 17
   mat <- t(mat)
17 18
 
18 19
   ## Convert to "dgCMatrix"
... ...
@@ -20,30 +21,29 @@
20 21
   newM <- as(newM, "dgCMatrix")
21 22
   breaks <- seq(2, ncol(mat), by=1000)
22 23
   if(length(breaks) > 2) {
23
-	for(i in seq(2, length(breaks))) {
24
-	  ix <- seq(breaks[i-1], (breaks[i]-1))
25
-	  newM <- cbind(newM, mat[,ix])
26
-	}
27
-	ix <- seq(tail(breaks, n = 1), ncol(mat))
28
-	newM <- cbind(newM, mat[,ix])
24
+    for(i in seq(2, length(breaks))) {
25
+      ix <- seq(breaks[i-1], (breaks[i]-1))
26
+      newM <- cbind(newM, mat[,ix])
27
+    }
28
+    ix <- seq(tail(breaks, n = 1), ncol(mat))
29
+    newM <- cbind(newM, mat[,ix])
29 30
   } else {
30 31
     ix <- seq(2, ncol(mat))
31 32
     newM <- cbind(newM, mat[,ix])
32
-  }  
33
-  
33
+  }
34
+
34 35
   colnames(newM) <- colnames(mat)
35
-  rownames(newM) <- rownames(mat)  
36
+  rownames(newM) <- rownames(mat)
36 37
   mat <- newM
37
-  
38
-  if (class == "Matrix") {
39
-    return(mat)
40
-  } else if (class == "DelayedArray") {
41
-    mat <- DelayedArray::DelayedArray(mat)
42
-    return(mat)
43
-  } else if (class == "matrix") {
38
+
39
+  if (class == "matrix") {
44 40
     mat <- as.matrix(mat)
45
-    return(mat)
46 41
   }
42
+
43
+  if (isTRUE(delayedArray)) {
44
+    mat <- DelayedArray::DelayedArray(mat)
45
+  }
46
+  return(mat)
47 47
 }
48 48
 
49 49
 
... ...
@@ -106,12 +106,14 @@
106 106
   cellMetricsLocation,
107 107
   geneMetricsLocation,
108 108
   emptyDropsLocation,
109
-  class) {
109
+  class,
110
+  delayedArray) {
110 111
 
111 112
   mat <- .readMatrixNpz(file.path(dir, matrixLocation),
112 113
     file.path(dir, colIndexLocation),
113 114
     file.path(dir, rowIndexLocation),
114
-    class)
115
+    class,
116
+    delayedArray)
115 117
 
116 118
   cellMetrics <- .readMetricsOptimus(file.path(dir, cellMetricsLocation))
117 119
   geneMetrics <- .readMetricsOptimus(file.path(dir, geneMetricsLocation))
... ...
@@ -138,16 +140,11 @@
138 140
 
139 141
 
140 142
 .checkArgsImportOptimus <- function(OptimusDirs,
141
-  samples,
142
-  class) {
143
+  samples) {
143 144
 
144 145
   if (length(OptimusDirs) != length(samples)) {
145 146
     stop("'OptimusDirs' and 'samples' have unequal lengths!")
146 147
   }
147
-
148
-  if (!(class %in% c("DelayedArray", "Matrix", "matrix"))) {
149
-    stop("Invalid 'class' argument!")
150
-  }
151 148
 }
152 149
 
153 150
 
... ...
@@ -159,9 +156,11 @@
159 156
   cellMetricsLocation,
160 157
   geneMetricsLocation,
161 158
   emptyDropsLocation,
162
-  class) {
159
+  class,
160
+  delayedArray) {
163 161
 
164
-  .checkArgsImportOptimus(OptimusDirs, samples, class)
162
+  class <- match.arg(class)
163
+  .checkArgsImportOptimus(OptimusDirs, samples)
165 164
 
166 165
   res <- vector("list", length = length(samples))
167 166
 
... ...
@@ -174,7 +173,8 @@
174 173
       cellMetricsLocation = cellMetricsLocation,
175 174
       geneMetricsLocation = geneMetricsLocation,
176 175
       emptyDropsLocation = emptyDropsLocation,
177
-      class = class)
176
+      class = class,
177
+      delayedArray = delayedArray)
178 178
     res[[i]] <- scei
179 179
   }
180 180
 
... ...
@@ -219,12 +219,12 @@
219 219
 #'  (\code{empty_drops_result.csv}).
220 220
 #'  Default \code{call-RunEmptyDrops/empty_drops_result.csv} which works for
221 221
 #'  optimus_v1.4.0.
222
-#' @param class Character. The class of the expression matrix stored in the
223
-#'  \link[SingleCellExperiment]{SingleCellExperiment}
224
-#'  object. Can be one of "DelayedArray" (as returned by
225
-#'  \link[DelayedArray]{DelayedArray} function), "Matrix" (as returned by
222
+#' @param class Character. The class of the expression matrix stored in the SCE
223
+#'  object. Can be one of "Matrix" (as returned by
226 224
 #'  \link[Matrix]{readMM} function), or "matrix" (as returned by
227 225
 #'  \link[base]{matrix} function). Default "Matrix".
226
+#' @param delayedArray Boolean. Whether to read the expression matrix as
227
+#'  \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}.
228 228
 #' @return A \link[SingleCellExperiment]{SingleCellExperiment} object
229 229
 #'  containing the count
230 230
 #'  matrix, the gene annotation, and the cell annotation.
... ...
@@ -244,7 +244,8 @@ importOptimus <- function(OptimusDirs,
244 244
   cellMetricsLocation = "call-MergeCellMetrics/merged-cell-metrics.csv.gz",
245 245
   geneMetricsLocation = "call-MergeGeneMetrics/merged-gene-metrics.csv.gz",
246 246
   emptyDropsLocation = "call-RunEmptyDrops/empty_drops_result.csv",
247
-  class = "Matrix") {
247
+  class = c("Matrix", "matrix"),
248
+  delayedArray = TRUE) {
248 249
 
249 250
   .importOptimus(OptimusDirs = OptimusDirs,
250 251
     samples = samples,
... ...
@@ -254,6 +255,7 @@ importOptimus <- function(OptimusDirs,
254 255
     cellMetricsLocation = cellMetricsLocation,
255 256
     geneMetricsLocation = geneMetricsLocation,
256 257
     emptyDropsLocation = emptyDropsLocation,
257
-    class = class)
258
+    class = class,
259
+    delayedArray = delayedArray)
258 260
 
259 261
 }
Browse code

Fixed package/function references

Joshua D. Campbell authored on 03/02/2020 21:03:41
Showing1 changed files
... ...
@@ -17,14 +17,14 @@
17 17
 
18 18
   ## Convert to "dgCMatrix"
19 19
   newM <- Matrix::Matrix(mat[,1], nrow=nrow(mat))
20
-  newM <- as(newM, "dgCMatrix")
20
+  newM <- methods::as(newM, "dgCMatrix")
21 21
   breaks <- seq(2, ncol(mat), by=1000)
22 22
   if(length(breaks) > 2) {
23 23
 	for(i in seq(2, length(breaks))) {
24 24
 	  ix <- seq(breaks[i-1], (breaks[i]-1))
25 25
 	  newM <- cbind(newM, mat[,ix])
26 26
 	}
27
-	ix <- seq(tail(breaks, n = 1), ncol(mat))
27
+	ix <- seq(utils::tail(breaks, n = 1), ncol(mat))
28 28
 	newM <- cbind(newM, mat[,ix])
29 29
   } else {
30 30
     ix <- seq(2, ncol(mat))
... ...
@@ -178,7 +178,7 @@
178 178
     res[[i]] <- scei
179 179
   }
180 180
 
181
-  sce <- do.call(BiocGenerics::cbind, res)
181
+  sce <- do.call(SingleCellExperiment::cbind, res)
182 182
   return(sce)
183 183
 }
184 184
 
Browse code

Do not run examples requiring python

Joshua D. Campbell authored on 24/01/2020 20:22:57
Showing1 changed files
... ...
@@ -229,10 +229,12 @@
229 229
 #'  containing the count
230 230
 #'  matrix, the gene annotation, and the cell annotation.
231 231
 #' @examples
232
+#' \dontrun{
232 233
 #' sce <- importOptimus(OptimusDirs =
233 234
 #'   system.file("extdata/Optimus_20x1000",
234 235
 #'   package = "singleCellTK"),
235 236
 #'   samples = "Optimus_20x1000")
237
+#' }
236 238
 #' @export
237 239
 importOptimus <- function(OptimusDirs,
238 240
   samples,
Browse code

Fixed colnames of first cell after conversion

Joshua D. Campbell authored on 24/01/2020 18:40:18
Showing1 changed files
... ...
@@ -11,6 +11,8 @@
11 11
   mat <- sparse$load_npz(matrixLocation)
12 12
   colIndex <- as.vector(np$load(colIndexLocation, allow_pickle = TRUE))
13 13
   rowIndex <- as.vector(np$load(rowIndexLocation, allow_pickle = TRUE))
14
+  colnames(mat) <- colIndex
15
+  rownames(mat) <- rowIndex  
14 16
   mat <- t(mat)
15 17
 
16 18
   ## Convert to "dgCMatrix"
... ...
@@ -29,10 +31,10 @@
29 31
     newM <- cbind(newM, mat[,ix])
30 32
   }  
31 33
   
34
+  colnames(newM) <- colnames(mat)
35
+  rownames(newM) <- rownames(mat)  
32 36
   mat <- newM
33
-  colnames(mat) <- colIndex
34
-  rownames(mat) <- rowIndex
35
-
37
+  
36 38
   if (class == "Matrix") {
37 39
     return(mat)
38 40
   } else if (class == "DelayedArray") {
Browse code

Fixed colnames of first cell after conversion

Joshua D. Campbell authored on 24/01/2020 18:31:24
Showing1 changed files
... ...
@@ -11,8 +11,6 @@
11 11
   mat <- sparse$load_npz(matrixLocation)
12 12
   colIndex <- as.vector(np$load(colIndexLocation, allow_pickle = TRUE))
13 13
   rowIndex <- as.vector(np$load(rowIndexLocation, allow_pickle = TRUE))
14
-  colnames(mat) <- colIndex
15
-  rownames(mat) <- rowIndex
16 14
   mat <- t(mat)
17 15
 
18 16
   ## Convert to "dgCMatrix"
... ...
@@ -32,6 +30,9 @@
32 30
   }  
33 31
   
34 32
   mat <- newM
33
+  colnames(mat) <- colIndex
34
+  rownames(mat) <- rowIndex
35
+
35 36
   if (class == "Matrix") {
36 37
     return(mat)
37 38
   } else if (class == "DelayedArray") {
Browse code

Fixed conversion of dgRMatrix when matrix is small

Joshua D. Campbell authored on 24/01/2020 18:07:51
Showing1 changed files
... ...
@@ -19,7 +19,7 @@
19 19
   newM <- Matrix::Matrix(mat[,1], nrow=nrow(mat))
20 20
   newM <- as(newM, "dgCMatrix")
21 21
   breaks <- seq(2, ncol(mat), by=1000)
22
-  if(length(breaks) > 1) {
22
+  if(length(breaks) > 2) {
23 23
 	for(i in seq(2, length(breaks))) {
24 24
 	  ix <- seq(breaks[i-1], (breaks[i]-1))
25 25
 	  newM <- cbind(newM, mat[,ix])
Browse code

Fixed conversion of dgRMatrix when matrix is small

Joshua D. Campbell authored on 24/01/2020 18:04:31
Showing1 changed files
... ...
@@ -19,14 +19,19 @@
19 19
   newM <- Matrix::Matrix(mat[,1], nrow=nrow(mat))
20 20
   newM <- as(newM, "dgCMatrix")
21 21
   breaks <- seq(2, ncol(mat), by=1000)
22
-  for(i in seq(2, length(breaks))) {
23
-    ix <- seq(breaks[i-1], (breaks[i]-1))
22
+  if(length(breaks) > 1) {
23
+	for(i in seq(2, length(breaks))) {
24
+	  ix <- seq(breaks[i-1], (breaks[i]-1))
25
+	  newM <- cbind(newM, mat[,ix])
26
+	}
27
+	ix <- seq(tail(breaks, n = 1), ncol(mat))
28
+	newM <- cbind(newM, mat[,ix])
29
+  } else {
30
+    ix <- seq(2, ncol(mat))
24 31
     newM <- cbind(newM, mat[,ix])
25
-  }
26
-  ix <- seq(tail(breaks, n = 1), ncol(mat))
27
-  newM <- cbind(newM, mat[,ix])
28
-  mat <- newM
32
+  }  
29 33
   
34
+  mat <- newM
30 35
   if (class == "Matrix") {
31 36
     return(mat)
32 37
   } else if (class == "DelayedArray") {
Browse code

changed path to example data

Joshua D. Campbell authored on 24/01/2020 17:53:40
Showing1 changed files
... ...
@@ -222,7 +222,7 @@
222 222
 #'  matrix, the gene annotation, and the cell annotation.
223 223
 #' @examples
224 224
 #' sce <- importOptimus(OptimusDirs =
225
-#'   system.file("extdata/Optimus_20x1000/bb4a2a5e-ff34-41b6-97d2-0c0c0c534530",
225
+#'   system.file("extdata/Optimus_20x1000",
226 226
 #'   package = "singleCellTK"),
227 227
 #'   samples = "Optimus_20x1000")
228 228
 #' @export
Browse code

Fixed merge conflicts

Joshua D. Campbell authored on 24/01/2020 13:36:46
Showing0 changed files
Browse code

add reticulate to imports

zhewa authored on 24/01/2020 06:29:49
Showing1 changed files
... ...
@@ -1,4 +1,5 @@
1 1
 
2
+#' @importFrom reticulate import
2 3
 .readMatrixNpz <- function(matrixLocation,
3 4
   colIndexLocation,
4 5
   rowIndexLocation,
Browse code

Fixed importOptimus to return correct matrix

Joshua D. Campbell authored on 24/01/2020 01:08:42
Showing1 changed files
... ...
@@ -24,6 +24,7 @@
24 24
   }
25 25
   ix <- seq(tail(breaks, n = 1), ncol(mat))
26 26
   newM <- cbind(newM, mat[,ix])
27
+  mat <- newM
27 28
   
28 29
   if (class == "Matrix") {
29 30
     return(mat)
Browse code

Added package prefix

Joshua D. Campbell authored on 24/01/2020 00:22:48
Showing1 changed files
... ...
@@ -15,7 +15,7 @@
15 15
   mat <- t(mat)
16 16
 
17 17
   ## Convert to "dgCMatrix"
18
-  newM <- Matrix(mat[,1], nrow=nrow(mat))
18
+  newM <- Matrix::Matrix(mat[,1], nrow=nrow(mat))
19 19
   newM <- as(newM, "dgCMatrix")
20 20
   breaks <- seq(2, ncol(mat), by=1000)
21 21
   for(i in seq(2, length(breaks))) {
Browse code

convert dgRMatrix to dgCMatrix

Joshua D. Campbell authored on 23/01/2020 22:01:04
Showing1 changed files
... ...
@@ -14,6 +14,17 @@
14 14
   rownames(mat) <- rowIndex
15 15
   mat <- t(mat)
16 16
 
17
+  ## Convert to "dgCMatrix"
18
+  newM <- Matrix(mat[,1], nrow=nrow(mat))
19
+  newM <- as(newM, "dgCMatrix")
20
+  breaks <- seq(2, ncol(mat), by=1000)
21
+  for(i in seq(2, length(breaks))) {
22
+    ix <- seq(breaks[i-1], (breaks[i]-1))
23
+    newM <- cbind(newM, mat[,ix])
24
+  }
25
+  ix <- seq(tail(breaks, n = 1), ncol(mat))
26
+  newM <- cbind(newM, mat[,ix])
27
+  
17 28
   if (class == "Matrix") {
18 29
     return(mat)
19 30
   } else if (class == "DelayedArray") {
Browse code

add Optimus

zhewa authored on 18/01/2020 21:54:49
Showing1 changed files
... ...
@@ -1,145 +1,236 @@
1
-.readMatrix <- function(path) {
2 1
 
3
-  res <- readRDS(path)
4
-  matrix <- t(as.matrix(res))
2
+.readMatrixNpz <- function(matrixLocation,
3
+  colIndexLocation,
4
+  rowIndexLocation,
5
+  class) {
6
+
7
+  sparse <- reticulate::import("scipy.sparse")
8
+  np <- reticulate::import("numpy")
9
+
10
+  mat <- sparse$load_npz(matrixLocation)
11
+  colIndex <- as.vector(np$load(colIndexLocation, allow_pickle = TRUE))
12
+  rowIndex <- as.vector(np$load(rowIndexLocation, allow_pickle = TRUE))
13
+  colnames(mat) <- colIndex
14
+  rownames(mat) <- rowIndex
15
+  mat <- t(mat)
5 16
 
6 17
   if (class == "Matrix") {
7
-    return(matrix)
18
+    return(mat)
8 19
   } else if (class == "DelayedArray") {
9
-    res <- DelayedArray::DelayedArray(res)
10
-    return(matrix)
20
+    mat <- DelayedArray::DelayedArray(mat)
21
+    return(mat)
22
+  } else if (class == "matrix") {
23
+    mat <- as.matrix(mat)
24
+    return(mat)
11 25
   }
12 26
 }
13 27
 
14 28
 
15
-.readMetrics <- function(path) {
29
+.readMetricsOptimus <- function(path) {
30
+  metrics <- data.table::fread(path)
31
+  return(metrics)
32
+}
33
+
34
+
35
+.readEmptyDrops <- function(path) {
36
+  emptyDrops <- data.table::fread(path)
37
+  colnames(emptyDrops) <- paste0("dropletUtils_emptyDrops_",
38
+    colnames(emptyDrops))
39
+  return(emptyDrops)
40
+}
16 41
 
17
-  res <- fread(path)
18
-  res <- as.dataframe(res)
19
-  return(res)
20 42
 
21
-  if (ncol(res) == 1) {
22
-    stop("There are no Cell or Gene Metrics!")
43
+.combineColData <- function(colnames, cellMetrics, emptyDrops) {
44
+  cd <- data.table::data.table(CellId = colnames)
45
+  cd <- merge(cd,
46
+    cellMetrics,
47
+    by.x = "CellId",
48
+    by.y = "V1",
49
+    all.x = TRUE,
50
+    all.y = FALSE,
51
+    sort = FALSE)
52
+
53
+  if (!is.null(emptyDrops)) {
54
+    cd <- merge(cd,
55
+      emptyDrops,
56
+      by.x = "CellId",
57
+      by.y = "dropletUtils_emptyDrops_CellId",
58
+      all.x = TRUE,
59
+      all.y = FALSE,
60
+      sort = FALSE)
23 61
   }
24 62
 
63
+  return(cd)
25 64
 }
26 65
 
27
-.readEmptyDrops<-function(path){
28
-  EmptyDrops<-read.csv(path)
66
+
67
+.combineRowData <- function(rownames, geneMetrics) {
68
+  rd <- data.table::data.table(feature_ID = rownames)
69
+  rd <- merge(rd,
70
+    geneMetrics,
71
+    by.x = "feature_ID",
72
+    by.y = "V1",
73
+    all.x = TRUE,
74
+    all.y = FALSE,
75
+    sort = FALSE)
76
+  return(rd)