... | ... |
@@ -44,7 +44,7 @@ |
44 | 44 |
tryCatch({ |
45 | 45 |
SingleCellExperiment::reducedDims(sce)[[obsm_name]] <- reticulate::py_to_r(anndata$obsm[obsm_name]) |
46 | 46 |
}, error = function(x){ |
47 |
- error_message <- paste0("Warning: unable to add '",obsm_name,"' from .obsm AnnData slot to SCE metadata. Skipping. ") |
|
47 |
+ error_message <- paste0("Warning: unable to add '",obsm_name,"' from .obsm AnnData slot to SCE reducedDims. Skipping. ") |
|
48 | 48 |
message(error_message) |
49 | 49 |
}) |
50 | 50 |
} |
... | ... |
@@ -96,6 +96,8 @@ |
96 | 96 |
#' object. Can be one of "Matrix" (as returned by |
97 | 97 |
#' \link{readMM} function), or "matrix" (as returned by |
98 | 98 |
#' \link[base]{matrix} function). Default \code{"Matrix"}. |
99 |
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default |
|
100 |
+#' \code{TRUE}. |
|
99 | 101 |
#' @details |
100 | 102 |
#' \code{importAnnData} converts scRNA-seq data in the AnnData format to the |
101 | 103 |
#' \code{SingleCellExperiment} object. The .X slot in AnnData is transposed to the features x cells |
... | ... |
@@ -120,7 +120,8 @@ |
120 | 120 |
importAnnData <- function(sampleDirs = NULL, |
121 | 121 |
sampleNames = NULL, |
122 | 122 |
delayedArray = FALSE, |
123 |
- class = c("Matrix", "matrix")) { |
|
123 |
+ class = c("Matrix", "matrix"), |
|
124 |
+ rowNamesDedup = TRUE) { |
|
124 | 125 |
|
125 | 126 |
if (length(sampleDirs)!=length(sampleNames)){ |
126 | 127 |
stop("Number of sampleDirs must be equal to number of SampleNames. Please provide sample names for all input directories") |
... | ... |
@@ -138,6 +139,15 @@ importAnnData <- function(sampleDirs = NULL, |
138 | 139 |
res[[i]] <- scei |
139 | 140 |
} |
140 | 141 |
sce <- do.call(SingleCellExperiment::cbind, res) |
142 |
+ |
|
143 |
+ if (isTRUE(rowNamesDedup)) { |
|
144 |
+ if (any(duplicated(rownames(sce)))) { |
|
145 |
+ message("Duplicated gene names found, adding '-1', '-2', ", |
|
146 |
+ "... suffix to them.") |
|
147 |
+ } |
|
148 |
+ sce <- dedupRowNames(sce) |
|
149 |
+ } |
|
150 |
+ |
|
141 | 151 |
return(sce) |
142 | 152 |
} |
143 | 153 |
|
... | ... |
@@ -1,6 +1,7 @@ |
1 | 1 |
.importAnnDataSample <- function(sampleDir = './', |
2 | 2 |
sampleName = 'sample', |
3 |
- delayedArray = FALSE){ |
|
3 |
+ delayedArray = FALSE, |
|
4 |
+ class){ |
|
4 | 5 |
|
5 | 6 |
anndata_file <- file.path(sampleDir, paste0(sampleName,'.h5ad',sep='')) |
6 | 7 |
if (!file.exists(anndata_file)){ |
... | ... |
@@ -9,6 +10,13 @@ |
9 | 10 |
anndata <- ad$read_h5ad(anndata_file) |
10 | 11 |
|
11 | 12 |
counts_matrix <- t((reticulate::py_to_r(anndata$X))) |
13 |
+ |
|
14 |
+ if (class == "Matrix") { |
|
15 |
+ counts_matrix <- .convertToMatrix(counts_matrix) |
|
16 |
+ } else if (class == "matrix") { |
|
17 |
+ counts_matrix <- base::as.matrix(counts_matrix) |
|
18 |
+ } |
|
19 |
+ |
|
12 | 20 |
if (isTRUE(delayedArray)) { |
13 | 21 |
counts_matrix <- DelayedArray::DelayedArray(counts_matrix) |
14 | 22 |
} |
... | ... |
@@ -84,6 +92,10 @@ |
84 | 92 |
#' } |
85 | 93 |
#' @param delayedArray Boolean. Whether to read the expression matrix as |
86 | 94 |
#' \link{DelayedArray} object. Default \code{FALSE}. |
95 |
+#' @param class Character. The class of the expression matrix stored in the SCE |
|
96 |
+#' object. Can be one of "Matrix" (as returned by |
|
97 |
+#' \link{readMM} function), or "matrix" (as returned by |
|
98 |
+#' \link[base]{matrix} function). Default \code{"Matrix"}. |
|
87 | 99 |
#' @details |
88 | 100 |
#' \code{importAnnData} converts scRNA-seq data in the AnnData format to the |
89 | 101 |
#' \code{SingleCellExperiment} object. The .X slot in AnnData is transposed to the features x cells |
... | ... |
@@ -107,18 +119,22 @@ |
107 | 119 |
#' @export |
108 | 120 |
importAnnData <- function(sampleDirs = NULL, |
109 | 121 |
sampleNames = NULL, |
110 |
- delayedArray = FALSE) { |
|
122 |
+ delayedArray = FALSE, |
|
123 |
+ class = c("Matrix", "matrix")) { |
|
111 | 124 |
|
112 | 125 |
if (length(sampleDirs)!=length(sampleNames)){ |
113 | 126 |
stop("Number of sampleDirs must be equal to number of SampleNames. Please provide sample names for all input directories") |
114 | 127 |
} |
115 | 128 |
|
129 |
+ class <- match.arg(class) |
|
130 |
+ |
|
116 | 131 |
res <- vector("list", length = length(sampleDirs)) |
117 | 132 |
|
118 | 133 |
for (i in seq_along(sampleDirs)){ |
119 | 134 |
scei <- .importAnnDataSample(sampleDir = sampleDirs[[i]], |
120 | 135 |
sampleName = sampleNames[[i]], |
121 |
- delayedArray = delayedArray) |
|
136 |
+ delayedArray = delayedArray, |
|
137 |
+ class = class) |
|
122 | 138 |
res[[i]] <- scei |
123 | 139 |
} |
124 | 140 |
sce <- do.call(SingleCellExperiment::cbind, res) |
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
.importAnnDataSample <- function(sampleDir = './', |
2 | 2 |
sampleName = 'sample', |
3 |
- delayedArray = TRUE){ |
|
3 |
+ delayedArray = FALSE){ |
|
4 | 4 |
|
5 | 5 |
anndata_file <- file.path(sampleDir, paste0(sampleName,'.h5ad',sep='')) |
6 | 6 |
if (!file.exists(anndata_file)){ |
... | ... |
@@ -83,7 +83,7 @@ |
83 | 83 |
#' with the sample name appended to each colname in colData |
84 | 84 |
#' } |
85 | 85 |
#' @param delayedArray Boolean. Whether to read the expression matrix as |
86 |
-#' \link{DelayedArray} object. Default \code{TRUE}. |
|
86 |
+#' \link{DelayedArray} object. Default \code{FALSE}. |
|
87 | 87 |
#' @details |
88 | 88 |
#' \code{importAnnData} converts scRNA-seq data in the AnnData format to the |
89 | 89 |
#' \code{SingleCellExperiment} object. The .X slot in AnnData is transposed to the features x cells |
... | ... |
@@ -107,7 +107,7 @@ |
107 | 107 |
#' @export |
108 | 108 |
importAnnData <- function(sampleDirs = NULL, |
109 | 109 |
sampleNames = NULL, |
110 |
- delayedArray = TRUE) { |
|
110 |
+ delayedArray = FALSE) { |
|
111 | 111 |
|
112 | 112 |
if (length(sampleDirs)!=length(sampleNames)){ |
113 | 113 |
stop("Number of sampleDirs must be equal to number of SampleNames. Please provide sample names for all input directories") |
... | ... |
@@ -83,7 +83,7 @@ |
83 | 83 |
#' with the sample name appended to each colname in colData |
84 | 84 |
#' } |
85 | 85 |
#' @param delayedArray Boolean. Whether to read the expression matrix as |
86 |
-#' \link[DelayedArray]{DelayedArray} object. Default \code{TRUE}. |
|
86 |
+#' \link{DelayedArray} object. Default \code{TRUE}. |
|
87 | 87 |
#' @details |
88 | 88 |
#' \code{importAnnData} converts scRNA-seq data in the AnnData format to the |
89 | 89 |
#' \code{SingleCellExperiment} object. The .X slot in AnnData is transposed to the features x cells |
... | ... |
@@ -1,25 +1,25 @@ |
1 | 1 |
.importAnnDataSample <- function(sampleDir = './', |
2 | 2 |
sampleName = 'sample', |
3 | 3 |
delayedArray = TRUE){ |
4 |
- |
|
4 |
+ |
|
5 | 5 |
anndata_file <- file.path(sampleDir, paste0(sampleName,'.h5ad',sep='')) |
6 | 6 |
if (!file.exists(anndata_file)){ |
7 | 7 |
stop("AnnData file not found at specified location. Please check path provided and/or filename.") |
8 | 8 |
} |
9 | 9 |
anndata <- ad$read_h5ad(anndata_file) |
10 |
- |
|
10 |
+ |
|
11 | 11 |
counts_matrix <- t((reticulate::py_to_r(anndata$X))) |
12 | 12 |
if (isTRUE(delayedArray)) { |
13 | 13 |
counts_matrix <- DelayedArray::DelayedArray(counts_matrix) |
14 | 14 |
} |
15 |
- |
|
15 |
+ |
|
16 | 16 |
sce_rowdata <- S4Vectors::DataFrame(reticulate::py_to_r(anndata$var)) |
17 | 17 |
sce_coldata <- S4Vectors::DataFrame(reticulate::py_to_r(anndata$obs)) |
18 | 18 |
sce <- SingleCellExperiment(list(counts = counts_matrix), |
19 | 19 |
rowData = sce_rowdata, |
20 | 20 |
colData = sce_coldata) |
21 | 21 |
colnames(sce) <- paste0(sampleName,"_",colnames(sce)) |
22 |
- |
|
22 |
+ |
|
23 | 23 |
multidim_observations <- reticulate::py_to_r(anndata$obsm_keys()) |
24 | 24 |
for(obsm_name in multidim_observations){ |
25 | 25 |
tryCatch({ |
... | ... |
@@ -28,9 +28,9 @@ |
28 | 28 |
error_message <- paste0("Warning: unable to add '",uns_name,"' from .obsm AnnData slot to SCE metadata. Skipping. ") |
29 | 29 |
message(error_message) |
30 | 30 |
}) |
31 |
- |
|
31 |
+ |
|
32 | 32 |
} |
33 |
- |
|
33 |
+ |
|
34 | 34 |
unstructured_data <- reticulate::py_to_r(anndata$uns_keys()) |
35 | 35 |
for(uns_name in unstructured_data){ |
36 | 36 |
tryCatch({ |
... | ... |
@@ -39,71 +39,72 @@ |
39 | 39 |
error_message <- paste0("Warning: unable to add unstructured data (.uns slot): '",uns_name,"' to SCE metadata. Skipping. ") |
40 | 40 |
message(error_message) |
41 | 41 |
}) |
42 |
- |
|
42 |
+ |
|
43 | 43 |
} |
44 |
- |
|
44 |
+ |
|
45 | 45 |
return(sce) |
46 |
- |
|
46 |
+ |
|
47 | 47 |
} |
48 | 48 |
|
49 | 49 |
#' @name importAnnData |
50 | 50 |
#' @rdname importAnnData |
51 | 51 |
#' @title Create a SingleCellExperiment Object from Python AnnData .h5ad files |
52 |
-#' @description This function reads in one or more Python AnnData files in the .h5ad format |
|
53 |
-#' and returns a single \link[SingleCellExperiment]{SingleCellExperiment} object containing all the |
|
52 |
+#' @description This function reads in one or more Python AnnData files in the .h5ad format |
|
53 |
+#' and returns a single \link[SingleCellExperiment]{SingleCellExperiment} object containing all the |
|
54 | 54 |
#' AnnData samples by concatenating their counts matrices and related information slots. |
55 |
-#' @param sampleDirs Folder containing the .h5ad file. Can be one of - |
|
55 |
+#' @param sampleDirs Folder containing the .h5ad file. Can be one of - |
|
56 | 56 |
#' \itemize{ |
57 | 57 |
#' \item Default \code{current working directory}. |
58 |
-#' \item Full path to the directory containing the .h5ad file. |
|
58 |
+#' \item Full path to the directory containing the .h5ad file. |
|
59 | 59 |
#' E.g \code{sampleDirs = '/path/to/sample'} |
60 |
-#' \item A vector of folder paths for the samples to import. |
|
60 |
+#' \item A vector of folder paths for the samples to import. |
|
61 | 61 |
#' E.g. \code{sampleDirs = c('/path/to/sample1', '/path/to/sample2','/path/to/sample3')} |
62 |
-#' importAnnData will return a single SCE object containing all the samples |
|
62 |
+#' importAnnData will return a single SCE object containing all the samples |
|
63 | 63 |
#' with the sample name appended to each colname in colData |
64 | 64 |
#' } |
65 |
-#' @param sampleNames The prefix/name of the .h5ad file without the .h5ad extension |
|
65 |
+#' @param sampleNames The prefix/name of the .h5ad file without the .h5ad extension |
|
66 | 66 |
#' e.g. if 'sample.h5ad' is the filename, pass \code{sampleNames = 'sample'}. |
67 |
-#' Can be one of - |
|
67 |
+#' Can be one of - |
|
68 | 68 |
#' \itemize{ |
69 | 69 |
#' \item Default \code{sample}. |
70 | 70 |
#' \item A vector of samples to import. Length of vector must be equal to length of sampleDirs vector |
71 | 71 |
#' E.g. \code{sampleDirs = c('sample1', 'sample2','sample3')} |
72 |
-#' importAnnData will return a single SCE object containing all the samples |
|
72 |
+#' importAnnData will return a single SCE object containing all the samples |
|
73 | 73 |
#' with the sample name appended to each colname in colData |
74 | 74 |
#' } |
75 | 75 |
#' @param delayedArray Boolean. Whether to read the expression matrix as |
76 | 76 |
#' \link[DelayedArray]{DelayedArray} object. Default \code{TRUE}. |
77 | 77 |
#' @details |
78 | 78 |
#' \code{importAnnData} converts scRNA-seq data in the AnnData format to the |
79 |
-#' \code{SingleCellExperiment} object. The .X slot in AnnData is transposed to the features x cells |
|
79 |
+#' \code{SingleCellExperiment} object. The .X slot in AnnData is transposed to the features x cells |
|
80 | 80 |
#' format and becomes the 'counts' matrix in the assay slot. The .vars AnnData slot becomes the SCE rowData |
81 | 81 |
#' and the .obs AnnData slot becomes the SCE colData. Multidimensional data in the .obsm AnnData slot is |
82 |
-#' ported over to the SCE reducedDims slot. Additionally, unstructured data in the .uns AnnData slot is |
|
83 |
-#' available through the SCE metadata slot. |
|
84 |
-#' There are 2 currently known minor issues - |
|
82 |
+#' ported over to the SCE reducedDims slot. Additionally, unstructured data in the .uns AnnData slot is |
|
83 |
+#' available through the SCE metadata slot. |
|
84 |
+#' There are 2 currently known minor issues - |
|
85 | 85 |
#' Anndata python module depends on another python module h5pyto read hd5 format files. |
86 | 86 |
#' If there are errors reading the .h5ad files, such as "ValueError: invalid shape in fixed-type tuple." |
87 |
-#' the user will need to do downgrade h5py by running \code{pip3 install --user h5py==2.9.0} |
|
87 |
+#' the user will need to do downgrade h5py by running \code{pip3 install --user h5py==2.9.0} |
|
88 | 88 |
#' Additionally there might be errors in converting some python objects in the unstructured data slots. |
89 | 89 |
#' There are no known R solutions at present. Refer \url{https://github.com/rstudio/reticulate/issues/209} |
90 | 90 |
#' @return A \code{SingleCellExperiment} object. |
91 | 91 |
#' @examples |
92 |
+#' file.path <- system.file("extdata/annData_pbmc_3k", package = "singleCellTK") |
|
92 | 93 |
#' \dontrun{ |
93 |
-#' sce <- importAnnData(sampleDirs = system.file("extdata/annData_pbmc_3k", package = "singleCellTK"), |
|
94 |
+#' sce <- importAnnData(sampleDirs = file.path, |
|
94 | 95 |
#' sampleNames = 'pbmc3k_20by20') |
95 | 96 |
#' } |
96 | 97 |
#' @export |
97 | 98 |
importAnnData <- function(sampleDirs = NULL, |
98 | 99 |
sampleNames = NULL, |
99 | 100 |
delayedArray = TRUE) { |
100 |
- |
|
101 |
+ |
|
101 | 102 |
if (length(sampleDirs)!=length(sampleNames)){ |
102 | 103 |
stop("Number of sampleDirs must be equal to number of SampleNames. Please provide sample names for all input directories") |
103 | 104 |
} |
104 |
- |
|
105 |
+ |
|
105 | 106 |
res <- vector("list", length = length(sampleDirs)) |
106 |
- |
|
107 |
+ |
|
107 | 108 |
for (i in seq_along(sampleDirs)){ |
108 | 109 |
scei <- .importAnnDataSample(sampleDir = sampleDirs[[i]], |
109 | 110 |
sampleName = sampleNames[[i]], |
... | ... |
@@ -19,18 +19,28 @@ |
19 | 19 |
rowData = sce_rowdata, |
20 | 20 |
colData = sce_coldata) |
21 | 21 |
colnames(sce) <- paste0(sampleName,"_",colnames(sce)) |
22 |
+ |
|
23 |
+ multi_Assay <- reticulate::py_to_r(anndata$layers$as_dict()) |
|
24 |
+ for(assay_name in names(multi_Assay)){ |
|
25 |
+ tryCatch({ |
|
26 |
+ SummarizedExperiment::assay(sce, assay_name, withDimnames = FALSE) <- t(reticulate::py_to_r(multi_Assay[[assay_name]])) |
|
27 |
+ base::dimnames(SummarizedExperiment::assay(sce, assay_name)) <- base::dimnames(SummarizedExperiment::assay(sce, "counts")) |
|
28 |
+ }, error = function(x){ |
|
29 |
+ error_message <- paste0("Warning: unable to add '",assay_name,"' from .layers AnnData slot to SCE assay. Skipping. ") |
|
30 |
+ message(error_message) |
|
31 |
+ }) |
|
32 |
+ } |
|
22 | 33 |
|
23 | 34 |
multidim_observations <- reticulate::py_to_r(anndata$obsm_keys()) |
24 | 35 |
for(obsm_name in multidim_observations){ |
25 | 36 |
tryCatch({ |
26 |
- reducedDims(sce)[[obsm_name]] <- reticulate::py_to_r(anndata$obsm[obsm_name]) |
|
37 |
+ SingleCellExperiment::reducedDims(sce)[[obsm_name]] <- reticulate::py_to_r(anndata$obsm[obsm_name]) |
|
27 | 38 |
}, error = function(x){ |
28 |
- error_message <- paste0("Warning: unable to add '",uns_name,"' from .obsm AnnData slot to SCE metadata. Skipping. ") |
|
39 |
+ error_message <- paste0("Warning: unable to add '",obsm_name,"' from .obsm AnnData slot to SCE metadata. Skipping. ") |
|
29 | 40 |
message(error_message) |
30 | 41 |
}) |
31 |
- |
|
32 | 42 |
} |
33 |
- |
|
43 |
+ |
|
34 | 44 |
unstructured_data <- reticulate::py_to_r(anndata$uns_keys()) |
35 | 45 |
for(uns_name in unstructured_data){ |
36 | 46 |
tryCatch({ |
... | ... |
@@ -89,9 +89,10 @@ |
89 | 89 |
#' There are no known R solutions at present. Refer \url{https://github.com/rstudio/reticulate/issues/209} |
90 | 90 |
#' @return A \code{SingleCellExperiment} object. |
91 | 91 |
#' @examples |
92 |
+#' \dontrun{ |
|
92 | 93 |
#' sce <- importAnnData(sampleDirs = system.file("extdata/annData_pbmc_3k", package = "singleCellTK"), |
93 | 94 |
#' sampleNames = 'pbmc3k_20by20') |
94 |
- |
|
95 |
+#' } |
|
95 | 96 |
#' @export |
96 | 97 |
importAnnData <- function(sampleDirs = NULL, |
97 | 98 |
sampleNames = NULL, |
... | ... |
@@ -34,7 +34,6 @@ |
34 | 34 |
unstructured_data <- reticulate::py_to_r(anndata$uns_keys()) |
35 | 35 |
for(uns_name in unstructured_data){ |
36 | 36 |
tryCatch({ |
37 |
- #metadata(sce)[[uns_name]] <- reticulate::py_to_r(anndata$uns[uns_name]) |
|
38 | 37 |
sce@metadata[[sampleName]]$annData[[uns_name]] <- reticulate::py_to_r(anndata$uns[uns_name]) |
39 | 38 |
}, error = function(x){ |
40 | 39 |
error_message <- paste0("Warning: unable to add unstructured data (.uns slot): '",uns_name,"' to SCE metadata. Skipping. ") |
... | ... |
@@ -34,7 +34,8 @@ |
34 | 34 |
unstructured_data <- reticulate::py_to_r(anndata$uns_keys()) |
35 | 35 |
for(uns_name in unstructured_data){ |
36 | 36 |
tryCatch({ |
37 |
- metadata(sce)[[uns_name]] <- reticulate::py_to_r(anndata$uns[uns_name]) |
|
37 |
+ #metadata(sce)[[uns_name]] <- reticulate::py_to_r(anndata$uns[uns_name]) |
|
38 |
+ sce@metadata[[sampleName]]$annData[[uns_name]] <- reticulate::py_to_r(anndata$uns[uns_name]) |
|
38 | 39 |
}, error = function(x){ |
39 | 40 |
error_message <- paste0("Warning: unable to add unstructured data (.uns slot): '",uns_name,"' to SCE metadata. Skipping. ") |
40 | 41 |
message(error_message) |
... | ... |
@@ -90,7 +90,7 @@ |
90 | 90 |
#' @return A \code{SingleCellExperiment} object. |
91 | 91 |
#' @examples |
92 | 92 |
#' sce <- importAnnData(sampleDirs = system.file("extdata/annData_pbmc_3k", package = "singleCellTK"), |
93 |
-#' sampleNames = 'pbmc3k') |
|
93 |
+#' sampleNames = 'pbmc3k_20by20') |
|
94 | 94 |
|
95 | 95 |
#' @export |
96 | 96 |
importAnnData <- function(sampleDirs = NULL, |
... | ... |
@@ -8,22 +8,22 @@ |
8 | 8 |
} |
9 | 9 |
anndata <- ad$read_h5ad(anndata_file) |
10 | 10 |
|
11 |
- counts_matrix <- t((py_to_r(anndata$X))) |
|
11 |
+ counts_matrix <- t((reticulate::py_to_r(anndata$X))) |
|
12 | 12 |
if (isTRUE(delayedArray)) { |
13 | 13 |
counts_matrix <- DelayedArray::DelayedArray(counts_matrix) |
14 | 14 |
} |
15 | 15 |
|
16 |
- sce_rowdata <- S4Vectors::DataFrame(py_to_r(anndata$var)) |
|
17 |
- sce_coldata <- S4Vectors::DataFrame(py_to_r(anndata$obs)) |
|
16 |
+ sce_rowdata <- S4Vectors::DataFrame(reticulate::py_to_r(anndata$var)) |
|
17 |
+ sce_coldata <- S4Vectors::DataFrame(reticulate::py_to_r(anndata$obs)) |
|
18 | 18 |
sce <- SingleCellExperiment(list(counts = counts_matrix), |
19 | 19 |
rowData = sce_rowdata, |
20 | 20 |
colData = sce_coldata) |
21 | 21 |
colnames(sce) <- paste0(sampleName,"_",colnames(sce)) |
22 | 22 |
|
23 |
- multidim_observations <- py_to_r(anndata$obsm_keys()) |
|
23 |
+ multidim_observations <- reticulate::py_to_r(anndata$obsm_keys()) |
|
24 | 24 |
for(obsm_name in multidim_observations){ |
25 | 25 |
tryCatch({ |
26 |
- reducedDims(sce)[[obsm_name]] <- py_to_r(anndata$obsm[obsm_name]) |
|
26 |
+ reducedDims(sce)[[obsm_name]] <- reticulate::py_to_r(anndata$obsm[obsm_name]) |
|
27 | 27 |
}, error = function(x){ |
28 | 28 |
error_message <- paste0("Warning: unable to add '",uns_name,"' from .obsm AnnData slot to SCE metadata. Skipping. ") |
29 | 29 |
message(error_message) |
... | ... |
@@ -31,10 +31,10 @@ |
31 | 31 |
|
32 | 32 |
} |
33 | 33 |
|
34 |
- unstructured_data <- py_to_r(anndata$uns_keys()) |
|
34 |
+ unstructured_data <- reticulate::py_to_r(anndata$uns_keys()) |
|
35 | 35 |
for(uns_name in unstructured_data){ |
36 | 36 |
tryCatch({ |
37 |
- metadata(sce)[[uns_name]] <- py_to_r(anndata$uns[uns_name]) |
|
37 |
+ metadata(sce)[[uns_name]] <- reticulate::py_to_r(anndata$uns[uns_name]) |
|
38 | 38 |
}, error = function(x){ |
39 | 39 |
error_message <- paste0("Warning: unable to add unstructured data (.uns slot): '",uns_name,"' to SCE metadata. Skipping. ") |
40 | 40 |
message(error_message) |
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,127 @@ |
1 |
+.importAnnDataSample <- function(sampleDir = './', |
|
2 |
+ sampleName = 'sample', |
|
3 |
+ delayedArray = TRUE){ |
|
4 |
+ |
|
5 |
+ anndata_file <- file.path(sampleDir, paste0(sampleName,'.h5ad',sep='')) |
|
6 |
+ if (!file.exists(anndata_file)){ |
|
7 |
+ stop("AnnData file not found at specified location. Please check path provided and/or filename.") |
|
8 |
+ } |
|
9 |
+ anndata <- ad$read_h5ad(anndata_file) |
|
10 |
+ |
|
11 |
+ counts_matrix <- t((py_to_r(anndata$X))) |
|
12 |
+ if (isTRUE(delayedArray)) { |
|
13 |
+ counts_matrix <- DelayedArray::DelayedArray(counts_matrix) |
|
14 |
+ } |
|
15 |
+ |
|
16 |
+ sce_rowdata <- S4Vectors::DataFrame(py_to_r(anndata$var)) |
|
17 |
+ sce_coldata <- S4Vectors::DataFrame(py_to_r(anndata$obs)) |
|
18 |
+ sce <- SingleCellExperiment(list(counts = counts_matrix), |
|
19 |
+ rowData = sce_rowdata, |
|
20 |
+ colData = sce_coldata) |
|
21 |
+ colnames(sce) <- paste0(sampleName,"_",colnames(sce)) |
|
22 |
+ |
|
23 |
+ multidim_observations <- py_to_r(anndata$obsm_keys()) |
|
24 |
+ for(obsm_name in multidim_observations){ |
|
25 |
+ tryCatch({ |
|
26 |
+ reducedDims(sce)[[obsm_name]] <- py_to_r(anndata$obsm[obsm_name]) |
|
27 |
+ }, error = function(x){ |
|
28 |
+ error_message <- paste0("Warning: unable to add '",uns_name,"' from .obsm AnnData slot to SCE metadata. Skipping. ") |
|
29 |
+ message(error_message) |
|
30 |
+ }) |
|
31 |
+ |
|
32 |
+ } |
|
33 |
+ |
|
34 |
+ unstructured_data <- py_to_r(anndata$uns_keys()) |
|
35 |
+ for(uns_name in unstructured_data){ |
|
36 |
+ tryCatch({ |
|
37 |
+ metadata(sce)[[uns_name]] <- py_to_r(anndata$uns[uns_name]) |
|
38 |
+ }, error = function(x){ |
|
39 |
+ error_message <- paste0("Warning: unable to add unstructured data (.uns slot): '",uns_name,"' to SCE metadata. Skipping. ") |
|
40 |
+ message(error_message) |
|
41 |
+ }) |
|
42 |
+ |
|
43 |
+ } |
|
44 |
+ |
|
45 |
+ return(sce) |
|
46 |
+ |
|
47 |
+} |
|
48 |
+ |
|
49 |
+#' @name importAnnData |
|
50 |
+#' @rdname importAnnData |
|
51 |
+#' @title Create a SingleCellExperiment Object from Python AnnData .h5ad files |
|
52 |
+#' @description This function reads in one or more Python AnnData files in the .h5ad format |
|
53 |
+#' and returns a single \link[SingleCellExperiment]{SingleCellExperiment} object containing all the |
|
54 |
+#' AnnData samples by concatenating their counts matrices and related information slots. |
|
55 |
+#' @param sampleDirs Folder containing the .h5ad file. Can be one of - |
|
56 |
+#' \itemize{ |
|
57 |
+#' \item Default \code{current working directory}. |
|
58 |
+#' \item Full path to the directory containing the .h5ad file. |
|
59 |
+#' E.g \code{sampleDirs = '/path/to/sample'} |
|
60 |
+#' \item A vector of folder paths for the samples to import. |
|
61 |
+#' E.g. \code{sampleDirs = c('/path/to/sample1', '/path/to/sample2','/path/to/sample3')} |
|
62 |
+#' importAnnData will return a single SCE object containing all the samples |
|
63 |
+#' with the sample name appended to each colname in colData |
|
64 |
+#' } |
|
65 |
+#' @param sampleNames The prefix/name of the .h5ad file without the .h5ad extension |
|
66 |
+#' e.g. if 'sample.h5ad' is the filename, pass \code{sampleNames = 'sample'}. |
|
67 |
+#' Can be one of - |
|
68 |
+#' \itemize{ |
|
69 |
+#' \item Default \code{sample}. |
|
70 |
+#' \item A vector of samples to import. Length of vector must be equal to length of sampleDirs vector |
|
71 |
+#' E.g. \code{sampleDirs = c('sample1', 'sample2','sample3')} |
|
72 |
+#' importAnnData will return a single SCE object containing all the samples |
|
73 |
+#' with the sample name appended to each colname in colData |
|
74 |
+#' } |
|
75 |
+#' @param delayedArray Boolean. Whether to read the expression matrix as |
|
76 |
+#' \link[DelayedArray]{DelayedArray} object. Default \code{TRUE}. |
|
77 |
+#' @details |
|
78 |
+#' \code{importAnnData} converts scRNA-seq data in the AnnData format to the |
|
79 |
+#' \code{SingleCellExperiment} object. The .X slot in AnnData is transposed to the features x cells |
|
80 |
+#' format and becomes the 'counts' matrix in the assay slot. The .vars AnnData slot becomes the SCE rowData |
|
81 |
+#' and the .obs AnnData slot becomes the SCE colData. Multidimensional data in the .obsm AnnData slot is |
|
82 |
+#' ported over to the SCE reducedDims slot. Additionally, unstructured data in the .uns AnnData slot is |
|
83 |
+#' available through the SCE metadata slot. |
|
84 |
+#' There are 2 currently known minor issues - |
|
85 |
+#' Anndata python module depends on another python module h5pyto read hd5 format files. |
|
86 |
+#' If there are errors reading the .h5ad files, such as "ValueError: invalid shape in fixed-type tuple." |
|
87 |
+#' the user will need to do downgrade h5py by running \code{pip3 install --user h5py==2.9.0} |
|
88 |
+#' Additionally there might be errors in converting some python objects in the unstructured data slots. |
|
89 |
+#' There are no known R solutions at present. Refer \url{https://github.com/rstudio/reticulate/issues/209} |
|
90 |
+#' @return A \code{SingleCellExperiment} object. |
|
91 |
+#' @examples |
|
92 |
+#' sce <- importAnnData(sampleDirs = system.file("extdata/annData_pbmc_3k", package = "singleCellTK"), |
|
93 |
+#' sampleNames = 'pbmc3k') |
|
94 |
+ |
|
95 |
+#' @export |
|
96 |
+importAnnData <- function(sampleDirs = NULL, |
|
97 |
+ sampleNames = NULL, |
|
98 |
+ delayedArray = TRUE) { |
|
99 |
+ |
|
100 |
+ if (length(sampleDirs)!=length(sampleNames)){ |
|
101 |
+ stop("Number of sampleDirs must be equal to number of SampleNames. Please provide sample names for all input directories") |
|
102 |
+ } |
|
103 |
+ |
|
104 |
+ res <- vector("list", length = length(sampleDirs)) |
|
105 |
+ |
|
106 |
+ for (i in seq_along(sampleDirs)){ |
|
107 |
+ scei <- .importAnnDataSample(sampleDir = sampleDirs[[i]], |
|
108 |
+ sampleName = sampleNames[[i]], |
|
109 |
+ delayedArray = delayedArray) |
|
110 |
+ res[[i]] <- scei |
|
111 |
+ } |
|
112 |
+ sce <- do.call(SingleCellExperiment::cbind, res) |
|
113 |
+ return(sce) |
|
114 |
+} |
|
115 |
+ |
|
116 |
+ |
|
117 |
+ |
|
118 |
+ |
|
119 |
+ |
|
120 |
+ |
|
121 |
+ |
|
122 |
+ |
|
123 |
+ |
|
124 |
+ |
|
125 |
+ |
|
126 |
+ |
|
127 |
+ |