Browse code

Update SoupX into runCellQC function. Edit runDecontX metadata structure as metadata(sce)$sctk$runDecontX$sample. Also updated CellQC report to accommodate the changes in metadata

rz2333 authored on 17/02/2022 13:57:52
Showing1 changed files
... ...
@@ -44,7 +44,7 @@
44 44
     tryCatch({
45 45
       SingleCellExperiment::reducedDims(sce)[[obsm_name]] <- reticulate::py_to_r(anndata$obsm[obsm_name])
46 46
     }, error = function(x){
47
-      error_message <- paste0("Warning: unable to add '",obsm_name,"' from .obsm AnnData slot to SCE metadata. Skipping. ")
47
+      error_message <- paste0("Warning: unable to add '",obsm_name,"' from .obsm AnnData slot to SCE reducedDims. Skipping. ")
48 48
       message(error_message)
49 49
     })
50 50
   }
Browse code

Add new feature: DEG volcano plot; optimized some other manual text

Yichen Wang authored on 23/11/2021 03:25:28
Showing1 changed files
... ...
@@ -96,6 +96,8 @@
96 96
 #'  object. Can be one of "Matrix" (as returned by
97 97
 #'  \link{readMM} function), or "matrix" (as returned by
98 98
 #'  \link[base]{matrix} function). Default \code{"Matrix"}.
99
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default 
100
+#'  \code{TRUE}.
99 101
 #' @details
100 102
 #' \code{importAnnData} converts scRNA-seq data in the AnnData format to the
101 103
 #' \code{SingleCellExperiment} object. The .X slot in AnnData is transposed to the features x cells
Browse code

Update dedup functionality for import data functions & update tutorial with VAM

Yichen Wang authored on 22/11/2021 17:57:17
Showing1 changed files
... ...
@@ -120,7 +120,8 @@
120 120
 importAnnData <- function(sampleDirs = NULL,
121 121
                           sampleNames = NULL,
122 122
                           delayedArray = FALSE,
123
-                          class = c("Matrix", "matrix")) {
123
+                          class = c("Matrix", "matrix"),
124
+                          rowNamesDedup = TRUE) {
124 125
 
125 126
   if (length(sampleDirs)!=length(sampleNames)){
126 127
     stop("Number of sampleDirs must be equal to number of SampleNames. Please provide sample names for all input directories")
... ...
@@ -138,6 +139,15 @@ importAnnData <- function(sampleDirs = NULL,
138 139
     res[[i]] <- scei
139 140
   }
140 141
   sce <- do.call(SingleCellExperiment::cbind, res)
142
+  
143
+  if (isTRUE(rowNamesDedup)) {
144
+    if (any(duplicated(rownames(sce)))) {
145
+      message("Duplicated gene names found, adding '-1', '-2', ",
146
+              "... suffix to them.")
147
+    }
148
+    sce <- dedupRowNames(sce)
149
+  }
150
+  
141 151
   return(sce)
142 152
 }
143 153
 
Browse code

Fix dgTMatrix bugs in import functions.Change output name of row/colData in exportSCEToFlat function

rz2333 authored on 02/09/2021 17:03:47
Showing1 changed files
... ...
@@ -1,6 +1,7 @@
1 1
 .importAnnDataSample <- function(sampleDir = './',
2 2
                                  sampleName = 'sample',
3
-                                 delayedArray = FALSE){
3
+                                 delayedArray = FALSE,
4
+                                 class){
4 5
 
5 6
   anndata_file <- file.path(sampleDir, paste0(sampleName,'.h5ad',sep=''))
6 7
   if (!file.exists(anndata_file)){
... ...
@@ -9,6 +10,13 @@
9 10
   anndata <- ad$read_h5ad(anndata_file)
10 11
 
11 12
   counts_matrix <- t((reticulate::py_to_r(anndata$X)))
13
+
14
+  if (class == "Matrix") {
15
+    counts_matrix <- .convertToMatrix(counts_matrix)
16
+  } else if (class == "matrix") {
17
+    counts_matrix <- base::as.matrix(counts_matrix)
18
+  }
19
+
12 20
   if (isTRUE(delayedArray)) {
13 21
     counts_matrix <- DelayedArray::DelayedArray(counts_matrix)
14 22
   }
... ...
@@ -84,6 +92,10 @@
84 92
 #' }
85 93
 #' @param delayedArray Boolean. Whether to read the expression matrix as
86 94
 #'  \link{DelayedArray} object. Default \code{FALSE}.
95
+#' @param class Character. The class of the expression matrix stored in the SCE
96
+#'  object. Can be one of "Matrix" (as returned by
97
+#'  \link{readMM} function), or "matrix" (as returned by
98
+#'  \link[base]{matrix} function). Default \code{"Matrix"}.
87 99
 #' @details
88 100
 #' \code{importAnnData} converts scRNA-seq data in the AnnData format to the
89 101
 #' \code{SingleCellExperiment} object. The .X slot in AnnData is transposed to the features x cells
... ...
@@ -107,18 +119,22 @@
107 119
 #' @export
108 120
 importAnnData <- function(sampleDirs = NULL,
109 121
                           sampleNames = NULL,
110
-                          delayedArray = FALSE) {
122
+                          delayedArray = FALSE,
123
+                          class = c("Matrix", "matrix")) {
111 124
 
112 125
   if (length(sampleDirs)!=length(sampleNames)){
113 126
     stop("Number of sampleDirs must be equal to number of SampleNames. Please provide sample names for all input directories")
114 127
   }
115 128
 
129
+  class <- match.arg(class)
130
+
116 131
   res <- vector("list", length = length(sampleDirs))
117 132
 
118 133
   for (i in seq_along(sampleDirs)){
119 134
     scei <- .importAnnDataSample(sampleDir = sampleDirs[[i]],
120 135
                                  sampleName = sampleNames[[i]],
121
-                                 delayedArray = delayedArray)
136
+                                 delayedArray = delayedArray,
137
+                                 class = class)
122 138
     res[[i]] <- scei
123 139
   }
124 140
   sce <- do.call(SingleCellExperiment::cbind, res)
Browse code

Update importMitoGeneSet function. Change delayedArray=False for all import function. Minor fixs in QC HTML report

rz2333 authored on 30/12/2020 17:40:20
Showing1 changed files
... ...
@@ -1,6 +1,6 @@
1 1
 .importAnnDataSample <- function(sampleDir = './',
2 2
                                  sampleName = 'sample',
3
-                                 delayedArray = TRUE){
3
+                                 delayedArray = FALSE){
4 4
 
5 5
   anndata_file <- file.path(sampleDir, paste0(sampleName,'.h5ad',sep=''))
6 6
   if (!file.exists(anndata_file)){
... ...
@@ -83,7 +83,7 @@
83 83
 #'   with the sample name appended to each colname in colData
84 84
 #' }
85 85
 #' @param delayedArray Boolean. Whether to read the expression matrix as
86
-#'  \link{DelayedArray} object. Default \code{TRUE}.
86
+#'  \link{DelayedArray} object. Default \code{FALSE}.
87 87
 #' @details
88 88
 #' \code{importAnnData} converts scRNA-seq data in the AnnData format to the
89 89
 #' \code{SingleCellExperiment} object. The .X slot in AnnData is transposed to the features x cells
... ...
@@ -107,7 +107,7 @@
107 107
 #' @export
108 108
 importAnnData <- function(sampleDirs = NULL,
109 109
                           sampleNames = NULL,
110
-                          delayedArray = TRUE) {
110
+                          delayedArray = FALSE) {
111 111
 
112 112
   if (length(sampleDirs)!=length(sampleNames)){
113 113
     stop("Number of sampleDirs must be equal to number of SampleNames. Please provide sample names for all input directories")
Browse code

Edit links to documentation

unknown authored on 22/10/2020 03:39:09
Showing1 changed files
... ...
@@ -83,7 +83,7 @@
83 83
 #'   with the sample name appended to each colname in colData
84 84
 #' }
85 85
 #' @param delayedArray Boolean. Whether to read the expression matrix as
86
-#'  \link[DelayedArray]{DelayedArray} object. Default \code{TRUE}.
86
+#'  \link{DelayedArray} object. Default \code{TRUE}.
87 87
 #' @details
88 88
 #' \code{importAnnData} converts scRNA-seq data in the AnnData format to the
89 89
 #' \code{SingleCellExperiment} object. The .X slot in AnnData is transposed to the features x cells
Browse code

Merge from upstream

rz2333 authored on 16/10/2020 03:10:12
Showing0 changed files
Browse code

Add Runnable functions

Yusuke Koga authored on 15/10/2020 16:44:51
Showing1 changed files
... ...
@@ -1,25 +1,25 @@
1 1
 .importAnnDataSample <- function(sampleDir = './',
2 2
                                  sampleName = 'sample',
3 3
                                  delayedArray = TRUE){
4
-  
4
+
5 5
   anndata_file <- file.path(sampleDir, paste0(sampleName,'.h5ad',sep=''))
6 6
   if (!file.exists(anndata_file)){
7 7
     stop("AnnData file not found at specified location. Please check path provided and/or filename.")
8 8
   }
9 9
   anndata <- ad$read_h5ad(anndata_file)
10
-  
10
+
11 11
   counts_matrix <- t((reticulate::py_to_r(anndata$X)))
12 12
   if (isTRUE(delayedArray)) {
13 13
     counts_matrix <- DelayedArray::DelayedArray(counts_matrix)
14 14
   }
15
-  
15
+
16 16
   sce_rowdata <- S4Vectors::DataFrame(reticulate::py_to_r(anndata$var))
17 17
   sce_coldata <- S4Vectors::DataFrame(reticulate::py_to_r(anndata$obs))
18 18
   sce <- SingleCellExperiment(list(counts = counts_matrix),
19 19
                               rowData = sce_rowdata,
20 20
                               colData = sce_coldata)
21 21
   colnames(sce) <- paste0(sampleName,"_",colnames(sce))
22
-  
22
+
23 23
   multidim_observations <- reticulate::py_to_r(anndata$obsm_keys())
24 24
   for(obsm_name in multidim_observations){
25 25
     tryCatch({
... ...
@@ -28,9 +28,9 @@
28 28
       error_message <- paste0("Warning: unable to add '",uns_name,"' from .obsm AnnData slot to SCE metadata. Skipping. ")
29 29
       message(error_message)
30 30
     })
31
-    
31
+
32 32
   }
33
-  
33
+
34 34
   unstructured_data <- reticulate::py_to_r(anndata$uns_keys())
35 35
   for(uns_name in unstructured_data){
36 36
     tryCatch({
... ...
@@ -39,71 +39,72 @@
39 39
       error_message <- paste0("Warning: unable to add unstructured data (.uns slot): '",uns_name,"' to SCE metadata. Skipping. ")
40 40
       message(error_message)
41 41
     })
42
-    
42
+
43 43
   }
44
-  
44
+
45 45
   return(sce)
46
-  
46
+
47 47
 }
48 48
 
49 49
 #' @name importAnnData
50 50
 #' @rdname importAnnData
51 51
 #' @title Create a SingleCellExperiment Object from Python AnnData .h5ad files
52
-#' @description This function reads in one or more Python AnnData files in the .h5ad format 
53
-#' and returns a single \link[SingleCellExperiment]{SingleCellExperiment} object containing all the 
52
+#' @description This function reads in one or more Python AnnData files in the .h5ad format
53
+#' and returns a single \link[SingleCellExperiment]{SingleCellExperiment} object containing all the
54 54
 #' AnnData samples by concatenating their counts matrices and related information slots.
55
-#' @param sampleDirs Folder containing the .h5ad file. Can be one of - 
55
+#' @param sampleDirs Folder containing the .h5ad file. Can be one of -
56 56
 #' \itemize{
57 57
 #'   \item Default \code{current working directory}.
58
-#'   \item Full path to the directory containing the .h5ad file. 
58
+#'   \item Full path to the directory containing the .h5ad file.
59 59
 #'   E.g \code{sampleDirs = '/path/to/sample'}
60
-#'   \item A vector of folder paths for the samples to import. 
60
+#'   \item A vector of folder paths for the samples to import.
61 61
 #'   E.g. \code{sampleDirs = c('/path/to/sample1', '/path/to/sample2','/path/to/sample3')}
62
-#'   importAnnData will return a single SCE object containing all the samples 
62
+#'   importAnnData will return a single SCE object containing all the samples
63 63
 #'   with the sample name appended to each colname in colData
64 64
 #' }
65
-#' @param sampleNames The prefix/name of the .h5ad file without the .h5ad extension 
65
+#' @param sampleNames The prefix/name of the .h5ad file without the .h5ad extension
66 66
 #' e.g. if 'sample.h5ad' is the filename, pass \code{sampleNames = 'sample'}.
67
-#' Can be one of - 
67
+#' Can be one of -
68 68
 #' \itemize{
69 69
 #'   \item Default \code{sample}.
70 70
 #'   \item A vector of samples to import. Length of vector must be equal to length of sampleDirs vector
71 71
 #'   E.g. \code{sampleDirs = c('sample1', 'sample2','sample3')}
72
-#'   importAnnData will return a single SCE object containing all the samples 
72
+#'   importAnnData will return a single SCE object containing all the samples
73 73
 #'   with the sample name appended to each colname in colData
74 74
 #' }
75 75
 #' @param delayedArray Boolean. Whether to read the expression matrix as
76 76
 #'  \link[DelayedArray]{DelayedArray} object. Default \code{TRUE}.
77 77
 #' @details
78 78
 #' \code{importAnnData} converts scRNA-seq data in the AnnData format to the
79
-#' \code{SingleCellExperiment} object. The .X slot in AnnData is transposed to the features x cells 
79
+#' \code{SingleCellExperiment} object. The .X slot in AnnData is transposed to the features x cells
80 80
 #' format and becomes the 'counts' matrix in the assay slot. The .vars AnnData slot becomes the SCE rowData
81 81
 #' and the .obs AnnData slot becomes the SCE colData. Multidimensional data in the .obsm AnnData slot is
82
-#' ported over to the SCE reducedDims slot. Additionally, unstructured data in the .uns AnnData slot is 
83
-#' available through the SCE metadata slot. 
84
-#' There are 2 currently known minor issues - 
82
+#' ported over to the SCE reducedDims slot. Additionally, unstructured data in the .uns AnnData slot is
83
+#' available through the SCE metadata slot.
84
+#' There are 2 currently known minor issues -
85 85
 #' Anndata python module depends on another python module h5pyto read hd5 format files.
86 86
 #' If there are errors reading the .h5ad files, such as "ValueError: invalid shape in fixed-type tuple."
87
-#' the user will need to do downgrade h5py by running \code{pip3 install --user h5py==2.9.0} 
87
+#' the user will need to do downgrade h5py by running \code{pip3 install --user h5py==2.9.0}
88 88
 #' Additionally there might be errors in converting some python objects in the unstructured data slots.
89 89
 #' There are no known R solutions at present. Refer \url{https://github.com/rstudio/reticulate/issues/209}
90 90
 #' @return A \code{SingleCellExperiment} object.
91 91
 #' @examples
92
+#' file.path <- system.file("extdata/annData_pbmc_3k", package = "singleCellTK")
92 93
 #' \dontrun{
93
-#' sce <- importAnnData(sampleDirs = system.file("extdata/annData_pbmc_3k", package = "singleCellTK"),
94
+#' sce <- importAnnData(sampleDirs = file.path,
94 95
 #'                      sampleNames = 'pbmc3k_20by20')
95 96
 #' }
96 97
 #' @export
97 98
 importAnnData <- function(sampleDirs = NULL,
98 99
                           sampleNames = NULL,
99 100
                           delayedArray = TRUE) {
100
-  
101
+
101 102
   if (length(sampleDirs)!=length(sampleNames)){
102 103
     stop("Number of sampleDirs must be equal to number of SampleNames. Please provide sample names for all input directories")
103 104
   }
104
-  
105
+
105 106
   res <- vector("list", length = length(sampleDirs))
106
-  
107
+
107 108
   for (i in seq_along(sampleDirs)){
108 109
     scei <- .importAnnDataSample(sampleDir = sampleDirs[[i]],
109 110
                                  sampleName = sampleNames[[i]],
Browse code

Update SCTK_QC, importAnnData and sce2adata.R

rz2333 authored on 15/10/2020 16:02:58
Showing1 changed files
... ...
@@ -19,18 +19,28 @@
19 19
                               rowData = sce_rowdata,
20 20
                               colData = sce_coldata)
21 21
   colnames(sce) <- paste0(sampleName,"_",colnames(sce))
22
+
23
+  multi_Assay <- reticulate::py_to_r(anndata$layers$as_dict())
24
+  for(assay_name in names(multi_Assay)){
25
+    tryCatch({
26
+      SummarizedExperiment::assay(sce, assay_name, withDimnames = FALSE) <- t(reticulate::py_to_r(multi_Assay[[assay_name]]))
27
+      base::dimnames(SummarizedExperiment::assay(sce, assay_name)) <- base::dimnames(SummarizedExperiment::assay(sce, "counts"))
28
+    }, error = function(x){
29
+      error_message <- paste0("Warning: unable to add '",assay_name,"' from .layers AnnData slot to SCE assay. Skipping. ")
30
+      message(error_message)
31
+    })
32
+  }
22 33
   
23 34
   multidim_observations <- reticulate::py_to_r(anndata$obsm_keys())
24 35
   for(obsm_name in multidim_observations){
25 36
     tryCatch({
26
-      reducedDims(sce)[[obsm_name]] <- reticulate::py_to_r(anndata$obsm[obsm_name])
37
+      SingleCellExperiment::reducedDims(sce)[[obsm_name]] <- reticulate::py_to_r(anndata$obsm[obsm_name])
27 38
     }, error = function(x){
28
-      error_message <- paste0("Warning: unable to add '",uns_name,"' from .obsm AnnData slot to SCE metadata. Skipping. ")
39
+      error_message <- paste0("Warning: unable to add '",obsm_name,"' from .obsm AnnData slot to SCE metadata. Skipping. ")
29 40
       message(error_message)
30 41
     })
31
-    
32 42
   }
33
-  
43
+
34 44
   unstructured_data <- reticulate::py_to_r(anndata$uns_keys())
35 45
   for(uns_name in unstructured_data){
36 46
     tryCatch({
Browse code

Updated description in DESCRIPTION. Moved importMultipleSources to its own file and removed some shiny code. Updated docs.

Joshua D. Campbell authored on 12/10/2020 18:25:27
Showing1 changed files
... ...
@@ -89,9 +89,10 @@
89 89
 #' There are no known R solutions at present. Refer \url{https://github.com/rstudio/reticulate/issues/209}
90 90
 #' @return A \code{SingleCellExperiment} object.
91 91
 #' @examples
92
+#' \dontrun{
92 93
 #' sce <- importAnnData(sampleDirs = system.file("extdata/annData_pbmc_3k", package = "singleCellTK"),
93 94
 #'                      sampleNames = 'pbmc3k_20by20')
94
-
95
+#' }
95 96
 #' @export
96 97
 importAnnData <- function(sampleDirs = NULL,
97 98
                           sampleNames = NULL,
Browse code

fixing anndata metadata slot

Shruthi Bandyadka authored on 08/07/2020 16:47:33
Showing1 changed files
... ...
@@ -34,7 +34,6 @@
34 34
   unstructured_data <- reticulate::py_to_r(anndata$uns_keys())
35 35
   for(uns_name in unstructured_data){
36 36
     tryCatch({
37
-      #metadata(sce)[[uns_name]] <- reticulate::py_to_r(anndata$uns[uns_name])
38 37
       sce@metadata[[sampleName]]$annData[[uns_name]] <- reticulate::py_to_r(anndata$uns[uns_name])
39 38
     }, error = function(x){
40 39
       error_message <- paste0("Warning: unable to add unstructured data (.uns slot): '",uns_name,"' to SCE metadata. Skipping. ")
Browse code

fixing metadata slot for anndata import

Shruthi Bandyadka authored on 08/07/2020 16:46:54
Showing1 changed files
... ...
@@ -34,7 +34,8 @@
34 34
   unstructured_data <- reticulate::py_to_r(anndata$uns_keys())
35 35
   for(uns_name in unstructured_data){
36 36
     tryCatch({
37
-      metadata(sce)[[uns_name]] <- reticulate::py_to_r(anndata$uns[uns_name])
37
+      #metadata(sce)[[uns_name]] <- reticulate::py_to_r(anndata$uns[uns_name])
38
+      sce@metadata[[sampleName]]$annData[[uns_name]] <- reticulate::py_to_r(anndata$uns[uns_name])
38 39
     }, error = function(x){
39 40
       error_message <- paste0("Warning: unable to add unstructured data (.uns slot): '",uns_name,"' to SCE metadata. Skipping. ")
40 41
       message(error_message)
Browse code

changing subsetted anndata filename

Shruthi Bandyadka authored on 17/06/2020 15:55:10
Showing1 changed files
... ...
@@ -90,7 +90,7 @@
90 90
 #' @return A \code{SingleCellExperiment} object.
91 91
 #' @examples
92 92
 #' sce <- importAnnData(sampleDirs = system.file("extdata/annData_pbmc_3k", package = "singleCellTK"),
93
-#'                      sampleNames = 'pbmc3k')
93
+#'                      sampleNames = 'pbmc3k_20by20')
94 94
 
95 95
 #' @export
96 96
 importAnnData <- function(sampleDirs = NULL,
Browse code

import anndata as SCE. updating reticulate dependency for py_to_r()

Shruthi Bandyadka authored on 12/06/2020 20:35:33
Showing1 changed files
... ...
@@ -8,22 +8,22 @@
8 8
   }
9 9
   anndata <- ad$read_h5ad(anndata_file)
10 10
   
11
-  counts_matrix <- t((py_to_r(anndata$X)))
11
+  counts_matrix <- t((reticulate::py_to_r(anndata$X)))
12 12
   if (isTRUE(delayedArray)) {
13 13
     counts_matrix <- DelayedArray::DelayedArray(counts_matrix)
14 14
   }
15 15
   
16
-  sce_rowdata <- S4Vectors::DataFrame(py_to_r(anndata$var))
17
-  sce_coldata <- S4Vectors::DataFrame(py_to_r(anndata$obs))
16
+  sce_rowdata <- S4Vectors::DataFrame(reticulate::py_to_r(anndata$var))
17
+  sce_coldata <- S4Vectors::DataFrame(reticulate::py_to_r(anndata$obs))
18 18
   sce <- SingleCellExperiment(list(counts = counts_matrix),
19 19
                               rowData = sce_rowdata,
20 20
                               colData = sce_coldata)
21 21
   colnames(sce) <- paste0(sampleName,"_",colnames(sce))
22 22
   
23
-  multidim_observations <- py_to_r(anndata$obsm_keys())
23
+  multidim_observations <- reticulate::py_to_r(anndata$obsm_keys())
24 24
   for(obsm_name in multidim_observations){
25 25
     tryCatch({
26
-      reducedDims(sce)[[obsm_name]] <- py_to_r(anndata$obsm[obsm_name])
26
+      reducedDims(sce)[[obsm_name]] <- reticulate::py_to_r(anndata$obsm[obsm_name])
27 27
     }, error = function(x){
28 28
       error_message <- paste0("Warning: unable to add '",uns_name,"' from .obsm AnnData slot to SCE metadata. Skipping. ")
29 29
       message(error_message)
... ...
@@ -31,10 +31,10 @@
31 31
     
32 32
   }
33 33
   
34
-  unstructured_data <- py_to_r(anndata$uns_keys())
34
+  unstructured_data <- reticulate::py_to_r(anndata$uns_keys())
35 35
   for(uns_name in unstructured_data){
36 36
     tryCatch({
37
-      metadata(sce)[[uns_name]] <- py_to_r(anndata$uns[uns_name])
37
+      metadata(sce)[[uns_name]] <- reticulate::py_to_r(anndata$uns[uns_name])
38 38
     }, error = function(x){
39 39
       error_message <- paste0("Warning: unable to add unstructured data (.uns slot): '",uns_name,"' to SCE metadata. Skipping. ")
40 40
       message(error_message)
Browse code

import anndata as SCE.

Shruthi Bandyadka authored on 12/06/2020 20:12:54
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,127 @@
1
+.importAnnDataSample <- function(sampleDir = './',
2
+                                 sampleName = 'sample',
3
+                                 delayedArray = TRUE){
4
+  
5
+  anndata_file <- file.path(sampleDir, paste0(sampleName,'.h5ad',sep=''))
6
+  if (!file.exists(anndata_file)){
7
+    stop("AnnData file not found at specified location. Please check path provided and/or filename.")
8
+  }
9
+  anndata <- ad$read_h5ad(anndata_file)
10
+  
11
+  counts_matrix <- t((py_to_r(anndata$X)))
12
+  if (isTRUE(delayedArray)) {
13
+    counts_matrix <- DelayedArray::DelayedArray(counts_matrix)
14
+  }
15
+  
16
+  sce_rowdata <- S4Vectors::DataFrame(py_to_r(anndata$var))
17
+  sce_coldata <- S4Vectors::DataFrame(py_to_r(anndata$obs))
18
+  sce <- SingleCellExperiment(list(counts = counts_matrix),
19
+                              rowData = sce_rowdata,
20
+                              colData = sce_coldata)
21
+  colnames(sce) <- paste0(sampleName,"_",colnames(sce))
22
+  
23
+  multidim_observations <- py_to_r(anndata$obsm_keys())
24
+  for(obsm_name in multidim_observations){
25
+    tryCatch({
26
+      reducedDims(sce)[[obsm_name]] <- py_to_r(anndata$obsm[obsm_name])
27
+    }, error = function(x){
28
+      error_message <- paste0("Warning: unable to add '",uns_name,"' from .obsm AnnData slot to SCE metadata. Skipping. ")
29
+      message(error_message)
30
+    })
31
+    
32
+  }
33
+  
34
+  unstructured_data <- py_to_r(anndata$uns_keys())
35
+  for(uns_name in unstructured_data){
36
+    tryCatch({
37
+      metadata(sce)[[uns_name]] <- py_to_r(anndata$uns[uns_name])
38
+    }, error = function(x){
39
+      error_message <- paste0("Warning: unable to add unstructured data (.uns slot): '",uns_name,"' to SCE metadata. Skipping. ")
40
+      message(error_message)
41
+    })
42
+    
43
+  }
44
+  
45
+  return(sce)
46
+  
47
+}
48
+
49
+#' @name importAnnData
50
+#' @rdname importAnnData
51
+#' @title Create a SingleCellExperiment Object from Python AnnData .h5ad files
52
+#' @description This function reads in one or more Python AnnData files in the .h5ad format 
53
+#' and returns a single \link[SingleCellExperiment]{SingleCellExperiment} object containing all the 
54
+#' AnnData samples by concatenating their counts matrices and related information slots.
55
+#' @param sampleDirs Folder containing the .h5ad file. Can be one of - 
56
+#' \itemize{
57
+#'   \item Default \code{current working directory}.
58
+#'   \item Full path to the directory containing the .h5ad file. 
59
+#'   E.g \code{sampleDirs = '/path/to/sample'}
60
+#'   \item A vector of folder paths for the samples to import. 
61
+#'   E.g. \code{sampleDirs = c('/path/to/sample1', '/path/to/sample2','/path/to/sample3')}
62
+#'   importAnnData will return a single SCE object containing all the samples 
63
+#'   with the sample name appended to each colname in colData
64
+#' }
65
+#' @param sampleNames The prefix/name of the .h5ad file without the .h5ad extension 
66
+#' e.g. if 'sample.h5ad' is the filename, pass \code{sampleNames = 'sample'}.
67
+#' Can be one of - 
68
+#' \itemize{
69
+#'   \item Default \code{sample}.
70
+#'   \item A vector of samples to import. Length of vector must be equal to length of sampleDirs vector
71
+#'   E.g. \code{sampleDirs = c('sample1', 'sample2','sample3')}
72
+#'   importAnnData will return a single SCE object containing all the samples 
73
+#'   with the sample name appended to each colname in colData
74
+#' }
75
+#' @param delayedArray Boolean. Whether to read the expression matrix as
76
+#'  \link[DelayedArray]{DelayedArray} object. Default \code{TRUE}.
77
+#' @details
78
+#' \code{importAnnData} converts scRNA-seq data in the AnnData format to the
79
+#' \code{SingleCellExperiment} object. The .X slot in AnnData is transposed to the features x cells 
80
+#' format and becomes the 'counts' matrix in the assay slot. The .vars AnnData slot becomes the SCE rowData
81
+#' and the .obs AnnData slot becomes the SCE colData. Multidimensional data in the .obsm AnnData slot is
82
+#' ported over to the SCE reducedDims slot. Additionally, unstructured data in the .uns AnnData slot is 
83
+#' available through the SCE metadata slot. 
84
+#' There are 2 currently known minor issues - 
85
+#' Anndata python module depends on another python module h5pyto read hd5 format files.
86
+#' If there are errors reading the .h5ad files, such as "ValueError: invalid shape in fixed-type tuple."
87
+#' the user will need to do downgrade h5py by running \code{pip3 install --user h5py==2.9.0} 
88
+#' Additionally there might be errors in converting some python objects in the unstructured data slots.
89
+#' There are no known R solutions at present. Refer \url{https://github.com/rstudio/reticulate/issues/209}
90
+#' @return A \code{SingleCellExperiment} object.
91
+#' @examples
92
+#' sce <- importAnnData(sampleDirs = system.file("extdata/annData_pbmc_3k", package = "singleCellTK"),
93
+#'                      sampleNames = 'pbmc3k')
94
+
95
+#' @export
96
+importAnnData <- function(sampleDirs = NULL,
97
+                          sampleNames = NULL,
98
+                          delayedArray = TRUE) {
99
+  
100
+  if (length(sampleDirs)!=length(sampleNames)){
101
+    stop("Number of sampleDirs must be equal to number of SampleNames. Please provide sample names for all input directories")
102
+  }
103
+  
104
+  res <- vector("list", length = length(sampleDirs))
105
+  
106
+  for (i in seq_along(sampleDirs)){
107
+    scei <- .importAnnDataSample(sampleDir = sampleDirs[[i]],
108
+                                 sampleName = sampleNames[[i]],
109
+                                 delayedArray = delayedArray)
110
+    res[[i]] <- scei
111
+  }
112
+  sce <- do.call(SingleCellExperiment::cbind, res)
113
+  return(sce)
114
+}
115
+
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+
127
+