Browse code

Update dedup functionality for import data functions & update tutorial with VAM

Yichen Wang authored on 22/11/2021 17:57:17
Showing1 changed files
... ...
@@ -102,7 +102,8 @@
102 102
 #'  object. Can be one of "Matrix" (as returned by
103 103
 #'  \link{readMM} function), or "matrix" (as returned by
104 104
 #'  \link[base]{matrix} function). Default \code{"Matrix"}.
105
-
105
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default 
106
+#'  \code{TRUE}.
106 107
 #' @details
107 108
 #' \code{importDropEst} expects either raw counts matrix stored as "cm_raw" or filtered
108 109
 #' counts matrix stored as "cm" in the DropEst rds output.
... ...
@@ -125,7 +126,8 @@ importDropEst <- function(sampleDirs = NULL,
125 126
                           rdsFileName = 'cell.counts',
126 127
                           sampleNames = NULL,
127 128
                           delayedArray = FALSE,
128
-                          class = c("Matrix", "matrix")) {
129
+                          class = c("Matrix", "matrix"),
130
+                          rowNamesDedup = TRUE) {
129 131
   dataType <- match.arg(dataType)
130 132
   class <- match.arg(class)
131 133
 
... ...
@@ -145,6 +147,15 @@ importDropEst <- function(sampleDirs = NULL,
145 147
     res[[i]] <- scei
146 148
   }
147 149
   sce <- do.call(SingleCellExperiment::cbind, res)
150
+  
151
+  if (isTRUE(rowNamesDedup)) {
152
+    if (any(duplicated(rownames(sce)))) {
153
+      message("Duplicated gene names found, adding '-1', '-2', ",
154
+              "... suffix to them.")
155
+    }
156
+    sce <- dedupRowNames(sce)
157
+  }
158
+  
148 159
   return(sce)
149 160
 }
150 161
 
Browse code

Fix dgTMatrix bugs in import functions.Change output name of row/colData in exportSCEToFlat function

rz2333 authored on 02/09/2021 17:03:47
Showing1 changed files
... ...
@@ -42,7 +42,8 @@
42 42
                                  dataType,
43 43
                                  rdsFileName,
44 44
                                  sampleName = 'sample',
45
-                                 delayedArray = FALSE){
45
+                                 delayedArray = FALSE,
46
+                                 class){
46 47
   ## Read DropEst RDS
47 48
   dropEst_rds <- .readDropEstFile(sampleDir,dataType,rdsFileName)
48 49
   if (dataType == 'filtered' && 'cm' %in% names(dropEst_rds)) {
... ...
@@ -53,9 +54,15 @@
53 54
     stop("No counts matrix found in the .rds provided! Exiting.")
54 55
   }
55 56
 
57
+  if (class == "Matrix") {
58
+    counts_matrix <- .convertToMatrix(counts_matrix)
59
+  } else if (class == "matrix") {
60
+    counts_matrix <- base::as.matrix(counts_matrix)
61
+  }
62
+
56 63
   if (isTRUE(delayedArray)) {
57 64
     counts_matrix <- DelayedArray::DelayedArray(counts_matrix)
58
-    }
65
+  }
59 66
   ## Create SingleCellExperiment object
60 67
   ## Add SCE ColData. If using filtered counts matrix, colData is subset to include filtered cells.
61 68
   ## append sample name to cells in SCE
... ...
@@ -91,6 +98,11 @@
91 98
 #' @param rdsFileName File name prefix of the DropEst RDS output. default is "cell.counts"
92 99
 #' @param delayedArray Boolean. Whether to read the expression matrix as
93 100
 #'  \link{DelayedArray} object or not. Default \code{FALSE}.
101
+#' @param class Character. The class of the expression matrix stored in the SCE
102
+#'  object. Can be one of "Matrix" (as returned by
103
+#'  \link{readMM} function), or "matrix" (as returned by
104
+#'  \link[base]{matrix} function). Default \code{"Matrix"}.
105
+
94 106
 #' @details
95 107
 #' \code{importDropEst} expects either raw counts matrix stored as "cm_raw" or filtered
96 108
 #' counts matrix stored as "cm" in the DropEst rds output.
... ...
@@ -112,8 +124,10 @@ importDropEst <- function(sampleDirs = NULL,
112 124
                           dataType = c('filtered','raw'),
113 125
                           rdsFileName = 'cell.counts',
114 126
                           sampleNames = NULL,
115
-                          delayedArray = FALSE) {
127
+                          delayedArray = FALSE,
128
+                          class = c("Matrix", "matrix")) {
116 129
   dataType <- match.arg(dataType)
130
+  class <- match.arg(class)
117 131
 
118 132
   if (length(sampleDirs)!=length(sampleNames)){
119 133
     stop("Please provide sample names for all input directories")
... ...
@@ -126,7 +140,8 @@ importDropEst <- function(sampleDirs = NULL,
126 140
                          sampleName = sampleNames[[i]],
127 141
                          dataType = dataType,
128 142
                          rdsFileName = rdsFileName,
129
-                         delayedArray = delayedArray)
143
+                         delayedArray = delayedArray,
144
+                         class = class)
130 145
     res[[i]] <- scei
131 146
   }
132 147
   sce <- do.call(SingleCellExperiment::cbind, res)
Browse code

merge latest master

Yichen Wang authored on 08/02/2021 21:14:12
Showing0 changed files
Browse code

Update importMitoGeneSet function. Change delayedArray=False for all import function. Minor fixs in QC HTML report

rz2333 authored on 30/12/2020 17:40:20
Showing1 changed files
... ...
@@ -91,7 +91,7 @@
91 91
 #' @param dataType can be "filtered" or "raw". Default \code{"filtered"}.
92 92
 #' @param rdsFileName File name prefix of the DropEst RDS output. default is "cell.counts"
93 93
 #' @param delayedArray Boolean. Whether to read the expression matrix as
94
-#'  \link{DelayedArray} object or not. Default \code{TRUE}.
94
+#'  \link{DelayedArray} object or not. Default \code{FALSE}.
95 95
 #' @details
96 96
 #' \code{importDropEst} expects either raw counts matrix stored as "cm_raw" or filtered
97 97
 #' counts matrix stored as "cm" in the DropEst rds output.
... ...
@@ -114,7 +114,7 @@ importDropEst <- function(sampleDirs = NULL,
114 114
                           dataType = c('filtered','raw'),
115 115
                           rdsFileName = 'cell.counts',
116 116
                           sampleNames = NULL,
117
-                          delayedArray = TRUE) {
117
+                          delayedArray = FALSE) {
118 118
   dataType <- match.arg(dataType)
119 119
 
120 120
   if (length(sampleDirs)!=length(sampleNames)){
Browse code

Merge master; fix conflict; fix bug

Yichen Wang authored on 12/11/2020 23:58:02
Showing0 changed files
Browse code

Edit links to documentation

unknown authored on 22/10/2020 03:39:09
Showing1 changed files
... ...
@@ -91,7 +91,7 @@
91 91
 #' @param dataType can be "filtered" or "raw". Default \code{"filtered"}.
92 92
 #' @param rdsFileName File name prefix of the DropEst RDS output. default is "cell.counts"
93 93
 #' @param delayedArray Boolean. Whether to read the expression matrix as
94
-#'  \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}.
94
+#'  \link{DelayedArray} object or not. Default \code{TRUE}.
95 95
 #' @details
96 96
 #' \code{importDropEst} expects either raw counts matrix stored as "cm_raw" or filtered
97 97
 #' counts matrix stored as "cm" in the DropEst rds output.
Browse code

merge upstream and fix bug in DE related func

Yichen Wang authored on 03/09/2020 18:59:48
Showing1 changed files
... ...
@@ -1,141 +1,141 @@
1
-
2
-.readDropEstFile <- function(sampleDir, dataType,rdsFileName){
3
-  dropEst_cell_counts <- file.path(sampleDir, paste(rdsFileName, '.rds', sep=''))
4
-  if (!file.exists(dropEst_cell_counts)){
5
-    stop("DropEst output not found at location specified. Please check path provided and/or filename.")
6
-  }
7
-  dropEst_rds <- readRDS(dropEst_cell_counts)
8
-
9
-  return(dropEst_rds)
10
-}
11
-
12
-.constructColdata <- function(dropEst_rds,counts_matrix, dataType){
13
-  coldata_fields <- c("mean_reads_per_umi","aligned_reads_per_cell","aligned_umis_per_cell","requested_umis_per_cb","requested_reads_per_cb")
14
-  coldata_df <-  list()
15
-  for (field in coldata_fields){
16
-    if (field %in% names(dropEst_rds)){
17
-      coldata_field_df <- data.frame(as.matrix(dropEst_rds[[field]]))
18
-      names(coldata_field_df)[1] <- field
19
-      coldata_field_df$cell <- row.names(coldata_field_df)
20
-
21
-      coldata_df[[field]] <- coldata_field_df
22
-    }}
23
-  coldata_df_merged <- Reduce(function(x, y) merge(x, y, all=TRUE,by="cell"), coldata_df)
24
-  row.names(coldata_df_merged) <- coldata_df_merged$cell
25
-  coldata_df_merged <- S4Vectors::DataFrame(as.matrix(coldata_df_merged))
26
-  if (dataType == 'filtered'){
27
-    coldata_df_merged <- coldata_df_merged[coldata_df_merged$cell %in% colnames(counts_matrix),]
28
-  }
29
-  return(coldata_df_merged)
30
-}
31
-
32
-.extractMetadata <- function(dropEst_rds){
33
-  metadata_fields <- c("saturation_info","merge_targets","reads_per_umi_per_cell")
34
-  metadata <- c()
35
-  for (md in metadata_fields){
36
-    if (md %in% names(dropEst_rds)){
37
-      metadata[[md]] <- dropEst_rds[[md]]
38
-    }}
39
-  return(metadata)
40
-}
41
-
42
-.importDropEstSample <- function(sampleDir = './',
43
-                                 dataType,
44
-                                 rdsFileName,
45
-                                 sampleName = 'sample',
46
-                                 delayedArray = FALSE){
47
-  ## Read DropEst RDS
48
-  dropEst_rds <- .readDropEstFile(sampleDir,dataType,rdsFileName)
49
-  if (dataType == 'filtered' && 'cm' %in% names(dropEst_rds)) {
50
-    counts_matrix <- dropEst_rds$cm
51
-  } else if (dataType == 'raw' && 'cm_raw' %in% names(dropEst_rds)) {
52
-    counts_matrix <- dropEst_rds$cm_raw
53
-  } else {
54
-    stop("No counts matrix found in the .rds provided! Exiting.")
55
-  }
56
-
57
-  if (isTRUE(delayedArray)) {
58
-    counts_matrix <- DelayedArray::DelayedArray(counts_matrix)
59
-    }
60
-  ## Create SingleCellExperiment object
61
-  ## Add SCE ColData. If using filtered counts matrix, colData is subset to include filtered cells.
62
-  ## append sample name to cells in SCE
63
-  sce <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = counts_matrix))
64
-  colnames(sce) <- paste0(sampleName,"_",colnames(sce))
65
-  sce_coldata <- .constructColdata(dropEst_rds, counts_matrix, dataType)
66
-  row.names(sce_coldata) <- paste0(sampleName,"_",row.names(sce_coldata))
67
-
68
-  if (dim(counts_matrix)[2] == dim(sce_coldata)[1]){
69
-    SummarizedExperiment::colData(sce) <- sce_coldata
70
-  } else {
71
-    warning("Unable to add ColData to SCE. nCol of Counts Matrix not equal to nRow of ColData matrix.")
72
-  }
73
-
74
-  ## Add SCE metadata
75
-  sce_metadata <- .extractMetadata(dropEst_rds)
76
-  sce@metadata$dropEst <- sce_metadata
77
-  
78
-  return(sce)
79
-}
80
-
81
-#' @name importDropEst
82
-#' @rdname importDropEst
83
-#' @title Create a SingleCellExperiment Object from DropEst output
84
-#' @description imports the RDS file created by DropEst (https://github.com/hms-dbmi/dropEst) and
85
-#' create a SingleCellExperiment object from either the raw or filtered counts matrix.
86
-#' Additionally parse through the RDS to obtain appropriate feature annotations as
87
-#' SCE coldata, in addition to any metadata.
88
-#' @param sampleDirs  A path to the directory containing the data files. Default "./".
89
-#' @param sampleNames A User-defined sample name. This will be prepended to all cell barcode IDs.
90
-#'  Default "sample".
91
-#' @param dataType can be "filtered" or "raw". Default \code{"filtered"}.
92
-#' @param rdsFileName File name prefix of the DropEst RDS output. default is "cell.counts"
93
-#' @param delayedArray Boolean. Whether to read the expression matrix as
94
-#'  \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}.
95
-#' @details
96
-#' \code{importDropEst} expects either raw counts matrix stored as "cm_raw" or filtered
97
-#' counts matrix stored as "cm" in the DropEst rds output.
98
-#' ColData is obtained from the DropEst corresponding to "mean_reads_per_umi","aligned_reads_per_cell",
99
-#' "aligned_umis_per_cell","requested_umis_per_cb","requested_reads_per_cb"
100
-#' If using filtered counts matrix, the colData dataframe is
101
-#' subset to contain features from the filtered counts matrix alone.
102
-#' If any annotations of ("saturation_info","merge_targets","reads_per_umi_per_cell") are
103
-#' found in the DropEst rds, they will be added to the SCE metadata field
104
-#' @return A \code{SingleCellExperiment} object containing the count matrix,
105
-#'  the feature annotations from DropEst as ColData, and any metadata from DropEst
106
-#' @examples
107
-#' # Example results were generated as per instructions from the developers of dropEst described in
108
-#' # https://github.com/hms-dbmi/dropEst/blob/master/examples/EXAMPLES.md
109
-#' sce <- importDropEst(sampleDirs = system.file("extdata/dropEst_scg71", package = "singleCellTK"),
110
-#'                      sampleNames = 'scg71')
111
-
112
-#' @export
113
-importDropEst <- function(sampleDirs = NULL,
114
-                          dataType = c('filtered','raw'),
115
-                          rdsFileName = 'cell.counts',
116
-                          sampleNames = NULL,
117
-                          delayedArray = TRUE) {
118
-  dataType <- match.arg(dataType)
119
-
120
-  if (length(sampleDirs)!=length(sampleNames)){
121
-    stop("Please provide sample names for all input directories")
122
-  }
123
-
124
-  res <- vector("list", length = length(sampleDirs))
125
-
126
-  for (i in seq_along(sampleDirs)){
127
-    scei <- .importDropEstSample(sampleDir = sampleDirs[[i]],
128
-                         sampleName = sampleNames[[i]],
129
-                         dataType = dataType,
130
-                         rdsFileName = rdsFileName,
131
-                         delayedArray = delayedArray)
132
-    res[[i]] <- scei
133
-  }
134
-  sce <- do.call(SingleCellExperiment::cbind, res)
135
-  return(sce)
136
-}
137
-
138
-
139
-
140
-
141
-
1
+
2
+.readDropEstFile <- function(sampleDir, dataType,rdsFileName){
3
+  dropEst_cell_counts <- file.path(sampleDir, paste(rdsFileName, '.rds', sep=''))
4
+  if (!file.exists(dropEst_cell_counts)){
5
+    stop("DropEst output not found at location specified. Please check path provided and/or filename.")
6
+  }
7
+  dropEst_rds <- readRDS(dropEst_cell_counts)
8
+
9
+  return(dropEst_rds)
10
+}
11
+
12
+.constructColdata <- function(dropEst_rds,counts_matrix, dataType){
13
+  coldata_fields <- c("mean_reads_per_umi","aligned_reads_per_cell","aligned_umis_per_cell","requested_umis_per_cb","requested_reads_per_cb")
14
+  coldata_df <-  list()
15
+  for (field in coldata_fields){
16
+    if (field %in% names(dropEst_rds)){
17
+      coldata_field_df <- data.frame(as.matrix(dropEst_rds[[field]]))
18
+      names(coldata_field_df)[1] <- field
19
+      coldata_field_df$cell <- row.names(coldata_field_df)
20
+
21
+      coldata_df[[field]] <- coldata_field_df
22
+    }}
23
+  coldata_df_merged <- Reduce(function(x, y) merge(x, y, all=TRUE,by="cell"), coldata_df)
24
+  row.names(coldata_df_merged) <- coldata_df_merged$cell
25
+  coldata_df_merged <- S4Vectors::DataFrame(as.matrix(coldata_df_merged))
26
+  if (dataType == 'filtered'){
27
+    coldata_df_merged <- coldata_df_merged[coldata_df_merged$cell %in% colnames(counts_matrix),]
28
+  }
29
+  return(coldata_df_merged)
30
+}
31
+
32
+.extractMetadata <- function(dropEst_rds){
33
+  metadata_fields <- c("saturation_info","merge_targets","reads_per_umi_per_cell")
34
+  metadata <- c()
35
+  for (md in metadata_fields){
36
+    if (md %in% names(dropEst_rds)){
37
+      metadata[[md]] <- dropEst_rds[[md]]
38
+    }}
39
+  return(metadata)
40
+}
41
+
42
+.importDropEstSample <- function(sampleDir = './',
43
+                                 dataType,
44
+                                 rdsFileName,
45
+                                 sampleName = 'sample',
46
+                                 delayedArray = FALSE){
47
+  ## Read DropEst RDS
48
+  dropEst_rds <- .readDropEstFile(sampleDir,dataType,rdsFileName)
49
+  if (dataType == 'filtered' && 'cm' %in% names(dropEst_rds)) {
50
+    counts_matrix <- dropEst_rds$cm
51
+  } else if (dataType == 'raw' && 'cm_raw' %in% names(dropEst_rds)) {
52
+    counts_matrix <- dropEst_rds$cm_raw
53
+  } else {
54
+    stop("No counts matrix found in the .rds provided! Exiting.")
55
+  }
56
+
57
+  if (isTRUE(delayedArray)) {
58
+    counts_matrix <- DelayedArray::DelayedArray(counts_matrix)
59
+    }
60
+  ## Create SingleCellExperiment object
61
+  ## Add SCE ColData. If using filtered counts matrix, colData is subset to include filtered cells.
62
+  ## append sample name to cells in SCE
63
+  sce <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = counts_matrix))
64
+  colnames(sce) <- paste0(sampleName,"_",colnames(sce))
65
+  sce_coldata <- .constructColdata(dropEst_rds, counts_matrix, dataType)
66
+  row.names(sce_coldata) <- paste0(sampleName,"_",row.names(sce_coldata))
67
+
68
+  if (dim(counts_matrix)[2] == dim(sce_coldata)[1]){
69
+    SummarizedExperiment::colData(sce) <- sce_coldata
70
+  } else {
71
+    warning("Unable to add ColData to SCE. nCol of Counts Matrix not equal to nRow of ColData matrix.")
72
+  }
73
+
74
+  ## Add SCE metadata
75
+  sce_metadata <- .extractMetadata(dropEst_rds)
76
+  sce@metadata$dropEst <- sce_metadata
77
+  
78
+  return(sce)
79
+}
80
+
81
+#' @name importDropEst
82
+#' @rdname importDropEst
83
+#' @title Create a SingleCellExperiment Object from DropEst output
84
+#' @description imports the RDS file created by DropEst (https://github.com/hms-dbmi/dropEst) and
85
+#' create a SingleCellExperiment object from either the raw or filtered counts matrix.
86
+#' Additionally parse through the RDS to obtain appropriate feature annotations as
87
+#' SCE coldata, in addition to any metadata.
88
+#' @param sampleDirs  A path to the directory containing the data files. Default "./".
89
+#' @param sampleNames A User-defined sample name. This will be prepended to all cell barcode IDs.
90
+#'  Default "sample".
91
+#' @param dataType can be "filtered" or "raw". Default \code{"filtered"}.
92
+#' @param rdsFileName File name prefix of the DropEst RDS output. default is "cell.counts"
93
+#' @param delayedArray Boolean. Whether to read the expression matrix as
94
+#'  \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}.
95
+#' @details
96
+#' \code{importDropEst} expects either raw counts matrix stored as "cm_raw" or filtered
97
+#' counts matrix stored as "cm" in the DropEst rds output.
98
+#' ColData is obtained from the DropEst corresponding to "mean_reads_per_umi","aligned_reads_per_cell",
99
+#' "aligned_umis_per_cell","requested_umis_per_cb","requested_reads_per_cb"
100
+#' If using filtered counts matrix, the colData dataframe is
101
+#' subset to contain features from the filtered counts matrix alone.
102
+#' If any annotations of ("saturation_info","merge_targets","reads_per_umi_per_cell") are
103
+#' found in the DropEst rds, they will be added to the SCE metadata field
104
+#' @return A \code{SingleCellExperiment} object containing the count matrix,
105
+#'  the feature annotations from DropEst as ColData, and any metadata from DropEst
106
+#' @examples
107
+#' # Example results were generated as per instructions from the developers of dropEst described in
108
+#' # https://github.com/hms-dbmi/dropEst/blob/master/examples/EXAMPLES.md
109
+#' sce <- importDropEst(sampleDirs = system.file("extdata/dropEst_scg71", package = "singleCellTK"),
110
+#'                      sampleNames = 'scg71')
111
+
112
+#' @export
113
+importDropEst <- function(sampleDirs = NULL,
114
+                          dataType = c('filtered','raw'),
115
+                          rdsFileName = 'cell.counts',
116
+                          sampleNames = NULL,
117
+                          delayedArray = TRUE) {
118
+  dataType <- match.arg(dataType)
119
+
120
+  if (length(sampleDirs)!=length(sampleNames)){
121
+    stop("Please provide sample names for all input directories")
122
+  }
123
+
124
+  res <- vector("list", length = length(sampleDirs))
125
+
126
+  for (i in seq_along(sampleDirs)){
127
+    scei <- .importDropEstSample(sampleDir = sampleDirs[[i]],
128
+                         sampleName = sampleNames[[i]],
129
+                         dataType = dataType,
130
+                         rdsFileName = rdsFileName,
131
+                         delayedArray = delayedArray)
132
+    res[[i]] <- scei
133
+  }
134
+  sce <- do.call(SingleCellExperiment::cbind, res)
135
+  return(sce)
136
+}
137
+
138
+
139
+
140
+
141
+
Browse code

updating documentation

Shruthi Bandyadka authored on 09/06/2020 15:18:43
Showing1 changed files
... ...
@@ -83,9 +83,10 @@
83 83
 #' @title Create a SCE Object from DropEst output
84 84
 #' @description imports the RDS file created by DropEst and
85 85
 #' creates a \link[SingleCellExperiment]{SingleCellExperiment} object from either the raw or filtered counts matrix.
86
-#' @param sampleDirs  A path to the directory containing the data files. Default "./".
87
-#' @param sampleNames A User-defined sample name. This will be prepended to all cell barcode IDs.
88
-#'  Default "sample".
86
+#' @param sampleDirs  a vector of paths to the sample directories containing the cell.counts.rds file. 
87
+#' Default is current working directory. 
88
+#' @param sampleNames a vector of  sample names corresponding to the sample directories.
89
+#' Default "sample". The sample name will be prepended to cell barcode IDs. 
89 90
 #' @param dataType can be "filtered" or "raw". Default \code{"filtered"}.
90 91
 #' @param rdsFileName File name prefix of the DropEst RDS output. default is "cell.counts"
91 92
 #' @param delayedArray Boolean. Whether to read the expression matrix as
Browse code

initial documentation structure

Shruthi Bandyadka authored on 22/05/2020 18:20:08
Showing1 changed files
... ...
@@ -80,11 +80,9 @@
80 80
 
81 81
 #' @name importDropEst
82 82
 #' @rdname importDropEst
83
-#' @title Create a SingleCellExperiment Object from DropEst output
84
-#' @description imports the RDS file created by DropEst (https://github.com/hms-dbmi/dropEst) and
85
-#' create a SingleCellExperiment object from either the raw or filtered counts matrix.
86
-#' Additionally parse through the RDS to obtain appropriate feature annotations as
87
-#' SCE coldata, in addition to any metadata.
83
+#' @title Create a SCE Object from DropEst output
84
+#' @description imports the RDS file created by DropEst and
85
+#' creates a \link[SingleCellExperiment]{SingleCellExperiment} object from either the raw or filtered counts matrix.
88 86
 #' @param sampleDirs  A path to the directory containing the data files. Default "./".
89 87
 #' @param sampleNames A User-defined sample name. This will be prepended to all cell barcode IDs.
90 88
 #'  Default "sample".
Browse code

Merge upstream

Yusuke Koga authored on 13/05/2020 10:50:05
Showing0 changed files
Browse code

update dropEst: add example dropest result and fix metadata slot

Shruthi Bandyadka authored on 04/05/2020 18:56:54
Showing1 changed files
... ...
@@ -73,7 +73,7 @@
73 73
   
74 74
   ## Add SCE metadata
75 75
   sce_metadata <- .extractMetadata(dropEst_rds)
76
-  metadata(sce) <- sce_metadata
76
+  sce@metadata$dropEst <- sce_metadata
77 77
   
78 78
   return(sce)
79 79
 }
... ...
@@ -103,6 +103,12 @@
103 103
 #' found in the DropEst rds, they will be added to the SCE metadata field
104 104
 #' @return A \code{SingleCellExperiment} object containing the count matrix,
105 105
 #'  the feature annotations from DropEst as ColData, and any metadata from DropEst
106
+#' @examples
107
+#' # Example results were generated as per instructions from the developers of dropEst described in
108
+#' # https://github.com/hms-dbmi/dropEst/blob/master/examples/EXAMPLES.md
109
+#' sce <- importDropEst(sampleDirs = system.file("extdata/dropEst_scg71", package = "singleCellTK"),
110
+#'                      sampleNames = 'scg71')
111
+
106 112
 #' @export
107 113
 importDropEst <- function(sampleDirs = NULL, 
108 114
                           dataType = c('filtered','raw'),
Browse code

Merge upstream

Yusuke Koga authored on 02/05/2020 19:46:11
Showing0 changed files
Browse code

Fixed several document bugs as well as errors to scds, barcodeRank, and emptyDrops functions when passing parameters.

Joshua D. Campbell authored on 30/04/2020 18:30:25
Showing1 changed files
... ...
@@ -43,7 +43,7 @@
43 43
                                  dataType, 
44 44
                                  rdsFileName, 
45 45
                                  sampleName = 'sample',
46
-                                 delayedArray = delayedArrary){
46
+                                 delayedArray = FALSE){
47 47
   ## Read DropEst RDS
48 48
   dropEst_rds <- .readDropEstFile(sampleDir,dataType,rdsFileName)
49 49
   if (dataType == 'filtered' && 'cm' %in% names(dropEst_rds)) {
... ...
@@ -85,10 +85,10 @@
85 85
 #' create a SingleCellExperiment object from either the raw or filtered counts matrix.
86 86
 #' Additionally parse through the RDS to obtain appropriate feature annotations as 
87 87
 #' SCE coldata, in addition to any metadata.
88
-#' @param sampleDir  A path to the directory containing the data files. Default "./".
89
-#' @param sampleName A User-defined sample name. This will be prepended to all cell barcode IDs.
88
+#' @param sampleDirs  A path to the directory containing the data files. Default "./".
89
+#' @param sampleNames A User-defined sample name. This will be prepended to all cell barcode IDs.
90 90
 #'  Default "sample".
91
-#'  @param dataType can be "filtered" or "raw". Default is "filtered"
91
+#' @param dataType can be "filtered" or "raw". Default \code{"filtered"}.
92 92
 #' @param rdsFileName File name prefix of the DropEst RDS output. default is "cell.counts"
93 93
 #' @param delayedArray Boolean. Whether to read the expression matrix as
94 94
 #'  \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}.
... ...
@@ -103,14 +103,6 @@
103 103
 #' found in the DropEst rds, they will be added to the SCE metadata field
104 104
 #' @return A \code{SingleCellExperiment} object containing the count matrix,
105 105
 #'  the feature annotations from DropEst as ColData, and any metadata from DropEst
106
-#' @examples
107
-#' # Example #1
108
-#' Example DropEst outputs were downloaded from the DropEst Github 
109
-#' (http://pklab.med.harvard.edu/viktor/dropest_paper/dropest_0.8.5.zip). 
110
-#' To run the dropest import function with the example dataset, 
111
-#' set the sampleDirs variable to the example dropEst provided in SCTK as follows-
112
-#' sce <- importDropEst(sampleDirs = c('path/to/dropest/folder/'), 
113
-#'                      dataType='filtered', sampleNames=c('sample'))
114 106
 #' @export
115 107
 importDropEst <- function(sampleDirs = NULL, 
116 108
                           dataType = c('filtered','raw'),
Browse code

Run CMD check, fix bugs

Yusuke Koga authored on 29/04/2020 21:41:54
Showing1 changed files
... ...
@@ -4,8 +4,8 @@
4 4
   if (!file.exists(dropEst_cell_counts)){
5 5
     stop("DropEst output not found at location specified. Please check path provided and/or filename.")
6 6
   }
7
-  dropEst_rds <- readRDS(dropEst_cell_counts) 
8
-  
7
+  dropEst_rds <- readRDS(dropEst_cell_counts)
8
+
9 9
   return(dropEst_rds)
10 10
 }
11 11
 
... ...
@@ -17,7 +17,7 @@
17 17
       coldata_field_df <- data.frame(as.matrix(dropEst_rds[[field]]))
18 18
       names(coldata_field_df)[1] <- field
19 19
       coldata_field_df$cell <- row.names(coldata_field_df)
20
-      
20
+
21 21
       coldata_df[[field]] <- coldata_field_df
22 22
     }}
23 23
   coldata_df_merged <- Reduce(function(x, y) merge(x, y, all=TRUE,by="cell"), coldata_df)
... ...
@@ -39,9 +39,9 @@
39 39
   return(metadata)
40 40
 }
41 41
 
42
-.importDropEstSample <- function(sampleDir = './', 
43
-                                 dataType, 
44
-                                 rdsFileName, 
42
+.importDropEstSample <- function(sampleDir = './',
43
+                                 dataType,
44
+                                 rdsFileName,
45 45
                                  sampleName = 'sample',
46 46
                                  delayedArray = delayedArrary){
47 47
   ## Read DropEst RDS
... ...
@@ -53,37 +53,37 @@
53 53
   } else {
54 54
     stop("No counts matrix found in the .rds provided! Exiting.")
55 55
   }
56
-  
56
+
57 57
   if (isTRUE(delayedArray)) {
58 58
     counts_matrix <- DelayedArray::DelayedArray(counts_matrix)
59 59
     }
60 60
   ## Create SingleCellExperiment object
61 61
   ## Add SCE ColData. If using filtered counts matrix, colData is subset to include filtered cells.
62
-  ## append sample name to cells in SCE 
62
+  ## append sample name to cells in SCE
63 63
   sce <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = counts_matrix))
64
-  colnames(sce) <- paste0(sampleName,"_",colnames(sce)) 
64
+  colnames(sce) <- paste0(sampleName,"_",colnames(sce))
65 65
   sce_coldata <- .constructColdata(dropEst_rds, counts_matrix, dataType)
66 66
   row.names(sce_coldata) <- paste0(sampleName,"_",row.names(sce_coldata))
67
-  
67
+
68 68
   if (dim(counts_matrix)[2] == dim(sce_coldata)[1]){
69 69
     SummarizedExperiment::colData(sce) <- sce_coldata
70 70
   } else {
71 71
     warning("Unable to add ColData to SCE. nCol of Counts Matrix not equal to nRow of ColData matrix.")
72 72
   }
73
-  
73
+
74 74
   ## Add SCE metadata
75 75
   sce_metadata <- .extractMetadata(dropEst_rds)
76 76
   metadata(sce) <- sce_metadata
77
-  
77
+
78 78
   return(sce)
79 79
 }
80 80
 
81 81
 #' @name importDropEst
82 82
 #' @rdname importDropEst
83
-#' @title Create a SingleCellExperiment Object from DropEst output 
83
+#' @title Create a SingleCellExperiment Object from DropEst output
84 84
 #' @description imports the RDS file created by DropEst (https://github.com/hms-dbmi/dropEst) and
85 85
 #' create a SingleCellExperiment object from either the raw or filtered counts matrix.
86
-#' Additionally parse through the RDS to obtain appropriate feature annotations as 
86
+#' Additionally parse through the RDS to obtain appropriate feature annotations as
87 87
 #' SCE coldata, in addition to any metadata.
88 88
 #' @param sampleDir  A path to the directory containing the data files. Default "./".
89 89
 #' @param sampleName A User-defined sample name. This will be prepended to all cell barcode IDs.
... ...
@@ -94,37 +94,38 @@
94 94
 #'  \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}.
95 95
 #' @details
96 96
 #' \code{importDropEst} expects either raw counts matrix stored as "cm_raw" or filtered
97
-#' counts matrix stored as "cm" in the DropEst rds output. 
97
+#' counts matrix stored as "cm" in the DropEst rds output.
98 98
 #' ColData is obtained from the DropEst corresponding to "mean_reads_per_umi","aligned_reads_per_cell",
99 99
 #' "aligned_umis_per_cell","requested_umis_per_cb","requested_reads_per_cb"
100
-#' If using filtered counts matrix, the colData dataframe is 
100
+#' If using filtered counts matrix, the colData dataframe is
101 101
 #' subset to contain features from the filtered counts matrix alone.
102
-#' If any annotations of ("saturation_info","merge_targets","reads_per_umi_per_cell") are 
102
+#' If any annotations of ("saturation_info","merge_targets","reads_per_umi_per_cell") are
103 103
 #' found in the DropEst rds, they will be added to the SCE metadata field
104 104
 #' @return A \code{SingleCellExperiment} object containing the count matrix,
105 105
 #'  the feature annotations from DropEst as ColData, and any metadata from DropEst
106 106
 #' @examples
107
-#' # Example #1
108
-#' Example DropEst outputs were downloaded from the DropEst Github 
109
-#' (http://pklab.med.harvard.edu/viktor/dropest_paper/dropest_0.8.5.zip). 
110
-#' To run the dropest import function with the example dataset, 
107
+#' \dontrun{
108
+#' example outputs were downloaded from the DropEst Github
109
+#' (http://pklab.med.harvard.edu/viktor/dropest_paper/dropest_0.8.5.zip).
110
+#' To run the dropest import function with the example dataset,
111 111
 #' set the sampleDirs variable to the example dropEst provided in SCTK as follows-
112
-#' sce <- importDropEst(sampleDirs = c('path/to/dropest/folder/'), 
112
+#' sce <- importDropEst(sampleDirs = c('path/to/dropest/folder/'),
113 113
 #'                      dataType='filtered', sampleNames=c('sample'))
114
+#' }
114 115
 #' @export
115
-importDropEst <- function(sampleDirs = NULL, 
116
+importDropEst <- function(sampleDirs = NULL,
116 117
                           dataType = c('filtered','raw'),
117 118
                           rdsFileName = 'cell.counts',
118 119
                           sampleNames = NULL,
119 120
                           delayedArray = TRUE) {
120 121
   dataType <- match.arg(dataType)
121
-  
122
+
122 123
   if (length(sampleDirs)!=length(sampleNames)){
123 124
     stop("Please provide sample names for all input directories")
124 125
   }
125
-  
126
+
126 127
   res <- vector("list", length = length(sampleDirs))
127
-  
128
+
128 129
   for (i in seq_along(sampleDirs)){
129 130
     scei <- .importDropEstSample(sampleDir = sampleDirs[[i]],
130 131
                          sampleName = sampleNames[[i]],
... ...
@@ -136,8 +137,8 @@ importDropEst <- function(sampleDirs = NULL,
136 137
   sce <- do.call(SingleCellExperiment::cbind, res)
137 138
   return(sce)
138 139
 }
139
-  
140
-  
140
+
141
+
141 142
 
142 143
 
143 144
 
Browse code

import dropEst funcion (includes delayedarray option)

Shruthi Bandyadka authored on 19/03/2020 20:30:54
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,143 @@
1
+
2
+.readDropEstFile <- function(sampleDir, dataType,rdsFileName){
3
+  dropEst_cell_counts <- file.path(sampleDir, paste(rdsFileName, '.rds', sep=''))
4
+  if (!file.exists(dropEst_cell_counts)){
5
+    stop("DropEst output not found at location specified. Please check path provided and/or filename.")
6
+  }
7
+  dropEst_rds <- readRDS(dropEst_cell_counts) 
8
+  
9
+  return(dropEst_rds)
10
+}
11
+
12
+.constructColdata <- function(dropEst_rds,counts_matrix, dataType){
13
+  coldata_fields <- c("mean_reads_per_umi","aligned_reads_per_cell","aligned_umis_per_cell","requested_umis_per_cb","requested_reads_per_cb")
14
+  coldata_df <-  list()
15
+  for (field in coldata_fields){
16
+    if (field %in% names(dropEst_rds)){
17
+      coldata_field_df <- data.frame(as.matrix(dropEst_rds[[field]]))
18
+      names(coldata_field_df)[1] <- field
19
+      coldata_field_df$cell <- row.names(coldata_field_df)
20
+      
21
+      coldata_df[[field]] <- coldata_field_df
22
+    }}
23
+  coldata_df_merged <- Reduce(function(x, y) merge(x, y, all=TRUE,by="cell"), coldata_df)
24
+  row.names(coldata_df_merged) <- coldata_df_merged$cell
25
+  coldata_df_merged <- S4Vectors::DataFrame(as.matrix(coldata_df_merged))
26
+  if (dataType == 'filtered'){
27
+    coldata_df_merged <- coldata_df_merged[coldata_df_merged$cell %in% colnames(counts_matrix),]
28
+  }
29
+  return(coldata_df_merged)
30
+}
31
+
32
+.extractMetadata <- function(dropEst_rds){
33
+  metadata_fields <- c("saturation_info","merge_targets","reads_per_umi_per_cell")
34
+  metadata <- c()
35
+  for (md in metadata_fields){
36
+    if (md %in% names(dropEst_rds)){
37
+      metadata[[md]] <- dropEst_rds[[md]]
38
+    }}
39
+  return(metadata)
40
+}
41
+
42
+.importDropEstSample <- function(sampleDir = './', 
43
+                                 dataType, 
44
+                                 rdsFileName, 
45
+                                 sampleName = 'sample',
46
+                                 delayedArray = delayedArrary){
47
+  ## Read DropEst RDS
48
+  dropEst_rds <- .readDropEstFile(sampleDir,dataType,rdsFileName)
49
+  if (dataType == 'filtered' && 'cm' %in% names(dropEst_rds)) {
50
+    counts_matrix <- dropEst_rds$cm
51
+  } else if (dataType == 'raw' && 'cm_raw' %in% names(dropEst_rds)) {
52
+    counts_matrix <- dropEst_rds$cm_raw
53
+  } else {
54
+    stop("No counts matrix found in the .rds provided! Exiting.")
55
+  }
56
+  
57
+  if (isTRUE(delayedArray)) {
58
+    counts_matrix <- DelayedArray::DelayedArray(counts_matrix)
59
+    }
60
+  ## Create SingleCellExperiment object
61
+  ## Add SCE ColData. If using filtered counts matrix, colData is subset to include filtered cells.
62
+  ## append sample name to cells in SCE 
63
+  sce <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = counts_matrix))
64
+  colnames(sce) <- paste0(sampleName,"_",colnames(sce)) 
65
+  sce_coldata <- .constructColdata(dropEst_rds, counts_matrix, dataType)
66
+  row.names(sce_coldata) <- paste0(sampleName,"_",row.names(sce_coldata))
67
+  
68
+  if (dim(counts_matrix)[2] == dim(sce_coldata)[1]){
69
+    SummarizedExperiment::colData(sce) <- sce_coldata
70
+  } else {
71
+    warning("Unable to add ColData to SCE. nCol of Counts Matrix not equal to nRow of ColData matrix.")
72
+  }
73
+  
74
+  ## Add SCE metadata
75
+  sce_metadata <- .extractMetadata(dropEst_rds)
76
+  metadata(sce) <- sce_metadata
77
+  
78
+  return(sce)
79
+}
80
+
81
+#' @name importDropEst
82
+#' @rdname importDropEst
83
+#' @title Create a SingleCellExperiment Object from DropEst output 
84
+#' @description imports the RDS file created by DropEst (https://github.com/hms-dbmi/dropEst) and
85
+#' create a SingleCellExperiment object from either the raw or filtered counts matrix.
86
+#' Additionally parse through the RDS to obtain appropriate feature annotations as 
87
+#' SCE coldata, in addition to any metadata.
88
+#' @param sampleDir  A path to the directory containing the data files. Default "./".
89
+#' @param sampleName A User-defined sample name. This will be prepended to all cell barcode IDs.
90
+#'  Default "sample".
91
+#'  @param dataType can be "filtered" or "raw". Default is "filtered"
92
+#' @param rdsFileName File name prefix of the DropEst RDS output. default is "cell.counts"
93
+#' @param delayedArray Boolean. Whether to read the expression matrix as
94
+#'  \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}.
95
+#' @details
96
+#' \code{importDropEst} expects either raw counts matrix stored as "cm_raw" or filtered
97
+#' counts matrix stored as "cm" in the DropEst rds output. 
98
+#' ColData is obtained from the DropEst corresponding to "mean_reads_per_umi","aligned_reads_per_cell",
99
+#' "aligned_umis_per_cell","requested_umis_per_cb","requested_reads_per_cb"
100
+#' If using filtered counts matrix, the colData dataframe is 
101
+#' subset to contain features from the filtered counts matrix alone.
102
+#' If any annotations of ("saturation_info","merge_targets","reads_per_umi_per_cell") are 
103
+#' found in the DropEst rds, they will be added to the SCE metadata field
104
+#' @return A \code{SingleCellExperiment} object containing the count matrix,
105
+#'  the feature annotations from DropEst as ColData, and any metadata from DropEst
106
+#' @examples
107
+#' # Example #1
108
+#' Example DropEst outputs were downloaded from the DropEst Github 
109
+#' (http://pklab.med.harvard.edu/viktor/dropest_paper/dropest_0.8.5.zip). 
110
+#' To run the dropest import function with the example dataset, 
111
+#' set the sampleDirs variable to the example dropEst provided in SCTK as follows-
112
+#' sce <- importDropEst(sampleDirs = c('path/to/dropest/folder/'), 
113
+#'                      dataType='filtered', sampleNames=c('sample'))
114
+#' @export
115
+importDropEst <- function(sampleDirs = NULL, 
116
+                          dataType = c('filtered','raw'),
117
+                          rdsFileName = 'cell.counts',
118
+                          sampleNames = NULL,
119
+                          delayedArray = TRUE) {
120
+  dataType <- match.arg(dataType)
121
+  
122
+  if (length(sampleDirs)!=length(sampleNames)){
123
+    stop("Please provide sample names for all input directories")
124
+  }
125
+  
126
+  res <- vector("list", length = length(sampleDirs))
127
+  
128
+  for (i in seq_along(sampleDirs)){
129
+    scei <- .importDropEstSample(sampleDir = sampleDirs[[i]],
130
+                         sampleName = sampleNames[[i]],
131
+                         dataType = dataType,
132
+                         rdsFileName = rdsFileName,
133
+                         delayedArray = delayedArray)
134
+    res[[i]] <- scei
135
+  }
136
+  sce <- do.call(SingleCellExperiment::cbind, res)
137
+  return(sce)
138
+}
139
+  
140
+  
141
+
142
+
143
+