... | ... |
@@ -150,7 +150,7 @@ export(sctkPythonInstallConda) |
150 | 150 |
export(sctkPythonInstallVirtualEnv) |
151 | 151 |
export(selectSCTKConda) |
152 | 152 |
export(selectSCTKVirtualEnvironment) |
153 |
-export(setSCERowNames) |
|
153 |
+export(setRowNames) |
|
154 | 154 |
export(setSCTKDisplayRow) |
155 | 155 |
export(seuratComputeHeatmap) |
156 | 156 |
export(seuratComputeJackStraw) |
... | ... |
@@ -14,6 +14,8 @@ |
14 | 14 |
#' object. Can be one of "Matrix" (as returned by |
15 | 15 |
#' \link{readMM} function), or "matrix" (as returned by |
16 | 16 |
#' \link[base]{matrix} function). Default "Matrix". |
17 |
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default |
|
18 |
+#' \code{TRUE}. |
|
17 | 19 |
#' @return A \code{SingleCellExperiment} object containing the count |
18 | 20 |
#' matrix, the feature annotations, and the cell annotation |
19 | 21 |
#' (which includes QC metrics stored in 'featureDump.txt'). |
... | ... |
@@ -24,7 +26,8 @@ importAlevin <- function( |
24 | 26 |
alevinDir = NULL, |
25 | 27 |
sampleName = 'sample', |
26 | 28 |
delayedArray = FALSE, |
27 |
- class = c("Matrix", "matrix")) { |
|
29 |
+ class = c("Matrix", "matrix"), |
|
30 |
+ rowNamesDedup = TRUE) { |
|
28 | 31 |
|
29 | 32 |
class <- match.arg(class) |
30 | 33 |
|
... | ... |
@@ -45,6 +48,15 @@ importAlevin <- function( |
45 | 48 |
if (delayedArray) { |
46 | 49 |
mat <- DelayedArray::DelayedArray(mat) |
47 | 50 |
} |
51 |
+ |
|
52 |
+ if (isTRUE(rowNamesDedup)) { |
|
53 |
+ if (any(duplicated(rownames(mat)))) { |
|
54 |
+ message("Duplicated gene names found, adding '-1', '-2', ", |
|
55 |
+ "... suffix to them.") |
|
56 |
+ } |
|
57 |
+ mat <- dedupRowNames(mat) |
|
58 |
+ } |
|
59 |
+ |
|
48 | 60 |
genes <- rownames(mat) |
49 | 61 |
cb <- .readBarcodes(file.path(alevinDir, 'alevin/featureDump.txt'), |
50 | 62 |
header = 'auto', |
... | ... |
@@ -62,6 +74,6 @@ importAlevin <- function( |
62 | 74 |
column_name = coln, |
63 | 75 |
sample = sampleName, |
64 | 76 |
row.names = coln) |
65 |
- |
|
77 |
+ |
|
66 | 78 |
return(sce) |
67 | 79 |
} |
... | ... |
@@ -120,7 +120,8 @@ |
120 | 120 |
importAnnData <- function(sampleDirs = NULL, |
121 | 121 |
sampleNames = NULL, |
122 | 122 |
delayedArray = FALSE, |
123 |
- class = c("Matrix", "matrix")) { |
|
123 |
+ class = c("Matrix", "matrix"), |
|
124 |
+ rowNamesDedup = TRUE) { |
|
124 | 125 |
|
125 | 126 |
if (length(sampleDirs)!=length(sampleNames)){ |
126 | 127 |
stop("Number of sampleDirs must be equal to number of SampleNames. Please provide sample names for all input directories") |
... | ... |
@@ -138,6 +139,15 @@ importAnnData <- function(sampleDirs = NULL, |
138 | 139 |
res[[i]] <- scei |
139 | 140 |
} |
140 | 141 |
sce <- do.call(SingleCellExperiment::cbind, res) |
142 |
+ |
|
143 |
+ if (isTRUE(rowNamesDedup)) { |
|
144 |
+ if (any(duplicated(rownames(sce)))) { |
|
145 |
+ message("Duplicated gene names found, adding '-1', '-2', ", |
|
146 |
+ "... suffix to them.") |
|
147 |
+ } |
|
148 |
+ sce <- dedupRowNames(sce) |
|
149 |
+ } |
|
150 |
+ |
|
141 | 151 |
return(sce) |
142 | 152 |
} |
143 | 153 |
|
... | ... |
@@ -27,7 +27,7 @@ |
27 | 27 |
column_name = coln, |
28 | 28 |
sample = sample, |
29 | 29 |
row.names = coln) |
30 |
- |
|
30 |
+ |
|
31 | 31 |
return(sce) |
32 | 32 |
} |
33 | 33 |
|
... | ... |
@@ -41,7 +41,8 @@ |
41 | 41 |
barcodesFileNames, |
42 | 42 |
gzipped, |
43 | 43 |
class, |
44 |
- delayedArray) { |
|
44 |
+ delayedArray, |
|
45 |
+ rowNamesDedup) { |
|
45 | 46 |
|
46 | 47 |
if (length(BUStoolsDirs) != length(samples)) { |
47 | 48 |
stop("'BUStoolsDirs' and 'samples' have unequal lengths!") |
... | ... |
@@ -68,6 +69,15 @@ |
68 | 69 |
} |
69 | 70 |
|
70 | 71 |
sce <- do.call(SingleCellExperiment::cbind, res) |
72 |
+ |
|
73 |
+ if (isTRUE(rowNamesDedup)) { |
|
74 |
+ if (any(duplicated(rownames(sce)))) { |
|
75 |
+ message("Duplicated gene names found, adding '-1', '-2', ", |
|
76 |
+ "... suffix to them.") |
|
77 |
+ } |
|
78 |
+ sce <- dedupRowNames(sce) |
|
79 |
+ } |
|
80 |
+ |
|
71 | 81 |
return(sce) |
72 | 82 |
} |
73 | 83 |
|
... | ... |
@@ -103,6 +113,8 @@ |
103 | 113 |
#' \link[base]{matrix} function). Default "Matrix". |
104 | 114 |
#' @param delayedArray Boolean. Whether to read the expression matrix as |
105 | 115 |
#' \link[DelayedArray]{DelayedArray-class} object or not. Default \code{FALSE}. |
116 |
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default |
|
117 |
+#' \code{TRUE}. |
|
106 | 118 |
#' @return A \code{SingleCellExperiment} object containing the count |
107 | 119 |
#' matrix, the gene annotation, and the cell annotation. |
108 | 120 |
#' @examples |
... | ... |
@@ -140,7 +152,8 @@ importBUStools <- function( |
140 | 152 |
barcodesFileNames = "genes.barcodes.txt", |
141 | 153 |
gzipped = "auto", |
142 | 154 |
class = c("Matrix", "matrix"), |
143 |
- delayedArray = FALSE) { |
|
155 |
+ delayedArray = FALSE, |
|
156 |
+ rowNamesDedup = TRUE) { |
|
144 | 157 |
|
145 | 158 |
class <- match.arg(class) |
146 | 159 |
|
... | ... |
@@ -152,5 +165,6 @@ importBUStools <- function( |
152 | 165 |
barcodesFileNames = barcodesFileNames, |
153 | 166 |
gzipped = gzipped, |
154 | 167 |
class = class, |
155 |
- delayedArray = delayedArray) |
|
168 |
+ delayedArray = delayedArray, |
|
169 |
+ rowNamesDedup = rowNamesDedup) |
|
156 | 170 |
} |
... | ... |
@@ -347,7 +347,8 @@ |
347 | 347 |
barcodesFileNames, |
348 | 348 |
gzipped, |
349 | 349 |
class, |
350 |
- delayedArray) { |
|
350 |
+ delayedArray, |
|
351 |
+ rowNamesDedup) { |
|
351 | 352 |
|
352 | 353 |
.checkArgsImportCellRanger(cellRangerDirs, |
353 | 354 |
sampleDirs, |
... | ... |
@@ -400,6 +401,14 @@ |
400 | 401 |
} |
401 | 402 |
|
402 | 403 |
sce <- do.call(SingleCellExperiment::cbind, res) |
404 |
+ |
|
405 |
+ if (isTRUE(rowNamesDedup)) { |
|
406 |
+ if (any(duplicated(rownames(sce)))) { |
|
407 |
+ message("Duplicated gene names found, adding '-1', '-2', ", |
|
408 |
+ "... suffix to them.") |
|
409 |
+ } |
|
410 |
+ sce <- dedupRowNames(sce) |
|
411 |
+ } |
|
403 | 412 |
return(sce) |
404 | 413 |
} |
405 | 414 |
|
... | ... |
@@ -531,6 +540,8 @@ |
531 | 540 |
#' \code{dataTypeV2} will be used to determine Cell Ranger output directory. If it has |
532 | 541 |
#' length 1, it assumes that all samples use the same genome reference and |
533 | 542 |
#' the function will load only filtered or raw data. |
543 |
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default |
|
544 |
+#' \code{TRUE}. |
|
534 | 545 |
#' @details |
535 | 546 |
#' \code{importCellRangerV2} imports output from Cell Ranger V2. |
536 | 547 |
#' \code{importCellRangerV2Sample} imports output from one sample from Cell |
... | ... |
@@ -571,7 +582,8 @@ importCellRanger <- function( |
571 | 582 |
barcodesFileNames = "barcodes.tsv.gz", |
572 | 583 |
gzipped = "auto", |
573 | 584 |
class = c("Matrix", "matrix"), |
574 |
- delayedArray = FALSE) { |
|
585 |
+ delayedArray = FALSE, |
|
586 |
+ rowNamesDedup = TRUE) { |
|
575 | 587 |
|
576 | 588 |
class <- match.arg(class) |
577 | 589 |
dataType <- match.arg(dataType) |
... | ... |
@@ -586,7 +598,8 @@ importCellRanger <- function( |
586 | 598 |
barcodesFileNames = barcodesFileNames, |
587 | 599 |
gzipped = gzipped, |
588 | 600 |
class = class, |
589 |
- delayedArray = delayedArray) |
|
601 |
+ delayedArray = delayedArray, |
|
602 |
+ rowNamesDedup = rowNamesDedup) |
|
590 | 603 |
} |
591 | 604 |
|
592 | 605 |
|
... | ... |
@@ -611,7 +624,8 @@ importCellRangerV2 <- function( |
611 | 624 |
class = c("Matrix", "matrix"), |
612 | 625 |
delayedArray = FALSE, |
613 | 626 |
reference = NULL, |
614 |
- cellRangerOutsV2 = NULL) { |
|
627 |
+ cellRangerOutsV2 = NULL, |
|
628 |
+ rowNamesDedup = TRUE) { |
|
615 | 629 |
|
616 | 630 |
class <- match.arg(class) |
617 | 631 |
dataTypeV2 <- match.arg(dataTypeV2) |
... | ... |
@@ -628,7 +642,6 @@ importCellRangerV2 <- function( |
628 | 642 |
cellRangerOutsV2 <- .getCellRangerOutV2(dataTypeV2, reference) |
629 | 643 |
} |
630 | 644 |
|
631 |
- |
|
632 | 645 |
.importCellRanger(cellRangerDirs = cellRangerDirs, |
633 | 646 |
sampleDirs = sampleDirs, |
634 | 647 |
sampleNames = sampleNames, |
... | ... |
@@ -639,8 +652,8 @@ importCellRangerV2 <- function( |
639 | 652 |
barcodesFileNames = "barcodes.tsv", |
640 | 653 |
gzipped = FALSE, |
641 | 654 |
class = class, |
642 |
- delayedArray = delayedArray) |
|
643 |
- |
|
655 |
+ delayedArray = delayedArray, |
|
656 |
+ rowNamesDedup = rowNamesDedup) |
|
644 | 657 |
} |
645 | 658 |
|
646 | 659 |
|
... | ... |
@@ -658,6 +671,8 @@ importCellRangerV2 <- function( |
658 | 671 |
#' \link[base]{matrix} function). Default "Matrix". |
659 | 672 |
#' @param delayedArray Boolean. Whether to read the expression matrix as |
660 | 673 |
#' \link{DelayedArray} object or not. Default \code{FALSE}. |
674 |
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default |
|
675 |
+#' \code{TRUE}. |
|
661 | 676 |
#' @return A \code{SingleCellExperiment} object containing the count |
662 | 677 |
#' matrix, the feature annotations, and the cell annotation for the sample. |
663 | 678 |
#' @examples |
... | ... |
@@ -670,7 +685,8 @@ importCellRangerV2Sample <- function( |
670 | 685 |
dataDir = NULL, |
671 | 686 |
sampleName = NULL, |
672 | 687 |
class = c("Matrix", "matrix"), |
673 |
- delayedArray = FALSE) { |
|
688 |
+ delayedArray = FALSE, |
|
689 |
+ rowNamesDedup = TRUE) { |
|
674 | 690 |
|
675 | 691 |
class <- match.arg(class) |
676 | 692 |
|
... | ... |
@@ -684,7 +700,8 @@ importCellRangerV2Sample <- function( |
684 | 700 |
barcodesFileNames = "barcodes.tsv", |
685 | 701 |
gzipped = FALSE, |
686 | 702 |
class = class, |
687 |
- delayedArray = delayedArray) |
|
703 |
+ delayedArray = delayedArray, |
|
704 |
+ rowNamesDedup = rowNamesDedup) |
|
688 | 705 |
} |
689 | 706 |
|
690 | 707 |
|
... | ... |
@@ -702,7 +719,8 @@ importCellRangerV3 <- function( |
702 | 719 |
sampleNames = NULL, |
703 | 720 |
dataType = c("filtered", "raw"), |
704 | 721 |
class = c("Matrix", "matrix"), |
705 |
- delayedArray = FALSE) { |
|
722 |
+ delayedArray = FALSE, |
|
723 |
+ rowNamesDedup = TRUE) { |
|
706 | 724 |
|
707 | 725 |
class <- match.arg(class) |
708 | 726 |
dataType <- match.arg(dataType) |
... | ... |
@@ -723,7 +741,8 @@ importCellRangerV3 <- function( |
723 | 741 |
barcodesFileNames = "barcodes.tsv.gz", |
724 | 742 |
gzipped = TRUE, |
725 | 743 |
class = class, |
726 |
- delayedArray = delayedArray) |
|
744 |
+ delayedArray = delayedArray, |
|
745 |
+ rowNamesDedup = rowNamesDedup) |
|
727 | 746 |
|
728 | 747 |
} |
729 | 748 |
|
... | ... |
@@ -742,6 +761,8 @@ importCellRangerV3 <- function( |
742 | 761 |
#' \link[base]{matrix} function). Default "Matrix". |
743 | 762 |
#' @param delayedArray Boolean. Whether to read the expression matrix as |
744 | 763 |
#' \link{DelayedArray} object or not. Default \code{FALSE}. |
764 |
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default |
|
765 |
+#' \code{TRUE}. |
|
745 | 766 |
#' @return A \code{SingleCellExperiment} object containing the count |
746 | 767 |
#' matrix, the feature annotations, and the cell annotation for the sample. |
747 | 768 |
#' @examples |
... | ... |
@@ -754,7 +775,8 @@ importCellRangerV3Sample <- function( |
754 | 775 |
dataDir = "./", |
755 | 776 |
sampleName = "sample", |
756 | 777 |
class = c("Matrix", "matrix"), |
757 |
- delayedArray = FALSE) { |
|
778 |
+ delayedArray = FALSE, |
|
779 |
+ rowNamesDedup = TRUE) { |
|
758 | 780 |
|
759 | 781 |
class <- match.arg(class) |
760 | 782 |
|
... | ... |
@@ -768,5 +790,6 @@ importCellRangerV3Sample <- function( |
768 | 790 |
barcodesFileNames = "barcodes.tsv.gz", |
769 | 791 |
gzipped = TRUE, |
770 | 792 |
class = class, |
771 |
- delayedArray = delayedArray) |
|
793 |
+ delayedArray = delayedArray, |
|
794 |
+ rowNamesDedup = rowNamesDedup) |
|
772 | 795 |
} |
... | ... |
@@ -102,7 +102,8 @@ |
102 | 102 |
#' object. Can be one of "Matrix" (as returned by |
103 | 103 |
#' \link{readMM} function), or "matrix" (as returned by |
104 | 104 |
#' \link[base]{matrix} function). Default \code{"Matrix"}. |
105 |
- |
|
105 |
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default |
|
106 |
+#' \code{TRUE}. |
|
106 | 107 |
#' @details |
107 | 108 |
#' \code{importDropEst} expects either raw counts matrix stored as "cm_raw" or filtered |
108 | 109 |
#' counts matrix stored as "cm" in the DropEst rds output. |
... | ... |
@@ -125,7 +126,8 @@ importDropEst <- function(sampleDirs = NULL, |
125 | 126 |
rdsFileName = 'cell.counts', |
126 | 127 |
sampleNames = NULL, |
127 | 128 |
delayedArray = FALSE, |
128 |
- class = c("Matrix", "matrix")) { |
|
129 |
+ class = c("Matrix", "matrix"), |
|
130 |
+ rowNamesDedup = TRUE) { |
|
129 | 131 |
dataType <- match.arg(dataType) |
130 | 132 |
class <- match.arg(class) |
131 | 133 |
|
... | ... |
@@ -145,6 +147,15 @@ importDropEst <- function(sampleDirs = NULL, |
145 | 147 |
res[[i]] <- scei |
146 | 148 |
} |
147 | 149 |
sce <- do.call(SingleCellExperiment::cbind, res) |
150 |
+ |
|
151 |
+ if (isTRUE(rowNamesDedup)) { |
|
152 |
+ if (any(duplicated(rownames(sce)))) { |
|
153 |
+ message("Duplicated gene names found, adding '-1', '-2', ", |
|
154 |
+ "... suffix to them.") |
|
155 |
+ } |
|
156 |
+ sce <- dedupRowNames(sce) |
|
157 |
+ } |
|
158 |
+ |
|
148 | 159 |
return(sce) |
149 | 160 |
} |
150 | 161 |
|
... | ... |
@@ -13,6 +13,8 @@ |
13 | 13 |
#' \code{"Matrix"}. |
14 | 14 |
#' @param delayedArray Boolean. Whether to read the expression matrix as |
15 | 15 |
#' \link{DelayedArray} object or not. Default \code{FALSE}. |
16 |
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default |
|
17 |
+#' \code{TRUE}. |
|
16 | 18 |
#' @details See the list below for the available datasets and their |
17 | 19 |
#' descriptions. |
18 | 20 |
#' \describe{ |
... | ... |
@@ -43,7 +45,7 @@ |
43 | 45 |
#' @export |
44 | 46 |
#' @importFrom SummarizedExperiment colData rowData colData<- assay assays |
45 | 47 |
importExampleData <- function(dataset, class = c("Matrix", "matrix"), |
46 |
- delayedArray = FALSE) { |
|
48 |
+ delayedArray = FALSE, rowNamesDedup = TRUE) { |
|
47 | 49 |
class <- match.arg(class) |
48 | 50 |
|
49 | 51 |
scRNAseqDatasets <- c("fluidigm_pollen", "allen_tasic") |
... | ... |
@@ -62,13 +64,16 @@ importExampleData <- function(dataset, class = c("Matrix", "matrix"), |
62 | 64 |
temp <- scRNAseq::ReprocessedAllenData() |
63 | 65 |
temp$sample <- paste0(colData(temp)$driver_1_s, "_", colData(temp)$dissection_s) |
64 | 66 |
} |
67 |
+ if (isTRUE(rowNamesDedup)) { |
|
68 |
+ temp <- dedupRowNames(temp) |
|
69 |
+ } |
|
65 | 70 |
} else if (dataset %in% tenxPbmcDatasets) { |
66 | 71 |
if(!("TENxPBMCData" %in% rownames(utils::installed.packages()))) { |
67 | 72 |
stop(paste0("Package 'TENxPBMCData' is not installed. Please install to load dataset '", dataset, "'.")) |
68 | 73 |
} |
69 | 74 |
temp <- TENxPBMCData::TENxPBMCData(dataset = dataset) |
70 | 75 |
colnames(temp) <- paste(temp$Sample, temp$Barcode, sep="_") |
71 |
- rownames(temp) <- rowData(temp)$Symbol_TENx |
|
76 |
+ temp <- setRowNames(temp, "Symbol_TENx", dedup = rowNamesDedup) |
|
72 | 77 |
colData(temp)$sample <- colData(temp)$Sample |
73 | 78 |
} else { |
74 | 79 |
stop("'dataset' must be one of: ", paste(c(scRNAseqDatasets, tenxPbmcDatasets), collapse = ",")) |
... | ... |
@@ -1,121 +1,147 @@ |
1 |
-.checkGzip <- function(path, gzipped){ |
|
2 |
- if (gzipped == "auto") { |
|
3 |
- ext <- tools::file_ext(path) |
|
4 |
- if (ext == "gz") { |
|
5 |
- path <- gzfile(path) |
|
6 |
- } |
|
7 |
- } else if (isTRUE(gzipped)) { |
|
8 |
- path <- gzfile(path) |
|
9 |
- } |
|
10 |
- |
|
11 |
- return(path) |
|
12 |
-} |
|
13 |
- |
|
14 |
-#' Create a SingleCellExperiment object from files |
|
15 |
-#' |
|
16 |
-#' Creates a SingleCellExperiment object from a counts file in various formats. |
|
17 |
-#' and a file of annotation information, . |
|
18 |
-#' |
|
19 |
-#' @param assayFile The path to a file in .mtx, .txt, .csv, .tab, or .tsv format. |
|
20 |
-#' @param annotFile The path to a text file that contains columns of annotation |
|
21 |
-#' information for each sample in the assayFile. This file should have the same |
|
22 |
-#' number of rows as there are columns in the assayFile. If multiple samples are |
|
23 |
-#' represented in these files, this should be denoted by a column called \code{'sample'} |
|
24 |
-#' within the \code{annotFile}. |
|
25 |
-#' @param featureFile The path to a text file that contains columns of |
|
26 |
-#' annotation information for each gene in the count matrix. This file should |
|
27 |
-#' have the same genes in the same order as assayFile. This is optional. |
|
28 |
-#' @param assayName The name of the assay that you are uploading. The default |
|
29 |
-#' is "counts". |
|
30 |
-#' @param inputDataFrames If TRUE, assayFile and annotFile are read as data |
|
31 |
-#' frames instead of file paths. The default is FALSE. |
|
32 |
-#' @param class Character. The class of the expression matrix stored in the SCE |
|
33 |
-#' object. Can be one of "Matrix" (as returned by |
|
34 |
-#' \link{readMM} function), or "matrix" (as returned by |
|
35 |
-#' \link[base]{matrix} function). Default "Matrix". |
|
36 |
-#' @param annotFileHeader Whether there's a header (colnames) in the cell annotation file. Default is FALSE |
|
37 |
-#' @param annotFileRowName Which column is used as the rownames for the cell annotation file. Default is 1 (first column). |
|
38 |
-#' @param annotFileSep Separater used for the cell annotation file. Default is "\\t". |
|
39 |
-#' @param featureHeader Whether there's a header (colnames) in the feature annotation file. Default is FALSE |
|
40 |
-#' @param featureRowName Which column is used as the rownames for the feature annotation file. Default is 1 (first column). |
|
41 |
-#' @param featureSep Separater used for the feature annotation file. Default is "\\t". |
|
42 |
-#' @param gzipped Whether the input file is gzipped. Default is "auto" and it will automatically detect whether the file is gzipped. Other options is TRUE or FALSE. |
|
43 |
-#' @param delayedArray Boolean. Whether to read the expression matrix as |
|
44 |
-#' \link{DelayedArray} object or not. Default \code{FALSE}. |
|
45 |
-#' @return a SingleCellExperiment object |
|
46 |
-#' @export |
|
47 |
- |
|
48 |
-importFromFiles <- function(assayFile, annotFile = NULL, featureFile = NULL, |
|
49 |
- assayName = "counts", inputDataFrames = FALSE, |
|
50 |
- class = c("Matrix", "matrix"), delayedArray = FALSE, |
|
51 |
- annotFileHeader = FALSE, annotFileRowName = 1, |
|
52 |
- annotFileSep = "\t", featureHeader = FALSE, |
|
53 |
- featureRowName = 1, featureSep = "\t", gzipped = "auto" |
|
54 |
- ){ |
|
55 |
- |
|
56 |
- class <- match.arg(class) |
|
57 |
- |
|
58 |
- if (inputDataFrames){ |
|
59 |
- countsin <- assayFile |
|
60 |
- annotin <- annotFile |
|
61 |
- featurein <- featureFile |
|
62 |
- } else{ |
|
63 |
- countsin <- readSingleCellMatrix(assayFile, class = class, delayedArray = delayedArray) |
|
64 |
- if (!is.null(annotFile)){ |
|
65 |
- annotFile <- .checkGzip(annotFile, gzipped = gzipped) |
|
66 |
- annotin <- utils::read.table(annotFile, sep = annotFileSep, header = annotFileHeader, |
|
67 |
- row.names = annotFileRowName, stringsAsFactors = FALSE) |
|
68 |
- } |
|
69 |
- if (!is.null(featureFile)){ |
|
70 |
- featureFile <- .checkGzip(featureFile, gzipped = gzipped) |
|
71 |
- featurein <- utils::read.table(featureFile, sep = featureSep, header = featureHeader, |
|
72 |
- row.names = featureRowName, stringsAsFactors = FALSE) |
|
73 |
- } |
|
74 |
- } |
|
75 |
- if (is.null(annotFile)){ |
|
76 |
- annotin <- data.frame(row.names = colnames(countsin)) |
|
77 |
- annotin <- S4Vectors::DataFrame(annotin) |
|
78 |
- } |
|
79 |
- if (is.null(featureFile)){ |
|
80 |
- featurein <- data.frame(Gene = rownames(countsin)) |
|
81 |
- rownames(featurein) <- featurein$Gene |
|
82 |
- featurein <- S4Vectors::DataFrame(featurein) |
|
83 |
- } |
|
84 |
- |
|
85 |
- if (nrow(annotin) != ncol(countsin)){ |
|
86 |
- stop("Different number of samples in input matrix and annotations: annot: ", |
|
87 |
- nrow(annotin), ", counts: ", ncol(countsin)) |
|
88 |
- } |
|
89 |
- if (nrow(featurein) != nrow(countsin)){ |
|
90 |
- stop("Different number of samples in input matrix and feature annotation", |
|
91 |
- nrow(featurein), ", counts: ", nrow(countsin)) |
|
92 |
- } |
|
93 |
- if (any(rownames(annotin) != colnames(countsin))){ |
|
94 |
- stop("Sample names in input matrix and annotation do not match!\nExample: ", |
|
95 |
- rownames(annotin)[rownames(annotin) != colnames(countsin)][1], " vs. ", |
|
96 |
- colnames(countsin)[rownames(annotin) != colnames(countsin)][1]) |
|
97 |
- } |
|
98 |
- if (any(rownames(featurein) != rownames(countsin))){ |
|
99 |
- stop("Sample names in input matrix and feature annotation do not match!") |
|
100 |
- } |
|
101 |
- |
|
102 |
- assaylist <- list() |
|
103 |
- if (is.null(rownames(countsin))){ |
|
104 |
- rownames(countsin) <- rownames(featurein) |
|
105 |
- } |
|
106 |
- if (is.null(colnames(countsin))){ |
|
107 |
- colnames(countsin) <- rownames(annotin) |
|
108 |
- } |
|
109 |
- #assaylist[[assayName]] <- methods::as(countsin, "dgCMatrix") |
|
110 |
- assaylist[[assayName]] <- .convertToMatrix(countsin) |
|
111 |
- |
|
112 |
- newassay <- SingleCellExperiment::SingleCellExperiment(assays = assaylist, |
|
113 |
- colData = annotin, |
|
114 |
- rowData = featurein) |
|
115 |
- |
|
116 |
- if(is.null(newassay$sample)) { |
|
117 |
- newassay$sample <- "sample" |
|
118 |
- } |
|
119 |
- |
|
120 |
- return(newassay) |
|
121 |
-} |
|
1 |
+.checkGzip <- function(path, gzipped){ |
|
2 |
+ if (gzipped == "auto") { |
|
3 |
+ ext <- tools::file_ext(path) |
|
4 |
+ if (ext == "gz") { |
|
5 |
+ path <- gzfile(path) |
|
6 |
+ } |
|
7 |
+ } else if (isTRUE(gzipped)) { |
|
8 |
+ path <- gzfile(path) |
|
9 |
+ } |
|
10 |
+ |
|
11 |
+ return(path) |
|
12 |
+} |
|
13 |
+ |
|
14 |
+#' Create a SingleCellExperiment object from files |
|
15 |
+#' |
|
16 |
+#' @details Creates a \linkS4class{SingleCellExperiment} object from a counts |
|
17 |
+#' file in various formats, and files of cell and feature annotation. |
|
18 |
+#' @param assayFile The path to a file in .mtx, .txt, .csv, .tab, or .tsv |
|
19 |
+#' format. |
|
20 |
+#' @param annotFile The path to a text file that contains columns of annotation |
|
21 |
+#' information for each cell in the \code{assayFile}. This file should have the |
|
22 |
+#' same number of rows as there are columns in the \code{assayFile}. If multiple |
|
23 |
+#' samples are represented in the dataset, this should be denoted by a column |
|
24 |
+#' called \code{'sample'} within the \code{annotFile}. |
|
25 |
+#' @param featureFile The path to a text file that contains columns of |
|
26 |
+#' annotation information for each gene in the count matrix. This file should |
|
27 |
+#' have the same genes in the same order as \code{assayFile}. This is optional. |
|
28 |
+#' @param assayName The name of the assay that you are uploading. The default |
|
29 |
+#' is \code{"counts"}. |
|
30 |
+#' @param inputDataFrames If \code{TRUE}, \code{assayFile}, \code{annotFile} and |
|
31 |
+#' \code{featureFile} should be \code{data.frames} object (or its inheritance) |
|
32 |
+#' instead of file paths. The default is \code{FALSE}. |
|
33 |
+#' @param class Character. The class of the expression matrix stored in the SCE |
|
34 |
+#' object. Can be one of \code{"Matrix"} (as returned by |
|
35 |
+#' \link{readMM} function), or \code{"matrix"} (as returned by |
|
36 |
+#' \link[base]{matrix} function). Default \code{"Matrix"}. |
|
37 |
+#' @param annotFileHeader Whether there's a header (colnames) in the cell |
|
38 |
+#' annotation file. Default is \code{FALSE}. |
|
39 |
+#' @param annotFileRowName Which column is used as the rownames for the cell |
|
40 |
+#' annotation file. This should match to the colnames of the \code{assayFile}. |
|
41 |
+#' Default is \code{1} (first column). |
|
42 |
+#' @param annotFileSep Separater used for the cell annotation file. Default is |
|
43 |
+#' \code{"\\t"}. |
|
44 |
+#' @param featureHeader Whether there's a header (colnames) in the feature |
|
45 |
+#' annotation file. Default is \code{FALSE}. |
|
46 |
+#' @param featureRowName Which column is used as the rownames for the feature |
|
47 |
+#' annotation file. This should match to the rownames of the \code{assayFile}. |
|
48 |
+#' Default is \code{1}. (first column). |
|
49 |
+#' @param featureSep Separater used for the feature annotation file. Default is |
|
50 |
+#' \code{"\\t"}. |
|
51 |
+#' @param gzipped Whether the input file is gzipped. Default is \code{"auto"} |
|
52 |
+#' and it will automatically detect whether the file is gzipped. Other options |
|
53 |
+#' are \code{TRUE} or \code{FALSE}. |
|
54 |
+#' @param delayedArray Boolean. Whether to read the expression matrix as |
|
55 |
+#' \link{DelayedArray} object or not. Default \code{FALSE}. |
|
56 |
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default |
|
57 |
+#' \code{TRUE}. |
|
58 |
+#' @return a \linkS4class{SingleCellExperiment} object |
|
59 |
+#' @export |
|
60 |
+ |
|
61 |
+importFromFiles <- function(assayFile, annotFile = NULL, featureFile = NULL, |
|
62 |
+ assayName = "counts", inputDataFrames = FALSE, |
|
63 |
+ class = c("Matrix", "matrix"), delayedArray = FALSE, |
|
64 |
+ annotFileHeader = FALSE, annotFileRowName = 1, |
|
65 |
+ annotFileSep = "\t", featureHeader = FALSE, |
|
66 |
+ featureRowName = 1, featureSep = "\t", |
|
67 |
+ gzipped = "auto", rowNamesDedup = TRUE){ |
|
68 |
+ |
|
69 |
+ class <- match.arg(class) |
|
70 |
+ |
|
71 |
+ if (inputDataFrames){ |
|
72 |
+ countsin <- assayFile |
|
73 |
+ annotin <- annotFile |
|
74 |
+ featurein <- featureFile |
|
75 |
+ } else{ |
|
76 |
+ countsin <- readSingleCellMatrix(assayFile, class = class, |
|
77 |
+ delayedArray = delayedArray) |
|
78 |
+ if (!is.null(annotFile)){ |
|
79 |
+ annotFile <- .checkGzip(annotFile, gzipped = gzipped) |
|
80 |
+ annotin <- utils::read.table(annotFile, sep = annotFileSep, |
|
81 |
+ header = annotFileHeader, |
|
82 |
+ row.names = annotFileRowName, |
|
83 |
+ stringsAsFactors = FALSE) |
|
84 |
+ } |
|
85 |
+ if (!is.null(featureFile)){ |
|
86 |
+ featureFile <- .checkGzip(featureFile, gzipped = gzipped) |
|
87 |
+ featurein <- utils::read.table(featureFile, sep = featureSep, |
|
88 |
+ header = featureHeader, |
|
89 |
+ row.names = featureRowName, |
|
90 |
+ stringsAsFactors = FALSE) |
|
91 |
+ } |
|
92 |
+ } |
|
93 |
+ if (is.null(annotFile)){ |
|
94 |
+ annotin <- data.frame(row.names = colnames(countsin)) |
|
95 |
+ annotin <- S4Vectors::DataFrame(annotin) |
|
96 |
+ } |
|
97 |
+ if (is.null(featureFile)){ |
|
98 |
+ featurein <- data.frame(Gene = rownames(countsin)) |
|
99 |
+ rownames(featurein) <- featurein$Gene |
|
100 |
+ featurein <- S4Vectors::DataFrame(featurein) |
|
101 |
+ } |
|
102 |
+ |
|
103 |
+ if (nrow(annotin) != ncol(countsin)){ |
|
104 |
+ stop("Different number of cells in input matrix and annotations: annot: ", |
|
105 |
+ nrow(annotin), ", counts: ", ncol(countsin)) |
|
106 |
+ } |
|
107 |
+ if (nrow(featurein) != nrow(countsin)){ |
|
108 |
+ stop("Different number of features in input matrix and feature annotation", |
|
109 |
+ nrow(featurein), ", counts: ", nrow(countsin)) |
|
110 |
+ } |
|
111 |
+ if (any(rownames(annotin) != colnames(countsin))){ |
|
112 |
+ stop("Cell names in input matrix and annotation do not match!\nExample: ", |
|
113 |
+ rownames(annotin)[rownames(annotin) != colnames(countsin)][1], " vs. ", |
|
114 |
+ colnames(countsin)[rownames(annotin) != colnames(countsin)][1]) |
|
115 |
+ } |
|
116 |
+ if (any(rownames(featurein) != rownames(countsin))){ |
|
117 |
+ stop("Feature names in input matrix and feature annotation do not match!") |
|
118 |
+ } |
|
119 |
+ |
|
120 |
+ assaylist <- list() |
|
121 |
+ if (is.null(rownames(countsin))){ |
|
122 |
+ rownames(countsin) <- rownames(featurein) |
|
123 |
+ } |
|
124 |
+ if (is.null(colnames(countsin))){ |
|
125 |
+ colnames(countsin) <- rownames(annotin) |
|
126 |
+ } |
|
127 |
+ #assaylist[[assayName]] <- methods::as(countsin, "dgCMatrix") |
|
128 |
+ assaylist[[assayName]] <- .convertToMatrix(countsin) |
|
129 |
+ |
|
130 |
+ newassay <- SingleCellExperiment::SingleCellExperiment(assays = assaylist, |
|
131 |
+ colData = annotin, |
|
132 |
+ rowData = featurein) |
|
133 |
+ |
|
134 |
+ if(is.null(newassay$sample)) { |
|
135 |
+ newassay$sample <- "sample" |
|
136 |
+ } |
|
137 |
+ |
|
138 |
+ if (isTRUE(rowNamesDedup)) { |
|
139 |
+ if (any(duplicated(rownames(newassay)))) { |
|
140 |
+ message("Duplicated gene names found, adding '-1', '-2', ", |
|
141 |
+ "... suffix to them.") |
|
142 |
+ } |
|
143 |
+ newassay <- dedupRowNames(newassay) |
|
144 |
+ } |
|
145 |
+ |
|
146 |
+ return(newassay) |
|
147 |
+} |
... | ... |
@@ -10,20 +10,26 @@ |
10 | 10 |
# sparse <- reticulate::import("scipy.sparse") |
11 | 11 |
# numpy <- reticulate::import("numpy") |
12 | 12 |
if (!reticulate::py_module_available(module = "scipy.sparse")) { |
13 |
- stop("Error!", "Cannot find python module 'scipy.sparse', please install Conda and run sctkPythonInstallConda() |
|
14 |
- or run sctkPythonInstallVirtualEnv(). If one of these have been previously run to install the modules, |
|
15 |
- make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(), respectively, if R has been |
|
16 |
- restarted since the module installation. Alternatively, scipy can be installed on the local machine |
|
17 |
- with pip (e.g. pip install scipy) and then the 'use_python()' function from the 'reticulate' package |
|
18 |
- can be used to select the correct Python environment.") |
|
13 |
+ stop("Error!", "Cannot find python module 'scipy.sparse', please install |
|
14 |
+ Conda and run sctkPythonInstallConda() or run |
|
15 |
+ sctkPythonInstallVirtualEnv(). If one of these have been previously |
|
16 |
+ run to install the modules, make sure to run selectSCTKConda() or |
|
17 |
+ selectSCTKVirtualEnvironment(), respectively, if R has been restarted |
|
18 |
+ since the module installation. Alternatively, scipy can be installed |
|
19 |
+ on the local machine with pip (e.g. pip install scipy) and then the |
|
20 |
+ 'use_python()' function from the 'reticulate' package can be used to |
|
21 |
+ select the correct Python environment.") |
|
19 | 22 |
} |
20 | 23 |
if (!reticulate::py_module_available(module = "numpy")) { |
21 |
- stop("Error!", "Cannot find python module 'numpy', please install Conda and run sctkPythonInstallConda() |
|
22 |
- or run sctkPythonInstallVirtualEnv(). If one of these have been previously run to install the modules, |
|
23 |
- make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(), respectively, if R has been |
|
24 |
- restarted since the module installation. Alternatively, numpy can be installed on the local machine |
|
25 |
- with pip (e.g. pip install numpy) and then the 'use_python()' function from the 'reticulate' package |
|
26 |
- can be used to select the correct Python environment.") |
|
24 |
+ stop("Error!", "Cannot find python module 'numpy', please install Conda and |
|
25 |
+ run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv(). If |
|
26 |
+ one of these have been previously run to install the modules, make |
|
27 |
+ sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(), |
|
28 |
+ respectively, if R has been restarted since the module installation. |
|
29 |
+ Alternatively, numpy can be installed on the local machine with pip |
|
30 |
+ (e.g. pip install numpy) and then the 'use_python()' function from the |
|
31 |
+ 'reticulate' package can be used to select the correct Python |
|
32 |
+ environment.") |
|
27 | 33 |
} |
28 | 34 |
|
29 | 35 |
error <- try({ |
... | ... |
@@ -56,7 +62,9 @@ |
56 | 62 |
}, silent = TRUE) |
57 | 63 |
|
58 | 64 |
if(inherits(error, "try-error")) { |
59 |
- stop(paste0("importOptimus did not complete successfully. SCE could not be generated. Error given during the import process: \n\n", error)) |
|
65 |
+ stop(paste0("importOptimus did not complete successfully. SCE could not be", |
|
66 |
+ "generated. Error given during the import process: \n\n", |
|
67 |
+ error)) |
|
60 | 68 |
} |
61 | 69 |
|
62 | 70 |
if (class == "matrix") { |
... | ... |
@@ -180,7 +188,8 @@ |
180 | 188 |
geneMetricsLocation, |
181 | 189 |
emptyDropsLocation, |
182 | 190 |
class, |
183 |
- delayedArray) { |
|
191 |
+ delayedArray, |
|
192 |
+ rowNamesDedup) { |
|
184 | 193 |
|
185 | 194 |
.checkArgsImportOptimus(OptimusDirs, samples) |
186 | 195 |
|
... | ... |
@@ -201,6 +210,15 @@ |
201 | 210 |
} |
202 | 211 |
|
203 | 212 |
sce <- do.call(SingleCellExperiment::cbind, res) |
213 |
+ |
|
214 |
+ if (isTRUE(rowNamesDedup)) { |
|
215 |
+ if (any(duplicated(rownames(sce)))) { |
|
216 |
+ message("Duplicated gene names found, adding '-1', '-2', ", |
|
217 |
+ "... suffix to them.") |
|
218 |
+ } |
|
219 |
+ sce <- dedupRowNames(sce) |
|
220 |
+ } |
|
221 |
+ |
|
204 | 222 |
return(sce) |
205 | 223 |
} |
206 | 224 |
|
... | ... |
@@ -247,6 +265,8 @@ |
247 | 265 |
#' \link[base]{matrix} function). Default "Matrix". |
248 | 266 |
#' @param delayedArray Boolean. Whether to read the expression matrix as |
249 | 267 |
#' \link{DelayedArray} object or not. Default \code{FALSE}. |
268 |
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default |
|
269 |
+#' \code{TRUE}. |
|
250 | 270 |
#' @return A \link[SingleCellExperiment]{SingleCellExperiment} object |
251 | 271 |
#' containing the count |
252 | 272 |
#' matrix, the gene annotation, and the cell annotation. |
... | ... |
@@ -267,7 +287,8 @@ importOptimus <- function(OptimusDirs, |
267 | 287 |
geneMetricsLocation = "call-MergeGeneMetrics/merged-gene-metrics.csv.gz", |
268 | 288 |
emptyDropsLocation = "call-RunEmptyDrops/empty_drops_result.csv", |
269 | 289 |
class = c("Matrix", "matrix"), |
270 |
- delayedArray = FALSE) { |
|
290 |
+ delayedArray = FALSE, |
|
291 |
+ rowNamesDedup = TRUE) { |
|
271 | 292 |
|
272 | 293 |
class <- match.arg(class) |
273 | 294 |
|
... | ... |
@@ -280,6 +301,7 @@ importOptimus <- function(OptimusDirs, |
280 | 301 |
geneMetricsLocation = geneMetricsLocation, |
281 | 302 |
emptyDropsLocation = emptyDropsLocation, |
282 | 303 |
class = class, |
283 |
- delayedArray = delayedArray) |
|
304 |
+ delayedArray = delayedArray, |
|
305 |
+ rowNamesDedup = rowNamesDedup) |
|
284 | 306 |
|
285 | 307 |
} |
... | ... |
@@ -40,7 +40,8 @@ |
40 | 40 |
barcodesFileNames, |
41 | 41 |
gzipped, |
42 | 42 |
class, |
43 |
- delayedArray) { |
|
43 |
+ delayedArray, |
|
44 |
+ rowNamesDedup) { |
|
44 | 45 |
|
45 | 46 |
if (length(STARsoloDirs) != length(samples)) { |
46 | 47 |
stop("'STARsoloDirs' and 'samples' have unequal lengths!") |
... | ... |
@@ -68,6 +69,14 @@ |
68 | 69 |
} |
69 | 70 |
|
70 | 71 |
sce <- do.call(SingleCellExperiment::cbind, res) |
72 |
+ |
|
73 |
+ if (isTRUE(rowNamesDedup)) { |
|
74 |
+ if (any(duplicated(rownames(sce)))) { |
|
75 |
+ message("Duplicated gene names found, adding '-1', '-2', ", |
|
76 |
+ "... suffix to them.") |
|
77 |
+ } |
|
78 |
+ sce <- dedupRowNames(sce) |
|
79 |
+ } |
|
71 | 80 |
return(sce) |
72 | 81 |
} |
73 | 82 |
|
... | ... |
@@ -111,6 +120,8 @@ |
111 | 120 |
#' \link[base]{matrix} function). Default "Matrix". |
112 | 121 |
#' @param delayedArray Boolean. Whether to read the expression matrix as |
113 | 122 |
#' \link{DelayedArray} object or not. Default \code{FALSE}. |
123 |
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default |
|
124 |
+#' \code{TRUE}. |
|
114 | 125 |
#' @return A \code{SingleCellExperiment} object containing the count |
115 | 126 |
#' matrix, the gene annotation, and the cell annotation. |
116 | 127 |
#' @examples |
... | ... |
@@ -151,7 +162,8 @@ importSTARsolo <- function( |
151 | 162 |
barcodesFileNames = "barcodes.tsv", |
152 | 163 |
gzipped = "auto", |
153 | 164 |
class = c("Matrix", "matrix"), |
154 |
- delayedArray = FALSE) { |
|
165 |
+ delayedArray = FALSE, |
|
166 |
+ rowNamesDedup = TRUE) { |
|
155 | 167 |
|
156 | 168 |
class <- match.arg(class) |
157 | 169 |
|
... | ... |
@@ -164,5 +176,6 @@ importSTARsolo <- function( |
164 | 176 |
barcodesFileNames = barcodesFileNames, |
165 | 177 |
gzipped = gzipped, |
166 | 178 |
class = class, |
167 |
- delayedArray = delayedArray) |
|
179 |
+ delayedArray = delayedArray, |
|
180 |
+ rowNamesDedup = rowNamesDedup) |
|
168 | 181 |
} |
... | ... |
@@ -74,7 +74,8 @@ |
74 | 74 |
delayedArray, |
75 | 75 |
cbNotFirstCol, |
76 | 76 |
feNotFirstCol, |
77 |
- combinedSample) { |
|
77 |
+ combinedSample, |
|
78 |
+ rowNamesDedup) { |
|
78 | 79 |
|
79 | 80 |
if (length(seqcDirs) != length(samples)) { |
80 | 81 |
stop("'seqcDirs' and 'samples' have unequal lengths!") |
... | ... |
@@ -121,6 +122,13 @@ |
121 | 122 |
|
122 | 123 |
} |
123 | 124 |
sce <- do.call(SingleCellExperiment::cbind, res) |
125 |
+ if (isTRUE(rowNamesDedup)) { |
|
126 |
+ if (any(duplicated(rownames(sce)))) { |
|
127 |
+ message("Duplicated gene names found, adding '-1', '-2', ", |
|
128 |
+ "... suffix to them.") |
|
129 |
+ } |
|
130 |
+ sce <- dedupRowNames(sce) |
|
131 |
+ } |
|
124 | 132 |
return(sce) |
125 | 133 |
|
126 | 134 |
} else { |
... | ... |
@@ -133,7 +141,15 @@ |
133 | 141 |
res[[i]] <- scei |
134 | 142 |
} |
135 | 143 |
if (length(seqcDirs) == 1) { |
136 |
- return(res[[1]]) |
|
144 |
+ sce <- res[[1]] |
|
145 |
+ if (isTRUE(rowNamesDedup)) { |
|
146 |
+ if (any(duplicated(rownames(sce)))) { |
|
147 |
+ message("Duplicated gene names found, adding '-1', '-2', ", |
|
148 |
+ "... suffix to them.") |
|
149 |
+ } |
|
150 |
+ sce <- dedupRowNames(sce) |
|
151 |
+ } |
|
152 |
+ return(sce) |
|
137 | 153 |
} else { |
138 | 154 |
return(res) |
139 | 155 |
} |
... | ... |
@@ -145,12 +161,11 @@ |
145 | 161 |
#' @rdname importSEQC |
146 | 162 |
#' @title Construct SCE object from seqc output |
147 | 163 |
#' @description Read the filtered barcodes, features, and matrices for all |
148 |
-#' samples from (preferably a single run of) seqc output. Import and |
|
149 |
-#' combine them as one big \link[SingleCellExperiment]{SingleCellExperiment} |
|
150 |
-#' object. |
|
164 |
+#' samples from (preferably a single run of) seqc output. Import and combine |
|
165 |
+#' them as one big \link[SingleCellExperiment]{SingleCellExperiment} object. |
|
151 | 166 |
#' @param seqcDirs A vector of paths to seqc output files. Each sample |
152 |
-#' should have its own path. For example: \code{./pbmc_1k_50x50}. |
|
153 |
-#' Must have the same length as \code{samples}. |
|
167 |
+#' should have its own path. For example: \code{"./pbmc_1k_50x50"}. Must have |
|
168 |
+#' the same length as \code{samples}. |
|
154 | 169 |
#' @param samples A vector of user-defined sample names for the samples to be |
155 | 170 |
#' imported. Must have the same length as \code{seqcDirs}. |
156 | 171 |
#' @param prefix A vector containing the prefix of file names within each |
... | ... |
@@ -158,44 +173,39 @@ |
158 | 173 |
#' length as \emph{samples}. |
159 | 174 |
#' @param gzipped Boolean. \code{TRUE} if the seqc output files |
160 | 175 |
#' (sparse_counts_barcode.csv, sparse_counts_genes.csv, and |
161 |
-#' sparse_molecule_counts.mtx) |
|
162 |
-#' were gzip compressed. \code{FALSE} otherwise. Default seqc outputs are |
|
163 |
-#' not gzipped. |
|
164 |
-#' Default \code{FALSE}. |
|
176 |
+#' sparse_molecule_counts.mtx) were gzip compressed. \code{FALSE} otherwise. |
|
177 |
+#' Default seqc outputs are not gzipped. Default \code{FALSE}. |
|
165 | 178 |
#' @param class Character. The class of the expression matrix stored in the SCE |
166 |
-#' object. Can be one of "Matrix" (as returned by |
|
167 |
-#' \link{readMM} function), or "matrix" (as returned by |
|
168 |
-#' \link[base]{matrix} function). Default "Matrix". |
|
179 |
+#' object. Can be one of \code{"Matrix"} (as returned by \link{readMM} |
|
180 |
+#' function), or \code{"matrix"} (as returned by \link[base]{matrix} function). |
|
181 |
+#' Default \code{"Matrix"}. |
|
169 | 182 |
#' @param delayedArray Boolean. Whether to read the expression matrix as |
170 | 183 |
#' \link{DelayedArray} object or not. Default \code{FALSE}. |
171 | 184 |
#' @param feNotFirstCol Boolean. \code{TRUE} if first column of |
172 |
-#' sparse_counts_genes.csv |
|
173 |
-#' is row index and it will be removed. \code{FALSE} the first column will |
|
174 |
-#' be kept. |
|
185 |
+#' sparse_counts_genes.csv is row index and it will be removed. \code{FALSE} |
|
186 |
+#' the first column will be kept. |
|
175 | 187 |
#' @param cbNotFirstCol Boolean. \code{TRUE} if first column of |
176 |
-#' sparse_counts_barcode.csv |
|
177 |
-#' is row index and it will be removed. \code{FALSE} the first column will |
|
178 |
-#' be kept. |
|
188 |
+#' sparse_counts_barcode.csv is row index and it will be removed. \code{FALSE} |
|
189 |
+#' the first column will be kept. |
|
179 | 190 |
#' @param combinedSample Boolean. If \code{TRUE}, \code{importSEQC} returns a |
180 | 191 |
#' \code{SingleCellExperiment} object containing the combined count matrix, |
181 |
-#' feature annotations |
|
182 |
-#' and the cell annotations. If \code{FALSE}, \code{importSEQC} returns a |
|
183 |
-#' list containing multiple |
|
192 |
+#' feature annotations and the cell annotations. If \code{FALSE}, |
|
193 |
+#' \code{importSEQC} returns a list containing multiple |
|
184 | 194 |
#' \code{SingleCellExperiment} objects. Each \code{SingleCellExperiment} |
185 | 195 |
#' contains count matrix, feature annotations and cell annotations for |
186 | 196 |
#' each sample. |
197 |
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Only applied |
|
198 |
+#' if \code{combinedSample} is \code{TRUE} or only one \code{seqcDirs} |
|
199 |
+#' specified. Default \code{TRUE}. |
|
187 | 200 |
#' @details |
188 |
-#' \code{importSEQC} imports output from seqc. |
|
189 |
-#' The default sparse_counts_barcode.csv or sparse_counts_genes.csv from |
|
190 |
-#' seqc output |
|
201 |
+#' \code{importSEQC} imports output from seqc. The default |
|
202 |
+#' sparse_counts_barcode.csv or sparse_counts_genes.csv from seqc output |
|
191 | 203 |
#' contains two columns. The first column is row index and the second column |
192 |
-#' is cell-barcode |
|
193 |
-#' or gene symbol. \code{importSEQC} will remove first column. Alternatively, |
|
194 |
-#' user can call |
|
204 |
+#' is cell-barcode or gene symbol. \code{importSEQC} will remove first column. |
|
205 |
+#' Alternatively, user can call |
|
195 | 206 |
#' \code{cbNotFirstCol} or \code{feNotFirstCol} as FALSE to keep the first |
196 |
-#' column of these files. |
|
197 |
-#' When \code{combinedSample} is TRUE, \code{importSEQC} will combined count |
|
198 |
-#' matrix with genes detected in at least one sample. |
|
207 |
+#' column of these files. When \code{combinedSample} is TRUE, \code{importSEQC} |
|
208 |
+#' will combined count matrix with genes detected in at least one sample. |
|
199 | 209 |
#' @return A \code{SingleCellExperiment} object containing the combined count |
200 | 210 |
#' matrix, the feature annotations, and the cell annotation. |
201 | 211 |
#' @examples |
... | ... |
@@ -220,7 +230,8 @@ importSEQC <- function( |
220 | 230 |
delayedArray = FALSE, |
221 | 231 |
cbNotFirstCol = TRUE, |
222 | 232 |
feNotFirstCol = TRUE, |
223 |
- combinedSample = TRUE) { |
|
233 |
+ combinedSample = TRUE, |
|
234 |
+ rowNamesDedup = TRUE) { |
|
224 | 235 |
|
225 | 236 |
class <- match.arg(class) |
226 | 237 |
|
... | ... |
@@ -232,5 +243,6 @@ importSEQC <- function( |
232 | 243 |
delayedArray = delayedArray, |
233 | 244 |
cbNotFirstCol = cbNotFirstCol, |
234 | 245 |
feNotFirstCol = feNotFirstCol, |
235 |
- combinedSample = combinedSample) |
|
246 |
+ combinedSample = combinedSample, |
|
247 |
+ rowNamesDedup = rowNamesDedup) |
|
236 | 248 |
} |
... | ... |
@@ -204,7 +204,7 @@ discreteColorPalette <- function(n, palette = c("random", "ggplot", "celda"), |
204 | 204 |
#' insert a new column called \code{"rownames.uniq"} to \code{rowData(x)}, with |
205 | 205 |
#' the deduplicated rownames. |
206 | 206 |
#' @param return.list When set to \code{TRUE}, will return a character vector |
207 |
-#' with deduplicated rownames. |
|
207 |
+#' of the deduplicated rownames. |
|
208 | 208 |
#' @export |
209 | 209 |
#' @return By default, a matrix or /linkS4class{SingleCellExperiment} object |
210 | 210 |
#' with rownames deduplicated. |
... | ... |
@@ -240,14 +240,16 @@ dedupRowNames <- function(x, as.rowData = FALSE, return.list = FALSE){ |
240 | 240 |
|
241 | 241 |
#' Set rownames of SCE with a character vector or a rowData column |
242 | 242 |
#' @description Users can set rownames of an SCE object with either a character |
243 |
-#' vector where the length equals to \code{nrow(inSCE)}, or a single character |
|
244 |
-#' specifying a column in \code{rowData(inSCE)}. Users can set |
|
245 |
-#' \code{dedup = TRUE} to remove duplicated entries in the specification, by |
|
246 |
-#' adding \code{-1, -2, ..., -i} suffix to the duplication of the same |
|
247 |
-#' identifier. |
|
248 |
-#' @param inSCE Input \linkS4class{SingleCellExperiment} object |
|
249 |
-#' @param rowNames Character. Either of length equal to \code{nrow(inSCE)}, or |
|
250 |
-#' a single character specifying a column in \code{rowData(inSCE)}. |
|
243 |
+#' vector where the length equals to \code{nrow(x)}, or a single character |
|
244 |
+#' specifying a column in \code{rowData(x)}. Also applicable to matrix like |
|
245 |
+#' object where \code{rownames<-} method works, but only allows full size name |
|
246 |
+#' vector. Users can set \code{dedup = TRUE} to remove duplicated entries in the |
|
247 |
+#' specification, by adding \code{-1, -2, ..., -i} suffix to the duplication of |
|
248 |
+#' the same identifier. |
|
249 |
+#' @param x Input object where the rownames will be modified. |
|
250 |
+#' @param rowNames Character vector of the rownames. If \code{x} is an |
|
251 |
+#' \linkS4class{SingleCellExperiment} object, a single character specifying a |
|
252 |
+#' column in \code{rowData(x)}. |
|
251 | 253 |
#' @param dedup Logical. Whether to deduplicate the specified rowNames. Default |
252 | 254 |
#' \code{TRUE} |
253 | 255 |
#' @return The input SCE object with rownames updated. |
... | ... |
@@ -255,31 +257,33 @@ dedupRowNames <- function(x, as.rowData = FALSE, return.list = FALSE){ |
255 | 257 |
#' @examples |
256 | 258 |
#' data("scExample", package = "singleCellTK") |
257 | 259 |
#' head(rownames(sce)) |
258 |
-#' sce <- setSCERowNames(sce, "feature_name") |
|
260 |
+#' sce <- setRowNames(sce, "feature_name") |
|
259 | 261 |
#' head(rownames(sce)) |
260 |
-setSCERowNames <- function(inSCE, rowNames, dedup = TRUE) { |
|
261 |
- if (!inherits(inSCE, "SingleCellExperiment")) { |
|
262 |
- stop("inSCE should be a SingleCellExperiment object") |
|
263 |
- } |
|
262 |
+setRowNames <- function(x, rowNames, dedup = TRUE) { |
|
264 | 263 |
if (!inherits(rowNames, "character")) { |
265 | 264 |
stop("rowNames should be of character class") |
266 | 265 |
} |
267 |
- if (length(rowNames) == 1) { |
|
268 |
- if (rowNames %in% names(SummarizedExperiment::rowData(inSCE))) { |
|
269 |
- rows <- SummarizedExperiment::rowData(inSCE)[[rowNames]] |
|
266 |
+ if (inherits(x, "SingleCellExperiment")) { |
|
267 |
+ if (length(rowNames) == 1) { |
|
268 |
+ if (rowNames %in% names(SummarizedExperiment::rowData(x))) { |
|
269 |
+ rows <- SummarizedExperiment::rowData(x)[[rowNames]] |
|
270 |
+ } else { |
|
271 |
+ stop("Single rowNames specification not found in rowData(x)") |
|
272 |
+ } |
|
273 |
+ } else if (length(rowNames) == nrow(x)) { |
|
274 |
+ rows <- rowNames |
|
270 | 275 |
} else { |
271 |
- stop("Single rowNames specification not found in rowData(inSCE)") |
|
276 |
+ stop("Length of rowNames does not match nrow(x)") |
|
272 | 277 |
} |
273 |
- } else if (length(rowNames) == nrow(inSCE)) { |
|
274 |
- rows <- rowNames |
|
278 |
+ rownames(x) <- rows |
|
275 | 279 |
} else { |
276 |
- stop("Length of rowNames does not match nrow(inSCE)") |
|
280 |
+ rownames(x) <- rowNames |
|
277 | 281 |
} |
278 |
- rownames(inSCE) <- rows |
|
282 |
+ |
|
279 | 283 |
if (isTRUE(dedup)) { |
280 |
- inSCE <- dedupRowNames(inSCE) |
|
284 |
+ x <- dedupRowNames(x) |
|
281 | 285 |
} |
282 |
- return(inSCE) |
|
286 |
+ return(x) |
|
283 | 287 |
} |
284 | 288 |
|
285 | 289 |
#' Retrieve cell/feature index by giving identifiers saved in col/rowData |
... | ... |
@@ -19,7 +19,7 @@ insert a new column called \code{"rownames.uniq"} to \code{rowData(x)}, with |
19 | 19 |
the deduplicated rownames.} |
20 | 20 |
|
21 | 21 |
\item{return.list}{When set to \code{TRUE}, will return a character vector |
22 |
-with deduplicated rownames.} |
|
22 |
+of the deduplicated rownames.} |
|
23 | 23 |
} |
24 | 24 |
\value{ |
25 | 25 |
By default, a matrix or /linkS4class{SingleCellExperiment} object |
... | ... |
@@ -8,7 +8,8 @@ importAlevin( |
8 | 8 |
alevinDir = NULL, |
9 | 9 |
sampleName = "sample", |
10 | 10 |
delayedArray = FALSE, |
11 |
- class = c("Matrix", "matrix") |
|
11 |
+ class = c("Matrix", "matrix"), |
|
12 |
+ rowNamesDedup = TRUE |
|
12 | 13 |
) |
13 | 14 |
} |
14 | 15 |
\arguments{ |
... | ... |
@@ -28,6 +29,9 @@ barcodes. Default is 'sample'.} |
28 | 29 |
object. Can be one of "Matrix" (as returned by |
29 | 30 |
\link{readMM} function), or "matrix" (as returned by |
30 | 31 |
\link[base]{matrix} function). Default "Matrix".} |
32 |
+ |
|
33 |
+\item{rowNamesDedup}{Boolean. Whether to deduplicate rownames. Default |
|
34 |
+\code{TRUE}.} |
|
31 | 35 |
} |
32 | 36 |
\value{ |
33 | 37 |
A \code{SingleCellExperiment} object containing the count |
... | ... |
@@ -12,7 +12,8 @@ importBUStools( |
12 | 12 |
barcodesFileNames = "genes.barcodes.txt", |
13 | 13 |
gzipped = "auto", |
14 | 14 |
class = c("Matrix", "matrix"), |
15 |
- delayedArray = FALSE |
|
15 |
+ delayedArray = FALSE, |
|
16 |
+ rowNamesDedup = TRUE |
|
16 | 17 |
) |
17 | 18 |
} |
18 | 19 |
\arguments{ |
... | ... |
@@ -47,6 +48,9 @@ object. Can be one of "Matrix" (as returned by |
47 | 48 |
|
48 | 49 |
\item{delayedArray}{Boolean. Whether to read the expression matrix as |
49 | 50 |
\link[DelayedArray]{DelayedArray-class} object or not. Default \code{FALSE}.} |
51 |
+ |
|
52 |
+\item{rowNamesDedup}{Boolean. Whether to deduplicate rownames. Default |
|
53 |
+\code{TRUE}.} |
|
50 | 54 |
} |
51 | 55 |
\value{ |
52 | 56 |
A \code{SingleCellExperiment} object containing the count |
... | ... |
@@ -17,7 +17,8 @@ importCellRanger( |
17 | 17 |
barcodesFileNames = "barcodes.tsv.gz", |
18 | 18 |
gzipped = "auto", |
19 | 19 |
class = c("Matrix", "matrix"), |
20 |
- delayedArray = FALSE |
|
20 |
+ delayedArray = FALSE, |
|
21 |
+ rowNamesDedup = TRUE |
|
21 | 22 |
) |
22 | 23 |
|
23 | 24 |
importCellRangerV2( |
... | ... |
@@ -28,7 +29,8 @@ importCellRangerV2( |
28 | 29 |
class = c("Matrix", "matrix"), |
29 | 30 |
delayedArray = FALSE, |
30 | 31 |
reference = NULL, |
31 |
- cellRangerOutsV2 = NULL |
|
32 |
+ cellRangerOutsV2 = NULL, |
|
33 |
+ rowNamesDedup = TRUE |
|
32 | 34 |
) |
33 | 35 |
|
34 | 36 |
importCellRangerV3( |
... | ... |
@@ -37,7 +39,8 @@ importCellRangerV3( |
37 | 39 |
sampleNames = NULL, |
38 | 40 |
dataType = c("filtered", "raw"), |
39 | 41 |
class = c("Matrix", "matrix"), |
40 |
- delayedArray = FALSE |
|
42 |
+ delayedArray = FALSE, |
|
43 |
+ rowNamesDedup = TRUE |
|
41 | 44 |
) |
42 | 45 |
} |
43 | 46 |
\arguments{ |
... | ... |
@@ -140,6 +143,9 @@ object. Can be one of "Matrix" (as returned by |
140 | 143 |
\item{delayedArray}{Boolean. Whether to read the expression matrix as |
141 | 144 |
\link{DelayedArray} object or not. Default \code{FALSE}.} |
142 | 145 |
|
146 |
+\item{rowNamesDedup}{Boolean. Whether to deduplicate rownames. Default |
|
147 |
+\code{TRUE}.} |
|
148 |
+ |
|
143 | 149 |
\item{dataTypeV2}{Character. The type of output to import for |
144 | 150 |
Cellranger version below 3.0.0. Whether to import the filtered or the |
145 | 151 |
raw data. Can be one of 'filtered' or 'raw'. Default 'filtered'. When |
... | ... |
@@ -8,7 +8,8 @@ importCellRangerV2Sample( |
8 | 8 |
dataDir = NULL, |
9 | 9 |
sampleName = NULL, |
10 | 10 |
class = c("Matrix", "matrix"), |
11 |
- delayedArray = FALSE |
|
11 |
+ delayedArray = FALSE, |
|
12 |
+ rowNamesDedup = TRUE |
|
12 | 13 |
) |
13 | 14 |
} |
14 | 15 |
\arguments{ |
... | ... |
@@ -24,6 +25,9 @@ object. Can be one of "Matrix" (as returned by |
24 | 25 |
|
25 | 26 |
\item{delayedArray}{Boolean. Whether to read the expression matrix as |
26 | 27 |
\link{DelayedArray} object or not. Default \code{FALSE}.} |
28 |
+ |
|
29 |
+\item{rowNamesDedup}{Boolean. Whether to deduplicate rownames. Default |
|
30 |
+\code{TRUE}.} |
|
27 | 31 |
} |
28 | 32 |
\value{ |
29 | 33 |
A \code{SingleCellExperiment} object containing the count |
... | ... |
@@ -8,7 +8,8 @@ importCellRangerV3Sample( |
8 | 8 |
dataDir = "./", |
9 | 9 |
sampleName = "sample", |
10 | 10 |
class = c("Matrix", "matrix"), |
11 |
- delayedArray = FALSE |
|
11 |
+ delayedArray = FALSE, |
|
12 |
+ rowNamesDedup = TRUE |
|
12 | 13 |
) |
13 | 14 |
} |
14 | 15 |
\arguments{ |
... | ... |
@@ -24,6 +25,9 @@ object. Can be one of "Matrix" (as returned by |
24 | 25 |
|
25 | 26 |
\item{delayedArray}{Boolean. Whether to read the expression matrix as |
26 | 27 |
\link{DelayedArray} object or not. Default \code{FALSE}.} |
28 |
+ |
|
29 |
+\item{rowNamesDedup}{Boolean. Whether to deduplicate rownames. Default |
|
30 |
+\code{TRUE}.} |
|
27 | 31 |
} |
28 | 32 |
\value{ |
29 | 33 |
A \code{SingleCellExperiment} object containing the count |
... | ... |
@@ -10,7 +10,8 @@ importDropEst( |
10 | 10 |
rdsFileName = "cell.counts", |
11 | 11 |
sampleNames = NULL, |
12 | 12 |
delayedArray = FALSE, |
13 |
- class = c("Matrix", "matrix") |
|
13 |
+ class = c("Matrix", "matrix"), |
|
14 |
+ rowNamesDedup = TRUE |
|
14 | 15 |
) |
15 | 16 |
} |
16 | 17 |
\arguments{ |
... | ... |
@@ -30,6 +31,9 @@ Default "sample".} |
30 | 31 |
object. Can be one of "Matrix" (as returned by |
31 | 32 |
\link{readMM} function), or "matrix" (as returned by |
32 | 33 |
\link[base]{matrix} function). Default \code{"Matrix"}.} |
34 |
+ |
|
35 |
+\item{rowNamesDedup}{Boolean. Whether to deduplicate rownames. Default |
|
36 |
+\code{TRUE}.} |
|
33 | 37 |
} |
34 | 38 |
\value{ |
35 | 39 |
A \code{SingleCellExperiment} object containing the count matrix, |
... | ... |
@@ -4,7 +4,12 @@ |
4 | 4 |
\alias{importExampleData} |
5 | 5 |
\title{Retrieve example datasets} |
6 | 6 |
\usage{ |
7 |
-importExampleData(dataset, class = c("Matrix", "matrix"), delayedArray = FALSE) |
|
7 |
+importExampleData( |
|
8 |
+ dataset, |
|
9 |
+ class = c("Matrix", "matrix"), |
|
10 |
+ delayedArray = FALSE, |
|
11 |
+ rowNamesDedup = TRUE |
|
12 |
+) |
|
8 | 13 |
} |
9 | 14 |
\arguments{ |
10 | 15 |
\item{dataset}{Character. Name of the dataset to retrieve.} |
... | ... |
@@ -17,6 +22,9 @@ will store the data as a sparse matrix from package \link{Matrix} while |
17 | 22 |
|
18 | 23 |
\item{delayedArray}{Boolean. Whether to read the expression matrix as |
19 | 24 |
\link{DelayedArray} object or not. Default \code{FALSE}.} |
25 |
+ |
|
26 |
+\item{rowNamesDedup}{Boolean. Whether to deduplicate rownames. Default |
|
27 |
+\code{TRUE}.} |
|
20 | 28 |
} |
21 | 29 |
\value{ |
22 | 30 |
The specified \link[SingleCellExperiment]{SingleCellExperiment} object. |
... | ... |
@@ -18,54 +18,73 @@ importFromFiles( |
18 | 18 |
featureHeader = FALSE, |
19 | 19 |
featureRowName = 1, |
20 | 20 |
featureSep = "\\t", |
21 |
- gzipped = "auto" |
|
21 |
+ gzipped = "auto", |
|
22 |
+ rowNamesDedup = TRUE |
|
22 | 23 |
) |
23 | 24 |
} |
24 | 25 |
\arguments{ |
25 |
-\item{assayFile}{The path to a file in .mtx, .txt, .csv, .tab, or .tsv format.} |
|
26 |
+\item{assayFile}{The path to a file in .mtx, .txt, .csv, .tab, or .tsv |
|
27 |
+format.} |
|
26 | 28 |
|
27 | 29 |
\item{annotFile}{The path to a text file that contains columns of annotation |
28 |
-information for each sample in the assayFile. This file should have the same |
|
29 |
-number of rows as there are columns in the assayFile. If multiple samples are |
|
30 |
-represented in these files, this should be denoted by a column called \code{'sample'} |
|
31 |
-within the \code{annotFile}.} |
|
30 |
+information for each cell in the \code{assayFile}. This file should have the |
|
31 |
+same number of rows as there are columns in the \code{assayFile}. If multiple |
|
32 |
+samples are represented in the dataset, this should be denoted by a column |
|
33 |
+called \code{'sample'} within the \code{annotFile}.} |
|
32 | 34 |
|
33 | 35 |
\item{featureFile}{The path to a text file that contains columns of |
34 | 36 |
annotation information for each gene in the count matrix. This file should |
35 |
-have the same genes in the same order as assayFile. This is optional.} |
|
37 |
+have the same genes in the same order as \code{assayFile}. This is optional.} |
|
36 | 38 |
|
37 | 39 |
\item{assayName}{The name of the assay that you are uploading. The default |
38 |
-is "counts".} |
|
40 |
+is \code{"counts"}.} |
|
39 | 41 |
|
40 |
-\item{inputDataFrames}{If TRUE, assayFile and annotFile are read as data |
|
41 |
-frames instead of file paths. The default is FALSE.} |
|
42 |
+\item{inputDataFrames}{If \code{TRUE}, \code{assayFile}, \code{annotFile} and |
|
43 |
+\code{featureFile} should be \code{data.frames} object (or its inheritance) |
|
44 |
+instead of file paths. The default is \code{FALSE}.} |
|
42 | 45 |
|
43 | 46 |
\item{class}{Character. The class of the expression matrix stored in the SCE |
44 |
-object. Can be one of "Matrix" (as returned by |
|
45 |
-\link{readMM} function), or "matrix" (as returned by |
|
46 |
-\link[base]{matrix} function). Default "Matrix".} |
|
47 |
+object. Can be one of \code{"Matrix"} (as returned by |
|
48 |
+\link{readMM} function), or \code{"matrix"} (as returned by |
|
49 |
+\link[base]{matrix} function). Default \code{"Matrix"}.} |
|
47 | 50 |
|
48 | 51 |
\item{delayedArray}{Boolean. Whether to read the expression matrix as |
49 | 52 |
\link{DelayedArray} object or not. Default \code{FALSE}.} |
50 | 53 |
|
51 |
-\item{annotFileHeader}{Whether there's a header (colnames) in the cell annotation file. Default is FALSE} |
|
54 |
+\item{annotFileHeader}{Whether there's a header (colnames) in the cell |
|
55 |
+annotation file. Default is \code{FALSE}.} |
|
52 | 56 |
|
53 |
-\item{annotFileRowName}{Which column is used as the rownames for the cell annotation file. Default is 1 (first column).} |
|
57 |
+\item{annotFileRowName}{Which column is used as the rownames for the cell |
|
58 |
+annotation file. This should match to the colnames of the \code{assayFile}. |
|
59 |
+Default is \code{1} (first column).} |
|
54 | 60 |
|
55 |
-\item{annotFileSep}{Separater used for the cell annotation file. Default is "\\t".} |
|
61 |
+\item{annotFileSep}{Separater used for the cell annotation file. Default is |
|
62 |
+\code{"\\t"}.} |
|
56 | 63 |
|
57 |
-\item{featureHeader}{Whether there's a header (colnames) in the feature annotation file. Default is FALSE} |
|
64 |
+\item{featureHeader}{Whether there's a header (colnames) in the feature |
|
65 |
+annotation file. Default is \code{FALSE}.} |
|
58 | 66 |
|
59 |
-\item{featureRowName}{Which column is used as the rownames for the feature annotation file. Default is 1 (first column).} |
|
67 |
+\item{featureRowName}{Which column is used as the rownames for the feature |
|
68 |
+annotation file. This should match to the rownames of the \code{assayFile}. |
|
69 |
+Default is \code{1}. (first column).} |
|
60 | 70 |
|
61 |
-\item{featureSep}{Separater used for the feature annotation file. Default is "\\t".} |
|
71 |
+\item{featureSep}{Separater used for the feature annotation file. Default is |
|
72 |
+\code{"\\t"}.} |
|
62 | 73 |
|
63 |
-\item{gzipped}{Whether the input file is gzipped. Default is "auto" and it will automatically detect whether the file is gzipped. Other options is TRUE or FALSE.} |
|
74 |
+\item{gzipped}{Whether the input file is gzipped. Default is \code{"auto"} |
|
75 |
+and it will automatically detect whether the file is gzipped. Other options |
|
76 |
+are \code{TRUE} or \code{FALSE}.} |
|
77 |
+ |
|
78 |
+\item{rowNamesDedup}{Boolean. Whether to deduplicate rownames. Default |
|
79 |
+\code{TRUE}.} |
|
64 | 80 |
} |
65 | 81 |
\value{ |
66 |
-a SingleCellExperiment object |
|
82 |
+a \linkS4class{SingleCellExperiment} object |
|
67 | 83 |
} |
68 | 84 |
\description{ |
69 |
-Creates a SingleCellExperiment object from a counts file in various formats. |
|
70 |
-and a file of annotation information, . |
|
85 |
+Create a SingleCellExperiment object from files |
|
86 |
+} |
|
87 |
+\details{ |
|
88 |
+Creates a \linkS4class{SingleCellExperiment} object from a counts |
|
89 |
+file in various formats, and files of cell and feature annotation. |
|
71 | 90 |
} |
... | ... |
@@ -14,7 +14,8 @@ importOptimus( |
14 | 14 |
geneMetricsLocation = "call-MergeGeneMetrics/merged-gene-metrics.csv.gz", |
15 | 15 |
emptyDropsLocation = "call-RunEmptyDrops/empty_drops_result.csv", |
16 | 16 |
class = c("Matrix", "matrix"), |
17 |
- delayedArray = FALSE |
|
17 |
+ delayedArray = FALSE, |
|
18 |
+ rowNamesDedup = TRUE |
|
18 | 19 |
) |
19 | 20 |
} |
20 | 21 |
\arguments{ |
... | ... |
@@ -63,6 +64,9 @@ object. Can be one of "Matrix" (as returned by |
63 | 64 |
|
64 | 65 |
\item{delayedArray}{Boolean. Whether to read the expression matrix as |
65 | 66 |
\link{DelayedArray} object or not. Default \code{FALSE}.} |
67 |
+ |
|
68 |
+\item{rowNamesDedup}{Boolean. Whether to deduplicate rownames. Default |
|
69 |
+\code{TRUE}.} |
|
66 | 70 |
} |
67 | 71 |
\value{ |
68 | 72 |
A \link[SingleCellExperiment]{SingleCellExperiment} object |
... | ... |
@@ -13,13 +13,14 @@ importSEQC( |
13 | 13 |
delayedArray = FALSE, |
14 | 14 |
cbNotFirstCol = TRUE, |
15 | 15 |
feNotFirstCol = TRUE, |
16 |
- combinedSample = TRUE |
|
16 |
+ combinedSample = TRUE, |
|
17 |
+ rowNamesDedup = TRUE |
|
17 | 18 |
) |
18 | 19 |
} |
19 | 20 |
\arguments{ |
20 | 21 |
\item{seqcDirs}{A vector of paths to seqc output files. Each sample |
21 |
-should have its own path. For example: \code{./pbmc_1k_50x50}. |
|
22 |
-Must have the same length as \code{samples}.} |
|
22 |
+should have its own path. For example: \code{"./pbmc_1k_50x50"}. Must have |
|
23 |
+the same length as \code{samples}.} |
|
23 | 24 |
|
24 | 25 |
\item{samples}{A vector of user-defined sample names for the samples to be |
25 | 26 |
imported. Must have the same length as \code{seqcDirs}.} |
... | ... |
@@ -29,38 +30,37 @@ sample directory. It cannot be null and the vector should have the same |
29 | 30 |
length as \emph{samples}.} |
30 | 31 |
|
31 | 32 |
\item{gzipped}{Boolean. \code{TRUE} if the seqc output files |
32 |
- (sparse_counts_barcode.csv, sparse_counts_genes.csv, and |
|
33 |
- sparse_molecule_counts.mtx) |
|
34 |
- were gzip compressed. \code{FALSE} otherwise. Default seqc outputs are |
|
35 |
- not gzipped. |
|
36 |
-Default \code{FALSE}.} |
|
33 |
+(sparse_counts_barcode.csv, sparse_counts_genes.csv, and |
|
34 |
+sparse_molecule_counts.mtx) were gzip compressed. \code{FALSE} otherwise. |
|
35 |
+Default seqc outputs are not gzipped. Default \code{FALSE}.} |
|
37 | 36 |
|
38 | 37 |
\item{class}{Character. The class of the expression matrix stored in the SCE |
39 |
-object. Can be one of "Matrix" (as returned by |
|
40 |
-\link{readMM} function), or "matrix" (as returned by |
|
41 |
-\link[base]{matrix} function). Default "Matrix".} |
|
38 |
+object. Can be one of \code{"Matrix"} (as returned by \link{readMM} |
|
39 |
+function), or \code{"matrix"} (as returned by \link[base]{matrix} function). |
|
40 |
+Default \code{"Matrix"}.} |
|
42 | 41 |
|
43 | 42 |
\item{delayedArray}{Boolean. Whether to read the expression matrix as |
44 | 43 |
\link{DelayedArray} object or not. Default \code{FALSE}.} |
45 | 44 |
|
46 | 45 |
\item{cbNotFirstCol}{Boolean. \code{TRUE} if first column of |
47 |
- sparse_counts_barcode.csv |
|
48 |
-is row index and it will be removed. \code{FALSE} the first column will |
|
49 |
- be kept.} |
|
46 |
+sparse_counts_barcode.csv is row index and it will be removed. \code{FALSE} |
|
47 |
+the first column will be kept.} |
|
50 | 48 |
|
51 | 49 |
\item{feNotFirstCol}{Boolean. \code{TRUE} if first column of |
52 |
- sparse_counts_genes.csv |
|
53 |
-is row index and it will be removed. \code{FALSE} the first column will |
|
54 |
- be kept.} |
|
50 |
+sparse_counts_genes.csv is row index and it will be removed. \code{FALSE} |
|
51 |
+the first column will be kept.} |
|
55 | 52 |
|
56 | 53 |
\item{combinedSample}{Boolean. If \code{TRUE}, \code{importSEQC} returns a |
57 | 54 |
\code{SingleCellExperiment} object containing the combined count matrix, |
58 |
- feature annotations |
|
59 |
- and the cell annotations. If \code{FALSE}, \code{importSEQC} returns a |
|
60 |
- list containing multiple |
|
55 |
+ feature annotations and the cell annotations. If \code{FALSE}, |
|
56 |
+ \code{importSEQC} returns a list containing multiple |
|
61 | 57 |
\code{SingleCellExperiment} objects. Each \code{SingleCellExperiment} |
62 | 58 |
contains count matrix, feature annotations and cell annotations for |
63 | 59 |
each sample.} |
60 |
+ |
|
61 |
+\item{rowNamesDedup}{Boolean. Whether to deduplicate rownames. Only applied |
|
62 |
+if \code{combinedSample} is \code{TRUE} or only one \code{seqcDirs} |
|
63 |
+specified. Default \code{TRUE}.} |
|
64 | 64 |
} |
65 | 65 |
\value{ |
66 | 66 |
A \code{SingleCellExperiment} object containing the combined count |
... | ... |
@@ -68,22 +68,18 @@ A \code{SingleCellExperiment} object containing the combined count |
68 | 68 |
} |
69 | 69 |
\description{ |
70 | 70 |
Read the filtered barcodes, features, and matrices for all |
71 |
- samples from (preferably a single run of) seqc output. Import and |
|
72 |
- combine them as one big \link[SingleCellExperiment]{SingleCellExperiment} |
|
73 |
- object. |
|
71 |
+ samples from (preferably a single run of) seqc output. Import and combine |
|
72 |
+ them as one big \link[SingleCellExperiment]{SingleCellExperiment} object. |
|
74 | 73 |
} |
75 | 74 |