... | ... |
@@ -27,6 +27,7 @@ |
27 | 27 |
#' a specific |
28 | 28 |
#' phenotype based on the first significant enriched gene set. |
29 | 29 |
#' @examples |
30 |
+#' library(scTHI.data) |
|
30 | 31 |
#' data(scExample) |
31 | 32 |
#' Class <- TME_classification(scExample) |
32 | 33 |
#' @return A list with two items: Class (character) and ClassLegend |
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,152 @@ |
1 |
+#' TME_classification |
|
2 |
+#' |
|
3 |
+#' The function allows the user to classify non-tumor cells in tumor |
|
4 |
+#' microenvironment. It implements the Mann-Whitney-Wilcoxon Gene |
|
5 |
+#' Set Test |
|
6 |
+#' (MWW-GST) algorithm and tests for each cell the enrichment of |
|
7 |
+#' a collection |
|
8 |
+#' of signatures of different cell types. |
|
9 |
+#' @param expMat Gene expression matrix where rows are genes |
|
10 |
+#' presented with |
|
11 |
+#' Hugo Symbols and columns are cells. Gene expression values |
|
12 |
+#' should be normalized counts. |
|
13 |
+#' @param minLenGeneSet Minimum gene set length |
|
14 |
+#' @param pvalFilter Logical, if TRUE results will be filtered |
|
15 |
+#' for p-Value. |
|
16 |
+#' Defoult is FALSE. |
|
17 |
+#' @param alternative a character string specifying the alternative |
|
18 |
+#' hypothesis of wilcoxon test, must be one of "two.sided" (default), |
|
19 |
+#' "greater" or "less". |
|
20 |
+#' @param fdrFilter Logical, if TRUE results will be filtered for FDR. |
|
21 |
+#' @param pvalCutoff Numeric p-Value (or FDR) threshold. Gene set with |
|
22 |
+#' p-Value (or FDR) greater than pvalCutoff will be discarded |
|
23 |
+#' (default is 0.01). |
|
24 |
+#' @param nesCutoff Numeric threshold. Gene set with NES greater than |
|
25 |
+#' nesCutoff will be discarded (default is 0.58) |
|
26 |
+#' @param nNES Default is 0.58, so each cell is classified with |
|
27 |
+#' a specific |
|
28 |
+#' phenotype based on the first significant enriched gene set. |
|
29 |
+#' @examples |
|
30 |
+#' data(scExample) |
|
31 |
+#' Class <- TME_classification(scExample) |
|
32 |
+#' @return A list with two items: Class (character) and ClassLegend |
|
33 |
+#' (character) |
|
34 |
+#' @export |
|
35 |
+#' |
|
36 |
+#' TME_classification |
|
37 |
+ |
|
38 |
+TME_classification <- function(expMat, |
|
39 |
+ minLenGeneSet = 10, |
|
40 |
+ alternative = "two.sided", |
|
41 |
+ pvalFilter = FALSE, |
|
42 |
+ fdrFilter = TRUE, |
|
43 |
+ pvalCutoff = 0.01, |
|
44 |
+ nesCutoff = 0.58, |
|
45 |
+ nNES = 1) { |
|
46 |
+ |
|
47 |
+ zerogenes <- rowSums(expMat == 0) |
|
48 |
+ expMat <- expMat[zerogenes != ncol(expMat), ] |
|
49 |
+ means <- rowMeans(expMat) |
|
50 |
+ sds <- apply(expMat, 1, sd) |
|
51 |
+ E <- apply(expMat, 2, function(x) { |
|
52 |
+ (x - means) / sds |
|
53 |
+ }) |
|
54 |
+ |
|
55 |
+ E_ <- apply(E, 2, rank) |
|
56 |
+ tmp <- lapply(signatures, function(x) { |
|
57 |
+ sum(x %in% rownames(E)) |
|
58 |
+ }) |
|
59 |
+ signatures_ <- signatures[tmp > minLenGeneSet] |
|
60 |
+ |
|
61 |
+ ans <- vector("list", length(signatures_)) |
|
62 |
+ names(ans) <- names(signatures_) |
|
63 |
+ ans <- lapply(signatures_, function(S) { |
|
64 |
+ geneSet <- S |
|
65 |
+ gs <- geneSet[which(geneSet %in% rownames(E))] |
|
66 |
+ outside_gs <- setdiff(rownames(E), gs) |
|
67 |
+ nx <- length(gs) |
|
68 |
+ ny <- length(outside_gs) |
|
69 |
+ Tstat <- colSums(E_[outside_gs, ]) |
|
70 |
+ Ustat <- nx * ny + ny * (ny + 1) / 2 - Tstat |
|
71 |
+ mu <- nx * ny / 2 |
|
72 |
+ sigma <- sqrt(mu * (nx + ny + 1) / 6) |
|
73 |
+ zValue <- Ustat - mu |
|
74 |
+ correction <- vapply(zValue, function(x) { |
|
75 |
+ switch( |
|
76 |
+ alternative, |
|
77 |
+ two.sided = sign(x) * 0.5, |
|
78 |
+ greater = 0.5, |
|
79 |
+ less = -0.5 |
|
80 |
+ ) |
|
81 |
+ },numeric(1)) |
|
82 |
+ zValue <- (zValue - correction) / sigma |
|
83 |
+ pValue <- vapply(zValue, function(x) { |
|
84 |
+ switch( |
|
85 |
+ alternative, |
|
86 |
+ less = 1 - pnorm(x), |
|
87 |
+ greater = pnorm(-x), |
|
88 |
+ two.sided = 2 * pnorm(-abs(x)) |
|
89 |
+ ) |
|
90 |
+ },numeric(1)) |
|
91 |
+ nes <- Ustat / nx / ny |
|
92 |
+ pu <- nes / (1 - nes) |
|
93 |
+ log.pu <- log2(pu) |
|
94 |
+ names(Ustat) <- colnames(E_) |
|
95 |
+ names(pValue) <- colnames(E_) |
|
96 |
+ names(nes) <- colnames(E_) |
|
97 |
+ names(pu) <- colnames(E_) |
|
98 |
+ names(log.pu) <- colnames(E_) |
|
99 |
+ #ans[[i]] <- |
|
100 |
+ list( |
|
101 |
+ statistic = Ustat, |
|
102 |
+ p.value = pValue, |
|
103 |
+ nes = nes, |
|
104 |
+ pu = pu, |
|
105 |
+ log.pu = log.pu |
|
106 |
+ )}) |
|
107 |
+ |
|
108 |
+ |
|
109 |
+ NES <- vapply(ans, function(x) { |
|
110 |
+ cbind(x$log.pu) |
|
111 |
+ },numeric(ncol(expMat))) |
|
112 |
+ NES <- t(NES) |
|
113 |
+ colnames(NES) <- colnames(expMat) |
|
114 |
+ pValue <- vapply(ans, function(x) { |
|
115 |
+ cbind(x$p.value) |
|
116 |
+ },numeric(ncol(expMat))) |
|
117 |
+ pValue <- t(pValue) |
|
118 |
+ colnames(pValue) <- colnames(expMat) |
|
119 |
+ |
|
120 |
+ FDR <- apply(pValue, 2, function(x) { |
|
121 |
+ p.adjust(x, method = "fdr") |
|
122 |
+ }) |
|
123 |
+ if (pvalFilter == TRUE) { |
|
124 |
+ NES[pValue >= pvalCutoff] <- 0 |
|
125 |
+ } |
|
126 |
+ if (fdrFilter == TRUE) { |
|
127 |
+ NES[FDR >= pvalCutoff] <- 0 |
|
128 |
+ } |
|
129 |
+ NES[NES < nesCutoff] <- 0 |
|
130 |
+ if (nNES == 1) { |
|
131 |
+ Class <- apply(NES, 2, function(x) { |
|
132 |
+ names(which.max(x)) |
|
133 |
+ }) |
|
134 |
+ } |
|
135 |
+ if (nNES != 1) { |
|
136 |
+ Class <- apply(NES, 2, function(x) { |
|
137 |
+ names(sort(x, decreasing = TRUE))[nNES] |
|
138 |
+ }) |
|
139 |
+ } |
|
140 |
+ Class[colSums(NES != 0) < nNES] <- "nc" |
|
141 |
+ phenotype <- signaturesColors[Class, ] |
|
142 |
+ rownames(phenotype) <- names(Class) |
|
143 |
+ Class <- phenotype$ALLPhenotypeFinal |
|
144 |
+ names(Class) <- rownames(phenotype) |
|
145 |
+ ClassLegend <- phenotype$Color |
|
146 |
+ names(ClassLegend) <- phenotype$ALLPhenotypeFinal |
|
147 |
+ ClassLegend <- ClassLegend[!duplicated(ClassLegend)] |
|
148 |
+ #print(sort(table(Class), decreasing = TRUE)) |
|
149 |
+ Classification <- list(Class, ClassLegend) |
|
150 |
+ names(Classification) <- c("Class", "ClassLegend") |
|
151 |
+ return(Classification) |
|
152 |
+} |
|
0 | 153 |
\ No newline at end of file |