Browse code

version 0.99.7

miccec authored on 13/03/2020 18:09:18
Showing 1 changed files
... ...
@@ -27,6 +27,7 @@
27 27
 #'  a specific
28 28
 #'   phenotype based on the first significant enriched gene set.
29 29
 #' @examples
30
+#' library(scTHI.data)
30 31
 #' data(scExample)
31 32
 #' Class <- TME_classification(scExample)
32 33
 #' @return A list with two items: Class (character) and ClassLegend
Browse code

version 5

miccec authored on 02/03/2020 18:19:17
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,152 @@
1
+#' TME_classification
2
+#'
3
+#' The function allows the user to classify non-tumor cells in tumor
4
+#' microenvironment. It implements the Mann-Whitney-Wilcoxon Gene
5
+#' Set Test
6
+#' (MWW-GST) algorithm and tests for each cell the enrichment of
7
+#' a collection
8
+#' of signatures of different cell types.
9
+#' @param expMat Gene expression matrix where rows are genes
10
+#'   presented with
11
+#'   Hugo Symbols and columns are cells. Gene expression values
12
+#'   should be normalized counts.
13
+#' @param minLenGeneSet Minimum gene set length
14
+#' @param pvalFilter Logical, if TRUE results will be filtered
15
+#' for p-Value.
16
+#'   Defoult is FALSE.
17
+#' @param alternative a character string specifying the alternative
18
+#' hypothesis of wilcoxon test, must be one of "two.sided" (default),
19
+#' "greater" or "less".
20
+#' @param fdrFilter Logical, if TRUE results will be filtered for FDR.
21
+#' @param pvalCutoff Numeric p-Value (or FDR) threshold. Gene set with
22
+#'   p-Value (or FDR) greater than pvalCutoff will be discarded
23
+#'   (default is 0.01).
24
+#' @param nesCutoff Numeric threshold. Gene set with NES greater than
25
+#'   nesCutoff will be discarded (default is 0.58)
26
+#' @param nNES Default is 0.58, so each cell is classified with
27
+#'  a specific
28
+#'   phenotype based on the first significant enriched gene set.
29
+#' @examples
30
+#' data(scExample)
31
+#' Class <- TME_classification(scExample)
32
+#' @return A list with two items: Class (character) and ClassLegend
33
+#'   (character)
34
+#' @export
35
+#'
36
+#' TME_classification
37
+
38
+TME_classification <- function(expMat,
39
+                               minLenGeneSet = 10,
40
+                               alternative = "two.sided",
41
+                               pvalFilter = FALSE,
42
+                               fdrFilter = TRUE,
43
+                               pvalCutoff = 0.01,
44
+                               nesCutoff = 0.58,
45
+                               nNES = 1) {
46
+  
47
+  zerogenes <- rowSums(expMat == 0)
48
+  expMat <- expMat[zerogenes != ncol(expMat), ]
49
+  means <- rowMeans(expMat)
50
+  sds <- apply(expMat, 1, sd)
51
+  E <- apply(expMat, 2, function(x) {
52
+    (x - means) / sds
53
+  })
54
+  
55
+  E_ <- apply(E, 2, rank)
56
+  tmp <- lapply(signatures, function(x) {
57
+    sum(x %in% rownames(E))
58
+  })
59
+  signatures_ <- signatures[tmp > minLenGeneSet]
60
+  
61
+  ans <- vector("list", length(signatures_))
62
+  names(ans) <- names(signatures_)
63
+  ans <- lapply(signatures_, function(S) {
64
+    geneSet <- S
65
+    gs <- geneSet[which(geneSet %in% rownames(E))]
66
+    outside_gs <- setdiff(rownames(E), gs)
67
+    nx <- length(gs)
68
+    ny <- length(outside_gs)
69
+    Tstat <- colSums(E_[outside_gs, ])
70
+    Ustat <- nx * ny + ny * (ny + 1) / 2 - Tstat
71
+    mu <- nx * ny / 2
72
+    sigma <- sqrt(mu * (nx + ny + 1) / 6)
73
+    zValue <- Ustat - mu
74
+    correction <- vapply(zValue, function(x) {
75
+      switch(
76
+        alternative,
77
+        two.sided = sign(x) * 0.5,
78
+        greater = 0.5,
79
+        less = -0.5
80
+      )
81
+    },numeric(1))
82
+    zValue <- (zValue - correction) / sigma
83
+    pValue <- vapply(zValue, function(x) {
84
+      switch(
85
+        alternative,
86
+        less = 1 - pnorm(x),
87
+        greater = pnorm(-x),
88
+        two.sided = 2 * pnorm(-abs(x))
89
+      )
90
+    },numeric(1))
91
+    nes <- Ustat / nx / ny
92
+    pu <- nes / (1 - nes)
93
+    log.pu <- log2(pu)
94
+    names(Ustat) <- colnames(E_)
95
+    names(pValue) <- colnames(E_)
96
+    names(nes) <- colnames(E_)
97
+    names(pu) <- colnames(E_)
98
+    names(log.pu) <- colnames(E_)
99
+    #ans[[i]] <- 
100
+    list(
101
+      statistic = Ustat,
102
+      p.value = pValue,
103
+      nes = nes,
104
+      pu = pu,
105
+      log.pu = log.pu
106
+    )})
107
+  
108
+  
109
+  NES <- vapply(ans, function(x) {
110
+    cbind(x$log.pu)
111
+  },numeric(ncol(expMat)))
112
+  NES <- t(NES)
113
+  colnames(NES) <- colnames(expMat)
114
+  pValue <- vapply(ans, function(x) {
115
+    cbind(x$p.value)
116
+  },numeric(ncol(expMat)))
117
+  pValue <- t(pValue)
118
+  colnames(pValue) <- colnames(expMat)
119
+  
120
+  FDR <- apply(pValue, 2, function(x) {
121
+    p.adjust(x, method = "fdr")
122
+  })
123
+  if (pvalFilter == TRUE) {
124
+    NES[pValue >= pvalCutoff] <- 0
125
+  }
126
+  if (fdrFilter == TRUE) {
127
+    NES[FDR >= pvalCutoff] <- 0
128
+  }
129
+  NES[NES < nesCutoff] <- 0
130
+  if (nNES == 1) {
131
+    Class <- apply(NES, 2, function(x) {
132
+      names(which.max(x))
133
+    })
134
+  }
135
+  if (nNES != 1) {
136
+    Class <- apply(NES, 2, function(x) {
137
+      names(sort(x, decreasing = TRUE))[nNES]
138
+    })
139
+  }
140
+  Class[colSums(NES != 0) < nNES] <- "nc"
141
+  phenotype <- signaturesColors[Class, ]
142
+  rownames(phenotype) <- names(Class)
143
+  Class <- phenotype$ALLPhenotypeFinal
144
+  names(Class) <- rownames(phenotype)
145
+  ClassLegend <- phenotype$Color
146
+  names(ClassLegend) <- phenotype$ALLPhenotypeFinal
147
+  ClassLegend <- ClassLegend[!duplicated(ClassLegend)]
148
+  #print(sort(table(Class), decreasing = TRUE))
149
+  Classification <- list(Class, ClassLegend)
150
+  names(Classification) <- c("Class", "ClassLegend")
151
+  return(Classification)
152
+}
0 153
\ No newline at end of file