... | ... |
@@ -182,6 +182,7 @@ nempi <- function(D, unknown = "", Gamma = NULL, type = "null", full = TRUE, |
182 | 182 |
} |
183 | 183 |
if (complete) { |
184 | 184 |
Z <- expG(G, complete = complete) |
185 |
+ Z <- Z*pi |
|
185 | 186 |
Z <- Z/colSums(Z)[col(Z)] |
186 | 187 |
ll <- sum(colSums(Z*(G + log(pi)/log(logtype)))) |
187 | 188 |
probs <- Z |
188 | 189 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,13 @@ |
1 |
+# Files desription: |
|
2 |
+ |
|
3 |
+- TCGA.r: to download and process publicly available TCGA data |
|
4 |
+ |
|
5 |
+- TCGA_nempi.r: anaysis of TCGA data with nempi |
|
6 |
+ |
|
7 |
+- dawnrank.r: analysis of TCGA data with dawnrank |
|
8 |
+ |
|
9 |
+- nempi_loo.r: leave one out classification with nempi and competitors |
|
10 |
+ |
|
11 |
+- perturbseq.r: processing of perturbseq data |
|
12 |
+ |
|
13 |
+- perturbseq_nempi.r: analysis of perturbseq data with nempi and competitors |
|
0 | 14 |
\ No newline at end of file |
1 | 15 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,1165 @@ |
1 |
+ |
|
2 |
+library(snowfall) |
|
3 |
+library(TCGAbiolinks) |
|
4 |
+ |
|
5 |
+if (is.null(types)) { |
|
6 |
+ types <- TCGAbiolinks:::getGDCprojects()$project_id |
|
7 |
+ ## special "FM-AD" |
|
8 |
+ dont <- paste0(unique(gsub("-.*", "", types)), "-") |
|
9 |
+ dont <- dont[-grep("TCGA", dont)] |
|
10 |
+ samplenr <- matrix(NA, length(types), 2) |
|
11 |
+ rownames(samplenr) <- types |
|
12 |
+ donr <- TRUE |
|
13 |
+ snrcount <- 0 |
|
14 |
+} else { |
|
15 |
+ dont <- "AGCT" |
|
16 |
+ donr <- FALSE |
|
17 |
+} |
|
18 |
+ |
|
19 |
+sizemat <- matrix(0, 1, 2) |
|
20 |
+colnames(sizemat) <- c("Tumor", "Normal") |
|
21 |
+rownames(sizemat) <- "" |
|
22 |
+ |
|
23 |
+path <- "mutclust/" |
|
24 |
+ |
|
25 |
+for (type in types) { |
|
26 |
+ if (donr) { |
|
27 |
+ snrcount <- snrcount + 1 |
|
28 |
+ } |
|
29 |
+ if (length(grep(paste(dont, collapse = "|"), type)) > 0) { next() } |
|
30 |
+ print(type) |
|
31 |
+ if (file.exists(paste0(path, type, "_final.rda")) & !newmut & !newllr & !newsave) { |
|
32 |
+ load(paste0(path, type, "_final.rda")) |
|
33 |
+ } else { |
|
34 |
+ |
|
35 |
+ summ <- TCGAbiolinks:::getProjectSummary(type) |
|
36 |
+ library(SummarizedExperiment) |
|
37 |
+ |
|
38 |
+ ## get methylation: |
|
39 |
+ if (file.exists(paste0(path, type, "_meth.rda"))) { |
|
40 |
+ load(paste0(path, type, "_meth.rda")) |
|
41 |
+ } else { |
|
42 |
+ data <- GDCquery(project = paste(type, sep = ""), |
|
43 |
+ sample.type = "Primary Tumor", |
|
44 |
+ data.category = "DNA methylation", |
|
45 |
+ data.type = "Methylation beta value", |
|
46 |
+ legacy = TRUE |
|
47 |
+ ) |
|
48 |
+ GDCdownload(data) |
|
49 |
+ data <- GDCprepare(data, summarizedExperiment = 0) |
|
50 |
+ ## map methy sites to genes: |
|
51 |
+ library(methyAnalysis) |
|
52 |
+ if (is.data.frame(data)) { |
|
53 |
+ meth2 <- as.matrix(data[, -(1:3)]) |
|
54 |
+ rownames(meth2) <- gsub(";.*", "", data[, 1]) |
|
55 |
+ } else { |
|
56 |
+ meth <- data@rowRanges |
|
57 |
+ library(TxDb.Hsapiens.UCSC.hg19.knownGene) |
|
58 |
+ probe2gene <- annotateDMRInfo(meth, 'TxDb.Hsapiens.UCSC.hg19.knownGene') |
|
59 |
+ meth2 <- assay(data) |
|
60 |
+ rownames(meth2) <- probe2gene$sigDMRInfo@elementMetadata@listData$GeneSymbol |
|
61 |
+ } |
|
62 |
+ meth <- meth2 |
|
63 |
+ meth <- meth[which(apply(meth, 1, function(x) return(any(is.na(x)))) == FALSE), ] |
|
64 |
+ methm <- meth[which(duplicated(rownames(meth)) == FALSE), ] |
|
65 |
+ count <- 0 |
|
66 |
+ for (i in which(duplicated(rownames(meth)) == FALSE)) { |
|
67 |
+ count <- count + 1 |
|
68 |
+ methm[count, ] <- apply(meth[which(rownames(meth) %in% rownames(methm)[i]), , drop = FALSE], 2, median) |
|
69 |
+ } |
|
70 |
+ meth <- methm |
|
71 |
+ meth <- meth[order(rownames(meth)), order(colnames(meth))] |
|
72 |
+ colnames(meth) <- gsub("A$", "", lapply(strsplit(colnames(meth), "-"), function(x) { y <- x[1:4]; y <- paste(y, collapse = "-"); return(y) })) |
|
73 |
+ methm <- meth[, which(duplicated(colnames(meth)) == FALSE)] |
|
74 |
+ for (i in which(duplicated(colnames(meth)) == TRUE)) { |
|
75 |
+ j <- which(colnames(methm) == colnames(meth)[i]) |
|
76 |
+ methm[, j] <- apply(meth[, which(colnames(meth) %in% colnames(methm)[i]), drop = FALSE], 2, median) |
|
77 |
+ } |
|
78 |
+ meth <- methm |
|
79 |
+ meth <- meth[order(rownames(meth)), order(colnames(meth))] |
|
80 |
+ save(meth, file = paste0(path, type, "_meth.rda")) |
|
81 |
+ } |
|
82 |
+ |
|
83 |
+ print("meth done") |
|
84 |
+ |
|
85 |
+ ## get copy number variation: |
|
86 |
+ if (file.exists(paste0(path, type, "_cnv.rda"))) { |
|
87 |
+ load(paste0(path, type, "_cnv.rda")) |
|
88 |
+ } else { |
|
89 |
+ data <- getGistic(gsub("TCGA-", "", type), type = "thresholded") |
|
90 |
+ ## data <- GDCquery(project = paste(type, sep = ""), |
|
91 |
+ ## sample.type = "Primary solid Tumor", |
|
92 |
+ ## data.category = "Copy Number Variation", |
|
93 |
+ ## data.type = "Copy Number Segment", |
|
94 |
+ ## ) |
|
95 |
+ ## GDCdownload(data) |
|
96 |
+ ## data <- GDCprepare(data) |
|
97 |
+ cnv <- data[, -(1:3)] |
|
98 |
+ cnv <- apply(cnv, c(1,2), as.numeric) |
|
99 |
+ rownames(cnv) <- data[, 1] |
|
100 |
+ colnames(cnv) <- gsub("A$", "", lapply(strsplit(colnames(cnv), "-"), function(x) { y <- x[1:4]; y <- paste(y, collapse = "-"); return(y) })) |
|
101 |
+ cnv <- cnv[order(rownames(cnv)), order(colnames(cnv))] |
|
102 |
+ save(cnv, file = paste0(path, type, "_cnv.rda")) |
|
103 |
+ } |
|
104 |
+ |
|
105 |
+ print("cnv done") |
|
106 |
+ |
|
107 |
+ ## get expression |
|
108 |
+ if (file.exists(paste0(path, type, "_query.rda"))) { |
|
109 |
+ load(paste0(path, type, "_query.rda")) |
|
110 |
+ } else { |
|
111 |
+ if (length(grep("-AML$|-LAML$", type)) > 0) { |
|
112 |
+ sampletype <- c("Primary Blood Derived Cancer - Peripheral Blood", "Solid Tissue Normal") |
|
113 |
+ } else { |
|
114 |
+ sampletype <- c("Primary solid Tumor", "Solid Tissue Normal") |
|
115 |
+ } |
|
116 |
+ data <- GDCquery(project = paste(type, sep = ""), |
|
117 |
+ sample.type = sampletype, |
|
118 |
+ data.category = "Transcriptome Profiling", |
|
119 |
+ data.type = "Gene Expression Quantification", |
|
120 |
+ workflow.type = "HTSeq - Counts" |
|
121 |
+ ) |
|
122 |
+ GDCdownload(data) |
|
123 |
+ if (is.null(data)) { |
|
124 |
+ print("is null") |
|
125 |
+ next() |
|
126 |
+ } |
|
127 |
+ data <- GDCprepare(data) |
|
128 |
+ save(data, file = paste0(path, type, "_query.rda")) |
|
129 |
+ } |
|
130 |
+ ## process expression data: |
|
131 |
+ D <- assay(data) |
|
132 |
+ class <- data@colData@listData$definition |
|
133 |
+ |
|
134 |
+ print("gene expression done") |
|
135 |
+ |
|
136 |
+ ## get mutation |
|
137 |
+ if (file.exists(paste0(path, type, "_mut.rda"))) { |
|
138 |
+ load(paste0(path, type, "_mut.rda")) |
|
139 |
+ } else { |
|
140 |
+ type2 <- gsub(paste(paste0(unique(gsub("-.*", "", types)), "-"), collapse = "|"), "", type) |
|
141 |
+ library(data.table) |
|
142 |
+ GDCquery_Maf(tumor = type2, save.csv = TRUE, pipeline = "varscan2") # mutation file |
|
143 |
+ GDCquery_Maf(tumor = type2, save.csv = TRUE, pipeline = "muse") # mutation file |
|
144 |
+ GDCquery_Maf(tumor = type2, save.csv = TRUE, pipeline = "somaticsniper") # mutation file |
|
145 |
+ GDCquery_Maf(tumor = type2, save.csv = TRUE, pipeline = "mutect2") # mutation file |
|
146 |
+ mut <- list() |
|
147 |
+ count <- 1 |
|
148 |
+ type3 <- gsub("-", "\\.", type) |
|
149 |
+ for (i in list.files("GDCdata")) { |
|
150 |
+ if (length(grep(type3, i)) > 0 & length(grep("csv", i)) > 0) { |
|
151 |
+ mut[[count]] <- fread(paste0("GDCdata/", i)) |
|
152 |
+ count <- count + 1 |
|
153 |
+ } |
|
154 |
+ } |
|
155 |
+ save(mut, file = paste0(path, type, "_mut.rda")) |
|
156 |
+ } |
|
157 |
+ |
|
158 |
+ ## clinical |
|
159 |
+ if (file.exists(paste0(path, type, "_clin.rda"))) { |
|
160 |
+ load(paste0(path, type, "_clin.rda")) |
|
161 |
+ } else { |
|
162 |
+ clinical <- GDCquery_clinic(project = type, type = "clinical") |
|
163 |
+ save(clinical, file = paste0(path, type, "_clin.rda")) |
|
164 |
+ } |
|
165 |
+ |
|
166 |
+ ## process mutation data: (https://cancer.sanger.ac.uk/cosmic/help/mutation/overview) |
|
167 |
+ if (file.exists(paste0(path, type, "_mut0.rda")) & !newmut) { |
|
168 |
+ load(paste0(path, type, "_mut0.rda")) |
|
169 |
+ } else { |
|
170 |
+ n <- nmut # try something to get all patients with at least one mutation |
|
171 |
+ library(snowfall) |
|
172 |
+ countSamples <- function(i, mut.org, genes, mut.mat, coln) { |
|
173 |
+ i <- which(rownames(mut.mat) %in% genes[i]) |
|
174 |
+ tmp <- mut.org[which(mut.org$Hugo_Symbol %in% rownames(mut.mat)[i]), coln] |
|
175 |
+ tmp2 <- mut.mat[i, ] |
|
176 |
+ tmp2[which(colnames(mut.mat) %in% tmp)] <- 1 |
|
177 |
+ return(tmp2) |
|
178 |
+ } |
|
179 |
+ typeSamples <- function(i, mut.org, genes, type.mat, coln, coln2) { |
|
180 |
+ i <- which(rownames(type.mat) %in% genes[i]) |
|
181 |
+ tmp <- mut.org[which(mut.org$Hugo_Symbol %in% rownames(type.mat)[i]), coln] |
|
182 |
+ tmp3 <- mut.org[which(mut.org$Hugo_Symbol %in% rownames(type.mat)[i]), coln2] |
|
183 |
+ tmp2 <- type.mat[i, ] |
|
184 |
+ tmp2[which(colnames(type.mat) %in% tmp)] <- tmp3 |
|
185 |
+ return(tmp2) |
|
186 |
+ } |
|
187 |
+ biggl <- list() |
|
188 |
+ for (i in length(mut)) { |
|
189 |
+ mutation <- mut[[i]] |
|
190 |
+ biggl[[i]] <- mutation$Hugo_Symbol |
|
191 |
+ } |
|
192 |
+ freq <- sort(table(unlist(biggl)), decreasing = TRUE) |
|
193 |
+ if (n == 0) { |
|
194 |
+ allsub <- names(freq) |
|
195 |
+ } else { |
|
196 |
+ allsub <- names(freq)[1:n] |
|
197 |
+ } |
|
198 |
+ M <- Mtype <- list() |
|
199 |
+ for (i in 1:length(mut)) { |
|
200 |
+ mutation <- mut[[i]] |
|
201 |
+ mut.mat <- matrix(0, length(allsub), length(unique(mutation$Tumor_Sample_Barcode))) |
|
202 |
+ type.mat <- matrix("", length(allsub), length(unique(mutation$Tumor_Sample_Barcode))) |
|
203 |
+ colnames(type.mat) <- colnames(mut.mat) <- sort(unique(mutation$Tumor_Sample_Barcode)) |
|
204 |
+ rownames(type.mat) <- rownames(mut.mat) <- allsub |
|
205 |
+ coln <- which(colnames(mutation) %in% "Tumor_Sample_Barcode") |
|
206 |
+ coln2 <- which(colnames(mutation) %in% "Variant_Classification") |
|
207 |
+ mut.org <- mutation[which(mutation$Hugo_Symbol %in% allsub), ] |
|
208 |
+ sfInit(parallel = T, cpus = 4) |
|
209 |
+ sfExport("mut.mat", "coln") |
|
210 |
+ tmp <- sfLapply(as.list(1:length(allsub)), countSamples, mut.org, allsub, mut.mat, coln) |
|
211 |
+ sfStop() |
|
212 |
+ tmp <- do.call("rbind", tmp) |
|
213 |
+ rownames(tmp) <- allsub |
|
214 |
+ colnames(tmp) <- colnames(mut.mat) |
|
215 |
+ M[[i]] <- tmp |
|
216 |
+ sfInit(parallel = T, cpus = 4) |
|
217 |
+ sfExport("type.mat", "coln2") |
|
218 |
+ tmp <- sfLapply(as.list(1:length(allsub)), typeSamples, mut.org, allsub, type.mat, coln, coln2) |
|
219 |
+ sfStop() |
|
220 |
+ tmp <- do.call("rbind", tmp) |
|
221 |
+ rownames(tmp) <- allsub |
|
222 |
+ colnames(tmp) <- colnames(mut.mat) |
|
223 |
+ Mtype[[i]] <- tmp |
|
224 |
+ } |
|
225 |
+ samples <- intersect(intersect(intersect(colnames(M[[1]]), colnames(M[[2]])), colnames(M[[3]])), colnames(M[[4]])) |
|
226 |
+ M0 <- M[[1]][, which(colnames(M[[1]]) %in% samples)] |
|
227 |
+ Mtype0 <- Mtype[[1]][, which(colnames(Mtype[[1]]) %in% samples)] |
|
228 |
+ for (i in 2:length(M)) { |
|
229 |
+ M0 <- M0 + M[[i]][, which(colnames(M[[i]]) %in% samples)] |
|
230 |
+ Mtype0 <- matrix(paste(Mtype0, Mtype[[i]][, which(colnames(Mtype[[i]]) %in% samples)]), nrow(Mtype0)) |
|
231 |
+ } |
|
232 |
+ rownames(Mtype0) <- rownames(M0) |
|
233 |
+ colnames(Mtype0) <- colnames(M0) |
|
234 |
+ save(M0, Mtype0, file = paste0(path, type, "_mut0.rda")) |
|
235 |
+ } |
|
236 |
+ |
|
237 |
+ ## process expression data: |
|
238 |
+ D <- assay(data) |
|
239 |
+ class <- data@colData@listData$definition |
|
240 |
+ M <- M0 |
|
241 |
+ Mtype <- Mtype0 |
|
242 |
+ colnames(M) <- lapply(colnames(M), function(x) { |
|
243 |
+ y <- unlist(strsplit(x, "-")) |
|
244 |
+ y <- paste(y[1:4], collapse = "-") |
|
245 |
+ y <- unlist(strsplit(y, "")) |
|
246 |
+ y <- paste(y[1:(length(y)-1)], collapse = "") |
|
247 |
+ return(y) |
|
248 |
+ }) |
|
249 |
+ colnames(D) <- lapply(colnames(D), function(x) { |
|
250 |
+ y <- unlist(strsplit(x, "-")) |
|
251 |
+ y <- paste(y[1:4], collapse = "-") |
|
252 |
+ y <- unlist(strsplit(y, "")) |
|
253 |
+ y <- paste(y[1:(length(y)-1)], collapse = "") |
|
254 |
+ return(y) |
|
255 |
+ }) |
|
256 |
+ colnames(M) <- gsub("A$", "", lapply(strsplit(colnames(M), "-"), function(x) { y <- x[1:4]; y <- paste(y, collapse = "-"); return(y) })) |
|
257 |
+ M <- M[order(rownames(M)), order(colnames(M))] |
|
258 |
+ Mtype <- Mtype[order(rownames(Mtype)), order(colnames(Mtype))] |
|
259 |
+ |
|
260 |
+ print("mutation done") |
|
261 |
+ |
|
262 |
+ ## log odds: |
|
263 |
+ if (file.exists(paste0(path, type, "_llr.rda")) & !newllr) { |
|
264 |
+ load(paste0(path, type, "_llr.rda")) |
|
265 |
+ } else { |
|
266 |
+ library(edgeR) |
|
267 |
+ if (sum(class %in% "Solid Tissue Normal") < 10) { |
|
268 |
+ distrParNC <- function(i, data) { |
|
269 |
+ data[i, ] <- data[i, ] - median(data[, i]) |
|
270 |
+ llrcol <- numeric(ncol(data)) |
|
271 |
+ div0 <- quantile(data[i, ], 0.25) |
|
272 |
+ div1 <- quantile(data[i, ], 0.75) |
|
273 |
+ sigma <- sd(data[i, ]) |
|
274 |
+ for (j in 1:ncol(data)) { |
|
275 |
+ if (data[i, j] <= 0) { |
|
276 |
+ llrcol[j] <- log2(div0/data[i, j]) |
|
277 |
+ } else { |
|
278 |
+ llrcol[j] <- log2(data[i, j]/div1) |
|
279 |
+ } |
|
280 |
+ } |
|
281 |
+ return(llrcol) |
|
282 |
+ } |
|
283 |
+ highcounts <- which(apply(D, 1, median) >= 10) |
|
284 |
+ DC <- D[highcounts, ] |
|
285 |
+ genenames <- rownames(D)[highcounts, ] |
|
286 |
+ nf <- calcNormFactors(DC) |
|
287 |
+ DC <- t(t(DC)/nf) # DC <- DC2 |
|
288 |
+ ## this is very adventurous: |
|
289 |
+ ## DC <- t(scale(t(DC))) |
|
290 |
+ ## DC <- abs(DC) |
|
291 |
+ ## pc <- 1-min(DC) |
|
292 |
+ ## tmp <- log2(DC+pc) |
|
293 |
+ ## hist(tmp) |
|
294 |
+ sfInit(parallel = T, cpus = 4) |
|
295 |
+ sfExport("DC") |
|
296 |
+ tmp <- do.call("rbind", sfLapply(1:nrow(DC), |
|
297 |
+ distrParNC, DC)) |
|
298 |
+ colnames(tmp) <- colnames(DC) |
|
299 |
+ rownames(tmp) <- genenames |
|
300 |
+ sfStop() |
|
301 |
+ DF <- DC |
|
302 |
+ } else { |
|
303 |
+ library(ks) |
|
304 |
+ mykcde <- function(x, k) { |
|
305 |
+ a <- which.min(abs(k$eval.points - x)) |
|
306 |
+ b <- k$estimate[a] |
|
307 |
+ b <- min(b, 1-b) |
|
308 |
+ return(b) |
|
309 |
+ } |
|
310 |
+ distrParKs <- function(i, data, C) { |
|
311 |
+ llrcol <- numeric(ncol(data)) |
|
312 |
+ ddistr <- list() |
|
313 |
+ dogenes <- unique(colnames(data))[which(!(unique(colnames(data)) %in% ""))] |
|
314 |
+ for (j in dogenes) { |
|
315 |
+ D <- which(colnames(data) %in% j) |
|
316 |
+ ddistr[[j]] <- kcde(data[i, D]) |
|
317 |
+ } |
|
318 |
+ cdistr <- kcde(data[i, C]) |
|
319 |
+ for (j in which(!(colnames(data) %in% ""))) { |
|
320 |
+ gene <- colnames(data)[j] |
|
321 |
+ llrcol[j] <- log2(mykcde(data[i, j], ddistr[[gene]])/mykcde(data[i,j], cdistr)) |
|
322 |
+ } |
|
323 |
+ llrcol <- llrcol[-C] |
|
324 |
+ return(llrcol) |
|
325 |
+ } |
|
326 |
+ DN <- D[, which(class %in% "Solid Tissue Normal")] |
|
327 |
+ DT <- D[, which(class %in% "Primary solid Tumor")] |
|
328 |
+ DF <- cbind(DT, DN) |
|
329 |
+ C <- (ncol(DT)+1):ncol(DF) |
|
330 |
+ highcounts <- which(apply(DF, 1, median) >= 10) |
|
331 |
+ DF <- DF[highcounts, ] |
|
332 |
+ genenames <- rownames(DF) |
|
333 |
+ colnames(DF)[1:ncol(DT)] <- "P" # not knock-down specific! |
|
334 |
+ colnames(DF)[grep(paste(unique(gsub("-.*", "", types)), collapse = "|"), colnames(DF))] <- "" |
|
335 |
+ nf <- calcNormFactors(DF) |
|
336 |
+ DF <- t(t(DF)/nf) |
|
337 |
+ sfInit(parallel = T, cpus = 4) |
|
338 |
+ sfExport("DF", "C", "mykcde") |
|
339 |
+ sfLibrary(ks) |
|
340 |
+ tmp <- do.call("rbind", sfLapply(1:nrow(DF), |
|
341 |
+ distrParKs, DF, C)) |
|
342 |
+ sfStop() |
|
343 |
+ colnames(tmp) <- colnames(DT) |
|
344 |
+ } |
|
345 |
+ save(tmp, DF, file = paste0(path, type, "_llr.rda")) |
|
346 |
+ } |
|
347 |
+ |
|
348 |
+ rownames(tmp) <- rownames(DF) |
|
349 |
+ |
|
350 |
+ par(mfrow=c(1,3)) |
|
351 |
+ hist(tmp) |
|
352 |
+ |
|
353 |
+ tmp[which(is.na(tmp) | is.infinite(tmp))] <- 0 |
|
354 |
+ |
|
355 |
+ hist(tmp) |
|
356 |
+ |
|
357 |
+ D <- tmp |
|
358 |
+ |
|
359 |
+ print("expression done") |
|
360 |
+ |
|
361 |
+ ## sd.glob <- sd(tmp) |
|
362 |
+ |
|
363 |
+ ## tmp <- tmp[-which(apply(tmp, 1, sd) < sd.glob), ] |
|
364 |
+ |
|
365 |
+ ## hist(tmp) |
|
366 |
+ |
|
367 |
+ ## prep clinical data: |
|
368 |
+ clinical[which(clinical$vital_status%in% "dead"), which(colnames(clinical) %in% "vital_status")] <- 1 |
|
369 |
+ clinical[which(clinical$vital_status%in% "alive"), which(colnames(clinical) %in% "vital_status")] <- 0 |
|
370 |
+ count <- 0 |
|
371 |
+ for (stage in sort(unique(clinical$tumor_stage))) { |
|
372 |
+ clinical[which(clinical$tumor_stage%in% stage), which(colnames(clinical) %in% "tumor_stage")] <- count |
|
373 |
+ count <- count + 1 |
|
374 |
+ } |
|
375 |
+ clinical$tumor_stage <- as.numeric(clinical$tumor_stage) |
|
376 |
+ clinical[which(is.na(clinical$days_to_death)), which(colnames(clinical) %in% "days_to_death")] <- clinical[which(is.na(clinical$days_to_death)), which(colnames(clinical) %in% "days_to_last_follow_up")] |
|
377 |
+ clinical$vital_status<- as.numeric(clinical$vital_status) |
|
378 |
+ |
|
379 |
+ print("clinical done") |
|
380 |
+ |
|
381 |
+ save(clinical, D, M, Mtype, DF, class, meth, cnv, file = paste0(path, type, "_final.rda")) |
|
382 |
+ } |
|
383 |
+ print(table(class)) |
|
384 |
+ sizemat <- rbind(sizemat, table(class)) |
|
385 |
+ rownames(sizemat)[nrow(sizemat)] <- type |
|
386 |
+ if (donr) { |
|
387 |
+ samplenr[snrcount, 1] <- sum(class %in% "Primary solid Tumor") |
|
388 |
+ samplenr[snrcount, 2] <- sum(class %in% "Solid Tissue Normal") |
|
389 |
+ } |
|
390 |
+} |
|
391 |
+ |
|
392 |
+stop("done") |
|
393 |
+ |
|
394 |
+samplenr2 <- samplenr[-which(is.na(samplenr[, 1]) == TRUE), ] |
|
395 |
+barplot(t(samplenr2[order(apply(samplenr2, 1, sum)), ]), horiz = 1, space = 1, las = 2) |
|
396 |
+ |
|
397 |
+newllr <- 1; newmut <- 1; nmut <- 1; newsave <- 1; types <- c("TCGA-SKCM","TCGA-UVM"); source("~/Documents/testing/general/TCGA.r") |
|
398 |
+ |
|
399 |
+nonesolid <- c("TCGA-LAML") |
|
400 |
+solidnonormal <- c() |
|
401 |
+ |
|
402 |
+## analysis: |
|
403 |
+ |
|
404 |
+type <- "TCGA-BRCA" |
|
405 |
+ |
|
406 |
+path <- "mutclust/" |
|
407 |
+ |
|
408 |
+load(paste0(path, type, "_final.rda")) |
|
409 |
+ |
|
410 |
+M[which(M < 3)] <- 0 |
|
411 |
+M[which(M > 0)] <- 1 |
|
412 |
+ |
|
413 |
+M[grep("Silent", Mtype)] <- 0 |
|
414 |
+M <- M[order(rownames(M)), order(colnames(M))] |
|
415 |
+ |
|
416 |
+cnv[which(cnv == 0)] <- 0 |
|
417 |
+cnv[which(cnv != 0)] <- 1 |
|
418 |
+cnv <- cnv[order(rownames(cnv)), order(colnames(cnv))] |
|
419 |
+ |
|
420 |
+meth[which(meth > 0.5)] <- 1 |
|
421 |
+meth[which(meth <= 0.5)] <- 0 |
|
422 |
+meth <- meth[order(rownames(meth)), order(colnames(meth))] |
|
423 |
+meth[is.na(meth)] <- 0 |
|
424 |
+ |
|
425 |
+P <- matrix(0, length(unique(c(rownames(M), rownames(cnv), rownames(meth)))), length(unique(c(colnames(M), colnames(cnv), colnames(meth))))) |
|
426 |
+rownames(P) <- sort(unique(c(rownames(M), rownames(cnv), rownames(meth)))) |
|
427 |
+colnames(P) <- sort(unique(c(colnames(M), colnames(cnv), colnames(meth)))) |
|
428 |
+colnames(P) <- gsub("A$", "", lapply(strsplit(colnames(P), "-"), function(x) { y <- x[1:4]; y <- paste(y, collapse = "-"); return(y) })) |
|
429 |
+P <- P[, which(duplicated(colnames(P)) == FALSE)] |
|
430 |
+ |
|
431 |
+P[which(rownames(P) %in% rownames(M)), which(colnames(P) %in% colnames(M))] <- M[which(rownames(M) %in% rownames(P)), which(colnames(M) %in% colnames(P))] |
|
432 |
+ |
|
433 |
+Pmut <- P |
|
434 |
+ |
|
435 |
+P <- Pmut*0 |
|
436 |
+ |
|
437 |
+P[which(rownames(P) %in% rownames(meth)), which(colnames(P) %in% colnames(meth))] <- P[which(rownames(P) %in% rownames(meth)), which(colnames(P) %in% colnames(meth))] + meth[which(rownames(meth) %in% rownames(P)), which(colnames(meth) %in% colnames(P))] |
|
438 |
+ |
|
439 |
+Pmeth <- P |
|
440 |
+ |
|
441 |
+P <- Pmeth*0 |
|
442 |
+ |
|
443 |
+P[which(rownames(P) %in% rownames(cnv)), which(colnames(P) %in% colnames(cnv))] <- P[which(rownames(P) %in% rownames(cnv)), which(colnames(P) %in% colnames(cnv))] + cnv[which(rownames(cnv) %in% rownames(P)), which(colnames(cnv) %in% colnames(P))] |
|
444 |
+ |
|
445 |
+Pcnv <- P |
|
446 |
+ |
|
447 |
+P <- Pmut+Pmeth+Pcnv |
|
448 |
+ |
|
449 |
+P2 <- P # full abberations including cnv and meth |
|
450 |
+ |
|
451 |
+P <- Pmut |
|
452 |
+ |
|
453 |
+## Bailey et al Cell 2018 |
|
454 |
+ |
|
455 |
+goi <- c("MAP2K4", "GATA3", "GPS2", "TBX3", "PTPRD", "NCOR1", "CBFB", "CDKN1B") # BRCA |
|
456 |
+ |
|
457 |
+P <- P[which(rownames(P) %in% goi), ] |
|
458 |
+ |
|
459 |
+P[which(P > 1)] <- 1 |
|
460 |
+P <- apply(P, 2, function(x) return(x/sum(x))) |
|
461 |
+P[is.na(P)] <- 0 |
|
462 |
+ |
|
463 |
+## data imputation: |
|
464 |
+ |
|
465 |
+library(naturalsort) |
|
466 |
+library(nem) |
|
467 |
+library(cluster) |
|
468 |
+library(Rcpp) |
|
469 |
+library(Rgraphviz) |
|
470 |
+library(mnem) |
|
471 |
+ |
|
472 |
+source("~/Documents/mnem/R/mnems.r") |
|
473 |
+source("~/Documents/mnem/R/mnems_low.r") |
|
474 |
+sourceCpp("~/Documents/mnem/src/mm.cpp") |
|
475 |
+ |
|
476 |
+source("~/Documents/nempi/R/nempi_main.r") |
|
477 |
+source("~/Documents/nempi/R/nempi_low.r") |
|
478 |
+ |
|
479 |
+Rho <- cbind(P, matrix(0, nrow(P), sum(!(colnames(D) %in% colnames(P))))) |
|
480 |
+ |
|
481 |
+colnames(Rho) <- c(colnames(P), colnames(D)[which(!(colnames(D) %in% colnames(P)))]) |
|
482 |
+ |
|
483 |
+Rho <- Rho[, colnames(D)] |
|
484 |
+ |
|
485 |
+if (sum(apply(Rho, 1, sum) == 0) > 0) { |
|
486 |
+ Rho <- Rho[-which(apply(Rho, 1, sum) == 0), ] |
|
487 |
+} |
|
488 |
+ |
|
489 |
+Rho[is.na(Rho)] <- 0 |
|
490 |
+ |
|
491 |
+sum(apply(Rho, 2, sum) == 0)/ncol(Rho) # unlabelled |
|
492 |
+ |
|
493 |
+pdf("temp.pdf", width = 12, height = 6) |
|
494 |
+tmp <- Rho |
|
495 |
+colnames(tmp) <- NULL |
|
496 |
+epiNEM::HeatmapOP(tmp, col = "RdBu", Rowv = 0, bordercol = "transparent") |
|
497 |
+dev.off() |
|
498 |
+ |
|
499 |
+inc <- sort(apply(Rho, 1, sum)) |
|
500 |
+ |
|
501 |
+D2 <- D[which(apply(D, 1, median) != 0), ] |
|
502 |
+ |
|
503 |
+D3 <- D2[, which(duplicated(colnames(D2)) == FALSE)] |
|
504 |
+Rho <- Rho[, which(duplicated(colnames(D2)) == FALSE)] |
|
505 |
+for (i in which(duplicated(colnames(D2)) == TRUE)) { |
|
506 |
+ j <- which(colnames(D3) %in% colnames(D2)[i]) |
|
507 |
+ D3[, j] <- apply(D2[, which(colnames(D2) %in% colnames(D2)[i]), drop = FALSE], 1, median) |
|
508 |
+} |
|
509 |
+ |
|
510 |
+D2 <- D3 |
|
511 |
+ |
|
512 |
+colnames(D2) <- c(rownames(Rho), sample(rownames(Rho), ncol(D2)-nrow(Rho), replace = TRUE)) |
|
513 |
+ |
|
514 |
+converged <- 10 |
|
515 |
+ |
|
516 |
+start <- Sys.time() |
|
517 |
+nempires <- nempi(D2, Gamma = Rho, full = TRUE, converged = converged) |
|
518 |
+end <- Sys.time() |
|
519 |
+ |
|
520 |
+print(end - start) |
|
521 |
+ |
|
522 |
+ures <- nempires |
|
523 |
+ |
|
524 |
+sum(ures$lls[2:length(ures$lls)] - ures$lls[1:(length(ures$lls)-1)] < 0) |
|
525 |
+ |
|
526 |
+pdf("temp.pdf", width = 12, height = 6) |
|
527 |
+epiNEM::HeatmapOP(ures$Gamma, bordercol = rgb(0,0,0,0), col = "RdBu") |
|
528 |
+#plot(ures, edgewidth = 30) |
|
529 |
+dev.off() |
|
530 |
+ |
|
531 |
+pdf("temp.pdf", width = 9, height = 6) |
|
532 |
+par(mfrow=c(2,3)) |
|
533 |
+plotConvergence(ures, type = "b", col = "blue") |
|
534 |
+dev.off() |
|
535 |
+ |
|
536 |
+source("~/Documents/nempi/R/nempi_main.r") |
|
537 |
+D4 <- D2 |
|
538 |
+colnames(D4) <- apply(Rho, 2, function(x) { |
|
539 |
+ Sgenes <- paste(sort(rownames(Rho)[which(x > 0)]), collapse = "_") |
|
540 |
+ return(Sgenes) |
|
541 |
+}) |
|
542 |
+ |
|
543 |
+## run on hpc cluster: |
|
544 |
+ |
|
545 |
+path <- "" |
|
546 |
+type <- "TCGA-BRCA" |
|
547 |
+ |
|
548 |
+if (do == 1) { |
|
549 |
+ |
|
550 |
+ library(e1071) |
|
551 |
+ |
|
552 |
+ load(paste0(path, type, "_nempi.rda")) |
|
553 |
+ |
|
554 |
+ D4 <- D2 |
|
555 |
+ colnames(D4) <- apply(Rho, 2, function(x) { |
|
556 |
+ Sgenes <- paste(sort(rownames(Rho)[which(x > 0)]), collapse = "_") |
|
557 |
+ return(Sgenes) |
|
558 |
+ }) |
|
559 |
+ |
|
560 |
+ svmres <- classpi(D4, full = TRUE, method = "svm") |
|
561 |
+ |
|
562 |
+ save(svmres, file = paste0("temp_", do, "_", as.numeric(Sys.time()), ".rda")) |
|
563 |
+ |
|
564 |
+} |
|
565 |
+ |
|
566 |
+if (do == 2) { |
|
567 |
+ |
|
568 |
+ library(nnet) |
|
569 |
+ |
|
570 |
+ load(paste0(path, type, "_nempi.rda")) |
|
571 |
+ |
|
572 |
+ D4 <- D2 |
|
573 |
+ colnames(D4) <- apply(Rho, 2, function(x) { |
|
574 |
+ Sgenes <- paste(sort(rownames(Rho)[which(x > 0)]), collapse = "_") |
|
575 |
+ return(Sgenes) |
|
576 |
+ }) |
|
577 |
+ |
|
578 |
+ nnres <- classpi(D4, full = TRUE, method = "nnet", MaxNWts = 50000, size = 5) # takes forever |
|
579 |
+ |
|
580 |
+ save(nnres, file = paste0("temp_", do, "_", as.numeric(Sys.time()), ".rda")) |
|
581 |
+ |
|
582 |
+} |
|
583 |
+ |
|
584 |
+if (do == 3) { |
|
585 |
+ |
|
586 |
+ library(CALIBERrfimpute) |
|
587 |
+ |
|
588 |
+ load(paste0(path, type, "_nempi.rda")) |
|
589 |
+ |
|
590 |
+ D4 <- D2 |
|
591 |
+ colnames(D4) <- apply(Rho, 2, function(x) { |
|
592 |
+ Sgenes <- paste(sort(rownames(Rho)[which(x > 0)]), collapse = "_") |
|
593 |
+ return(Sgenes) |
|
594 |
+ }) |
|
595 |
+ |
|
596 |
+ mfdata <- cbind(as.data.frame(t(D4)), colnames(D4)) |
|
597 |
+ mfdata[which(mfdata == "", arr.ind = TRUE)] <- NA |
|
598 |
+ |
|
599 |
+ micedata <- mfdata |
|
600 |
+ colnames(micedata) <- paste0(LETTERS[1:ncol(micedata)], 1:ncol(micedata)) |
|
601 |
+ miceres <- mice(micedata, method = c(rep('rfcont', ncol(micedata)-1), 'rfcat'), m = 2, maxit = 2) |
|
602 |
+ |
|
603 |
+ save(miceres, file = paste0("temp_", do, "_", as.numeric(Sys.time()), ".rda")) |
|
604 |
+ |
|
605 |
+} |
|
606 |
+ |
|
607 |
+if (do == 4) { |
|
608 |
+ |
|
609 |
+ library(e1071) |
|
610 |
+ |
|
611 |
+ load(paste0(path, type, "_nempi.rda")) |
|
612 |
+ |
|
613 |
+ D4 <- D2 |
|
614 |
+ colnames(D4) <- apply(Rho, 2, function(x) { |
|
615 |
+ Sgenes <- paste(sort(rownames(Rho)[which(x > 0)]), collapse = "_") |
|
616 |
+ return(Sgenes) |
|
617 |
+ }) |
|
618 |
+ |
|
619 |
+ rfres <- classpi(D4, full = TRUE, method = "randomForest") |
|
620 |
+ |
|
621 |
+ save(rfres, file = paste0("temp_", do, "_", as.numeric(Sys.time()), ".rda")) |
|
622 |
+ |
|
623 |
+} |
|
624 |
+ |
|
625 |
+if (do == 5) { |
|
626 |
+ |
|
627 |
+ library(e1071) |
|
628 |
+ |
|
629 |
+ load(paste0(path, type, "_nempi.rda")) |
|
630 |
+ |
|
631 |
+ D4 <- D2 |
|
632 |
+ colnames(D4) <- apply(Rho, 2, function(x) { |
|
633 |
+ Sgenes <- paste(sort(rownames(Rho)[which(x > 0)]), collapse = "_") |
|
634 |
+ return(Sgenes) |
|
635 |
+ }) |
|
636 |
+ |
|
637 |
+ mfdata <- cbind(as.data.frame(t(D4)), colnames(D4)) |
|
638 |
+ mfdata[which(mfdata == "", arr.ind = TRUE)] <- NA |
|
639 |
+ library(missForest) |
|
640 |
+ mfimp <- missForest(mfdata) |
|
641 |
+ D4 <- D2 |
|
642 |
+ colnames(D4) <- mfimp$ximp[, ncol(mfimp$ximp)] |
|
643 |
+ tmp <- mynem(D4, multi = TRUE) |
|
644 |
+ Gamma <- getGamma(D4) |
|
645 |
+ ures <- list() |
|
646 |
+ ures$Gamma <- apply(Gamma, 2, function(x) return(x/sum(x))) |
|
647 |
+ ures$res <- list() |
|
648 |
+ ures$res$adj <- tmp$adj |
|
649 |
+ ures$null <- TRUE |
|
650 |
+ ures$combi <- 1 |
|
651 |
+ mfres <- ures |
|
652 |
+ |
|
653 |
+ save(mfres, file = paste0("temp_", do, "_", as.numeric(Sys.time()), ".rda")) |
|
654 |
+ |
|
655 |
+} |
|
656 |
+ |
|
657 |
+## knn |
|
658 |
+ |
|
659 |
+library(class) |
|
660 |
+train <- t(D4[, which(colnames(D4) != "")]) |
|
661 |
+test <- t(D4[, which(colnames(D4) == "")]) |
|
662 |
+knn0 <- 0 |
|
663 |
+if (knn0) { |
|
664 |
+ train <- rbind(train, NULL = rep(0, ncol(train))) |
|
665 |
+ cl <- c(colnames(D4)[which(colnames(D4) != "")], "NULL") |
|
666 |
+} else { |
|
667 |
+ cl <- colnames(D4)[which(colnames(D4) != "")] |
|
668 |
+} |
|
669 |
+knnres <- knn(train, test, cl, prob = TRUE) |
|
670 |
+D3 <- D4 |
|
671 |
+colnames(D3)[which(colnames(D3) %in% "")] <- as.character(knnres) |
|
672 |
+tmp <- mynem(D3, multi = TRUE) |
|
673 |
+Gamma <- getGamma(D3) |
|
674 |
+ures <- list() |
|
675 |
+ures$Gamma <- Gamma # apply(Gamma, 2, function(x) return(x/sum(x))) |
|
676 |
+ures$res <- list() |
|
677 |
+ures$res$adj <- tmp$adj |
|
678 |
+ures$null <- TRUE |
|
679 |
+ures$combi <- 1 |
|
680 |
+ |
|
681 |
+knnres <- ures |
|
682 |
+ |
|
683 |
+## save(nempires, knnres, rfres, mfres, svmres, nnres, Rho, D2, Pmut, Pmeth, Pcnv, file = paste0(path, type, "_nempi.rda")) |
|
684 |
+ |
|
685 |
+path <- "mutclust/"; type <- "TCGA-BRCA" |
|
686 |
+ |
|
687 |
+load(paste0(path, type, "_nempi.rda")) |
|
688 |
+ |
|
689 |
+## ## load("~/Mount/Euler/temp_1_1573814974.40659.rda") # old |
|
690 |
+ |
|
691 |
+## load("~/Mount/Euler/temp_1_1574076694.65703.rda") |
|
692 |
+ |
|
693 |
+## ## load("~/Mount/Euler/temp_4_1573819581.9749.rda") # old |
|
694 |
+ |
|
695 |
+## load("~/Mount/Euler/temp_4_1574080528.67352.rda") |
|
696 |
+ |
|
697 |
+## ## load("~/Mount/Euler/temp_2_1573821112.2412.rda") # old |
|
698 |
+ |
|
699 |
+## load("~/Mount/Euler/temp_2_1574084503.12431.rda") |
|
700 |
+ |
|
701 |
+## load("~/Mount/Euler/temp_5_1574081415.91547.rda") |
|
702 |
+ |
|
703 |
+## ures <- rfres |
|
704 |
+ |
|
705 |
+## ures <- mfres |
|
706 |
+ |
|
707 |
+## ures <- svmres |
|
708 |
+ |
|
709 |
+## ures <- nnres |
|
710 |
+ |
|
711 |
+## ures <- knnres |
|
712 |
+ |
|
713 |
+ures <- nempires |
|
714 |
+ |
|
715 |
+## check against methylation and cnvs: |
|
716 |
+ |
|
717 |
+pdf("nempi_gamma.pdf", width = 12, height = 6) |
|
718 |
+tmp <- ures$Gamma |
|
719 |
+colnames(tmp) <- NULL |
|
720 |
+epiNEM::HeatmapOP(tmp, bordercol = rgb(0,0,0,0), col = "RdBu", colorkey = NULL) |
|
721 |
+dev.off() |
|
722 |
+ |
|
723 |
+pdf("nempi_phi.pdf", width = 6, height = 6) |
|
724 |
+Pgenes <- sort(unique(colnames(D2))) |
|
725 |
+adjtmp <- ures$res$adj |
|
726 |
+colnames(adjtmp) <- rownames(adjtmp) <- Pgenes |
|
727 |
+plotDnf(adjtmp, edgelwd = 2) |
|
728 |
+dev.off() |
|
729 |
+ |
|
730 |
+## cnv/meth enrichment: |
|
731 |
+ |
|
732 |
+methods <- list("NEM$\\pi$" = nempires, knn = knnres, mf = mfres, nn = nnres, rf = rfres, svm = svmres) |
|
733 |
+ |
|
734 |
+mutinc <- 1 |
|
735 |
+ |
|
736 |
+Lall <- Lcnv <- Lmeth <- Lmut <- list() |
|
737 |
+ |
|
738 |
+for (i in 1:length(methods)) { |
|
739 |
+ if (i != 8) { |
|
740 |
+ print(names(methods)[i]) |
|
741 |
+ ures <- methods[[i]] |
|
742 |
+ newGamma <- ures$Gamma |
|
743 |
+ } else { |
|
744 |
+ print("random") |
|
745 |
+ newGamma <- newGamma*0 |
|
746 |
+ newGamma[sample(1:length(newGamma), floor(0.45*length(newGamma)))] <- 1 # well that is included into the test... |
|
747 |
+ } |
|
748 |
+ hist(newGamma) |
|
749 |
+ if (i == 1) { |
|
750 |
+ rntmp <- rownames(newGamma); newGamma <- t(mytc(ures$res$adj))%*%newGamma; rownames(newGamma) <- rntmp |
|
751 |
+ } |
|
752 |
+ P <- Pmut+Pmeth+Pcnv |
|
753 |
+ if (!mutinc) { # include mutations or not (0) |
|
754 |
+ P[which(Pmut == 1)] <- 0 |
|
755 |
+ } |
|
756 |
+ P <- P[which(rownames(P) %in% rownames(Rho)), which(colnames(P) %in% colnames(Rho))] |
|
757 |
+ P <- P[order(rownames(P)), order(colnames(P))] |
|
758 |
+ P[which(P > 1)] <- 1 |
|
759 |
+ Ptmp <- cbind(P, matrix(0, nrow(P), sum(!(colnames(Rho) %in% colnames(P))))) |
|
760 |
+ colnames(Ptmp) <- c(colnames(P), colnames(Rho)[which(!(colnames(Rho) %in% colnames(P)))]) |
|
761 |
+ P <- Ptmp[, colnames(Rho)] |
|
762 |
+ ## fisher: |
|
763 |
+ if (i %in% c(4,5)) { |
|
764 |
+ cut <- 0.07 |
|
765 |
+ } else if (i == 6) { |
|
766 |
+ cut <- 0.1 |
|
767 |
+ } else { |
|
768 |
+ cut <- 1/8 |
|
769 |
+ } |
|
770 |
+ newGamma[which(newGamma > cut)] <- 1 |
|
771 |
+ newGamma[which(newGamma <= cut)] <- 0 |
|
772 |
+ pmeth <- newGamma |
|
773 |
+ pmeth[which(newGamma == 1 & P == 1)] <- 2 |
|
774 |
+ pmeth[which(newGamma == 0 & P == 1)] <- -2 |
|
775 |
+ if (!mutinc) { # include mutations or not (0) |
|
776 |
+ pmeth[which(Rho > 0)] <- 0 |
|
777 |
+ } |
|
778 |
+ colnames(pmeth) <- NULL |
|
779 |
+ ##pdf(paste0("FigS_", names(methods)[i], ".pdf"), height = 6, width = 12) |
|
780 |
+ setEPS() |
|
781 |
+ postscript(paste0("FigS_", names(methods)[i], ".eps"), height = 6, width = 12) |
|
782 |
+ print(epiNEM::HeatmapOP(pmeth, bordercol = rgb(0,0,0,0), col = "RdBu", colorkey = NULL)) |
|
783 |
+ dev.off() |
|
784 |
+ print("cnv") |
|
785 |
+ P <- Pcnv |
|
786 |
+ P <- P[which(rownames(P) %in% rownames(Rho)), which(colnames(P) %in% colnames(Rho))] |
|
787 |
+ P <- P[order(rownames(P)), order(colnames(P))] |
|
788 |
+ P[which(P > 1)] <- 1 |
|
789 |
+ Ptmp <- cbind(P, matrix(0, nrow(P), sum(!(colnames(Rho) %in% colnames(P))))) |
|
790 |
+ colnames(Ptmp) <- c(colnames(P), colnames(Rho)[which(!(colnames(Rho) %in% colnames(P)))]) |
|
791 |
+ P <- Ptmp[, colnames(Rho)] |
|
792 |
+ F <- matrix(c(sum(pmeth >= 1 & P == 1), sum(pmeth >= 1 & P == 0), sum(pmeth == -2 & P == 1), sum(pmeth == 0 & P == 0)), 2) |
|
793 |
+ print(1 - phyper(F[1,1]-1, sum(F[, 1]), sum(F[, 2]), sum(F[1, ]))) |
|
794 |
+ Lcnv[[i]] <- F |
|
795 |
+ print("meth") |
|
796 |
+ P <- Pmeth |
|
797 |
+ P <- P[which(rownames(P) %in% rownames(Rho)), which(colnames(P) %in% colnames(Rho))] |
|
798 |
+ P <- P[order(rownames(P)), order(colnames(P))] |
|
799 |
+ P[which(P > 1)] <- 1 |
|
800 |
+ Ptmp <- cbind(P, matrix(0, nrow(P), sum(!(colnames(Rho) %in% colnames(P))))) |
|
801 |
+ colnames(Ptmp) <- c(colnames(P), colnames(Rho)[which(!(colnames(Rho) %in% colnames(P)))]) |
|
802 |
+ P <- Ptmp[, colnames(Rho)] |
|
803 |
+ F <- matrix(c(sum(pmeth >= 1 & P == 1), sum(pmeth >= 1 & P == 0), sum(pmeth == -2 & P == 1), sum(pmeth == 0 & P == 0)), 2) |
|
804 |
+ print(1 - phyper(F[1,1]-1, sum(F[, 1]), sum(F[, 2]), sum(F[1, ]))) |
|
805 |
+ Lmeth[[i]] <- F |
|
806 |
+ print("mut") |
|
807 |
+ P <- Pmut |
|
808 |
+ P <- P[which(rownames(P) %in% rownames(Rho)), which(colnames(P) %in% colnames(Rho))] |
|
809 |
+ P <- P[order(rownames(P)), order(colnames(P))] |
|
810 |
+ P[which(P > 1)] <- 1 |
|
811 |
+ Ptmp <- cbind(P, matrix(0, nrow(P), sum(!(colnames(Rho) %in% colnames(P))))) |
|
812 |
+ colnames(Ptmp) <- c(colnames(P), colnames(Rho)[which(!(colnames(Rho) %in% colnames(P)))]) |
|
813 |
+ P <- Ptmp[, colnames(Rho)] |
|
814 |
+ F <- matrix(c(sum(pmeth >= 1 & P == 1), sum(pmeth >= 1 & P == 0), sum(pmeth == -2 & P == 1), sum(pmeth == 0 & P == 0)), 2) |
|
815 |
+ print(1 - phyper(F[1,1]-1, sum(F[, 1]), sum(F[, 2]), sum(F[1, ]))) |
|
816 |
+ Lmut[[i]] <- F |
|
817 |
+ Fmat <- matrix(c(sum(pmeth == 2), sum(pmeth == 1), sum(pmeth == -2), sum(pmeth == 0)), 2) |
|
818 |
+ Lall[[i]] <- Fmat |
|
819 |
+ ## print(fisher.test(Fmat, alternative = "greater")) |
|
820 |
+ print("p-value") |
|
821 |
+ print(1 - phyper(Fmat[1,1]-1, sum(Fmat[, 1]), sum(Fmat[, 2]), sum(Fmat[1, ]))) |
|
822 |
+} |
|
823 |
+ |
|
824 |
+## create tables: |
|
825 |
+ |
|
826 |
+for (i in 1:length(methods)) { |
|
827 |
+ cat(paste0(names(methods)[i], " & ", Lcnv[[i]][1,1], " & ", Lcnv[[i]][2,1], " & ", Lcnv[[i]][2,2], " & ", Lcnv[[i]][1,2], "\\\\\n")) |
|
828 |
+} |
|
829 |
+ |
|
830 |
+for (i in 1:length(methods)) { |
|
831 |
+ cat(paste0(names(methods)[i], " & ", Lmeth[[i]][1,1], " & ", Lmeth[[i]][2,1], " & ", Lmeth[[i]][2,2], " & ", Lmeth[[i]][1,2], "\\\\\n")) |
|
832 |
+} |
|
833 |
+ |
|
834 |
+for (i in 1:length(methods)) { |
|
835 |
+ cat(paste0(names(methods)[i], " & ", Lmut[[i]][1,1], " & ", Lmut[[i]][2,1], " & ", Lmut[[i]][2,2], " & ", Lmut[[i]][1,2], "\\\\\n")) |
|
836 |
+} |
|
837 |
+ |
|
838 |
+for (i in 1:length(methods)) { |
|
839 |
+ if (names(methods)[i] == "nn") { next() } |
|
840 |
+ Fmat <- Lall[[i]] |
|
841 |
+ ptmp <- 1 - phyper(Fmat[1,1]-1, sum(Fmat[, 1]), sum(Fmat[, 2]), sum(Fmat[1, ])) |
|
842 |
+ if (ptmp == 0) { |
|
843 |
+ ptmp <- "$< 2.2\\times10^{-16}$" |
|
844 |
+ } else { |
|
845 |
+ ptmp <- paste0("$", signif(ptmp), "$") |
|
846 |
+ } |
|
847 |
+ cat(paste0(names(methods)[i], " & ", Lall[[i]][1,1], " & ", Lall[[i]][2,1], " & ", Lall[[i]][2,2], " & ", Lall[[i]][1,2], " & ", ptmp, "\\\\\n")) |
|
848 |
+} |
|
849 |
+ |
|
850 |
+ |
|
851 |
+## check correlation |
|
852 |
+ |
|
853 |
+## P4 <- apply(P3, 2, function(x) return(x/sum(x))) |
|
854 |
+## P4[is.na(P4)] <- 0 |
|
855 |
+ |
|
856 |
+## cor(as.vector(newGamma), as.vector(P3)) |
|
857 |
+ |
|
858 |
+## |
|
859 |
+ |
|
860 |
+cormat <- matrix(0, nrow(pmeth), 2) |
|
861 |
+fishres <- numeric(nrow(pmeth)) |
|
862 |
+names(fishres) <- rownames(pmeth) |
|
863 |
+for (i in 1:nrow(pmeth)) { |
|
864 |
+ Fmat <- matrix(c(sum(pmeth[i, ] == 2), sum(pmeth[i, ] == 1), sum(pmeth[i, ] == -2), sum(pmeth[i, ] == 0)), 2) |
|
865 |
+ fishres[i] <- fisher.test(Fmat, alternative = "g")$p.value |
|
866 |
+ cormat[i, ] <- c(sum(Fmat[1, ]), sum(Fmat[, 1])) |
|
867 |
+} |
|
868 |
+ |
|
869 |
+## GATA3 & PTPRD: |
|
870 |
+ |
|
871 |
+Fmat <- matrix(c(sum(pmeth[3, ] %in% c(2,-2) & pmeth[7, ] %in% c(2,-2)), |
|
872 |
+ sum(pmeth[3, ] %in% c(1,0) & pmeth[7, ] %in% c(-2,2)), |
|
873 |
+ sum(pmeth[3, ] %in% c(-2,2) & pmeth[7, ] %in% c(1,0)), |
|
874 |
+ sum(pmeth[3, ] %in% c(1,0) & pmeth[7, ] %in% c(1,0))), 2) |
|
875 |
+ |
|
876 |
+1 - phyper(Fmat[1,1]-1, sum(Fmat[, 1]), sum(Fmat[, 2]), sum(Fmat[1, ])) |
|
877 |
+ |
|
878 |
+ |
|
879 |
+## pca: |
|
880 |
+ |
|
881 |
+pca <- princomp(D2) |
|
882 |
+ |
|
883 |
+col <- apply(newGamma, 2, sum) |
|
884 |
+ |
|
885 |
+col <- col/max(col) |
|
886 |
+ |
|
887 |
+K <- kmeans(t(newGamma), nrow(newGamma)) |
|
888 |
+ |
|
889 |
+plot(pca$loadings[, 1:2], col = K$cluster)#rgb(col,0,0,1)) |
|
890 |
+ |
|
891 |
+## tsne: |
|
892 |
+ |
|
893 |
+sne <- tsne(t(newGamma)) |
|
894 |
+ |
|
895 |
+plot(sne, col = K$cluster) |
|
896 |
+ |
|
897 |
+## R profiling: |
|
898 |
+ |
|
899 |
+Rprof("temp.txt", line.profiling=TRUE) |
|
900 |
+ures <- nempi(D2[1:20, ], Gamma = Gamma, complete = 1, full = TRUE, converged = converged, combi = combi) |
|
901 |
+Rprof(NULL) |
|
902 |
+summaryRprof("temp.txt", lines = "show")$sampling.time |
|
903 |
+head(summaryRprof("temp.txt", lines = "show")$by.self, 10) |
|
904 |
+ |
|
905 |
+## |
|
906 |
+ |
|
907 |
+type <- "TCGA-BRCA" |
|
908 |
+load(paste0(path, type, "_nempi.rda")) |
|
909 |
+ |
|
910 |
+pdf("Fig5.pdf", width = 10, height = 10) |
|
911 |
+plot(uresn, edgelwd = 2) |
|
912 |
+dev.off() |
|
913 |
+ |
|
914 |
+pdf("Fig6.pdf", width = 10, height = 5) |
|
915 |
+tmp <- uresn$Gamma |
|
916 |
+colnames(tmp) <- NULL |
|
917 |
+epiNEM::HeatmapOP(tmp, bordercol = rgb(0,0,0,0), col = "RdBu", colorkey = NULL) |
|
918 |
+dev.off() |
|
919 |
+ |
|
920 |
+pdf("Fig7.pdf", width = 10, height = 10) |
|
921 |
+phitmp <- mytc(uresn$res$adj) |
|
922 |
+tmp <- t(phitmp)%*%uresn$Gamma |
|
923 |
+colnames(tmp) <- NULL |
|
924 |
+rownames(tmp) <- rownames(uresn$Gamma) |
|
925 |
+tmp2 <- tmp |
|
926 |
+colnames(tmp) <- NULL |
|
927 |
+tmp <- Gamma |
|
928 |
+colnames(tmp) <- NULL |
|
929 |
+tmp3 <- tmp |
|
930 |
+tmp4 <- tmp2 |
|
931 |
+p1 <- epiNEM::HeatmapOP(tmp2, bordercol = rgb(0,0,0,0), col = "RdBu", clusterx = tmp2, colorkey = NULL) |
|
932 |
+p2 <- epiNEM::HeatmapOP(tmp, bordercol = rgb(0,0,0,0), col = "RdBu", clusterx = tmp2, colorkey = NULL) |
|
933 |
+print(p1, position=c(0, .5, 1, 1), more=TRUE) |
|
934 |
+print(p2, position=c(0, 0, 1, .5)) |
|
935 |
+sum(tmp == 1 & tmp2 == 1)/sum(tmp == 1) |
|
936 |
+dev.off() |
|
937 |
+ |
|
938 |
+pdf("Fig8.pdf", width = 10, height = 10) |
|
939 |
+plot(uresf, edgelwd = 2) |
|
940 |
+dev.off() |
|
941 |
+ |
|
942 |
+pdf("Fig9.pdf", width = 10, height = 5) |
|
943 |
+tmp <- uresf$Gamma |
|
944 |
+colnames(tmp) <- NULL |
|
945 |
+epiNEM::HeatmapOP(tmp, bordercol = rgb(0,0,0,0), col = "RdBu", colorkey = NULL) |
|
946 |
+dev.off() |
|
947 |
+ |
|
948 |
+pdf("Fig10.pdf", width = 10, height = 10) |
|
949 |
+phitmp <- mytc(uresf$res$adj) |
|
950 |
+tmp <- t(phitmp)%*%uresf$Gamma |
|
951 |
+colnames(tmp) <- NULL |
|
952 |
+rownames(tmp) <- rownames(uresf$Gamma) |
|
953 |
+tmp2 <- tmp |
|
954 |
+colnames(tmp) <- NULL |
|
955 |
+tmp <- Gamma |
|
956 |
+colnames(tmp) <- NULL |
|
957 |
+p1 <- epiNEM::HeatmapOP(tmp2, bordercol = rgb(0,0,0,0), col = "RdBu", clusterx = tmp2, colorkey = NULL) |
|
958 |
+p2 <- epiNEM::HeatmapOP(tmp, bordercol = rgb(0,0,0,0), col = "RdBu", clusterx = tmp2, colorkey = NULL) |
|
959 |
+print(p1, position=c(0, .5, 1, 1), more=TRUE) |
|
960 |
+print(p2, position=c(0, 0, 1, .5)) |
|
961 |
+sum(tmp == 1 & tmp2 == 1)/sum(tmp == 1) |
|
962 |
+dev.off() |
|
963 |
+ |
|
964 |
+## new figure: |
|
965 |
+ |
|
966 |
+P <- t(mytc(uresf$res$adj))%*%uresf$Gamma |
|
967 |
+rownames(P) <- rownames(uresf$Gamma) |
|
968 |
+PM <- P |
|
969 |
+PM[which(PM > 1/6 & Gamma == 1)] <- P[which(PM > 1/6 & Gamma == 1)] + 1 |
|
970 |
+PM[which(PM <= 1/6 & Gamma == 1)] <- P[which(PM <= 1/6 & Gamma == 1)] - 1 |
|
971 |
+epiNEM::HeatmapOP(PM, bordercol = rgb(0,0,0,0), col = "RdYlBu", breaks = seq(-1,2,length.out=5), clusterx = tmp2) |
|
972 |
+ |
|
973 |
+## other plots: |
|
974 |
+ |
|
975 |
+pdf("temp.pdf", width = 10, height = 10) |
|
976 |
+plotDnf(c("M1=M4", "M2=M4", "M3=M5", "M1=M5"), edgelwd = 2) |
|
977 |
+dev.off() |
|
978 |
+ |
|
979 |
+M <- matrix(0, 5, 10) |
|
980 |
+rownames(M) <- paste0("M", 1:5) |
|
981 |
+colnames(M) <- paste0("S", 1:10) |
|
982 |
+ |
|
983 |
+M[1, 1:4] <- 1 |
|
984 |
+M[2, c(2,7:9)] <- 1 |
|
985 |
+M[3, 5:6] <- 1 |
|
986 |
+M[3, 10] <- 1 |
|
987 |
+ |
|
988 |
+phi <- matrix(0, 5, 5) |
|
989 |
+diag(phi) <- 1 |
|
990 |
+phi[1, 1:5] <- 1 |
|
991 |
+phi[2, 3] <- phi[4, 5] <- 1 |
|
992 |
+ |
|
993 |
+## M <- t(phi)%*%M; M[M > 1] <- 1 |
|
994 |
+ |
|
995 |
+rownames(M) <- paste0("M", 1:5) |
|
996 |
+ |
|
997 |
+pdf("temp.pdf", width = 8, height = 4) |
|
998 |
+epiNEM::HeatmapOP(M, Colv = 0, Rowv = 0, col = "RdBu", colorkey = NULL) |
|
999 |
+dev.off() |
|
1000 |
+ |
|
1001 |
+## check for mutation type (das führt zu nichts) |
|
1002 |
+ |
|
1003 |
+colnames(Mtype) <- unlist(lapply(strsplit(colnames(Mtype), "-"), function(x) { |
|
1004 |
+ y <- x[1:3] |
|
1005 |
+ y <- paste(c(y, "01"), collapse = "-") |
|
1006 |
+ return(y) |
|
1007 |
+ })) |
|
1008 |
+ |
|
1009 |
+checkgene <- "CBFB" |
|
1010 |
+ |
|
1011 |
+van <- intersect(colnames(Gamma)[which(PM[checkgene, ] < 0)], colnames(Mtype)) |
|
1012 |
+ |
|
1013 |
+A <- sum(unlist(lapply(strsplit(Mtype[checkgene, van], " "), function(x) if ("Silent" %in% x) { return(1) } else { return(0) }))) |
|
1014 |
+B <- length(van) - A |
|
1015 |
+ |
|
1016 |
+van <- intersect(colnames(Gamma)[which(PM[checkgene, ] > 0)], colnames(Mtype)) |
|
1017 |
+ |
|
1018 |
+C <- sum(unlist(lapply(strsplit(Mtype[checkgene, van], " "), function(x) if ("Silent" %in% x) { return(1) } else { return(0) }))) |
|
1019 |
+D <- length(van) - C |
|
1020 |
+ |
|
1021 |
+table(unlist(lapply(strsplit(Mtype[checkgene, van], " "), function(x) return(names(table(x))[which.max(table(x))])))) |
|
1022 |
+ |
|
1023 |
+## table(unlist(strsplit(as.vector(Mtype), " "))) |
|
1024 |
+ |
|
1025 |
+## pdf("Fig10.pdf", width = 10, height = 10) |
|
1026 |
+## p1 <- epiNEM::HeatmapOP(tmp4, bordercol = rgb(0,0,0,0), col = "RdBu", clusterx = tmp2, colorkey = NULL) |
|
1027 |
+## p2 <- epiNEM::HeatmapOP(tmp3, bordercol = rgb(0,0,0,0), col = "RdBu", clusterx = tmp2, colorkey = NULL) |
|
1028 |
+## p3 <- epiNEM::HeatmapOP(tmp2, bordercol = rgb(0,0,0,0), col = "RdBu", clusterx = tmp2, colorkey = NULL) |
|
1029 |
+## p4 <- epiNEM::HeatmapOP(tmp, bordercol = rgb(0,0,0,0), col = "RdBu", clusterx = tmp2, colorkey = NULL) |
|
1030 |
+## print(p1, position=c(0, .5, .5, 1), more=TRUE) |
|
1031 |
+## print(p2, position=c(0, 0, .5, .5), more=TRUE) |
|
1032 |
+## print(p3, position=c(.5, .5, 1, 1), more=TRUE) |
|
1033 |
+## print(p4, position=c(.5, 0, 1, .5)) |
|
1034 |
+## sum(tmp == 1 & tmp2 == 1)/sum(tmp == 1) |
|
1035 |
+## dev.off() |
|
1036 |
+ |
|
1037 |
+source("~/Documents/testing/R/nempi.r") |
|
1038 |
+source("~/Documents/testing/R/nempi_low.r") |
|
1039 |
+ |
|
1040 |
+bsres <- unembs(D2, Gamma = Gamma, complete = 1, full = TRUE, converged = converged, combi = combi, bsruns = 10, bssize = 0.5) |
|
1041 |
+ |
|
1042 |
+pdf("temp.pdf", width = 10, height = 5) |
|
1043 |
+epiNEM::HeatmapOP(bsres$Gamma, bordercol = rgb(0,0,0,0), col = "RdBu", colorkey = NULL) |
|
1044 |
+dev.off() |
|
1045 |
+ |
|
1046 |
+pdf("temp.pdf", width = 20, height = 10) |
|
1047 |
+par(mfrow=c(1,2)) |
|
1048 |
+tmp <- bsres$phi |
|
1049 |
+tmp[which(tmp > 0)] <- 1 |
|
1050 |
+diag(tmp) <- 0 |
|
1051 |
+freqtop <- as.vector(t(bsres$phi)[which(lower.tri(bsres$phi) == TRUE)])/10 |
|
1052 |
+freqtop <- freqtop[which(freqtop != 0)] |
|
1053 |
+tmptop <- tmp |
|
1054 |
+tmptop[lower.tri(tmptop)] <- 0 |
|
1055 |
+tmptop <- adj2dnf(tmptop) |
|
1056 |
+tmptop <- tmptop[grep("=", tmptop)] |
|
1057 |
+plotDnf(tmptop, edgelwd = 2, edgelab = freqtop, freq = freqtop) |
|
1058 |
+freqbot <- as.vector(t(bsres$phi)[which(upper.tri(bsres$phi) == TRUE)])/10 |
|
1059 |
+freqbot <- freqbot[which(freqbot != 0)] |
|
1060 |
+tmpbot <- tmp |
|
1061 |
+tmpbot[upper.tri(tmpbot)] <- 0 |
|
1062 |
+tmpbot <- adj2dnf(tmpbot) |
|
1063 |
+tmpbot <- tmpbot[grep("=", tmpbot)] |
|
1064 |
+plotDnf(tmpbot, edgelwd = 2, edgelab = freqbot, freq = freqbot) |
|
1065 |
+dev.off() |
|
1066 |
+ |
|
1067 |
+## |
|
1068 |
+ |
|
1069 |
+source("testing/vignettes/TCGA_cluster.r") |
|
1070 |
+ |
|
1071 |
+pcares <- prcomp(tmp) |
|
1072 |
+ |
|
1073 |
+library(naturalsort) |
|
1074 |
+library(nem) |
|
1075 |
+library(cluster) |
|
1076 |
+library(Rcpp) |
|
1077 |
+ |
|
1078 |
+source("~/Documents/mnem/R/mnems.r") |
|
1079 |
+source("~/Documents/mnem/R/mnems_low.r") |
|
1080 |
+sourceCpp("~/Documents/mnem/src/mm.cpp") |
|
1081 |
+ |
|
1082 |
+res <- mnem(tmp, starts = 10, search = "greedy", type = "cluster3", complete = 1, multi = 1) |
|
1083 |
+ |
|
1084 |
+tmp2 <- tmp |
|
1085 |
+Rprof("temp.txt", line.profiling=TRUE) |
|
1086 |
+res <- mnem(tmp2, starts = 10, search = "greedy", type = "cluster3", complete = 1, multi = 1, k = 2) |
|
1087 |
+Rprof(NULL) |
|
1088 |
+summaryRprof("temp.txt", lines = "show")$sampling.time |
|
1089 |
+head(summaryRprof("temp.txt", lines = "show")$by.self) |
|
1090 |
+ |
|
1091 |
+## resk <- mnemk(tmp2, starts = 10, search = "estimate", type = "cluster3", complete = 1, multi = 1) |
|
1092 |
+ |
|
1093 |
+cluster <- apply(getAffinity(res$probs, mw = res$mw, complete = TRUE), 2, which.max) |
|
1094 |
+ |
|
1095 |
+par(mfrow=c(1,2)) |
|
1096 |
+plot(pcares$rotation[, 1:2], col = cluster) |
|
1097 |
+ |
|
1098 |
+names(cluster) <- gsub("-01$|-03$", "", colnames(M)) |
|
1099 |
+ |
|
1100 |
+cluster <- cluster[which(names(cluster) %in% clinical$submitter_id)] |
|
1101 |
+ |
|
1102 |
+cluster <- cluster[order(names(cluster))] |
|
1103 |
+ |
|
1104 |
+clinical <- rbind(clinical, clinical[which(clinical[, 1] %in% names(cluster)[which(duplicated(names(cluster)))]), ]) |
|
1105 |
+ |
|
1106 |
+clinical <- clinical[order(clinical[, 1]), ] |
|
1107 |
+ |
|
1108 |
+print(all(clinical[, 1] == names(cluster))) |
|
1109 |
+ |
|
1110 |
+fit <- survfit(Surv(days_to_death, vital_status) ~ cluster, clinical) |
|
1111 |
+plot(fit, col = 1:length(table(cluster)), lty = 1:length(table(cluster))) |
|
1112 |
+legend(max(clinical$days_to_death, na.rm = TRUE), 1, 1:length(table(cluster)), lty = 1:length(table(cluster)), col = 1:length(table(cluster)), xjust = 1, yjust = 1) |
|
1113 |
+ |
|
1114 |
+fit <- coxph(Surv(days_to_death, vital_status) ~ cluster + age_at_diagnosis, clinical) |
|
1115 |
+print(fit) |
|
1116 |
+ |
|
1117 |
+fit <- coxph(Surv(days_to_death, vital_status) ~ cluster + age_at_diagnosis + tumor_stage, clinical) |
|
1118 |
+print(fit) |
|
1119 |
+ |
|
1120 |
+fit <- coxph(Surv(days_to_death, vital_status) ~ cluster + tumor_stage, clinical) |
|
1121 |
+print(fit) |
|
1122 |
+ |
|
1123 |
+fit <- coxph(Surv(days_to_death, vital_status) ~ cluster, clinical) |
|
1124 |
+print(fit) |
|
1125 |
+ |
|
1126 |
+kres <- clustNEM(tmp, nem = 0, k = length(res$comp), nstart = 10) |
|
1127 |
+ |
|
1128 |
+## kres <- clustNEM(tmp, nem = 0, nstart = 10) |
|
1129 |
+ |
|
1130 |
+plot(pcares$rotation[, 1:2], col = kres$cluster) |
|
1131 |
+ |
|
1132 |
+kcluster <- kres$cluster |
|
1133 |
+ |
|
1134 |
+names(kcluster) <- gsub("-01$|-03$", "", colnames(M)) |
|
1135 |
+ |
|
1136 |
+kcluster <- kcluster[which(names(kcluster) %in% clinical$submitter_id)] |
|
1137 |
+ |
|
1138 |
+kcluster <- kcluster[order(names(kcluster))] |
|
1139 |
+ |
|
1140 |
+fit <- survfit(Surv(days_to_death, vital_status) ~ kcluster, clinical) |
|
1141 |
+plot(fit, col = 1:length(table(cluster)), lty = 1:length(table(cluster))) |
|
1142 |
+legend(max(clinical$days_to_death, na.rm = TRUE), 1, 1:length(table(cluster)), lty = 1:length(table(cluster)), col = 1:length(table(cluster)), xjust = 1, yjust = 1) |
|
1143 |
+ |
|
1144 |
+fit <- coxph(Surv(days_to_death, vital_status) ~ kcluster + age_at_diagnosis, clinical) |
|
1145 |
+print(fit) |
|
1146 |
+ |
|
1147 |
+fit <- coxph(Surv(days_to_death, vital_status) ~ kcluster, clinical) |
|
1148 |
+print(fit) |
|
1149 |
+ |
|
1150 |
+ |
|
1151 |
+ |
|
1152 |
+ |
|
1153 |
+ |
|
1154 |
+ |
|
1155 |
+ |
|
1156 |
+ |
|
1157 |
+ |
|
1158 |
+ |
|
1159 |
+ |
|
1160 |
+ |
|
1161 |
+ |
|
1162 |
+ |
|
1163 |
+ |
|
1164 |
+ |
|
1165 |
+ |
0 | 1166 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,1040 @@ |
1 |
+ |
|
2 |
+method <- as.character(commandArgs(TRUE)[1]) |
|
3 |
+ |
|
4 |
+type <- as.character(commandArgs(TRUE)[2]) |
|
5 |
+ |
|
6 |
+goi <- as.numeric(commandArgs(TRUE)[3]) |
|
7 |
+ |
|
8 |
+goitype <- as.numeric(commandArgs(TRUE)[4]) |
|
9 |
+ |
|
10 |
+path <- "/cluster/work/bewi/members/mpirkl/" |
|
11 |
+ |
|
12 |
+## method <- "nempi"; type <- "TCGA-BRCA"; goi <- 0; goitype <- 1; path <- "~/Mount/Eulershare/" |
|
13 |
+ |
|
14 |
+library(naturalsort) |
|
15 |
+library(cluster) |
|
16 |
+library(Rcpp) |
|
17 |
+library(Rgraphviz) |
|
18 |
+library(mnem) |
|
19 |
+library(randomForest) |
|
20 |
+library(e1071) |
|
21 |
+library(missForest) |
|
22 |
+library(class) |
|
23 |
+library(nnet) |
|
24 |
+ |
|
25 |
+source("mnems.r") |
|
26 |
+source("mnems_low.r") |
|
27 |
+sourceCpp(code=readChar("mm.cpp", file.info("mm.cpp")$size)) |
|
28 |
+source("nempi_main.r") |
|
29 |
+source("nempi_low.r") |
|
30 |
+ |
|
31 |
+## sourceCpp("~/Documents/mnem/src/mm.cpp"); source("~/Documents/mnem/R/mnems.r"); source("~/Documents/mnem/R/mnems_low.r"); source("~/Documents/nempi/R/nempi_main.r"); source("~/Documents/nempi/R/nempi_low.r") |
|
32 |
+## load("mutclust/TCGA-BRCA_final.rda") |
|
33 |
+ |
|
34 |
+type <- "TCGA-BRCA" |
|
35 |
+load(paste0(type, "_final.rda")) |
|
36 |
+ |
|
37 |
+M[which(M < 3)] <- 0 |
|
38 |
+M[which(M > 0)] <- 1 |
|
39 |
+M[grep("Silent", Mtype)] <- 0 |
|
40 |
+M <- M[order(rownames(M)), order(colnames(M))] |
|
41 |
+cnv[which(cnv == 0)] <- 0 |
|
42 |
+cnv[which(cnv != 0)] <- 1 |
|
43 |
+cnv <- cnv[order(rownames(cnv)), order(colnames(cnv))] |
|
44 |
+meth[which(meth > 0.5)] <- 1 |
|
45 |
+meth[which(meth <= 0.5)] <- 0 |
|
46 |
+meth <- meth[order(rownames(meth)), order(colnames(meth))] |
|
47 |
+meth[is.na(meth)] <- 0 |
|
48 |
+P <- matrix(0, length(unique(c(rownames(M), rownames(cnv), rownames(meth)))), length(unique(c(colnames(M), colnames(cnv), colnames(meth))))) |
|
49 |
+rownames(P) <- sort(unique(c(rownames(M), rownames(cnv), rownames(meth)))) |
|
50 |
+colnames(P) <- sort(unique(c(colnames(M), colnames(cnv), colnames(meth)))) |
|
51 |
+colnames(P) <- gsub("A$", "", lapply(strsplit(colnames(P), "-"), function(x) { y <- x[1:4]; y <- paste(y, collapse = "-"); return(y) })) |
|
52 |
+P <- P[, which(duplicated(colnames(P)) == FALSE)] |
|
53 |
+P[which(rownames(P) %in% rownames(M)), which(colnames(P) %in% colnames(M))] <- M[which(rownames(M) %in% rownames(P)), which(colnames(M) %in% colnames(P))] |
|
54 |
+Pmut <- P |
|
55 |
+P <- Pmut*0 |
|
56 |
+P[which(rownames(P) %in% rownames(meth)), which(colnames(P) %in% colnames(meth))] <- P[which(rownames(P) %in% rownames(meth)), which(colnames(P) %in% colnames(meth))] + meth[which(rownames(meth) %in% rownames(P)), which(colnames(meth) %in% colnames(P))] |
|
57 |
+Pmeth <- P |
|
58 |
+P <- Pmeth*0 |
|
59 |
+P[which(rownames(P) %in% rownames(cnv)), which(colnames(P) %in% colnames(cnv))] <- P[which(rownames(P) %in% rownames(cnv)), which(colnames(P) %in% colnames(cnv))] + cnv[which(rownames(cnv) %in% rownames(P)), which(colnames(cnv) %in% colnames(P))] |
|
60 |
+Pcnv <- P |
|
61 |
+P <- Pmut+Pmeth+Pcnv |
|
62 |
+P2 <- P # full abberations including cnv and meth |
|
63 |
+P <- Pmut |
|
64 |
+Ps <- list() |
|
65 |
+Ps$all <- P2 |
|
66 |
+Ps$meth <- Pmeth |
|
67 |
+Ps$cnv <- Pcnv |
|
68 |
+Ps$mut <- Pmut |
|
69 |
+ |
|
70 |
+if (method=="auc") { |
|
71 |
+ ## goitype <- 4 |
|
72 |
+ methods <- c("nempi", "svm", "nn", "rf", "mf", "knn") |
|
73 |
+ mutinc <- 1 |
|
74 |
+ files <- list.files(paste0(path,"nempi_tcga/")) |
|
75 |
+ if (goitype==1) { |
|
76 |
+ files <- files[grep("_pan",files)] |
|
77 |
+ } else if (goitype==2) { |
|
78 |
+ files <- files[grep("_all",files)] |
|
79 |
+ } else if (goitype==3) { |
|
80 |
+ files <- files[grep("_mut",files)] |
|
81 |
+ } else if (goitype==4) { |
|
82 |
+ files <- files[grep("_org",files)] |
|
83 |
+ } |
|
84 |
+ pr <- roc <- list() |
|
85 |
+ aucrows <- max(unlist(lapply(methods,function(x) { |
|
86 |
+ y <- length(grep(paste0("^",x),files)) |
|
87 |
+ return(y) |
|
88 |
+ }))) |
|
89 |
+ pr$all <- pr$mut <- pr$cnv <- pr$meth <- |
|
90 |
+ pr$allp <- pr$mutp <- pr$cnvp <- pr$methp <- |
|
91 |
+ pr$time <- matrix(NA,aucrows,length(methods)+2, |
|
92 |
+ dimnames=list(rownames=NULL,colnames=c(methods,c("random bin","random cont")))) |
|
93 |
+ roc$all <- roc$mut <- roc$cnv <- roc$meth <- |
|
94 |
+ roc$allp <- roc$mutp <- roc$cnvp <- roc$methp <- |
|
95 |
+ roc$time <- matrix(NA,aucrows,length(methods)+2, |
|
96 |
+ dimnames=list(rownames=NULL,colnames=c(methods,c("random bin","random cont")))) |
|
97 |
+ pr$missing <- roc$missing <- numeric(aucrows) |
|
98 |
+ for (method in methods) { |
|
99 |
+ ## method <- "nempi" |
|
100 |
+ print(method) |
|
101 |
+ files.method <- files[grep(paste0("^",method),files)] |
|
102 |
+ idx <- which(methods==method) |
|
103 |
+ idx2 <- 0 |
|
104 |
+ for (file in files.method) { |
|
105 |
+ if (length(grep(paste0("^",method),file))==0) { next() } |
|
106 |
+ idx2 <- idx2 + 1 |
|
107 |
+ cat(paste0(idx2,".")) |
|
108 |
+ ## file <- "nempi_1.rds" |
|
109 |
+ ures <- readRDS(paste0(path,"nempi_tcga/",file)) |
|
110 |
+ pr$time[idx2,idx] <- ures$time |
|
111 |
+ goi <- rownames(ures$Gamma) |
|
112 |
+ P <- Pmut |
|
113 |
+ P <- P[which(rownames(P) %in% goi), ] |
|
114 |
+ P[which(P > 1)] <- 1 |
|
115 |
+ ## P <- apply(P, 2, function(x) return(x/sum(x))) |
|
116 |
+ ## P[is.na(P)] <- 0 |
|
117 |
+ Rho <- cbind(P, matrix(0, nrow(P), sum(!(colnames(D) %in% colnames(P))))) |
|
118 |
+ colnames(Rho) <- c(colnames(P), colnames(D)[which(!(colnames(D) %in% colnames(P)))]) |
|
119 |
+ Rho <- Rho[, colnames(D)] |
|
120 |
+ if (sum(apply(Rho, 1, sum) == 0) > 0) { |
|
121 |
+ Rho <- Rho[-which(apply(Rho, 1, sum) == 0), ] |
|
122 |
+ } |
|
123 |
+ Rho[is.na(Rho)] <- 0 |
|
124 |
+ if (method=="nempi") { |
|
125 |
+ pr$missing[idx2] <- roc$missing[idx2] <- sum(apply(Rho, 2, sum) == 0)/ncol(Rho) # unlabelled |
|
126 |
+ } |
|
127 |
+ inc <- sort(apply(Rho, 1, sum)) |
|
128 |
+ D2 <- D[which(apply(D, 1, median) != 0), ] |
|
129 |
+ D3 <- D2[, which(duplicated(colnames(D2)) == FALSE)] |
|
130 |
+ D4 <- D3 |
|
131 |
+ Rho <- Rho[, which(duplicated(colnames(D2)) == FALSE)] |
|
132 |
+ colnames(D4) <- apply(Rho, 2, function(x) { |
|
133 |
+ Sgenes <- paste(sort(rownames(Rho)[which(x > 0)]), collapse = "_") |
|
134 |
+ return(Sgenes) |
|
135 |
+ }) |
|
136 |
+ for (mtype in c("all")) { # ,"mut","cnv","meth")) { |
|
137 |
+ ## mtype <- "all" |
|
138 |
+ P <- Ps[[mtype]] # P2 |
|
139 |
+ if (!mutinc) { # include mutations or not (0) |
|
140 |
+ P[which(Pmut == 1)] <- 0 |
|
141 |
+ } |
|
142 |
+ P <- P[which(rownames(P) %in% rownames(Rho)), which(colnames(P) %in% colnames(Rho))] |
|
143 |
+ P <- P[order(rownames(P)), order(colnames(P))] |
|
144 |
+ Ptmp <- cbind(P, matrix(0, nrow(P), sum(!(colnames(Rho) %in% colnames(P))))) |
|
145 |
+ colnames(Ptmp) <- c(colnames(P), colnames(Rho)[which(!(colnames(Rho) %in% colnames(P)))]) |
|
146 |
+ P <- Ptmp[, colnames(Rho)] |
|
147 |
+ P[which(P > 1)] <- 1 |
|
148 |
+ meas <- list() |
|
149 |
+ meas$data <- P |
|
150 |
+ colnames(meas$data) <- apply(P, 2, function(x) { |
|
151 |
+ y <- paste(sort(rownames(P)[which(x==1)]), collapse = "_") |
|
152 |
+ return(y) |
|
153 |
+ }) |
|
154 |
+ measnet <- matrix(0, nrow(P), nrow(P)) |
|
155 |
+ diag(measnet) <- 1 |
|
156 |
+ meas$Nem[[1]] <- measnet |
|
157 |
+ meas$Theta[[1]] <- ures$res$subtopo |
|
158 |
+ if (idx == 1) { |
|
159 |
+ pitmp <- pifit(ures, meas, D4) |
|
160 |
+ randres <- ures |
|
161 |
+ randres$Gamma[1:length(randres$Gamma)] <- sample(c(0,1),length(randres$Gamma),replace=TRUE) |
|
162 |
+ rand <- pifit(randres, meas, D4, propagate=FALSE) |
|
163 |
+ pr[[mtype]][idx2,length(methods)+1] <- rand$auc |
|
164 |
+ roc[[mtype]][idx2,length(methods)+1] <- rand$roc |
|
165 |
+ randres2 <- randres |
|
166 |
+ randres2$Gamma[1:length(randres2$Gamma)] <- runif(length(randres2$Gamma),0,1) |
|
167 |
+ randres2$Gamma <- apply(randres2$Gamma, 2, function(x) { |
|
168 |
+ y <- runif(1,0,1) |
|
169 |
+ return(x/(sum(x)+y)) |
|
170 |
+ }) |
|
171 |
+ rand <- pifit(randres2, meas, D4, propagate=FALSE) |
|
172 |
+ pr[[mtype]][idx2,length(methods)+2] <- rand$auc |
|
173 |
+ roc[[mtype]][idx2,length(methods)+2] <- rand$roc |
|
174 |
+ } else { |
|
175 |
+ pitmp <- pifit(ures, meas, D4, propagate=FALSE) |
|
176 |
+ } |
|
177 |
+ pr[[mtype]][idx2,idx] <- pitmp$auc |
|
178 |
+ roc[[mtype]][idx2,idx] <- pitmp$roc |
|
179 |
+ meas$Nem[[1]] <- ures$res$adj |
|
180 |
+ if (idx == 1) { |
|
181 |
+ pitmp <- pifit(ures, meas, D4) |
|
182 |
+ rand <- pifit(randres, meas, D4, propagate=FALSE) |
|
183 |
+ pr[[paste0(mtype,"p")]][idx2,length(methods)+1] <- rand$auc |
|
184 |
+ roc[[paste0(mtype,"p")]][idx2,length(methods)+1] <- rand$roc |
|
185 |
+ rand <- pifit(randres2, meas, D4, propagate=FALSE) |
|
186 |
+ pr[[paste0(mtype,"p")]][idx2,length(methods)+2] <- rand$auc |
|
187 |
+ roc[[paste0(mtype,"p")]][idx2,length(methods)+2] <- rand$roc |
|
188 |
+ } else { |
|
189 |
+ pitmp <- pifit(ures, meas, D4, propagate=FALSE) |
|
190 |
+ } |
|
191 |
+ pr[[paste0(mtype,"p")]][idx2,idx] <- pitmp$auc |
|
192 |
+ roc[[paste0(mtype,"p")]][idx2,idx] <- pitmp$roc |
|
193 |
+ } |
|
194 |
+ } |
|
195 |
+ } |
|
196 |
+ if (goitype==1) { |
|
197 |
+ saveRDS(pr,file=paste0(path,"nempi_tcga/pr_results_pan.rds")) |
|
198 |
+ saveRDS(roc,file=paste0(path,"nempi_tcga/roc_results_pan.rds")) |
|
199 |
+ } else if (goitype==2) { |
|
200 |
+ saveRDS(pr,file=paste0(path,"nempi_tcga/pr_results_all.rds")) |
|
201 |
+ saveRDS(roc,file=paste0(path,"nempi_tcga/roc_results_all.rds")) |
|
202 |
+ } else if (goitype==3) { |
|
203 |
+ saveRDS(pr,file=paste0(path,"nempi_tcga/pr_results_mut.rds")) |
|
204 |
+ saveRDS(roc,file=paste0(path,"nempi_tcga/roc_results_mut.rds")) |
|
205 |
+ } else if (goitype==4) { |
|
206 |
+ saveRDS(pr,file=paste0(path,"nempi_tcga/pr_results_org.rds")) |
|
207 |
+ saveRDS(roc,file=paste0(path,"nempi_tcga/roc_results_org.rds")) |
|
208 |
+ } |
|
209 |
+ stop("success") |
|
210 |
+} |
|
211 |
+ |
|
212 |
+## genes of interest |
|
213 |
+ |
|
214 |
+if (goi!=0) { |
|
215 |
+ ## goi <- 1 |
|
216 |
+ goi0 <- goi |
|
217 |
+ gois <- rownames(Pmut)[which(apply(Pmut,1,sum)>0)] |
|
218 |
+ prob <- rep(1/length(gois),length(gois)) |
|
219 |
+ if (goitype==1) { |
|
220 |
+ gois <- intersect(c("JAK1","FGFR1","MSH2","HIST1H1C","MGMT","SMARCA1","ZC3H12A","SETBP1","PIK3CB","KRT222","HGF","IL7R","DAZAP1","LEMD2","MACF1","MYC","THRAP3","RPL5","H3F3C","AXIN2","POLQ","AR","UNCX","PIK3CG","ABL1","PMS1","IRF6","ALK","ESR1","PGR","CHD8","H3F3A","PTPRC","CACNA1A","TBL1XR1","EGR3","JAK3","GRIN2D","POLE","MSH3","RARA","ATXN3","PMS2","ACVR1B","TLR4","PLXNB2","RNF111","CDKN2C","MLH1","BRCA2","ARAF","IRF2","CHEK2","USP9X","TRAF3","EPHA3","NOTCH2","CBWD3","PAX5","RHEB","KMT2A","BRAF1","BCL2","JAK2"),gois) # pan-cancer drivers: too mutated/cnved/methed ?? no, don't think so. |
|
221 |
+ } else if (goitype==3) { |
|
222 |
+ prob0 <- apply(Pmut[which(apply(Pmut,1,sum)>0),],1,function(x) return(sum(x!=0))) |
|
223 |
+ prob <- prob0 |
|
224 |
+ prob[which(prob<20)] <- 0 |
|
225 |
+ prob <- prob/sum(prob) |
|
226 |
+ } |
|
227 |
+ ## goi <- 1 |
|
228 |
+ set.seed(goi*9247) |
|
229 |
+ goi <- sample(gois,10,prob=prob) |
|
230 |
+ print(goi) |
|
231 |
+ epiNEM::HeatmapOP(Pmut[goi,]) |
|
232 |
+ apply(Pmut[goi,],1,sum) |
|
233 |
+} else { |
|
234 |
+ goi0 <- goi |
|
235 |
+ goi <- c("MAP2K4", "GATA3", "GPS2", "TBX3", "PTPRD", "NCOR1", "CBFB", "CDKN1B") # BRCA |
|
236 |
+} |
|
237 |
+ |
|
238 |
+P <- Pmut |
|
239 |
+P <- P[which(rownames(P) %in% goi), ] |
|
240 |
+P[which(P > 1)] <- 1 |
|
241 |
+P <- apply(P, 2, function(x) return(x/sum(x))) |
|
242 |
+P[is.na(P)] <- 0 |
|
243 |
+ |
|
244 |
+## data imputation: |
|
245 |
+ |
|
246 |
+Rho <- cbind(P, matrix(0, nrow(P), sum(!(colnames(D) %in% colnames(P))))) |
|
247 |
+colnames(Rho) <- c(colnames(P), colnames(D)[which(!(colnames(D) %in% colnames(P)))]) |
|
248 |
+Rho <- Rho[, colnames(D)] |
|
249 |
+if (sum(apply(Rho, 1, sum) == 0) > 0) { |
|
250 |
+ Rho <- Rho[-which(apply(Rho, 1, sum) == 0), ] |
|
251 |
+} |
|
252 |
+Rho[is.na(Rho)] <- 0 |
|
253 |
+print("unlabelled:") |
|
254 |
+print(sum(apply(Rho, 2, sum) == 0)/ncol(Rho)) |
|
255 |
+inc <- sort(apply(Rho, 1, sum)) |
|
256 |
+D2 <- D[which(apply(D, 1, median) != 0), ] |
|
257 |
+D3 <- D2[, which(duplicated(colnames(D2)) == FALSE)] |
|
258 |
+Rho <- Rho[, which(duplicated(colnames(D2)) == FALSE)] |
|
259 |
+for (i in which(duplicated(colnames(D2)) == TRUE)) { |
|
260 |
+ j <- which(colnames(D3) %in% colnames(D2)[i]) |
|
261 |
+ D3[, j] <- apply(D2[, which(colnames(D2) %in% colnames(D2)[i]), drop = FALSE], 1, median) |
|
262 |
+} |
|
263 |
+D2 <- D3 |
|
264 |
+colnames(D2) <- c(rownames(Rho), sample(rownames(Rho), ncol(D2)-nrow(Rho), replace = TRUE)) |
|
265 |
+D4 <- D2 |
|
266 |
+colnames(D4) <- apply(Rho, 2, function(x) { |
|
267 |
+ Sgenes <- paste(sort(rownames(Rho)[which(x > 0)]), collapse = "_") |
|
268 |
+ return(Sgenes) |
|
269 |
+}) |
|
270 |
+ |
|
271 |
+## pdf("temp.pdf", width = 12, height = 6);tmp <- Rho;colnames(tmp) <- NULL;epiNEM::HeatmapOP(tmp, col = "RdBu", Rowv = 0, bordercol = "transparent");dev.off() |
|
272 |
+ |
|
273 |
+start <- as.numeric(Sys.time()) |
|
274 |
+if (method=="nempi") { |
|
275 |
+ converged <- 10 |
|
276 |
+ nempires <- nempi(D2, Gamma = Rho, full = TRUE, converged = converged) |
|
277 |
+ ures <- nempires |
|
278 |
+} else if (method=="svm") { |
|
279 |
+ svmres <- classpi(D4, full = TRUE, method = "svm") |
|
280 |
+ ures <- svmres |
|
281 |
+} else if (method=="nn") { |
|
282 |
+ nnres <- classpi(D4, full = TRUE, method = "nnet", MaxNWts = 50000, size = 2) |
|
283 |
+ ures <- nnres |
|
284 |
+} else if (method=="mf") { |
|
285 |
+ mfdata <- cbind(as.data.frame(t(D4)), factor(colnames(D4))) |
|
286 |
+ mfdata[which(mfdata == "", arr.ind = TRUE)] <- NA |
|
287 |
+ mfimp <- missForest(mfdata) |
|
288 |
+ D4 <- D2 |
|
289 |
+ colnames(D4) <- mfimp$ximp[, ncol(mfimp$ximp)] |
|
290 |
+ tmp <- nem(D4, multi = TRUE) |
|
291 |
+ Gamma <- getGamma(D4) |
|
292 |
+ ures <- list() |
|
293 |
+ ures$Gamma <- apply(Gamma, 2, function(x) return(x/sum(x))) |
|
294 |
+ ures$res <- list() |
|
295 |
+ ures$res$adj <- tmp$adj |
|
296 |
+ ures$null <- TRUE |
|
297 |
+ ures$combi <- 1 |
|
298 |
+ mfres <- ures |
|
299 |
+} else if (method=="rf") { |
|
300 |
+ rfres <- classpi(D4, full = TRUE, method = "randomForest") |
|
301 |
+ ures <- rfres |
|
302 |
+} else if (method=="knn") { |
|
303 |
+ train <- t(D4[, which(colnames(D4) != "")]) |
|
304 |
+ test <- t(D4[, which(colnames(D4) == "")]) |
|
305 |
+ knn0 <- 0 |
|
306 |
+ if (knn0) { |
|
307 |
+ train <- rbind(train, NULL = rep(0, ncol(train))) |
|
308 |
+ cl <- c(colnames(D4)[which(colnames(D4) != "")], "NULL") |
|
309 |
+ } else { |
|
310 |
+ cl <- colnames(D4)[which(colnames(D4) != "")] |
|
311 |
+ } |
|
312 |
+ knnres <- knn(train, test, cl, prob = TRUE) |
|
313 |
+ D3 <- D4 |
|
314 |
+ colnames(D3)[which(colnames(D3) %in% "")] <- as.character(knnres) |
|
315 |
+ tmp <- nem(D3, multi = TRUE) |
|
316 |
+ Gamma <- getGamma(D3) |
|
317 |
+ ures <- list() |
|
318 |
+ ures$Gamma <- Gamma # apply(Gamma, 2, function(x) return(x/sum(x))) |
|
319 |
+ ures$res <- list() |
|
320 |
+ ures$res$adj <- tmp$adj |
|
321 |
+ ures$null <- TRUE |
|
322 |
+ ures$combi <- 1 |
|
323 |
+ knnres <- ures |
|
324 |
+} else if (method=="mice") { # takes forever |
|
325 |
+ mfdata <- cbind(as.data.frame(t(D4)), factor(colnames(D4))) |
|
326 |
+ mfdata[which(mfdata == "", arr.ind = TRUE)] <- NA |
|
327 |
+ |
|
328 |
+ micedata <- mfdata |
|
329 |
+ colnames(micedata) <- paste0(LETTERS[1:ncol(micedata)], 1:ncol(micedata)) |
|
330 |
+ miceres <- mice(micedata, method = c(rep('rfcont', ncol(micedata)-1), 'rfcat'), m = 2, maxit = 2) |
|
331 |
+ ures <- miceres |
|
332 |
+} |
|
333 |
+end <- as.numeric(Sys.time()) |
|
334 |
+print(end - start) |
|
335 |
+ures$time <- end-start |
|
336 |
+ |
|
337 |
+if (goi0!=0) { |
|
338 |
+ if (goitype==1) { |
|
339 |
+ saveRDS(ures, file=paste0(path, "nempi_tcga/", method, "_", goi0, "_pan.rds")) |
|
340 |
+ } else if (goitype==2) { |
|
341 |
+ saveRDS(ures, file=paste0(path, "nempi_tcga/", method, "_", goi0, "_all.rds")) |
|
342 |
+ } else if (goitype==3) { |
|
343 |
+ saveRDS(ures, file=paste0(path, "nempi_tcga/", method, "_", goi0, "_mut.rds")) |
|
344 |
+ } |
|
345 |
+} else { |
|
346 |
+ saveRDS(ures, file=paste0(path, "nempi_tcga/", method, "_org.rds")) |
|
347 |
+} |
|
348 |
+stop("tcga done") |
|
349 |
+ |
|
350 |
+## commands for hpc: |
|
351 |
+ |
|
352 |
+system("scp ~/Documents/mnem/R/mnems.r euler.ethz.ch:") |
|
353 |
+system("scp ~/Documents/mnem/R/mnems_low.r euler.ethz.ch:") |
|
354 |
+system("scp ~/Documents/nempi/R/nempi_main.r euler.ethz.ch:") |
|
355 |
+system("scp ~/Documents/nempi/R/nempi_low.r euler.ethz.ch:") |
|
356 |
+ |
|
357 |
+system("scp testing/nempi/other/TCGA_nempi.r euler.ethz.ch:") |
|
358 |
+ |
|
359 |
+rm error.txt |
|
360 |
+rm output.txt |
|
361 |
+rm .RData |
|
362 |
+ |
|
363 |
+ram=8000 # 4h: nempi 8, mf 8, knn 8, svm 32, nn 32, rf 32, auc 16 |
|
364 |
+queue=4 |
|
365 |
+method="auc" |
|
366 |
+type="TCGA-BRCA" |
|
367 |
+goi=1 |
|
368 |
+goitype=3 |
|
369 |
+ |
|
370 |
+bsub -M ${ram} -q normal.${queue}h -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --max-ppsize=500000 --vanilla --silent --no-save --args '${method}' '${type}' '${goi}' '${goitype}' < TCGA_nempi.r" |
|
371 |
+ |
|
372 |
+for goi in {2..100}; do |
|
373 |
+bsub -M ${ram} -q normal.${queue}h -e error.txt -o output.txt -R "rusage[mem=${ram}]" "R/bin/R --max-ppsize=500000 --vanilla --silent --no-save --args '${method}' '${type}' '${goi}' '${goitype}' < TCGA_nempi.r" |
|
374 |
+done |
|
375 |
+ |
|
376 |
+for i in {141716598..141716731}; do |
|
377 |
+bkill ${i} |
|
378 |
+done |
|
379 |
+ |
|
380 |
+## analysis: |
|
381 |
+ |
|
382 |
+method <- "nempi" |
|
383 |
+ures <- readRDS(paste0("~/Mount/Eulershare/nempi_tcga/",method,"_org.rds")) |
|
384 |
+ |
|
385 |
+sum(ures$lls[2:length(ures$lls)] - ures$lls[1:(length(ures$lls)-1)] < 0) |
|
386 |
+ |
|
387 |
+pdf("temp.pdf", width = 12, height = 6) |
|
388 |
+epiNEM::HeatmapOP(ures$Gamma, bordercol = rgb(0,0,0,0), col = "RdBu") |
|
389 |
+#plot(ures, edgewidth = 30) |
|
390 |
+dev.off() |
|
391 |
+ |
|
392 |
+pdf("temp.pdf", width = 9, height = 6) |
|
393 |
+par(mfrow=c(2,3)) |
|
394 |
+plotConvergence(ures, type = "b", col = "blue") |
|
395 |
+dev.off() |
|
396 |
+ |
|
397 |
+pdf("temp.pdf", width = 6, height = 6) |
|
398 |
+Pgenes <- sort(unique(rownames(ures$Gamma))) |
|
399 |
+adjtmp <- ures$res$adj |
|
400 |
+colnames(adjtmp) <- rownames(adjtmp) <- Pgenes |
|
401 |
+plotDnf(adj2dnf(adjtmp), edgelwd = 2) |
|
402 |
+dev.off() |
|
403 |
+ |
|
404 |
+pdf("temp.pdf", width = 12, height = 6) |
|
405 |
+epiNEM::HeatmapOP(t(mytc(ures$res$adj))%*%ures$Gamma, bordercol = rgb(0,0,0,0), col = "RdBu") |
|
406 |
+#plot(ures, edgewidth = 30) |
|
407 |
+dev.off() |
|
408 |
+ |
|
409 |
+source("~/Documents/nempi/R/nempi_main.r") |
|
410 |
+D4 <- D2 |
|
411 |
+colnames(D4) <- apply(Rho, 2, function(x) { |
|
412 |
+ Sgenes <- paste(sort(rownames(Rho)[which(x > 0)]), collapse = "_") |
|
413 |
+ return(Sgenes) |
|
414 |
+}) |
|
415 |
+ |
|
416 |
+## new analysis: |
|
417 |
+ |
|
418 |
+goitype <- "pan" |
|
419 |
+stat <- readRDS(paste0("~/Mount/Eulershare/nempi_tcga/pr_results_",goitype,".rds")) |
|
420 |
+ |
|
421 |
+cex.main <- 1.5 |
|
422 |
+cex.lab <- 1.5 |
|
423 |
+idx <- c(1:6,8) |
|
424 |
+setEPS() |
|
425 |
+postscript("temp.eps", height = 5, width = 10) |
|
426 |
+par(mar=c(3.85,4,2.75,1),oma=c(0,0,0,0)) |
|
427 |
+layout.mat <- matrix(c(rep(rep(1:3,each=1),3),4:6),4,byrow=1) |
|
428 |
+layout(layout.mat) |
|
429 |
+methods <- c("nempi", "svm", "nn", "rf", "mf", "knn") |
|
430 |
+wc.ps <- numeric((length(idx)-1)) |
|
431 |
+wc.ps[1:length(wc.ps)] <- NA |
|
432 |
+for (i in 1:(length(idx)-1)) { |
|
433 |
+ wc.ps[i] <- wilcox.test(stat$all[,1],stat$all[,idx[1+i]],alternative="greater")$p.value |
|
434 |
+} |
|
435 |
+wc.psp <- numeric((length(idx)-1)) |
|
436 |
+wc.psp[1:length(wc.psp)] <- NA |
|
437 |
+for (i in 1:(length(idx)-1)) { |
|
438 |
+ wc.psp[i] <- wilcox.test(stat$allp[,1],stat$allp[,idx[1+i]],alternative="greater")$p.value |
|
439 |
+} |
|
440 |
+col <- c("red", "blue", "darkgreen", "brown", "orange", "turquoise","grey","darkgrey") |
|
441 |
+ylim <- c(min(stat$all,na.rm=TRUE),1) |
|
442 |
+mnem:::myboxplot(stat$all[,idx],col=col,dens=0,xaxt = "n",main = "no perturbation propagation",ylab ="area under the precision-recall curve",box=1,border=col,medcol="black",ylim=ylim,cex.main=cex.main,cex.lab=cex.lab) |
|
443 |
+max.val <- apply(stat$all[,idx[-1]],2,max,na.rm=TRUE) + 0.05 |
|
444 |
+if (goitype=="pan") { |
|
445 |
+ max.val <- max.val + rep(0.05,length(max.val)) |
|
446 |
+} |
|
447 |
+text(2:(length(idx[-1])+1),max.val,labels=round(wc.ps,7),srt=90) |
|
448 |
+mnem:::myboxplot(stat$allp[,idx],col=col,dens=0,xaxt = "n",main = "perturbation propagation",ylab ="area under the precision-recall curve",box=1,border=col,medcol="black",ylim=ylim,cex.main=cex.main,cex.lab=cex.lab) |
|
449 |
+max.valp <- apply(stat$allp[,idx[-1]],2,min,na.rm=TRUE) - 0.05 |
|
450 |
+if (goitype=="pan") { |
|
451 |
+ max.valp <- max.valp + c(0,0,0,-0.01,-0.02,-0.05) |
|
452 |
+} |
|
453 |
+##text(2:(length(idx[-1])+1),max.valp,labels=round(wc.psp,7)) |
|
454 |
+text((length(idx[-1])+1),max.valp[length(max.valp)],labels=round(wc.psp[length(wc.psp)],7),srt=90) |
|
455 |
+mnem:::myboxplot(stat$time[,1:6],col=col,dens=0,xaxt = "n",main = "running time",ylab ="seconds",box=1,border=col,medcol="black",log="y",cex.main=cex.main,cex.lab=cex.lab) |
|
456 |
+##mnem:::myboxplot(stat$missing,col=col,dens=0,xaxt = "n",main = "fraction of unlabelled samples",ylab ="fraction",box=1,border=col,medcol="black",log="y") |
|
457 |
+cex.leg <- 1.5 |
|
458 |
+par(mar=rep(0, 4)) |
|
459 |
+plot.new() |
|
460 |
+legend("topleft",legend=c(expression(NEM~pi), "svm", "neural net"),col=c("red", "blue", "darkgreen"),fill=c("red", "blue", "darkgreen"),box.col = "transparent",cex = cex.leg,ncol = 1,border="transparent") |
|
461 |
+plot.new() |
|
462 |
+legend("topleft",legend=c("random forest", "missForest", "knn"),col=c("brown", "orange", "turquoise"),fill=c("brown", "orange", "turquoise"),box.col = "transparent",cex = cex.leg,ncol = 1,border="transparent") |
|
463 |
+plot.new() |
|
464 |
+legend("topleft",legend=c("random"),col=c("grey"),fill=c("grey"),box.col = "transparent",cex = cex.leg,ncol = 1,border="transparent") |
|
465 |
+dev.off() |
|
466 |
+ |
|
467 |
+## colnames |
|
468 |
+## rownames nempi svm nn rf mf knn random bin |
|
469 |
+## [1,] 0.8830668 0.8682249 0.7571648 0.7754192 0.7362392 0.7826912 0.6653634 |
|
470 |
+## colnames |
|
471 |
+## rownames random cont |
|
472 |
+## [1,] 0.7385025 |
|
473 |
+ |
|
474 |
+## colnames |
|
475 |
+## rownames nempi svm nn rf mf knn random bin |
|
476 |
+## [1,] 0.6007286 0.6215111 0.57883 0.6321455 0.6049589 0.5877279 0.5210848 |
|
477 |
+## colnames |
|
478 |
+## rownames random cont |
|
479 |
+## [1,] 0.5418864 |
|
480 |
+ |
|
481 |
+## old analysis: |
|
482 |
+ |
|
483 |
+## save(nempires, knnres, rfres, mfres, svmres, nnres, Rho, D2, Pmut, Pmeth, Pcnv, file = paste0(path, type, "_nempi.rda")) |
|
484 |
+ |
|
485 |
+path <- "mutclust/"; type <- "TCGA-BRCA" |
|
486 |
+ |
|
487 |
+load(paste0(path, type, "_nempi.rda")) |
|
488 |
+ |
|
489 |
+source("~/Documents/nempi/R/nempi_main.r") |
|
490 |
+D4 <- D2 |
|
491 |
+colnames(D4) <- apply(Rho, 2, function(x) { |
|
492 |
+ Sgenes <- paste(sort(rownames(Rho)[which(x > 0)]), collapse = "_") |
|
493 |
+ return(Sgenes) |
|
494 |
+}) |
|
495 |
+ |
|
496 |
+ures <- nempires |
|
497 |
+ |
|
498 |
+## check against methylation and cnvs: |
|
499 |
+ |
|
500 |
+pdf("temp.pdf", width = 12, height = 6) |
|
501 |
+tmp <- ures$Gamma |
|
502 |
+colnames(tmp) <- NULL |
|
503 |
+epiNEM::HeatmapOP(tmp, bordercol = rgb(0,0,0,0), col = "RdBu", colorkey = NULL) |
|
504 |
+dev.off() |
|
505 |
+ |
|
506 |
+pdf("temp.pdf", width = 6, height = 6) |
|
507 |
+Pgenes <- sort(unique(colnames(D2))) |
|
508 |
+adjtmp <- ures$res$adj |
|
509 |
+colnames(adjtmp) <- rownames(adjtmp) <- Pgenes |
|
510 |
+plotDnf(adjtmp, edgelwd = 2) |
|
511 |
+dev.off() |
|
512 |
+ |
|
513 |
+## cnv/meth enrichment: |
|
514 |
+ |
|
515 |
+# load("~/Documents/nempi_backup/backup3/TCGA-BRCA_nempi.rda") |
|
516 |
+ |
|
517 |
+source("~/Documents/nempi/R/nempi_main.r") |
|
518 |
+source("~/Documents/nempi/R/nempi_low.r") |
|
519 |
+ |
|
520 |
+methods <- list("NEM$\\pi$" = nempires)#, knn = knnres, mf = mfres, nn = nnres, rf = rfres, svm = svmres) |
|
521 |
+ |
|
522 |
+mutinc <- 1 |
|
523 |
+ |
|
524 |
+Lall <- Lcnv <- Lmeth <- Lmut <- Laucor <- list() |
|
525 |
+ |
|
526 |
+for (i in 1:length(methods)) { |
|
527 |
+ if (i != 8) { |
|
528 |
+ print(names(methods)[i]) |
|
529 |
+ ures <- methods[[i]] |
|
530 |
+ newGamma <- ures$Gamma |
|
531 |
+ } else { |
|
532 |
+ print("random") |
|
533 |
+ newGamma <- newGamma*0 |
|
534 |
+ newGamma[sample(1:length(newGamma), floor(0.45*length(newGamma)))] <- 1 # well that is included into the test... |
|
535 |
+ } |
|
536 |
+ hist(newGamma) |
|
537 |
+ if (i == 1) { |
|
538 |
+ rntmp <- rownames(newGamma); newGamma <- t(mytc(ures$res$adj))%*%newGamma; rownames(newGamma) <- rntmp |
|
539 |
+ } |
|
540 |
+ P <- Pmut+Pmeth+Pcnv |
|
541 |
+ if (!mutinc) { # include mutations or not (0) |
|
542 |
+ P[which(Pmut == 1)] <- 0 |
|
543 |
+ } |
|
544 |
+ P <- P[which(rownames(P) %in% rownames(Rho)), which(colnames(P) %in% colnames(Rho))] |
|
545 |
+ P <- P[order(rownames(P)), order(colnames(P))] |
|
546 |
+ Ptmp <- cbind(P, matrix(0, nrow(P), sum(!(colnames(Rho) %in% colnames(P))))) |
|
547 |
+ colnames(Ptmp) <- c(colnames(P), colnames(Rho)[which(!(colnames(Rho) %in% colnames(P)))]) |
|
548 |
+ P <- Ptmp[, colnames(Rho)] |
|
549 |
+ P <- t(mytc(methods[[1]]$res$adj))%*%P |
|
550 |
+ P[which(P > 1)] <- 1 |
|
551 |
+ ## fisher: |
|
552 |
+ if (i %in% c(4,5)) { |
|
553 |
+ cut <- 0.07 |
|
554 |
+ } else if (i == 6) { |
|
555 |
+ cut <- 0.1 |
|
556 |
+ } else { |
|
557 |
+ cut <- 1/8 |
|
558 |
+ } |
|
559 |
+ newGamma[which(newGamma > cut)] <- 1 |
|
560 |
+ newGamma[which(newGamma <= cut)] <- 0 |
|
561 |
+ pmeth <- newGamma |
|
562 |
+ pmeth[which(newGamma == 1 & P == 1)] <- 2 |
|
563 |
+ pmeth[which(newGamma == 0 & P == 1)] <- -2 |
|
564 |
+ if (!mutinc) { # include mutations or not (0) |
|
565 |
+ pmeth[which(Rho > 0)] <- 0 |
|
566 |
+ } |
|
567 |
+ colnames(pmeth) <- NULL |
|
568 |
+ ##pdf(paste0("FigS_", names(methods)[i], ".pdf"), height = 6, width = 12) |
|
569 |
+ setEPS() |
|
570 |
+ postscript(paste0("FigS_", names(methods)[i], ".eps"), height = 6, width = 12) |
|
571 |
+ print(epiNEM::HeatmapOP(pmeth, bordercol = rgb(0,0,0,0), col = "RdBu", colorkey = NULL)) |
|
572 |
+ dev.off() |
|
573 |
+ print("cnv") |
|
574 |
+ P <- Pcnv |
|
575 |
+ P <- P[which(rownames(P) %in% rownames(Rho)), which(colnames(P) %in% colnames(Rho))] |
|
576 |
+ P <- P[order(rownames(P)), order(colnames(P))] |
|
577 |
+ P[which(P > 1)] <- 1 |
|
578 |
+ Ptmp <- cbind(P, matrix(0, nrow(P), sum(!(colnames(Rho) %in% colnames(P))))) |
|
579 |
+ colnames(Ptmp) <- c(colnames(P), colnames(Rho)[which(!(colnames(Rho) %in% colnames(P)))]) |
|
580 |
+ P <- Ptmp[, colnames(Rho)] |
|
581 |
+ F <- matrix(c(sum(pmeth >= 1 & P == 1), sum(pmeth >= 1 & P == 0), sum(pmeth <= 0 & P == 1), sum(pmeth <= 0 & P == 0)), 2) |
|
582 |
+ print(1 - phyper(F[1,1]-1, sum(F[, 1]), sum(F[, 2]), sum(F[1, ]))) |
|
583 |
+ Lcnv[[i]] <- F |
|
584 |
+ print("meth") |
|
585 |
+ P <- Pmeth |
|
586 |
+ P <- P[which(rownames(P) %in% rownames(Rho)), which(colnames(P) %in% colnames(Rho))] |
|
587 |
+ P <- P[order(rownames(P)), order(colnames(P))] |
|
588 |
+ P[which(P > 1)] <- 1 |
|
589 |
+ Ptmp <- cbind(P, matrix(0, nrow(P), sum(!(colnames(Rho) %in% colnames(P))))) |
|
590 |
+ colnames(Ptmp) <- c(colnames(P), colnames(Rho)[which(!(colnames(Rho) %in% colnames(P)))]) |
|
591 |
+ P <- Ptmp[, colnames(Rho)] |
|
592 |
+ F <- matrix(c(sum(pmeth >= 1 & P == 1), sum(pmeth >= 1 & P == 0), sum(pmeth <= 0 & P == 1), sum(pmeth <= 0 & P == 0)), 2) |
|
593 |
+ print(1 - phyper(F[1,1]-1, sum(F[, 1]), sum(F[, 2]), sum(F[1, ]))) |
|
594 |
+ Lmeth[[i]] <- F |
|
595 |
+ print("mut") |
|
596 |
+ P <- Pmut |
|
597 |
+ P <- P[which(rownames(P) %in% rownames(Rho)), which(colnames(P) %in% colnames(Rho))] |
|
598 |
+ P <- P[order(rownames(P)), order(colnames(P))] |
|
599 |
+ P[which(P > 1)] <- 1 |
|
600 |
+ Ptmp <- cbind(P, matrix(0, nrow(P), sum(!(colnames(Rho) %in% colnames(P))))) |
|
601 |
+ colnames(Ptmp) <- c(colnames(P), colnames(Rho)[which(!(colnames(Rho) %in% colnames(P)))]) |
|
602 |
+ P <- Ptmp[, colnames(Rho)] |
|
603 |
+ F <- matrix(c(sum(pmeth >= 1 & P == 1), sum(pmeth >= 1 & P == 0), sum(pmeth <= 0 & P == 1), sum(pmeth <= 0 & P == 0)), 2) |
|
604 |
+ print(1 - phyper(F[1,1]-1, sum(F[, 1]), sum(F[, 2]), sum(F[1, ]))) |
|
605 |
+ Lmut[[i]] <- F |
|
606 |
+ Fmat <- matrix(c(sum(pmeth == 2), sum(pmeth == 1), sum(pmeth == -2), sum(pmeth == 0)), 2) |
|
607 |
+ Lall[[i]] <- Fmat |
|
608 |
+ ## print(fisher.test(Fmat, alternative = "greater")) |
|
609 |
+ print("p-value") |
|
610 |
+ print(1 - phyper(Fmat[1,1]-1, sum(Fmat[, 1]), sum(Fmat[, 2]), sum(Fmat[1, ]))) |
|
611 |
+ meas <- list() |
|
612 |
+ meas$data <- matrix(0, nrow(pmeth), ncol(pmeth)) |
|
613 |
+ colnames(meas$data) <- apply(pmeth, 2, function(x) { |
|
614 |
+ y <- paste(sort(rownames(pmeth)[which(x %in% c(2,-2))]), collapse = "_") |
|
615 |
+ return(y) |
|
616 |
+ }) |
|
617 |
+ measnet <- matrix(0, nrow(pmeth), nrow(pmeth)) |
|
618 |
+ diag(measnet) <- 1 |
|
619 |
+ meas$Nem[[1]] <- measnet |
|
620 |
+ meas$Theta[[1]] <- ures$res$subtopo |
|
621 |
+ print("AUC") |
|
622 |
+ if (i == 1) { |
|
623 |
+ pitmp <- pifit(ures, meas, D4) |
|
624 |
+ } else { |
|
625 |
+ pitmp <- pifit(ures, meas, D4, propagate=FALSE) |
|
626 |
+ } |
|
627 |
+ print(pitmp$auc) |
|
628 |
+ print("Cor") |
|
629 |
+ print(pitmp$cor) |
|
630 |
+ Laucor[[i]] <- c(pitmp$auc, pitmp$cor) |
|
631 |
+} |
|
632 |
+ |
|
633 |
+source("~/Documents/nempi/R/nempi_main.r") |
|
634 |
+source("~/Documents/nempi/R/nempi_low.r") |
|
635 |
+ |
|
636 |
+P <- Pmut+Pmeth+Pcnv |
|
637 |
+if (!mutinc) { # include mutations or not (0) |
|
638 |
+ P[which(Pmut == 1)] <- 0 |
|
639 |
+} |
|
640 |
+P <- P[which(rownames(P) %in% rownames(Rho)), which(colnames(P) %in% colnames(Rho))] |
|
641 |
+P <- P[order(rownames(P)), order(colnames(P))] |
|
642 |
+Ptmp <- cbind(P, matrix(0, nrow(P), sum(!(colnames(Rho) %in% colnames(P))))) |
|
643 |
+colnames(Ptmp) <- c(colnames(P), colnames(Rho)[which(!(colnames(Rho) %in% colnames(P)))]) |
|
644 |
+P <- Ptmp[, colnames(Rho)] |
|
645 |
+# P <- t(mytc(methods[[1]]$res$adj))%*%P |
|
646 |
+P[which(P > 1)] <- 1 |
|
647 |
+ |
|
648 |
+## plot aucs: |
|
649 |
+setEPS() |
|
650 |
+postscript(paste0("Fig_auc.eps"), height = 5, width = 10) |
|
651 |
+cols <- c("red","turquoise","orange","darkgreen","brown","blue") |
|
652 |
+par(mfrow=c(1,2)) |
|
653 |
+for (prop in 1:2) { |
|
654 |
+ for (i in 1:length(methods)) { |
|
655 |
+ print(names(methods)[i]) |
|
656 |
+ ures <- methods[[i]] |
|
657 |
+ meas <- list() |
|
658 |
+ meas$data <- matrix(0, nrow(P), ncol(P)) |
|
659 |
+ colnames(meas$data) <- apply(P, 2, function(x) { |
|
660 |
+ y <- paste(sort(rownames(P)[which(x == 1)]), collapse = "_") |
|
661 |
+ return(y) |
|
662 |
+ }) |
|
663 |
+ meas$Theta[[1]] <- methods[[1]]$res$subtopo |
|
664 |
+ print("AUC") |
|
665 |
+ if (prop == 1) { |
|
666 |
+ meas$Nem[[1]] <- methods[[1]]$res$adj |
|
667 |
+ } else { |
|
668 |
+ meas$Nem[[1]] <- diag(nrow(pmeth)) |
|
669 |
+ } |
|
670 |
+ if (i == 1) { |
|
671 |
+ pitmp <- pifit(ures, meas, D4) |
|
672 |
+ } else { |
|
673 |
+ pitmp <- pifit(ures, meas, D4, propagate = FALSE) |
|
674 |
+ } |
|
675 |
+ if (i == 1) { |
|
676 |
+ if (prop == 1) { |
|
677 |
+ main <- "precision-recall curves (propagated)" |
|
678 |
+ } else { |
|
679 |
+ main <- "precision-recall curves (not propagated)" |
|
680 |
+ } |
|
681 |
+ plot(pitmp$rec, pitmp$ppv, type="l", xlim = c(0,1), ylim = c(0,1), main = main, xlab = "recall", ylab = "precision", col = cols[i]) |
|
682 |
+ abline(h=0.5,col=rgb(0,0,0,1),lty=3) |
|
683 |
+ } else { |
|
684 |
+ lines(pitmp$rec, pitmp$ppv, type="l", col = cols[i]) |
|
685 |
+ } |
|
686 |
+ print(pitmp$auc) |
|
687 |
+ } |
|
688 |
+ if (prop == 1) { |
|
689 |
+ cols <- c("red", "blue", "darkgreen", "brown", "orange", "turquoise") |
|
690 |
+ legend <- c("svm","neural net","random forest","missForest","knn") |
|
691 |
+ legend("bottomright",legend=c(expression(NEM~pi),legend),fill=cols,col=cols) |
|
692 |
+ cols <- c("red","turquoise","orange","darkgreen","brown","blue") |
|
693 |
+ } |
|
694 |
+} |
|
695 |
+dev.off() |
|
696 |
+ |
|
697 |
+source("~/Documents/nempi/R/nempi_main.r") |
|
698 |
+source("~/Documents/nempi/R/nempi_low.r") |
|
699 |
+ |
|
700 |
+## create tables: |
|
701 |
+ |
|
702 |
+for (i in 1:length(methods)) { |
|
703 |
+ cat(paste0(names(methods)[i], " & ", Lcnv[[i]][1,1], " & ", Lcnv[[i]][2,1], " & ", Lcnv[[i]][2,2], " & ", Lcnv[[i]][1,2], "\\\\\n")) |
|
704 |
+} |
|
705 |
+ |
|
706 |
+for (i in 1:length(methods)) { |
|
707 |
+ cat(paste0(names(methods)[i], " & ", Lmeth[[i]][1,1], " & ", Lmeth[[i]][2,1], " & ", Lmeth[[i]][2,2], " & ", Lmeth[[i]][1,2], "\\\\\n")) |
|
708 |
+} |
|
709 |
+ |
|
710 |
+for (i in 1:length(methods)) { |
|
711 |
+ cat(paste0(names(methods)[i], " & ", Lmut[[i]][1,1], " & ", Lmut[[i]][2,1], " & ", Lmut[[i]][2,2], " & ", Lmut[[i]][1,2], "\\\\\n")) |
|
712 |
+} |
|
713 |
+ |
|
714 |
+for (i in 1:length(methods)) { |
|
715 |
+ if (names(methods)[i] == "nn") { next() } |
|
716 |
+ Fmat <- Lall[[i]] |
|
717 |
+ ptmp <- 1 - phyper(Fmat[1,1]-1, sum(Fmat[, 1]), sum(Fmat[, 2]), sum(Fmat[1, ])) |
|
718 |
+ if (ptmp == 0) { |
|
719 |
+ ptmp <- "$< 2.2\\times10^{-16}$" |
|
720 |
+ } else { |
|
721 |
+ ptmp <- paste0("$", signif(ptmp), "$") |
|
722 |
+ } |
|
723 |
+ cat(paste0(names(methods)[i], " & ", Lall[[i]][1,1], " & ", Lall[[i]][2,1], " & ", Lall[[i]][2,2], " & ", Lall[[i]][1,2], " & ", round(Lall[[i]][1,1]/(Lall[[i]][1,1]+Lall[[i]][2,1])*100), "\\% & ", round(Lall[[i]][1,1]/(Lall[[i]][1,1]+Lall[[i]][1,2])*100), "\\% & ", ptmp, " & ", round(Laucor[[i]][1], 2), "\\\\\n")) |
|
724 |
+} |
|
725 |
+ |
|
726 |
+ |
|
727 |
+## check correlation |
|
728 |
+ |
|
729 |
+## P4 <- apply(P3, 2, function(x) return(x/sum(x))) |
|
730 |
+## P4[is.na(P4)] <- 0 |
|
731 |
+ |
|
732 |
+## cor(as.vector(newGamma), as.vector(P3)) |
|
733 |
+ |
|
734 |
+## |
|
735 |
+ |
|
736 |
+cormat <- matrix(0, nrow(pmeth), 2) |
|
737 |
+fishres <- numeric(nrow(pmeth)) |
|
738 |
+names(fishres) <- rownames(pmeth) |
|
739 |
+for (i in 1:nrow(pmeth)) { |
|
740 |
+ Fmat <- matrix(c(sum(pmeth[i, ] == 2), sum(pmeth[i, ] == 1), sum(pmeth[i, ] == -2), sum(pmeth[i, ] == 0)), 2) |
|
741 |
+ fishres[i] <- fisher.test(Fmat, alternative = "g")$p.value |
|
742 |
+ cormat[i, ] <- c(sum(Fmat[1, ]), sum(Fmat[, 1])) |
|
743 |
+} |
|
744 |
+ |
|
745 |
+## GATA3 & PTPRD: |
|
746 |
+ |
|
747 |
+Fmat <- matrix(c(sum(pmeth[3, ] %in% c(2,-2) & pmeth[7, ] %in% c(2,-2)), |
|
748 |
+ sum(pmeth[3, ] %in% c(1,0) & pmeth[7, ] %in% c(-2,2)), |
|
749 |
+ sum(pmeth[3, ] %in% c(-2,2) & pmeth[7, ] %in% c(1,0)), |
|
750 |
+ sum(pmeth[3, ] %in% c(1,0) & pmeth[7, ] %in% c(1,0))), 2) |
|
751 |
+ |
|
752 |
+1 - phyper(Fmat[1,1]-1, sum(Fmat[, 1]), sum(Fmat[, 2]), sum(Fmat[1, ])) |
|
753 |
+ |
|
754 |
+ |
|