1 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,117 +0,0 @@ |
1 |
- |
|
2 |
-set.seed(888) |
|
3 |
- |
|
4 |
-type = c(rep("Tumor", 10), rep("Control", 10)) |
|
5 |
-gender = sample(c("F", "M"), 20, replace = TRUE) |
|
6 |
-gender[sample(1:20, 2)] = NA |
|
7 |
-age = runif(20, min = 30, max = 80) |
|
8 |
-mutation = data.frame(mut1 = sample(c(TRUE, FALSE), 20, p = c(0.2, 0.8), replace = TRUE), |
|
9 |
- mut2 = sample(c(TRUE, FALSE), 20, p = c(0.3, 0.7), replace = TRUE)) |
|
10 |
-anno = data.frame(type = type, gender = gender, age = age, mutation, stringsAsFactors = FALSE) |
|
11 |
- |
|
12 |
-###################################### |
|
13 |
-# generate methylation matrix |
|
14 |
-rand_meth = function(k, mean) { |
|
15 |
- (runif(k) - 0.5)*min(c(1-mean), mean) + mean |
|
16 |
-} |
|
17 |
- |
|
18 |
-mean_meth = c(rand_meth(300, 0.3), rand_meth(700, 0.7)) |
|
19 |
-mat_meth = as.data.frame(lapply(mean_meth, function(m) { |
|
20 |
- if(m < 0.3) { |
|
21 |
- c(rand_meth(10, m), rand_meth(10, m + 0.2)) |
|
22 |
- } else if(m > 0.7) { |
|
23 |
- c(rand_meth(10, m), rand_meth(10, m - 0.2)) |
|
24 |
- } else { |
|
25 |
- c(rand_meth(10, m), rand_meth(10, m + sample(c(1, -1), 1)*0.2)) |
|
26 |
- } |
|
27 |
- |
|
28 |
-})) |
|
29 |
-mat_meth = t(mat_meth) |
|
30 |
-rownames(mat_meth) = NULL |
|
31 |
-colnames(mat_meth) = paste0("sample", 1:20) |
|
32 |
- |
|
33 |
-###################################### |
|
34 |
-# generate directions for methylation |
|
35 |
-direction = rowMeans(mat_meth[, 1:10]) - rowMeans(mat_meth[, 11:20]) |
|
36 |
-direction = ifelse(direction > 0, "hyper", "hypo") |
|
37 |
- |
|
38 |
-####################################### |
|
39 |
-# generate expression matrix |
|
40 |
-mat_expr = t(apply(mat_meth, 1, function(x) { |
|
41 |
- x = x + rnorm(length(x), sd = abs(runif(1)-0.5)*0.4 + 0.1) |
|
42 |
- -scale(x) |
|
43 |
-})) |
|
44 |
-dimnames(mat_expr) = dimnames(mat_meth) |
|
45 |
- |
|
46 |
-############################################################# |
|
47 |
-# matrix for correlation between methylation and expression |
|
48 |
-cor_pvalue = -log10(sapply(seq_len(nrow(mat_meth)), function(i) { |
|
49 |
- cor.test(mat_meth[i, ], mat_expr[i, ])$p.value |
|
50 |
-})) |
|
51 |
- |
|
52 |
-##################################################### |
|
53 |
-# matrix for types of genes |
|
54 |
-gene_type = sample(c("protein_coding", "lincRNA", "microRNA", "psedo-gene", "others"), |
|
55 |
- nrow(mat_meth), replace = TRUE, prob = c(6, 1, 1, 1, 1)) |
|
56 |
- |
|
57 |
-################################################# |
|
58 |
-# annotation to genes |
|
59 |
-anno_gene = sapply(mean_meth, function(m) { |
|
60 |
- if(m > 0.6) { |
|
61 |
- if(runif(1) < 0.8) return("intragenic") |
|
62 |
- } |
|
63 |
- if(m < 0.4) { |
|
64 |
- if(runif(1) < 0.4) return("TSS") |
|
65 |
- } |
|
66 |
- return("intergenic") |
|
67 |
-}) |
|
68 |
- |
|
69 |
-############################################ |
|
70 |
-# distance to genes |
|
71 |
-tss_dist = sapply(mean_meth, function(m) { |
|
72 |
- if(m < 0.3) { |
|
73 |
- if(runif(1) < 0.5) { |
|
74 |
- return(round( (runif(1) - 0.5)*1000 + 500)) |
|
75 |
- } else { |
|
76 |
- return(round( (runif(1) - 0.5)*10000 + 500)) |
|
77 |
- } |
|
78 |
- } else if(m < 0.6) { |
|
79 |
- if(runif(1) < 0.8) { |
|
80 |
- return(round( (runif(1)-0.5)*100000 + 50000 )) |
|
81 |
- } else { |
|
82 |
- return(round( (runif(1)-0.5)*1000000 + 500000 )) |
|
83 |
- } |
|
84 |
- } |
|
85 |
- return(round( (runif(1) - 0.5)*1000000 + 500000)) |
|
86 |
-}) |
|
87 |
- |
|
88 |
- |
|
89 |
-####################################### |
|
90 |
-# annotation to enhancers |
|
91 |
-rand_tss = function(m) { |
|
92 |
- if(m < 0.4) { |
|
93 |
- if(runif(1) < 0.25) return(runif(1)) |
|
94 |
- } else if (runif(1) < 0.1) { |
|
95 |
- return(runif(1)) |
|
96 |
- } |
|
97 |
- return(0) |
|
98 |
-} |
|
99 |
-rand_enhancer = function(m) { |
|
100 |
- if(m < 0.4) { |
|
101 |
- if(runif(1) < 0.6) return(runif(1)) |
|
102 |
- } else if (runif(1) < 0.1) { |
|
103 |
- return(runif(1)) |
|
104 |
- } |
|
105 |
- return(0) |
|
106 |
-} |
|
107 |
-rand_repressive = function(m) { |
|
108 |
- if(m > 0.4) { |
|
109 |
- if(runif(1) < 0.8) return(runif(1)) |
|
110 |
- } |
|
111 |
- return(0) |
|
112 |
-} |
|
113 |
-anno_states = data.frame( |
|
114 |
- tss = sapply(mean_meth, rand_tss), |
|
115 |
- enhancer = sapply(mean_meth, rand_enhancer), |
|
116 |
- rand_repressive = sapply(mean_meth, rand_repressive)) |
|
117 |
- |
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,117 @@ |
1 |
+ |
|
2 |
+set.seed(888) |
|
3 |
+ |
|
4 |
+type = c(rep("Tumor", 10), rep("Control", 10)) |
|
5 |
+gender = sample(c("F", "M"), 20, replace = TRUE) |
|
6 |
+gender[sample(1:20, 2)] = NA |
|
7 |
+age = runif(20, min = 30, max = 80) |
|
8 |
+mutation = data.frame(mut1 = sample(c(TRUE, FALSE), 20, p = c(0.2, 0.8), replace = TRUE), |
|
9 |
+ mut2 = sample(c(TRUE, FALSE), 20, p = c(0.3, 0.7), replace = TRUE)) |
|
10 |
+anno = data.frame(type = type, gender = gender, age = age, mutation, stringsAsFactors = FALSE) |
|
11 |
+ |
|
12 |
+###################################### |
|
13 |
+# generate methylation matrix |
|
14 |
+rand_meth = function(k, mean) { |
|
15 |
+ (runif(k) - 0.5)*min(c(1-mean), mean) + mean |
|
16 |
+} |
|
17 |
+ |
|
18 |
+mean_meth = c(rand_meth(300, 0.3), rand_meth(700, 0.7)) |
|
19 |
+mat_meth = as.data.frame(lapply(mean_meth, function(m) { |
|
20 |
+ if(m < 0.3) { |
|
21 |
+ c(rand_meth(10, m), rand_meth(10, m + 0.2)) |
|
22 |
+ } else if(m > 0.7) { |
|
23 |
+ c(rand_meth(10, m), rand_meth(10, m - 0.2)) |
|
24 |
+ } else { |
|
25 |
+ c(rand_meth(10, m), rand_meth(10, m + sample(c(1, -1), 1)*0.2)) |
|
26 |
+ } |
|
27 |
+ |
|
28 |
+})) |
|
29 |
+mat_meth = t(mat_meth) |
|
30 |
+rownames(mat_meth) = NULL |
|
31 |
+colnames(mat_meth) = paste0("sample", 1:20) |
|
32 |
+ |
|
33 |
+###################################### |
|
34 |
+# generate directions for methylation |
|
35 |
+direction = rowMeans(mat_meth[, 1:10]) - rowMeans(mat_meth[, 11:20]) |
|
36 |
+direction = ifelse(direction > 0, "hyper", "hypo") |
|
37 |
+ |
|
38 |
+####################################### |
|
39 |
+# generate expression matrix |
|
40 |
+mat_expr = t(apply(mat_meth, 1, function(x) { |
|
41 |
+ x = x + rnorm(length(x), sd = abs(runif(1)-0.5)*0.4 + 0.1) |
|
42 |
+ -scale(x) |
|
43 |
+})) |
|
44 |
+dimnames(mat_expr) = dimnames(mat_meth) |
|
45 |
+ |
|
46 |
+############################################################# |
|
47 |
+# matrix for correlation between methylation and expression |
|
48 |
+cor_pvalue = -log10(sapply(seq_len(nrow(mat_meth)), function(i) { |
|
49 |
+ cor.test(mat_meth[i, ], mat_expr[i, ])$p.value |
|
50 |
+})) |
|
51 |
+ |
|
52 |
+##################################################### |
|
53 |
+# matrix for types of genes |
|
54 |
+gene_type = sample(c("protein_coding", "lincRNA", "microRNA", "psedo-gene", "others"), |
|
55 |
+ nrow(mat_meth), replace = TRUE, prob = c(6, 1, 1, 1, 1)) |
|
56 |
+ |
|
57 |
+################################################# |
|
58 |
+# annotation to genes |
|
59 |
+anno_gene = sapply(mean_meth, function(m) { |
|
60 |
+ if(m > 0.6) { |
|
61 |
+ if(runif(1) < 0.8) return("intragenic") |
|
62 |
+ } |
|
63 |
+ if(m < 0.4) { |
|
64 |
+ if(runif(1) < 0.4) return("TSS") |
|
65 |
+ } |
|
66 |
+ return("intergenic") |
|
67 |
+}) |
|
68 |
+ |
|
69 |
+############################################ |
|
70 |
+# distance to genes |
|
71 |
+tss_dist = sapply(mean_meth, function(m) { |
|
72 |
+ if(m < 0.3) { |
|
73 |
+ if(runif(1) < 0.5) { |
|
74 |
+ return(round( (runif(1) - 0.5)*1000 + 500)) |
|
75 |
+ } else { |
|
76 |
+ return(round( (runif(1) - 0.5)*10000 + 500)) |
|
77 |
+ } |
|
78 |
+ } else if(m < 0.6) { |
|
79 |
+ if(runif(1) < 0.8) { |
|
80 |
+ return(round( (runif(1)-0.5)*100000 + 50000 )) |
|
81 |
+ } else { |
|
82 |
+ return(round( (runif(1)-0.5)*1000000 + 500000 )) |
|
83 |
+ } |
|
84 |
+ } |
|
85 |
+ return(round( (runif(1) - 0.5)*1000000 + 500000)) |
|
86 |
+}) |
|
87 |
+ |
|
88 |
+ |
|
89 |
+####################################### |
|
90 |
+# annotation to enhancers |
|
91 |
+rand_tss = function(m) { |
|
92 |
+ if(m < 0.4) { |
|
93 |
+ if(runif(1) < 0.25) return(runif(1)) |
|
94 |
+ } else if (runif(1) < 0.1) { |
|
95 |
+ return(runif(1)) |
|
96 |
+ } |
|
97 |
+ return(0) |
|
98 |
+} |
|
99 |
+rand_enhancer = function(m) { |
|
100 |
+ if(m < 0.4) { |
|
101 |
+ if(runif(1) < 0.6) return(runif(1)) |
|
102 |
+ } else if (runif(1) < 0.1) { |
|
103 |
+ return(runif(1)) |
|
104 |
+ } |
|
105 |
+ return(0) |
|
106 |
+} |
|
107 |
+rand_repressive = function(m) { |
|
108 |
+ if(m > 0.4) { |
|
109 |
+ if(runif(1) < 0.8) return(runif(1)) |
|
110 |
+ } |
|
111 |
+ return(0) |
|
112 |
+} |
|
113 |
+anno_states = data.frame( |
|
114 |
+ tss = sapply(mean_meth, rand_tss), |
|
115 |
+ enhancer = sapply(mean_meth, rand_enhancer), |
|
116 |
+ rand_repressive = sapply(mean_meth, rand_repressive)) |
|
117 |
+ |