Adds .assignClass() to reduce repeated code.
Reorders if statements for clarity. Removes
unnecessary EM function args.
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
# Applied from Lai et al 2007 Bioinformatics. |
2 | 2 |
# |
3 |
-# Code written by Charlotte Siska |
|
3 |
+# Code written by Charlotte Siska and Max McGrath |
|
4 | 4 |
# R functions fishersTrans, createVectors, discordantRun and makeTable |
5 | 5 |
# Email: charlotte.siska@ucdenver.edu |
6 | 6 |
# |
... | ... |
@@ -12,163 +12,124 @@ |
12 | 12 |
# R function for SparCC, adapted from https://bitbucket.org/yonatanf/sparcc |
13 | 13 |
# Email: hyfang@pku.edu.cn |
14 | 14 |
|
15 |
+#' @export |
|
15 | 16 |
discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE, |
16 | 17 |
subsampling = FALSE, subSize = dim(x)[1], iter = 100, |
17 | 18 |
components = 3) { |
18 | 19 |
|
19 |
- .checkDiscordantInputs(v1, v2, x, y, transform, subsampling, subSize, iter, |
|
20 |
- components) |
|
21 |
- |
|
22 |
- if (transform) { |
|
23 |
- v1 <- fishersTrans(v1) |
|
24 |
- v2 <- fishersTrans(v2) |
|
25 |
- } |
|
26 |
- |
|
27 |
- x <- exprs(x) |
|
28 |
- if(is.null(y) == FALSE) { y <- exprs(y) } |
|
29 |
- featureSize = dim(x)[1] |
|
30 |
- |
|
31 |
- pdata <- cbind(v1, v2) |
|
32 |
- |
|
33 |
- param1 <- sd(v1) |
|
34 |
- param2 <- sd(v2) |
|
35 |
- |
|
36 |
- if(components == 3) { |
|
37 |
- class <- cbind(rep(0, dim(pdata)[1]), rep(0, dim(pdata)[1])) |
|
38 |
- class[pdata[,1]>0+param1,1] <- 2 |
|
39 |
- class[pdata[,1]<0-param1,1] <- 1 |
|
40 |
- class[pdata[,2]>0+param2,2] <- 2 |
|
41 |
- class[pdata[,2]<0-param2,2] <- 1 |
|
42 |
- discordClass <- c(2,3,4,6,7,8) |
|
43 |
- } |
|
44 |
- |
|
45 |
- if(components == 5) { |
|
46 |
- class <- cbind(rep(0, dim(pdata)[1]), rep(0, dim(pdata)[1])) |
|
47 |
- class[pdata[,1]>0+param1,1] <- 2 |
|
48 |
- class[pdata[,1]<0-param1,1] <- 1 |
|
49 |
- class[pdata[,1]>0+(2*param1),1] <- 4 |
|
50 |
- class[pdata[,1]<0-(2*param1),1] <- 3 |
|
51 |
- class[pdata[,2]>0+param2,2] <- 2 |
|
52 |
- class[pdata[,2]<0-param2,2] <- 1 |
|
53 |
- class[pdata[,2]>0+(2*param2),2] <- 4 |
|
54 |
- class[pdata[,2]<0-(2*param2),2] <- 3 |
|
55 |
- concordClass <- c(1,7,13,19,25) |
|
56 |
- discordClass <- setdiff(1:25,concordClass) |
|
57 |
- } |
|
20 |
+ .checkDiscordantInputs(v1, v2, x, y, transform, subsampling, subSize, iter, |
|
21 |
+ components) |
|
22 |
+ |
|
23 |
+ if (transform) { |
|
24 |
+ v1 <- fishersTrans(v1) |
|
25 |
+ v2 <- fishersTrans(v2) |
|
26 |
+ } |
|
58 | 27 |
|
59 |
- if(subsampling == TRUE) { |
|
60 |
- subSize = nrow(x) |
|
61 |
- if(is.null(y) == FALSE & nrow(y) < subSize) { |
|
62 |
- subSize = nrow(y) |
|
28 |
+ x <- exprs(x) |
|
29 |
+ if (is.null(y) == FALSE) { y <- exprs(y) } |
|
30 |
+ featureSize = dim(x)[1] |
|
31 |
+ |
|
32 |
+ pdata <- cbind(v1, v2) |
|
33 |
+ param1 <- sd(v1) |
|
34 |
+ param2 <- sd(v2) |
|
35 |
+ class <- cbind(.assignClass(v1, param1, components), |
|
36 |
+ .assignClass(v2, param2, components)) |
|
37 |
+ |
|
38 |
+ if (components == 3) { |
|
39 |
+ discordClass <- c(2,3,4,6,7,8) |
|
40 |
+ } else { |
|
41 |
+ discordClass <- setdiff(1:25, c(1, 7, 13, 19, 25)) |
|
63 | 42 |
} |
64 |
- total_mu <- rep(0,3) |
|
65 |
- total_sigma <- rep(0,3) |
|
66 |
- total_nu <- rep(0,3) |
|
67 |
- total_tau <- rep(0,3) |
|
68 |
- total_pi <- rep(0,3) |
|
69 |
- for(i in 1:iter) { |
|
70 |
- # make sure pairs are independent |
|
71 |
- rowIndex <- sample(nrow(x), subSize) |
|
72 |
- colIndex <- sample(nrow(y), subSize) |
|
73 |
- mat1 <- matrix(v1, nrow = nrow(x), byrow = FALSE) |
|
74 |
- mat2 <- matrix(v2, nrow = nrow(x), byrow = FALSE) |
|
43 |
+ |
|
44 |
+ if (subsampling) { |
|
45 |
+ subSize <- min(nrow(x), nrow(y)) |
|
46 |
+ total_mu <- total_sigma <- total_nu <- |
|
47 |
+ total_tau <- total_pi <- rep(0, components) |
|
48 |
+ # NOTE: changed this from default 3 to components... need to figure out |
|
49 |
+ # if that's right |
|
50 |
+ |
|
51 |
+ for(i in 1:iter) { |
|
52 |
+ # make sure pairs are independent |
|
53 |
+ rowIndex <- sample(nrow(x), subSize) |
|
54 |
+ colIndex <- sample(nrow(y), subSize) |
|
55 |
+ mat1 <- matrix(v1, nrow = nrow(x), byrow = FALSE) |
|
56 |
+ mat2 <- matrix(v2, nrow = nrow(x), byrow = FALSE) |
|
57 |
+ |
|
58 |
+ subSampV1 <- sapply(1:subSize, function(x) mat1[rowIndex[x], |
|
59 |
+ colIndex[x]]) |
|
60 |
+ subSampV2 <- sapply(1:subSize, function(x) mat2[rowIndex[x], |
|
61 |
+ colIndex[x]]) |
|
62 |
+ |
|
63 |
+ sub.pdata <- cbind(subSampV1, subSampV2) |
|
64 |
+ sub.class <- cbind(.assignClass(subSampV1, param1, components), |
|
65 |
+ .assignClass(subSampV2, param2, components)) |
|
66 |
+ |
|
67 |
+ pd <- em.normal.partial.concordant(sub.pdata, sub.class, components) |
|
68 |
+ total_mu <- total_mu + pd$mu_sigma[1,] |
|
69 |
+ total_sigma <- total_sigma + pd$mu_sigma[2,] |
|
70 |
+ total_nu <- total_nu + pd$nu_tau[1,] |
|
71 |
+ total_tau <- total_tau + pd$nu_tau[2,] |
|
72 |
+ total_pi <- total_pi + pd$pi |
|
73 |
+ } |
|
75 | 74 |
|
76 |
- subSampV1 <- sapply(1:subSize, function(x) mat1[rowIndex[x], |
|
77 |
- colIndex[x]]) |
|
78 |
- subSampV2 <- sapply(1:subSize, function(x) mat2[rowIndex[x], |
|
79 |
- colIndex[x]]) |
|
75 |
+ mu <- total_mu / iter |
|
76 |
+ sigma <- total_sigma / iter |
|
77 |
+ nu <- total_nu / iter |
|
78 |
+ tau <- total_tau / iter |
|
79 |
+ pi <- total_pi / iter |
|
80 | 80 |
|
81 |
- sub.pdata <- cbind(subSampV1, subSampV2) |
|
82 |
- sub.class <- cbind(rep(0, subSize), rep(0, subSize)) |
|
83 |
- if(components == 3) { |
|
84 |
- sub.class[sub.pdata[,1]>0+param1,1] <- 2 |
|
85 |
- sub.class[sub.pdata[,1]<0-param1,1] <- 1 |
|
86 |
- sub.class[sub.pdata[,2]>0+param2,2] <- 2 |
|
87 |
- sub.class[sub.pdata[,2]<0-param2,2] <- 1 |
|
88 |
- } |
|
89 |
- if(components == 5) { |
|
90 |
- sub.class[sub.pdata[,1]>0+param1,1] <- 2 |
|
91 |
- sub.class[sub.pdata[,1]<0-param1,1] <- 1 |
|
92 |
- sub.class[sub.pdata[,1]>0+(2*param1),1] <- 4 |
|
93 |
- sub.class[sub.pdata[,1]<0-(2*param1),1] <- 3 |
|
94 |
- sub.class[pdata[,2]>0+param2,2] <- 2 |
|
95 |
- sub.class[pdata[,2]<0-param2,2] <- 1 |
|
96 |
- sub.class[pdata[,2]>0+(2*param2),2] <- 4 |
|
97 |
- sub.class[pdata[,2]<0-(2*param2),2] <- 3 |
|
98 |
- } |
|
99 |
- pd <- em.normal.partial.concordant(sub.pdata, sub.class, tol = 0.001, |
|
100 |
- restriction = 0, |
|
101 |
- constrain = c(0,-sd(pdata),sd(pdata)), |
|
102 |
- iteration = 1000, |
|
103 |
- components = components) |
|
104 |
- total_mu <- total_mu + pd$mu_sigma[1,] |
|
105 |
- total_sigma <- total_sigma + pd$mu_sigma[2,] |
|
106 |
- total_nu <- total_nu + pd$nu_tau[1,] |
|
107 |
- total_tau <- total_tau + pd$nu_tau[2,] |
|
108 |
- total_pi <- total_pi + pd$pi |
|
81 |
+ finalResult <- subSampleData(pdata, class, mu, sigma, nu, tau, pi, |
|
82 |
+ components) |
|
83 |
+ zTable <- finalResult$z |
|
84 |
+ classVector <- finalResult$class |
|
85 |
+ } else { |
|
86 |
+ pd <- em.normal.partial.concordant(pdata, class, components) |
|
87 |
+ zTable <- pd$z |
|
88 |
+ classVector <- pd$class |
|
109 | 89 |
} |
110 | 90 |
|
111 |
- mu <- total_mu / iter |
|
112 |
- sigma <- total_sigma / iter |
|
113 |
- nu <- total_nu / iter |
|
114 |
- tau <- total_tau / iter |
|
115 |
- pi <- total_pi / iter |
|
91 |
+ discordPPV <- apply(zTable, 1, function(x) sum(x[discordClass])/sum(x)) |
|
116 | 92 |
|
117 |
- finalResult <- subSampleData(pdata, class, mu, sigma, nu, tau, pi, |
|
118 |
- components) |
|
119 |
- zTable <- finalResult$z |
|
120 |
- classVector <- finalResult$class |
|
121 |
- } else { |
|
122 |
- pd <- em.normal.partial.concordant(pdata, class, tol = 0.001, |
|
123 |
- restriction = 0, |
|
124 |
- constrain = c(0, -sd(pdata), sd(pdata)), |
|
125 |
- iteration = 1000, |
|
126 |
- components = components) |
|
127 |
- zTable <- pd$z |
|
128 |
- classVector <- pd$class |
|
129 |
- } |
|
130 |
- |
|
131 |
- discordPPV <- apply(zTable, 1, function(x) sum(x[discordClass])/sum(x)) |
|
132 |
- |
|
133 |
- if(is.null(y) == FALSE) { |
|
134 |
- discordPPMatrix <- matrix(discordPPV, nrow = featureSize, |
|
135 |
- byrow = FALSE) |
|
136 |
- classMatrix <- matrix(classVector, nrow = featureSize, byrow = FALSE) |
|
137 |
- rownames(discordPPMatrix) <- rownames(x) |
|
138 |
- colnames(discordPPMatrix) <- rownames(y) |
|
139 |
- rownames(classMatrix) <- rownames(x) |
|
140 |
- colnames(classMatrix) <- rownames(y) |
|
93 |
+ if(is.null(y)) { |
|
94 |
+ discordPPMatrix <- matrix(NA,nrow = featureSize, ncol = featureSize) |
|
95 |
+ classMatrix <- discordPPMatrix |
|
96 |
+ diag <- lower.tri(discordPPMatrix, diag = FALSE) |
|
97 |
+ discordPPMatrix[diag] <- discordPPV |
|
98 |
+ classMatrix[diag] <- classVector |
|
99 |
+ rownames(discordPPMatrix) <- rownames(x) |
|
100 |
+ colnames(discordPPMatrix) <- rownames(x) |
|
101 |
+ rownames(classMatrix) <- rownames(x) |
|
102 |
+ colnames(classMatrix) <- rownames(x) |
|
103 |
+ vector_names <- getNames(x) |
|
104 |
+ names(discordPPV) <- vector_names |
|
105 |
+ names(classVector) <- vector_names |
|
106 |
+ } else { |
|
107 |
+ discordPPMatrix <- matrix(discordPPV, nrow = featureSize, |
|
108 |
+ byrow = FALSE) |
|
109 |
+ classMatrix <- matrix(classVector, nrow = featureSize, byrow = FALSE) |
|
110 |
+ rownames(discordPPMatrix) <- rownames(x) |
|
111 |
+ colnames(discordPPMatrix) <- rownames(y) |
|
112 |
+ rownames(classMatrix) <- rownames(x) |
|
113 |
+ colnames(classMatrix) <- rownames(y) |
|
114 |
+ |
|
115 |
+ vector_names <- getNames(x,y) |
|
116 |
+ names(discordPPV) <- vector_names |
|
117 |
+ names(classVector) <- vector_names |
|
118 |
+ } |
|
141 | 119 |
|
142 |
- vector_names <- getNames(x,y) |
|
143 |
- names(discordPPV) <- vector_names |
|
144 |
- names(classVector) <- vector_names |
|
145 |
- } |
|
146 |
- |
|
147 |
- if(is.null(y)) { |
|
148 |
- discordPPMatrix <- matrix(NA,nrow = featureSize, ncol = featureSize) |
|
149 |
- classMatrix <- discordPPMatrix |
|
150 |
- diag <- lower.tri(discordPPMatrix, diag = FALSE) |
|
151 |
- discordPPMatrix[diag] <- discordPPV |
|
152 |
- classMatrix[diag] <- classVector |
|
153 |
- rownames(discordPPMatrix) <- rownames(x) |
|
154 |
- colnames(discordPPMatrix) <- rownames(x) |
|
155 |
- rownames(classMatrix) <- rownames(x) |
|
156 |
- colnames(classMatrix) <- rownames(x) |
|
157 |
- vector_names <- getNames(x) |
|
158 |
- names(discordPPV) <- vector_names |
|
159 |
- names(classVector) <- vector_names |
|
160 |
- } |
|
161 |
- |
|
162 |
- zTable <- t(apply(zTable, 1, function(x) x/sum(x))) |
|
163 |
- rownames(zTable) <- vector_names |
|
164 |
- |
|
165 |
- return(list(discordPPMatrix = discordPPMatrix, discordPPVector = |
|
166 |
- discordPPV, classMatrix = classMatrix, classVector = classVector, |
|
167 |
- probMatrix = zTable, loglik = pd$loglik)) |
|
120 |
+ zTable <- t(apply(zTable, 1, function(x) x/sum(x))) |
|
121 |
+ rownames(zTable) <- vector_names |
|
122 |
+ |
|
123 |
+ return(list(discordPPMatrix = discordPPMatrix, discordPPVector = discordPPV, |
|
124 |
+ classMatrix = classMatrix, classVector = classVector, |
|
125 |
+ probMatrix = zTable, loglik = pd$loglik)) |
|
168 | 126 |
} |
169 | 127 |
|
170 |
-em.normal.partial.concordant <- function(data, class, tol=0.001, |
|
171 |
- restriction=0, constrain=0, iteration=1000, components) { |
|
128 |
+em.normal.partial.concordant <- function(data, class, components) { |
|
129 |
+ tol <- 0.001 |
|
130 |
+ restriction <- 0 |
|
131 |
+ constrain <- 0 |
|
132 |
+ iteration <- 1000 |
|
172 | 133 |
n <- as.integer(dim(data)[1]) |
173 | 134 |
g <- as.integer(nlevels(as.factor(class))) |
174 | 135 |
|
... | ... |
@@ -209,6 +170,21 @@ em.normal.partial.concordant <- function(data, class, tol=0.001, |
209 | 170 |
z = array(results[[3]], dim = c(n, g*g)))) |
210 | 171 |
} |
211 | 172 |
|
173 |
+ |
|
174 |
+# Internal function to assign class to vector based on number of components and |
|
175 |
+# each elements distance from zero |
|
176 |
+#' @importFrom dplyr case_when |
|
177 |
+.assignClass <- function(x, param, components) { |
|
178 |
+ if (components == 3) { |
|
179 |
+ rtn <- case_when(x < -param ~ 1, x > param ~ 2, TRUE ~ 0) |
|
180 |
+ } else { |
|
181 |
+ rtn <- case_when(x < -2*param ~ 3, x > 2*param ~ 4, x < -param ~ 1, |
|
182 |
+ x > param ~ 2, TRUE ~ 0) |
|
183 |
+ } |
|
184 |
+ return(rtn) |
|
185 |
+} |
|
186 |
+ |
|
187 |
+# Internal function to validate user inputs for discordantRun() |
|
212 | 188 |
.checkDiscordantInputs <- function(v1, v2, x, y, transform, |
213 | 189 |
subsampling, subSize, iter, |
214 | 190 |
components) { |
... | ... |
@@ -217,6 +193,11 @@ em.normal.partial.concordant <- function(data, class, tol=0.001, |
217 | 193 |
stop("x and y (if present) must be type ExpressionSet") |
218 | 194 |
} |
219 | 195 |
|
196 |
+ # Need to double check if this is true w/ Katerina |
|
197 |
+ if (is.null(y) && subsampling) { |
|
198 |
+ stop("y cannot be NULL if subsampling is TRUE") |
|
199 |
+ } |
|
200 |
+ |
|
220 | 201 |
if (!(components %in% c(3, 5))) { |
221 | 202 |
stop ("components must be equal to 3 or 5") |
222 | 203 |
} |