Browse code

Improves param validation, fixes SparCC bug

Removes checkInputs in favor of seperate functions
for checking inputs for discordantRun and
createVectors. Also adds previous SparCC fix
to case where y is not null

Max McGrath authored on 22/07/2021 18:40:36
Showing3 changed files

... ...
@@ -6,21 +6,20 @@
6 6
 #' @import tools
7 7
 #' 
8 8
 #' @export
9
-createVectors <- function(x, y = NULL, groups, cor.method = c("spearman")) {
10
-    print(x)
11
-    if(checkInputs(x,y,groups)) {
12
-        stop("Please fix inputs.")
9
+createVectors <- function(x, y = NULL, groups, 
10
+                          cor.method = c("pearson", "spearman", "bwmc", 
11
+                                         "sparcc")) {
12
+    
13
+    cor.method <- match.arg(cor.method)
14
+    if (unique(unique(groups) != c(1,2))) {
15
+        stop("groups vector must consist of 1s and 2s corresponding to first
16
+             and second group")
13 17
     }
14 18
     
19
+    print(x)
20
+    
15 21
     index1 <- which(groups == 1)
16 22
     index2 <- which(groups == 2)
17
-    
18
-    check <- match(cor.method, c("spearman", "pearson", "bwmc", "sparcc"))
19
-    if(is.na(check)) {
20
-        stop("Please enter spearman, pearson, bwmc or sparcc for correlation 
21
-            metric.")
22
-    }
23
-    
24 23
     x <- exprs(x)
25 24
     
26 25
     if(is.null(y) == FALSE) {
... ...
@@ -39,10 +38,10 @@ createVectors <- function(x, y = NULL, groups, cor.method = c("spearman")) {
39 38
         }
40 39
         if(cor.method == c("sparcc")) {
41 40
             if(min(data) < 0) {
42
-                stop("SparCC can only be applied to sequencing data.")
41
+                stop("Negative values found. SparCC can only be applied to sequencing data.")
43 42
             }
44
-            statMatrix1 <- SparCC.count(t(data1))
45
-            statMatrix2 <- SparCC.count(t(data2))
43
+            statMatrix1 <- SparCC.count(t(data1))$cor.w
44
+            statMatrix2 <- SparCC.count(t(data2))$cor.w
46 45
         }
47 46
         statMatrix1 <- statMatrix1[1:featureSize,
48 47
                                    (featureSize + 1):dim(data1)[1]]
... ...
@@ -16,25 +16,14 @@ discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE,
16 16
                           subsampling = FALSE, subSize = dim(x)[1], iter = 100, 
17 17
                           components = 3) {
18 18
   
19
-  if(checkInputs(x,y)) {
20
-    stop("Please fix inputs.")
21
-  }
19
+  .checkDiscordantInputs(v1, v2, x, y, transform, subsampling, subSize, iter, 
20
+                         components)
22 21
   
23
-  if(transform == TRUE) {
24
-    if(range(v1)[1] < -1 || range(v1)[2] > 1 || range(v2)[1] < -1 || 
25
-       range(v2)[2] > 1) {
26
-      stop("correlation vectors have values less than -1 and/or greater 
27
-                than 1.")
28
-    }
22
+  if (transform) {
29 23
     v1 <- fishersTrans(v1)
30 24
     v2 <- fishersTrans(v2)
31 25
   }
32 26
   
33
-  check <- match(components, c(3,5))
34
-  if(is.na(check)) {
35
-    stop("components must be 3 or 5")
36
-  }
37
-  
38 27
   x <- exprs(x)
39 28
   if(is.null(y) == FALSE) { y <- exprs(y) }
40 29
   featureSize = dim(x)[1]
... ...
@@ -125,7 +114,8 @@ discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE,
125 114
     tau <- total_tau / iter
126 115
     pi <- total_pi / iter
127 116
     
128
-    finalResult <- subSampleData(pdata, class, mu, sigma, nu, tau, pi, components)
117
+    finalResult <- subSampleData(pdata, class, mu, sigma, nu, tau, pi, 
118
+                                 components)
129 119
     zTable <- finalResult$z
130 120
     classVector <- finalResult$class
131 121
   } else {
... ...
@@ -177,30 +167,14 @@ discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE,
177 167
               probMatrix = zTable, loglik = pd$loglik))
178 168
 }
179 169
 
180
-#modified from package mclust
181
-unmap <- function(classification, components){
182
-    n <- length(classification)
183
-    # u <- sort(unique(classification)) # OG Code
184
-    u <- 0:(components - 1) # Max's potential fix
185
-    labs <- as.character(u)
186
-    k <- length(u)
187
-    z <- matrix(0, n, k)
188
-    for (j in 1:k) z[classification == u[j], j] <- 1
189
-    dimnames(z) <- list(NULL, labs)
190
-    return(z)
191
-}
192
-
193 170
 em.normal.partial.concordant <- function(data, class, tol=0.001, 
194
-    restriction=0, constrain=0, iteration=1000, components){
171
+    restriction=0, constrain=0, iteration=1000, components) {
195 172
     n <- as.integer(dim(data)[1])
196 173
     g <- as.integer(nlevels(as.factor(class)))
197 174
 
198 175
     yl.outer <- function(k, zx, zy){
199 176
         return( c(zx[k,] %o% zy[k,]) )
200 177
     }
201
-    yl.diag <- function(k, z){
202
-        return( c(diag(z[k,])) )
203
-    }
204 178
 
205 179
     zx <- unmap(class[,1], components = components)
206 180
     zy <- unmap(class[,2], components = components)
... ...
@@ -224,9 +198,31 @@ em.normal.partial.concordant <- function(data, class, tol=0.001,
224 198
                                                 as.integer(iteration), 
225 199
                                                 convergence)
226 200
     
227
-    return(list(model="PCD", convergence=results[[16]], 
228
-        pi=t(array(results[[5]], dim=c(g,g))), mu_sigma=rbind(results[[6]], 
229
-        results[[7]]), nu_tau=rbind(results[[8]], results[[9]]), 
230
-        loglik=results[[11]], class=apply(array(results[[3]], dim=c(n,g*g)),1,
231
-        order,decreasing=TRUE)[1,], z=array(results[[3]], dim=c(n,g*g))))
201
+    return(list(model = "PCD", 
202
+                convergence = results[[16]],
203
+                pi = t(array(results[[5]], dim=c(g,g))), 
204
+                mu_sigma = rbind(results[[6]], results[[7]]), 
205
+                nu_tau = rbind(results[[8]], results[[9]]),
206
+                loglik = results[[11]], 
207
+                class = apply(array(results[[3]], dim = c(n,g*g)), 
208
+                              1, order, decreasing = TRUE)[1,], 
209
+                z = array(results[[3]], dim = c(n, g*g))))
210
+}
211
+
212
+.checkDiscordantInputs <- function(v1, v2, x, y, transform, 
213
+                                   subsampling, subSize, iter, 
214
+                                   components) {
215
+  
216
+  if (!is(x, "ExpressionSet") || (!is.null(y) && !is(y, "ExpressionSet"))) {
217
+    stop("x and y (if present) must be type ExpressionSet")
218
+  }
219
+  
220
+  if (!(components %in% c(3, 5))) {
221
+    stop ("components must be equal to 3 or 5")
222
+  }
223
+  
224
+  if (transform && (range(v1)[1] < -1 || range(v1)[2] > 1 || 
225
+                    range(v2)[1] < -1 || range(v2)[2] > 1)) {
226
+    stop ("correlation vectors have values less than -1 and/or greater than 1.")
227
+  }
232 228
 }
233 229
\ No newline at end of file
... ...
@@ -12,9 +12,6 @@ subSampleData <- function(pdata, class, mu, sigma, nu, tau, pi, components) {
12 12
     yl.outer <- function(k, zx, zy){
13 13
         return( c(zx[k,] %o% zy[k,]) )
14 14
     }
15
-    yl.diag <- function(k, z){
16
-        return( c(diag(z[k,])) )
17
-    }
18 15
     
19 16
     zx <- unmap(class[,1], components = components)
20 17
     zy <- unmap(class[,2], components = components)
... ...
@@ -33,6 +30,19 @@ subSampleData <- function(pdata, class, mu, sigma, nu, tau, pi, components) {
33 30
                 z = array(results[[3]], dim=c(n,g*g))))
34 31
 } 
35 32
 
33
+# modified from package mclust
34
+unmap <- function(classification, components){
35
+    n <- length(classification)
36
+    # u <- sort(unique(classification)) # OG Code
37
+    u <- 0:(components - 1) # Max's potential fix
38
+    labs <- as.character(u)
39
+    k <- length(u)
40
+    z <- matrix(0, n, k)
41
+    for (j in 1:k) z[classification == u[j], j] <- 1
42
+    dimnames(z) <- list(NULL, labs)
43
+    return(z)
44
+}
45
+
36 46
 getNames <- function(x, y = NULL) {
37 47
     if(is.null(y) == FALSE) {
38 48
         namesMatrix <- NULL
... ...
@@ -61,18 +71,4 @@ getNames <- function(x, y = NULL) {
61 71
     vector_names <- apply(namesMatrix, 1, function(k) paste(k[1],"_",k[2],
62 72
                                                             sep = ""))
63 73
     return(vector_names)
64
-}
65
-
66
-checkInputs <- function(x, y, groups = NULL) {
67
-    issue = FALSE
68
-    if(is.null(groups) == FALSE  && unique(unique(groups) != c(1,2))) {
69
-        print("groups vector must consist of 1s and 2s corresponding to first
70
-             and second group.")
71
-        issue = TRUE
72
-    }
73
-    if(mode(x) != "S4" || (is.null(y) == FALSE && mode(y) != "S4")) {
74
-        print("data matrices x and/or y must be type ExpressionSet")
75
-        issue = TRUE
76
-    }
77
-    return(issue)
78 74
 }
79 75
\ No newline at end of file