Browse code

2.5.8; handle trivial cases in genotFitness

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@125266 bc3139a8-67e5-0310-9ffc-ced21a209358

Ramon Diaz-Uriarte authored on 17/12/2016 12:59:03
Showing5 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: OncoSimulR
2 2
 Type: Package
3 3
 Title: Forward Genetic Simulation of Cancer Progression with Epistasis 
4
-Version: 2.5.7
5
-Date: 2016-12-15
4
+Version: 2.5.8
5
+Date: 2016-12-17
6 6
 Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"),
7 7
 		     email = "rdiaz02@gmail.com"),
8 8
 	      person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com"))
... ...
@@ -99,12 +99,49 @@ to_Fitness_Matrix <- function(x, max_num_genotypes) {
99 99
 from_genotype_fitness <- function(x) {
100 100
     ## Would break with output from allFitnessEffects and
101 101
     ## output from allGenotypeAndMut
102
+    
103
+    ## For the very special and weird case of
104
+    ## a matrix but only a single gene so with a 0 and 1
105
+    ## No, this is a silly and meaningless case.
106
+    ## if( ( ncol(x) == 2 ) && (nrow(x) == 1) && (x[1, 1] == 1) ) {
107
+    
108
+    ## } else  blabla: 
109
+    
110
+    if(! (inherits(x, "matrix") || inherits(x, "data.frame")) )
111
+        stop("Input must inherit from matrix or data.frame.")
112
+    
113
+    ## if((ncol(x) > 2) && !(inherits(x, "matrix"))
114
+    ##     stop(paste0("Genotype fitness input either two-column data frame",
115
+    ##          " or a numeric matrix with > 2 columns."))
116
+    ## if( (ncol(x) > 2) && (nrow(x) == 1) )
117
+    ##     stop(paste0("It looks like you have a matrix for a single genotype",
118
+    ##                 " of a single gene. For this degenerate cases use",
119
+    ##                 " a data frame specification."))
120
+    
102 121
     if(ncol(x) > 2) {
122
+        if(inherits(x, "matrix")) {
123
+            if(!is.numeric(x))
124
+                stop("A genotype fitness matrix/data.frame must be numeric.")
125
+        } else if(inherits(x, "data.frame")) {
126
+            if(!all(unlist(lapply(x, is.numeric))))
127
+                stop("A genotype fitness matrix/data.frame must be numeric.")
128
+        }
129
+        
130
+        ## We are expecting here a matrix of 0/1 where columns are genes
131
+        ## except for the last column, that is Fitness
103 132
         ## Of course, can ONLY work with epistastis, NOT order
104 133
         return(genot_fitness_to_epistasis(x))
105 134
     } else {
135
+        if(!inherits(x, "data.frame"))
136
+            stop("genotFitness: if two-column must be data frame")
106 137
         ## Make sure no factors
107 138
         if(is.factor(x[, 1])) x[, 1] <- as.character(x[, 1])
139
+        ## Make sure no numbers
140
+        if(any(is.numeric(x[, 1])))
141
+            stop(paste0("genotFitness: first column of data frame is numeric.",
142
+                        " Ambiguous and suggests possible error. If sure,",
143
+                        " enter that column as character"))
144
+        
108 145
         omarker <- any(grepl(">", x[, 1], fixed = TRUE))
109 146
         emarker <- any(grepl(",", x[, 1], fixed = TRUE))
110 147
         nogoodepi <- any(grepl(":", x[, 1], fixed = TRUE))
... ...
@@ -120,17 +157,21 @@ from_genotype_fitness <- function(x) {
120 157
             ## involve epistasis and order. But they must have different
121 158
             ## genes. Otherwise, it is not manageable.
122 159
         }
123
-        if(emarker) {
124
-            x <- x[, c(1, 2)]
160
+        if( emarker || ( (!omarker) && (!emarker) && (!nogoodepi)) ) {
161
+            ## the second case above corresponds to passing just single letter genotypes
162
+            ## as there is not a single marker
163
+            x <- x[, c(1, 2), drop = FALSE]
125 164
             if(!all(colnames(x) == c("Genotype", "Fitness"))) {
126 165
                 message("Column names of object not Genotype and Fitness.",
127 166
                         " Renaming them assuming that is what you wanted")
128 167
                 colnames(x) <- c("Genotype", "Fitness")
129 168
             }
169
+            if((!omarker) && (!emarker) && (!nogoodepi)) {
170
+                message("All single-gene genotypes as input to from_genotype_fitness")
171
+            }
130 172
             ## Yes, we need to do this to  scale the fitness and put the "-"
131 173
             return(genot_fitness_to_epistasis(allGenotypes_to_matrix(x)))
132 174
         }
133
-        
134 175
     }
135 176
 }
136 177
 
... ...
@@ -161,7 +202,7 @@ genot_fitness_to_epistasis <- function(x) {
161 202
     ## Why should I stop?
162 203
     if(any(f < 0))
163 204
         message("Negative fitnesses. Watch out if you divide by the wildtype")
164
-    x <- x[, -ncol(x)]
205
+    x <- x[, -ncol(x), drop = FALSE]
165 206
     wt <- which(rowSums(x) == 0)
166 207
     fwt <- 1
167 208
     if(length(wt) == 1)
... ...
@@ -209,6 +250,7 @@ allGenotypes_to_matrix <- function(x) {
209 250
     if(length(anywt) == 1) {
210 251
         fwt <- x[anywt, 2]
211 252
         x <- x[-anywt, ]
253
+        ## Trivial case of passing just a WT?
212 254
     } else {
213 255
         fwt <- 1
214 256
     }
... ...
@@ -230,8 +272,8 @@ allGenotypes_to_matrix <- function(x) {
230 272
                m)
231 273
     ## Ensure sorted
232 274
     m <- data.frame(m)
233
-    rs <- rowSums(m[, -ncol(m)])
234
-    m <- m[order(rs), ]
275
+    rs <- rowSums(m[, -ncol(m), drop = FALSE])
276
+    m <- m[order(rs), , drop = FALSE]
235 277
     ## m <- m[do.call(order, as.list(cbind(rs, m[, -ncol(m)]))), ]
236 278
     return(m)
237 279
 }
... ...
@@ -1,3 +1,6 @@
1
+Changes in version 2.5.8 (2016-12-17):
2
+	- Handle trivial cases in genotFitness
3
+
1 4
 Changes in version 2.5.7 (2016-12-15):
2 5
 	- Clarified McFarland parameterization.
3 6
 
... ...
@@ -127,12 +127,14 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
127 127
   Genotypes can be specified in two ways:
128 128
   \itemize{
129 129
     
130
-    \item As a matrix (or data frame) with g + 1 columns. Each of the
131
-    first g columns contains a 1 or a 0 indicating that the gene of that
132
-    column is mutated or not. Column g+ 1 contains the fitness
133
-    values. This is, for instance, the output you will get from
130
+    \item As a matrix (or data frame) with g + 1 columns (where g >
131
+    1). Each of the first g columns contains a 1 or a 0 indicating that
132
+    the gene of that column is mutated or not. Column g+ 1 contains the
133
+    fitness values. This is, for instance, the output you will get from
134 134
     \code{\link{rfitness}}. If the matrix has all columns named, those
135
-    will be used for the names of the genes.
135
+    will be used for the names of the genes. Of course, except for
136
+    column or row names, all entries in this matrix or data frame must
137
+    be numeric.
136 138
     
137 139
     \item As a two column data frame. The second column is fitness, and
138 140
     the first column are genotypes, given as a character vector. For
... ...
@@ -1190,5 +1190,147 @@ test_that("not all genes named", {
1190 1190
                  fixed = TRUE)
1191 1191
 })
1192 1192
 
1193
+
1194
+test_that("We can deal with single-gene genotypes and trivial cases" ,{
1195
+
1196
+    ## we get the message
1197
+    expect_message(allFitnessEffects(
1198
+        genotFitness = data.frame(g = c("A", "B"),
1199
+                                  y = c(1, 2))), "All single-gene genotypes",
1200
+        fixed = TRUE)
1201
+
1202
+    
1203
+    expect_true(identical(
1204
+        data.frame(Genotype = c("WT", "A", "B", "A, B"),
1205
+                   Fitness = c(1.0, 1.0, 2.0, 1.0),
1206
+                   stringsAsFactors = FALSE),
1207
+        as.data.frame(evalAllGenotypes(
1208
+            allFitnessEffects(genotFitness = data.frame(g = c("A", "B"),
1209
+                                                        y = c(1, 2))),
1210
+            addwt = TRUE))
1211
+    ))
1212
+
1213
+    expect_true(identical(
1214
+        data.frame(Genotype = c("WT", "A", "B", "A, B"),
1215
+                   Fitness = c(1.0, 1.5, 2.9, 1.0),
1216
+                   stringsAsFactors = FALSE),
1217
+        as.data.frame(evalAllGenotypes(
1218
+            allFitnessEffects(genotFitness = data.frame(g = c("A", "B"),
1219
+                                                        y = c(1.5, 2.9))),
1220
+            addwt = TRUE))
1221
+    ))
1222
+
1223
+    expect_true(identical(
1224
+        data.frame(Genotype = c("WT", "A", "B", "E", "A, B", "A, E", "B, E", "A, B, E"),
1225
+                   Fitness = c(1.0, 1.3, 2.4, 3.2, rep(1.0, 4)),
1226
+                   stringsAsFactors = FALSE),
1227
+        as.data.frame(evalAllGenotypes(
1228
+            allFitnessEffects(genotFitness = data.frame(g = c("A", "B", "E"),
1229
+                                                        y = c(1.3, 2.4, 3.2))),
1230
+            addwt = TRUE))
1231
+    ))
1232
+
1233
+
1234
+    expect_true(identical(
1235
+        data.frame(Genotype = c("WT", "A"),
1236
+                   Fitness = c(1.0, 1.0),
1237
+                   stringsAsFactors = FALSE),
1238
+        as.data.frame(evalAllGenotypes(
1239
+            allFitnessEffects(genotFitness = data.frame(g = c("A"),
1240
+                                                        y = c(1))),
1241
+            addwt = TRUE))
1242
+    ))
1243
+    
1244
+    expect_true(identical(
1245
+        data.frame(Genotype = c("WT", "A"),
1246
+                   Fitness = c(1.0, 0.6),
1247
+                   stringsAsFactors = FALSE),
1248
+        as.data.frame(evalAllGenotypes(
1249
+            allFitnessEffects(genotFitness = data.frame(g = c("A"),
1250
+                                                        y = c(0.6))),
1251
+            addwt = TRUE))
1252
+    ))
1253
+
1254
+    expect_true(identical(
1255
+        data.frame(Genotype = c("WT", "A", "D", "F", "A, D", "A, F", "D, F", "A, D, F"),
1256
+                   Fitness = c(rep(1, 7), 1.7),
1257
+                   stringsAsFactors = FALSE),
1258
+        as.data.frame(evalAllGenotypes(
1259
+            allFitnessEffects(genotFitness = data.frame(g = c("A, D, F"),
1260
+                                                        y = c(1.7))),
1261
+            addwt = TRUE))
1262
+    ))    
1263
+
1264
+
1265
+    m <- rbind(c(1, 0, 1.2),
1266
+               c(0, 1, 2.4))
1267
+
1268
+    expect_true(identical(
1269
+        data.frame(Genotype = c("WT", "A", "B", "A, B"),
1270
+                   Fitness = c(1.0, 1.2, 2.4, 1.0),
1271
+                   stringsAsFactors = FALSE),
1272
+        as.data.frame(evalAllGenotypes(
1273
+            allFitnessEffects(genotFitness = m),
1274
+            addwt = TRUE))
1275
+    ))    
1276
+
1277
+    m2 <- rbind(c(1, 0, 1.2),
1278
+               c(0, 1, 2.4))
1279
+    colnames(m2) <- c("U", "M", "Fitness")
1280
+    expect_true(identical(
1281
+        data.frame(Genotype = c("WT", "M", "U", "M, U"),
1282
+                   Fitness = c(1.0, 2.4, 1.2, 1.0),
1283
+                   stringsAsFactors = FALSE),
1284
+        as.data.frame(evalAllGenotypes(
1285
+            allFitnessEffects(genotFitness = m2),
1286
+            addwt = TRUE))
1287
+    ))
1288
+    
1289
+    m2df <- data.frame(rbind(c(1, 0, 1.2),
1290
+               c(0, 1, 2.4)))
1291
+    colnames(m2df) <- c("U", "M", "Fitness")
1292
+    expect_true(identical(
1293
+        data.frame(Genotype = c("WT", "M", "U", "M, U"),
1294
+                   Fitness = c(1.0, 2.4, 1.2, 1.0),
1295
+                   stringsAsFactors = FALSE),
1296
+        as.data.frame(evalAllGenotypes(
1297
+            allFitnessEffects(genotFitness = m2df),
1298
+            addwt = TRUE))
1299
+    ))   
1300
+
1301
+    m3 <- matrix(c(1, 1.2), ncol = 2)
1302
+    colnames(m3) <- c("U", "Fitness")
1303
+    expect_error(
1304
+        allFitnessEffects(genotFitness = m3),
1305
+        "genotFitness: if two-column must be data frame",
1306
+        fixed = TRUE)
1307
+
1308
+    ## Stupid
1309
+    m5 <- data.frame(x = 1, y = 2, stringsAsFactors= FALSE)
1310
+    expect_error(evalAllGenotypes(allFitnessEffects(genotFitness = m5)),
1311
+                 "genotFitness: first column of data frame is numeric.",
1312
+                 fixed = TRUE)
1313
+
1314
+    m6 <- matrix(letters[1:4], ncol = 4)
1315
+    expect_error(allFitnessEffects(genotFitness = m6),
1316
+                 "A genotype fitness matrix/data.frame must be numeric",
1317
+                 fixed = TRUE)
1318
+
1319
+    m7 <- as.data.frame(matrix(letters[1:4], ncol = 4))
1320
+    expect_error(allFitnessEffects(genotFitness = m7),
1321
+                 "A genotype fitness matrix/data.frame must be numeric",
1322
+                 fixed = TRUE)
1323
+
1324
+    m8 <- 1:9
1325
+    expect_error(allFitnessEffects(genotFitness = m8),
1326
+                 "Input must inherit from matrix or data.frame",
1327
+                 fixed = TRUE)
1328
+
1329
+
1330
+    
1331
+
1332
+})
1333
+
1334
+
1193 1335
 cat(paste("\n Ending all-fitness at", date()))
1194 1336