git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@125266 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: OncoSimulR |
2 | 2 |
Type: Package |
3 | 3 |
Title: Forward Genetic Simulation of Cancer Progression with Epistasis |
4 |
-Version: 2.5.7 |
|
5 |
-Date: 2016-12-15 |
|
4 |
+Version: 2.5.8 |
|
5 |
+Date: 2016-12-17 |
|
6 | 6 |
Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"), |
7 | 7 |
email = "rdiaz02@gmail.com"), |
8 | 8 |
person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com")) |
... | ... |
@@ -99,12 +99,49 @@ to_Fitness_Matrix <- function(x, max_num_genotypes) { |
99 | 99 |
from_genotype_fitness <- function(x) { |
100 | 100 |
## Would break with output from allFitnessEffects and |
101 | 101 |
## output from allGenotypeAndMut |
102 |
+ |
|
103 |
+ ## For the very special and weird case of |
|
104 |
+ ## a matrix but only a single gene so with a 0 and 1 |
|
105 |
+ ## No, this is a silly and meaningless case. |
|
106 |
+ ## if( ( ncol(x) == 2 ) && (nrow(x) == 1) && (x[1, 1] == 1) ) { |
|
107 |
+ |
|
108 |
+ ## } else blabla: |
|
109 |
+ |
|
110 |
+ if(! (inherits(x, "matrix") || inherits(x, "data.frame")) ) |
|
111 |
+ stop("Input must inherit from matrix or data.frame.") |
|
112 |
+ |
|
113 |
+ ## if((ncol(x) > 2) && !(inherits(x, "matrix")) |
|
114 |
+ ## stop(paste0("Genotype fitness input either two-column data frame", |
|
115 |
+ ## " or a numeric matrix with > 2 columns.")) |
|
116 |
+ ## if( (ncol(x) > 2) && (nrow(x) == 1) ) |
|
117 |
+ ## stop(paste0("It looks like you have a matrix for a single genotype", |
|
118 |
+ ## " of a single gene. For this degenerate cases use", |
|
119 |
+ ## " a data frame specification.")) |
|
120 |
+ |
|
102 | 121 |
if(ncol(x) > 2) { |
122 |
+ if(inherits(x, "matrix")) { |
|
123 |
+ if(!is.numeric(x)) |
|
124 |
+ stop("A genotype fitness matrix/data.frame must be numeric.") |
|
125 |
+ } else if(inherits(x, "data.frame")) { |
|
126 |
+ if(!all(unlist(lapply(x, is.numeric)))) |
|
127 |
+ stop("A genotype fitness matrix/data.frame must be numeric.") |
|
128 |
+ } |
|
129 |
+ |
|
130 |
+ ## We are expecting here a matrix of 0/1 where columns are genes |
|
131 |
+ ## except for the last column, that is Fitness |
|
103 | 132 |
## Of course, can ONLY work with epistastis, NOT order |
104 | 133 |
return(genot_fitness_to_epistasis(x)) |
105 | 134 |
} else { |
135 |
+ if(!inherits(x, "data.frame")) |
|
136 |
+ stop("genotFitness: if two-column must be data frame") |
|
106 | 137 |
## Make sure no factors |
107 | 138 |
if(is.factor(x[, 1])) x[, 1] <- as.character(x[, 1]) |
139 |
+ ## Make sure no numbers |
|
140 |
+ if(any(is.numeric(x[, 1]))) |
|
141 |
+ stop(paste0("genotFitness: first column of data frame is numeric.", |
|
142 |
+ " Ambiguous and suggests possible error. If sure,", |
|
143 |
+ " enter that column as character")) |
|
144 |
+ |
|
108 | 145 |
omarker <- any(grepl(">", x[, 1], fixed = TRUE)) |
109 | 146 |
emarker <- any(grepl(",", x[, 1], fixed = TRUE)) |
110 | 147 |
nogoodepi <- any(grepl(":", x[, 1], fixed = TRUE)) |
... | ... |
@@ -120,17 +157,21 @@ from_genotype_fitness <- function(x) { |
120 | 157 |
## involve epistasis and order. But they must have different |
121 | 158 |
## genes. Otherwise, it is not manageable. |
122 | 159 |
} |
123 |
- if(emarker) { |
|
124 |
- x <- x[, c(1, 2)] |
|
160 |
+ if( emarker || ( (!omarker) && (!emarker) && (!nogoodepi)) ) { |
|
161 |
+ ## the second case above corresponds to passing just single letter genotypes |
|
162 |
+ ## as there is not a single marker |
|
163 |
+ x <- x[, c(1, 2), drop = FALSE] |
|
125 | 164 |
if(!all(colnames(x) == c("Genotype", "Fitness"))) { |
126 | 165 |
message("Column names of object not Genotype and Fitness.", |
127 | 166 |
" Renaming them assuming that is what you wanted") |
128 | 167 |
colnames(x) <- c("Genotype", "Fitness") |
129 | 168 |
} |
169 |
+ if((!omarker) && (!emarker) && (!nogoodepi)) { |
|
170 |
+ message("All single-gene genotypes as input to from_genotype_fitness") |
|
171 |
+ } |
|
130 | 172 |
## Yes, we need to do this to scale the fitness and put the "-" |
131 | 173 |
return(genot_fitness_to_epistasis(allGenotypes_to_matrix(x))) |
132 | 174 |
} |
133 |
- |
|
134 | 175 |
} |
135 | 176 |
} |
136 | 177 |
|
... | ... |
@@ -161,7 +202,7 @@ genot_fitness_to_epistasis <- function(x) { |
161 | 202 |
## Why should I stop? |
162 | 203 |
if(any(f < 0)) |
163 | 204 |
message("Negative fitnesses. Watch out if you divide by the wildtype") |
164 |
- x <- x[, -ncol(x)] |
|
205 |
+ x <- x[, -ncol(x), drop = FALSE] |
|
165 | 206 |
wt <- which(rowSums(x) == 0) |
166 | 207 |
fwt <- 1 |
167 | 208 |
if(length(wt) == 1) |
... | ... |
@@ -209,6 +250,7 @@ allGenotypes_to_matrix <- function(x) { |
209 | 250 |
if(length(anywt) == 1) { |
210 | 251 |
fwt <- x[anywt, 2] |
211 | 252 |
x <- x[-anywt, ] |
253 |
+ ## Trivial case of passing just a WT? |
|
212 | 254 |
} else { |
213 | 255 |
fwt <- 1 |
214 | 256 |
} |
... | ... |
@@ -230,8 +272,8 @@ allGenotypes_to_matrix <- function(x) { |
230 | 272 |
m) |
231 | 273 |
## Ensure sorted |
232 | 274 |
m <- data.frame(m) |
233 |
- rs <- rowSums(m[, -ncol(m)]) |
|
234 |
- m <- m[order(rs), ] |
|
275 |
+ rs <- rowSums(m[, -ncol(m), drop = FALSE]) |
|
276 |
+ m <- m[order(rs), , drop = FALSE] |
|
235 | 277 |
## m <- m[do.call(order, as.list(cbind(rs, m[, -ncol(m)]))), ] |
236 | 278 |
return(m) |
237 | 279 |
} |
... | ... |
@@ -127,12 +127,14 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL, |
127 | 127 |
Genotypes can be specified in two ways: |
128 | 128 |
\itemize{ |
129 | 129 |
|
130 |
- \item As a matrix (or data frame) with g + 1 columns. Each of the |
|
131 |
- first g columns contains a 1 or a 0 indicating that the gene of that |
|
132 |
- column is mutated or not. Column g+ 1 contains the fitness |
|
133 |
- values. This is, for instance, the output you will get from |
|
130 |
+ \item As a matrix (or data frame) with g + 1 columns (where g > |
|
131 |
+ 1). Each of the first g columns contains a 1 or a 0 indicating that |
|
132 |
+ the gene of that column is mutated or not. Column g+ 1 contains the |
|
133 |
+ fitness values. This is, for instance, the output you will get from |
|
134 | 134 |
\code{\link{rfitness}}. If the matrix has all columns named, those |
135 |
- will be used for the names of the genes. |
|
135 |
+ will be used for the names of the genes. Of course, except for |
|
136 |
+ column or row names, all entries in this matrix or data frame must |
|
137 |
+ be numeric. |
|
136 | 138 |
|
137 | 139 |
\item As a two column data frame. The second column is fitness, and |
138 | 140 |
the first column are genotypes, given as a character vector. For |
... | ... |
@@ -1190,5 +1190,147 @@ test_that("not all genes named", { |
1190 | 1190 |
fixed = TRUE) |
1191 | 1191 |
}) |
1192 | 1192 |
|
1193 |
+ |
|
1194 |
+test_that("We can deal with single-gene genotypes and trivial cases" ,{ |
|
1195 |
+ |
|
1196 |
+ ## we get the message |
|
1197 |
+ expect_message(allFitnessEffects( |
|
1198 |
+ genotFitness = data.frame(g = c("A", "B"), |
|
1199 |
+ y = c(1, 2))), "All single-gene genotypes", |
|
1200 |
+ fixed = TRUE) |
|
1201 |
+ |
|
1202 |
+ |
|
1203 |
+ expect_true(identical( |
|
1204 |
+ data.frame(Genotype = c("WT", "A", "B", "A, B"), |
|
1205 |
+ Fitness = c(1.0, 1.0, 2.0, 1.0), |
|
1206 |
+ stringsAsFactors = FALSE), |
|
1207 |
+ as.data.frame(evalAllGenotypes( |
|
1208 |
+ allFitnessEffects(genotFitness = data.frame(g = c("A", "B"), |
|
1209 |
+ y = c(1, 2))), |
|
1210 |
+ addwt = TRUE)) |
|
1211 |
+ )) |
|
1212 |
+ |
|
1213 |
+ expect_true(identical( |
|
1214 |
+ data.frame(Genotype = c("WT", "A", "B", "A, B"), |
|
1215 |
+ Fitness = c(1.0, 1.5, 2.9, 1.0), |
|
1216 |
+ stringsAsFactors = FALSE), |
|
1217 |
+ as.data.frame(evalAllGenotypes( |
|
1218 |
+ allFitnessEffects(genotFitness = data.frame(g = c("A", "B"), |
|
1219 |
+ y = c(1.5, 2.9))), |
|
1220 |
+ addwt = TRUE)) |
|
1221 |
+ )) |
|
1222 |
+ |
|
1223 |
+ expect_true(identical( |
|
1224 |
+ data.frame(Genotype = c("WT", "A", "B", "E", "A, B", "A, E", "B, E", "A, B, E"), |
|
1225 |
+ Fitness = c(1.0, 1.3, 2.4, 3.2, rep(1.0, 4)), |
|
1226 |
+ stringsAsFactors = FALSE), |
|
1227 |
+ as.data.frame(evalAllGenotypes( |
|
1228 |
+ allFitnessEffects(genotFitness = data.frame(g = c("A", "B", "E"), |
|
1229 |
+ y = c(1.3, 2.4, 3.2))), |
|
1230 |
+ addwt = TRUE)) |
|
1231 |
+ )) |
|
1232 |
+ |
|
1233 |
+ |
|
1234 |
+ expect_true(identical( |
|
1235 |
+ data.frame(Genotype = c("WT", "A"), |
|
1236 |
+ Fitness = c(1.0, 1.0), |
|
1237 |
+ stringsAsFactors = FALSE), |
|
1238 |
+ as.data.frame(evalAllGenotypes( |
|
1239 |
+ allFitnessEffects(genotFitness = data.frame(g = c("A"), |
|
1240 |
+ y = c(1))), |
|
1241 |
+ addwt = TRUE)) |
|
1242 |
+ )) |
|
1243 |
+ |
|
1244 |
+ expect_true(identical( |
|
1245 |
+ data.frame(Genotype = c("WT", "A"), |
|
1246 |
+ Fitness = c(1.0, 0.6), |
|
1247 |
+ stringsAsFactors = FALSE), |
|
1248 |
+ as.data.frame(evalAllGenotypes( |
|
1249 |
+ allFitnessEffects(genotFitness = data.frame(g = c("A"), |
|
1250 |
+ y = c(0.6))), |
|
1251 |
+ addwt = TRUE)) |
|
1252 |
+ )) |
|
1253 |
+ |
|
1254 |
+ expect_true(identical( |
|
1255 |
+ data.frame(Genotype = c("WT", "A", "D", "F", "A, D", "A, F", "D, F", "A, D, F"), |
|
1256 |
+ Fitness = c(rep(1, 7), 1.7), |
|
1257 |
+ stringsAsFactors = FALSE), |
|
1258 |
+ as.data.frame(evalAllGenotypes( |
|
1259 |
+ allFitnessEffects(genotFitness = data.frame(g = c("A, D, F"), |
|
1260 |
+ y = c(1.7))), |
|
1261 |
+ addwt = TRUE)) |
|
1262 |
+ )) |
|
1263 |
+ |
|
1264 |
+ |
|
1265 |
+ m <- rbind(c(1, 0, 1.2), |
|
1266 |
+ c(0, 1, 2.4)) |
|
1267 |
+ |
|
1268 |
+ expect_true(identical( |
|
1269 |
+ data.frame(Genotype = c("WT", "A", "B", "A, B"), |
|
1270 |
+ Fitness = c(1.0, 1.2, 2.4, 1.0), |
|
1271 |
+ stringsAsFactors = FALSE), |
|
1272 |
+ as.data.frame(evalAllGenotypes( |
|
1273 |
+ allFitnessEffects(genotFitness = m), |
|
1274 |
+ addwt = TRUE)) |
|
1275 |
+ )) |
|
1276 |
+ |
|
1277 |
+ m2 <- rbind(c(1, 0, 1.2), |
|
1278 |
+ c(0, 1, 2.4)) |
|
1279 |
+ colnames(m2) <- c("U", "M", "Fitness") |
|
1280 |
+ expect_true(identical( |
|
1281 |
+ data.frame(Genotype = c("WT", "M", "U", "M, U"), |
|
1282 |
+ Fitness = c(1.0, 2.4, 1.2, 1.0), |
|
1283 |
+ stringsAsFactors = FALSE), |
|
1284 |
+ as.data.frame(evalAllGenotypes( |
|
1285 |
+ allFitnessEffects(genotFitness = m2), |
|
1286 |
+ addwt = TRUE)) |
|
1287 |
+ )) |
|
1288 |
+ |
|
1289 |
+ m2df <- data.frame(rbind(c(1, 0, 1.2), |
|
1290 |
+ c(0, 1, 2.4))) |
|
1291 |
+ colnames(m2df) <- c("U", "M", "Fitness") |
|
1292 |
+ expect_true(identical( |
|
1293 |
+ data.frame(Genotype = c("WT", "M", "U", "M, U"), |
|
1294 |
+ Fitness = c(1.0, 2.4, 1.2, 1.0), |
|
1295 |
+ stringsAsFactors = FALSE), |
|
1296 |
+ as.data.frame(evalAllGenotypes( |
|
1297 |
+ allFitnessEffects(genotFitness = m2df), |
|
1298 |
+ addwt = TRUE)) |
|
1299 |
+ )) |
|
1300 |
+ |
|
1301 |
+ m3 <- matrix(c(1, 1.2), ncol = 2) |
|
1302 |
+ colnames(m3) <- c("U", "Fitness") |
|
1303 |
+ expect_error( |
|
1304 |
+ allFitnessEffects(genotFitness = m3), |
|
1305 |
+ "genotFitness: if two-column must be data frame", |
|
1306 |
+ fixed = TRUE) |
|
1307 |
+ |
|
1308 |
+ ## Stupid |
|
1309 |
+ m5 <- data.frame(x = 1, y = 2, stringsAsFactors= FALSE) |
|
1310 |
+ expect_error(evalAllGenotypes(allFitnessEffects(genotFitness = m5)), |
|
1311 |
+ "genotFitness: first column of data frame is numeric.", |
|
1312 |
+ fixed = TRUE) |
|
1313 |
+ |
|
1314 |
+ m6 <- matrix(letters[1:4], ncol = 4) |
|
1315 |
+ expect_error(allFitnessEffects(genotFitness = m6), |
|
1316 |
+ "A genotype fitness matrix/data.frame must be numeric", |
|
1317 |
+ fixed = TRUE) |
|
1318 |
+ |
|
1319 |
+ m7 <- as.data.frame(matrix(letters[1:4], ncol = 4)) |
|
1320 |
+ expect_error(allFitnessEffects(genotFitness = m7), |
|
1321 |
+ "A genotype fitness matrix/data.frame must be numeric", |
|
1322 |
+ fixed = TRUE) |
|
1323 |
+ |
|
1324 |
+ m8 <- 1:9 |
|
1325 |
+ expect_error(allFitnessEffects(genotFitness = m8), |
|
1326 |
+ "Input must inherit from matrix or data.frame", |
|
1327 |
+ fixed = TRUE) |
|
1328 |
+ |
|
1329 |
+ |
|
1330 |
+ |
|
1331 |
+ |
|
1332 |
+}) |
|
1333 |
+ |
|
1334 |
+ |
|
1193 | 1335 |
cat(paste("\n Ending all-fitness at", date())) |
1194 | 1336 |
|