#### added the Rtreemix package and removed the bim package Herve Pages authored on 16/11/2007 21:25:16
 1 1 `new file mode 100644` ... ... `@@ -0,0 +1,468 @@` 1 `+################################################################################` 2 `+## FUNCTIONS FOR STABILITY ANALYSIS OF THE ONCOGENETIC TREES MIXTURE MODEL ##` 3 `+################################################################################` 4 `+## Function for calculating the P value of a given similarity measure.` 5 `+## What is the probability for obtaining the same or smaller` 6 `+## value in a random vector of similarity values.` 7 `+Pval.dist <- function(dist.val, random.vals) {` 8 `+ return((sum(random.vals <= dist.val) + 1) /(length(random.vals) + 1))` 9 `+}` 10 `+######################################################################` 11 `+## Function for calculating the Kullback-Leibler divergence between` 12 `+## two discrete probability distributions p and q.` 13 `+kullback.leibler <- function(p, q) {` 14 `+ if(length(p) != length(q))` 15 `+ stop("Error: The distribution vectors have different lengths!") ` 16 `+ return(sum(p * log(p / q)))` 17 `+}` 18 `+######################################################################` 19 `+## Function for calculating the L1 distance between` 20 `+## two discrete probability distributions p and q.` 21 `+L1.dist <- function(p, q) {` 22 `+ if(length(p) != length(q))` 23 `+ stop("Error: The distribution vectors have different lengths!")` 24 `+` 25 `+ return(sum(abs(p - q)))` 26 `+}` 27 `+######################################################################` 28 `+## Function for calculating the cosine distance between` 29 `+## two discrete probability distributions p and q.` 30 `+cosin.dist <- function(p, q) {` 31 `+ if(length(p) != length(q))` 32 `+ stop("Error: The distribution vectors have different lengths!")` 33 `+` 34 `+ return(1 - (sum (p * q) / (L2.norm(p) * L2.norm(q)) ))` 35 `+}` 36 `+######################################################################` 37 `+## Function for calculating the L2 norm of a given vector x.` 38 `+L2.norm <- function(x) {` 39 `+ return(sqrt(sum(x * x)))` 40 `+}` 41 `+######################################################################` 42 `+## Function for calculating the Euclidian distance between` 43 `+## two vectors x and y.` 44 `+euclidian.dist <- function(x, y) {` 45 `+ if(length(x) != length(y))` 46 `+ stop("Error: The vectors have different lengths!")` 47 `+` 48 `+ return(sqrt(drop((x - y) %*% (x - y))))` 49 `+}` 50 `+######################################################################` 51 `+## Function for calculating the rank-correlation distance between` 52 `+## two vectors x and y.` 53 `+rank.cor.dist <- function(x, y) {` 54 `+ if(length(x) != length(y))` 55 `+ stop("Error: The vectors have different length!")` 56 `+` 57 `+ return(1 - rcorr(x, y, type = "spearman")[][1, 2])` 58 `+}` 59 `+######################################################################` 60 `+## Function that implements a similarity measure for comparing the topologies` 61 `+## of the trees of two mixture models mixture1 and mixture2.` 62 `+## It returns a value that uses the number of different edges for caracterizing` 63 `+## the difference between the models.` 64 `+## For the comparisson it is necessary that the two models have the same` 65 `+## number of tree components build on the same number of genetic events.` 66 `+## It is assumed that the mixtures have at least two components (the first one is the noise).` 67 `+comp.models <- function(mixture1, mixture2) {` 68 `+ if((class(mixture1) != "RtreemixModel") || (class(mixture2) != "RtreemixModel"))` 69 `+ stop("The specified mixture models should be of type RtreemixModel!")` 70 `+ ` 71 `+ if(numTrees(mixture1) != numTrees(mixture2))` 72 `+ stop("The two specified mixture models don't have the same number of trees!")` 73 `+ ` 74 `+ if(eventsNum(mixture1) != eventsNum(mixture2))` 75 `+ stop("The two specified mixture models don't have the same number of events!")` 76 `+ no.trees <- numTrees(mixture1)` 77 `+ if (no.trees == 1)` 78 `+ stop("The mixture models should have at least two tree components, where the first one is the noise component!")` 79 `+ ` 80 `+ no.events <- eventsNum(mixture1)` 81 `+ ` 82 `+ true.order <- list() ## list specifying the edges in the components of mixture1` 83 `+ fit.order <- list() ## list specifying the edges in the components of mixture2` 84 `+` 85 `+ ## Build the vectors that characterize the edges of the components of the mixture models.` 86 `+ ## The noise components are ignored since they always have the same (star) topology.` 87 `+ for(k in 2:no.trees) {` 88 `+ true.vec <- character(0)` 89 `+ fit.vec <- character(0)` 90 `+ for(l in 2:no.events) {` 91 `+ ch <- names(which(sapply(edges(getTree(mixture1, k)), function(x, el) {el %in% x}, nodes(getTree(mixture1, k))[l])))` 92 `+ true.vec <- c(true.vec, ifelse((identical(ch, character(0)) || is.null(ch)), "e", ch)) ` 93 `+ ch <- names(which(sapply(edges(getTree(mixture2, k)), function(x, el) {el %in% x}, nodes(getTree(mixture2, k))[l])))` 94 `+ fit.vec <- c(fit.vec, ifelse((identical(ch, character(0)) || is.null(ch)), "e", ch)) ` 95 `+ }` 96 `+ names(true.vec) <- nodes(getTree(mixture1, k))[-1]` 97 `+ true.order <- c(true.order, list(true.vec))` 98 `+ names(fit.vec) <- nodes(getTree(mixture2, k))[-1]` 99 `+ fit.order <- c(fit.order, list(fit.vec))` 100 `+ }` 101 `+ ## Build the comparisson matrix.` 102 `+ ## The (i, j) element is the number of distinct edges of the` 103 `+ ## (i + 1)-th and (j + 1)-th component of the models` 104 `+ ## mixture1 and mixture2 respectively. ` 105 `+ comp.mat <- matrix(nrow = no.trees - 1, ncol = no.trees - 1)` 106 `+ for(i in 1:(no.trees - 1))` 107 `+ for(j in 1:(no.trees - 1)) {` 108 `+ comp.mat[i, j] <- sum(true.order[[i]] != fit.order[[j]])` 109 `+ }` 110 `+ ## Form the similarity pairs and calculate the similarity of the models` 111 `+ ## as a sum of the number of different edges of the trees in the similarity pairs:` 112 `+ if(no.trees > 2) {` 113 `+ rc <- which(comp.mat == min(comp.mat), arr.ind = TRUE)` 114 `+ diff.sum <- min(comp.mat)` 115 `+ row.index <- c(rc[1, 1]) ## get the row index of the minimum` 116 `+ col.index <- c(rc[1, 2]) ## get the column index of the minimum` 117 `+` 118 `+ for(m in 1:(nrow(comp.mat) - 1)) {` 119 `+ rc <- which(comp.mat == min(comp.mat[-(row.index), -(col.index)]), arr.ind = TRUE)` 120 `+ diff.sum <- diff.sum + min(comp.mat[-(row.index), -(col.index)])` 121 `+ row.index <- c(row.index, rc[1, 1])` 122 `+ col.index <- c(col.index, rc[1, 2]) ` 123 `+ }` 124 `+ } else {` 125 `+ diff.sum <- comp.mat[1, 1]` 126 `+ }` 127 `+ ## Return a result between 0 and 1. ` 128 `+ return(diff.sum/((no.trees - 1) * (no.events - 1))) ` 129 `+` 130 `+}` 131 `+######################################################################` 132 `+## Function that assignes to each node the level at which that node is` 133 `+## in a specific tree (tree.num) of the trees mixture model.` 134 `+## The start.val is the maximum depth of the tree with which the tree` 135 `+## specified with tree.num will be compared.` 136 `+get.tree.levels <- function(mixture, tree.num, start.val) {` 137 `+ root <- nodes(getTree(mixture, tree.num))` 138 `+ levels <- acc(getTree(mixture, tree.num), root)` 139 `+ ## vec <- rep(max(levels[]), eventsNum(mixture) - 1)` 140 `+ vec <- rep(start.val, eventsNum(mixture) - 1)` 141 `+ names(vec) <- nodes(getTree(mixture, tree.num))[-1]` 142 `+ ` 143 `+ vec[names(levels[])] <- levels[]` 144 `+` 145 `+ return(vec)` 146 `+}` 147 `+######################################################################` 148 `+## Function that implements a similarity measure for comparing the topologies` 149 `+## of the trees of two mixture models mixture1 and mixture2.` 150 `+## It returns a value that uses the number of different edges and the depth at` 151 `+## which the events are, for caracterizing the difference between the models.` 152 `+## This measure is more application specific, since the depth at which the` 153 `+## events are in a tree is very important for disease progression.` 154 `+## For the comparisson it is necessary that the two models have the same` 155 `+## number of tree components build on the same number of genetic events.` 156 `+## It is assumed that the mixtures have at least two components (the first one is the noise).` 157 `+comp.models.levels <- function(mixture1, mixture2) {` 158 `+ if((class(mixture1) != "RtreemixModel") || (class(mixture2) != "RtreemixModel"))` 159 `+ stop("The specified mixture models should be of type RtreemixModel!")` 160 `+ ` 161 `+ if(numTrees(mixture1) != numTrees(mixture2))` 162 `+ stop("The two specified mixture models don't have the same number of trees!")` 163 `+` 164 `+ if(eventsNum(mixture1) != eventsNum(mixture2))` 165 `+ stop("The two specified mixture models don't have the same number of events!")` 166 `+ no.trees <- numTrees(mixture1)` 167 `+ if (no.trees == 1)` 168 `+ stop("The mixture models should have at least two tree components, where the first one is the noise component!") ` 169 `+ ` 170 `+ no.events <- eventsNum(mixture1)` 171 `+ ` 172 `+ true.order <- list() ## list specifying the edges in the components of mixture1` 173 `+ fit.order <- list() ## list specifying the edges in the components of mixture2` 174 `+` 175 `+ start.vals1 <- vector(mode = "numeric", length = (no.trees - 1)) ## the depth of the tree components in mixture1 (without the noise component) ` 176 `+ start.vals2 <- vector(mode = "numeric", length = (no.trees - 1)) ## the depth of the tree components in mixture2 (without the noise component)` 177 `+` 178 `+ ## Build the vectors that characterize the edges of the components of the mixture models.` 179 `+ ## The noise components are ignored since they always have the same (star) topology. ` 180 `+ for(k in 2:no.trees) {` 181 `+ true.vec <- character(0)` 182 `+ fit.vec <- character(0)` 183 `+ for(l in 2:no.events) {` 184 `+ ch <- names(which(sapply(edges(getTree(mixture1, k)), function(x, el) {el %in% x}, nodes(getTree(mixture1, k))[l])))` 185 `+ true.vec <- c(true.vec, ifelse((identical(ch, character(0)) || is.null(ch)), "e", ch)) ` 186 `+ ch <- names(which(sapply(edges(getTree(mixture2, k)), function(x, el) {el %in% x}, nodes(getTree(mixture2, k))[l])))` 187 `+ fit.vec <- c(fit.vec, ifelse((identical(ch, character(0)) || is.null(ch)), "e", ch)) ` 188 `+ }` 189 `+ names(true.vec) <- nodes(getTree(mixture1, k))[-1]` 190 `+ true.order <- c(true.order, list(true.vec))` 191 `+ names(fit.vec) <- nodes(getTree(mixture2, k))[-1]` 192 `+ fit.order <- c(fit.order, list(fit.vec))` 193 `+` 194 `+ ## Assign the (maximal depths + 1) for each tree in the mixtures` 195 `+ start.vals1[k - 1] <- max(acc(getTree(mixture1, k), nodes(getTree(mixture1, k)))[] + 1, na.remove = TRUE)` 196 `+ start.vals2[k - 1] <- max(acc(getTree(mixture2, k), nodes(getTree(mixture2, k)))[] + 1, na.remove = TRUE) ` 197 `+ }` 198 `+ ## Build the comparisson matrix.` 199 `+ ## The (i, j) element is the number of distinct edges of the` 200 `+ ## (i + 1)-th and (j + 1)-th component of the models` 201 `+ ## mixture1 and mixture2 respectively. ` 202 `+ comp.mat <- matrix(nrow = no.trees - 1, ncol = no.trees - 1)` 203 `+ for(i in 1:(no.trees - 1))` 204 `+ for(j in 1:(no.trees - 1)) {` 205 `+ comp.mat[i, j] <- sum(true.order[[i]] != fit.order[[j]])` 206 `+ }` 207 `+ ## Form the similarity pairs and calculate the similarity of the models` 208 `+ ## as a sum of the number of different edges of the trees in the similarity pairs` 209 `+ ## and their corresponding L1-distance of the levels of the events:` 210 `+ if(no.trees > 2) {` 211 `+ rc <- which(comp.mat == min(comp.mat), arr.ind = TRUE)` 212 `+ diff.sum <- min(comp.mat) +` 213 `+ L1.dist(get.tree.levels(mixture1, rc[1, 1] + 1, start.vals2[rc[1, 2]]),` 214 `+ get.tree.levels(mixture2, rc[1, 2] + 1, start.vals1[rc[1, 1]]))` 215 `+ row.index <- c(rc[1, 1]) ## get the row index of the minimum` 216 `+ col.index <- c(rc[1, 2]) ## get the column index of the minimum` 217 `+` 218 `+ for(m in 1:(nrow(comp.mat) - 1)) {` 219 `+ rc <- which(comp.mat == min(comp.mat[-(row.index), -(col.index)]), arr.ind = TRUE)` 220 `+ diff.sum <- diff.sum + min(comp.mat[-(row.index), -(col.index)]) +` 221 `+ L1.dist(get.tree.levels(mixture1, rc[1, 1] + 1, start.vals2[rc[1, 2]]),` 222 `+ get.tree.levels(mixture2, rc[1, 2] + 1, start.vals1[rc[1, 1]]))` 223 `+ row.index <- c(row.index, rc[1, 1])` 224 `+ col.index <- c(col.index, rc[1, 2]) ` 225 `+ }` 226 `+ } else {` 227 `+ if(no.trees == 2) {` 228 `+ diff.sum <- comp.mat[1, 1] +` 229 `+ L1.dist(get.tree.levels(mixture1, 2, start.vals2),` 230 `+ get.tree.levels(mixture2, 2, start.vals1))` 231 `+ } else {` 232 `+ stop("The specified mixture models must have at least two tree components, where the first one is the noise component!")` 233 `+ }` 234 `+ }` 235 `+ ` 236 `+ return(diff.sum) ` 237 `+` 238 `+}` 239 `+######################################################################` 240 `+## Function that implements a similarity measure for comparing the topologies` 241 `+## of the nontrivial tree components of a specified mixture model.` 242 `+## It returns a value that uses the number of different edges for caracterizing` 243 `+## the difference of the trees in the model.` 244 `+## For the comparisson it is necessary that the model has at least two` 245 `+## nontrivial components.` 246 `+comp.trees <- function(model) {` 247 `+ no.trees <- numTrees(model)` 248 `+ if(no.trees <= 2)` 249 `+ stop("The specified mixture model should have at least two nontrivial tree components.")` 250 `+ ` 251 `+ no.events <- eventsNum(model)` 252 `+ true.order <- list() ## list specifying the edges in the nontrivial components of the model ` 253 `+ ## Build the vectors that characterize the` 254 `+ ## edges of the nontrivial components of the mixture model.` 255 `+ for(k in 2:no.trees) {` 256 `+ true.vec <- character(0)` 257 `+ for(l in 2:no.events) {` 258 `+ ch <- names(which(sapply(edges(getTree(model, k)), function(x, el) {el %in% x}, nodes(getTree(model, k))[l])))` 259 `+ true.vec <- c(true.vec, ifelse((identical(ch, character(0)) || is.null(ch)), "e", ch)) ` 260 `+ }` 261 `+ names(true.vec) <- nodes(getTree(model, k))[-1]` 262 `+ true.order <- c(true.order, list(true.vec)) ` 263 `+ }` 264 `+ ## Calculate the similarity of the nontrivial components of the model ` 265 `+ ## as a sum of the number of different edges of all combinations of` 266 `+ ## two different nontrivial trees:` 267 `+ diff.sum <- 0` 268 `+ for(k1 in 2:(no.trees - 1)) ` 269 `+ for(k2 in (k1 + 1):no.trees) ` 270 `+ diff.sum <- diff.sum + sum(true.order[[k1 - 1]] != true.order[[k2 - 1]])` 271 `+ ` 272 `+ ## Return a result between 0 and 1.` 273 `+ return(diff.sum / (choose((no.trees - 1), 2) * (no.events - 1)))` 274 `+}` 275 `+######################################################################` 276 `+## Function that implements a similarity measure for comparing the topologies` 277 `+## of the nontrivial tree components of a specified mixture model.` 278 `+## It returns a value that uses the number of different edges and the depth at` 279 `+## which the events are, for caracterizing the difference of the trees in the model.` 280 `+## For the comparisson it is necessary that the model has at least two` 281 `+## nontrivial components.` 282 `+comp.trees.levels <- function(model) {` 283 `+ no.trees <- numTrees(model)` 284 `+ if(no.trees <= 2)` 285 `+ stop("The specified mixture model should have at least two nontrivial tree components.")` 286 `+ ` 287 `+ no.events <- eventsNum(model)` 288 `+ start.vals <- vector(mode = "numeric", length = (no.trees - 1)) ## the depth of the nontrivial tree components in the model ` 289 `+ true.order <- list() ## list specifying the edges in the nontrivial components of the model ` 290 `+ ` 291 `+ ## Build the vectors that characterize the` 292 `+ ## edges of the nontrivial components of the mixture model. ` 293 `+ for(k in 2:no.trees) {` 294 `+ true.vec <- character(0)` 295 `+ for(l in 2:no.events) {` 296 `+ ch <- names(which(sapply(edges(getTree(model, k)), function(x, el) {el %in% x}, nodes(getTree(model, k))[l])))` 297 `+ true.vec <- c(true.vec, ifelse((identical(ch, character(0)) || is.null(ch)), "e", ch)) ` 298 `+ }` 299 `+ names(true.vec) <- nodes(getTree(model, k))[-1]` 300 `+ true.order <- c(true.order, list(true.vec))` 301 `+ ` 302 `+ ## Assign the maximal depths + 1 for each tree in the mixtures` 303 `+ start.vals[k - 1] <- max(acc(getTree(model, k), nodes(getTree(model, k)))[] + 1, na.remove = TRUE) ` 304 `+ }` 305 `+ ## Calculate the similarity of the models as a sum of the number of` 306 `+ ## different edges of all possible combinations of two different` 307 `+ ## nontrivial trees in the model and their corresponding L1-distance` 308 `+ ## of the levels of the events:` 309 `+ diff.sum <- 0` 310 `+ for(k1 in 2:(no.trees - 1)) {` 311 `+ for(k2 in (k1 + 1):no.trees) {` 312 `+ diff.sum <- diff.sum + (sum(true.order[[k1 - 1]] != true.order[[k2 - 1]])` 313 `+ + L1.dist(get.tree.levels(model, k1, start.vals[k2 - 1]),` 314 `+ get.tree.levels(model, k2, start.vals[k1 - 1])))` 315 `+ }` 316 `+ }` 317 `+ return(diff.sum)` 318 `+}` 319 `+######################################################################` 320 `+## Stability analysis of the oncogenetic trees mixture model.` 321 `+## This includes stability analysis on different levels of the mixture` 322 `+## model: GPS values, encoded probability distribution, tree topologies.` 323 `+## The function outputs a list of analysis for the mentioned features.` 324 `+## Each analysis contains the values of different similarity measures` 325 `+## with their corresponding p-values.` 326 `+## The models should have at least two components (the first one is the noise).` 327 `+stability.sim <- function(no.trees = 3, ## number of tree components` 328 `+ no.events = 9, ## number of genetic events` 329 `+ prob = c(0.2, 0.8), ## interval for the edgeweights of the random mixture models ` 330 `+ no.draws = 300, ## number of samples drawn from the random models used for learning a mixture model` 331 `+ no.rands = 100, ## number of rands for calculatin the p-values` 332 `+ no.sim = 1 ## number of simulation iterations` 333 `+ ) {` 334 `+ ## Set the true types.` 335 `+ no.trees <- as.integer(no.trees)` 336 `+ no.events <- as.integer(no.events)` 337 `+ no.draws <- as.integer(no.draws)` 338 `+ no.rands <- as.integer(no.rands)` 339 `+ no.sim <- as.integer(no.sim)` 340 `+ ## Check if the necessary parameters are provided and have the correct form.` 341 `+ if(no.trees < 2)` 342 `+ stop("The specified mixture model should have at least two` 343 `+tree components (where the first one is the noise).")` 344 `+ if (no.events < 1)` 345 `+ stop("The number of events must be an integer greater than zero!")` 346 `+ if(no.draws < 1)` 347 `+ stop("The number of draws (number of samples) must be an integer greater than zero!") ` 348 `+ if (no.rands < 1)` 349 `+ stop("The number of random models for the p-values must be an integer greater than zero!") ` 350 `+ if (no.sim < 1)` 351 `+ stop("The number of iterations for the waiting time simulation` 352 `+must be an integer greater than zero!")` 353 `+ if(!missing(prob)) {` 354 `+ if(!is.numeric(prob) || (length(prob) != 2))` 355 `+ stop("Specify the boundaries of the conditional probabilities as a numeric vector` 356 `+of length two = c(min, max)!") ` 357 `+ if(prob < prob)` 358 `+ stop("In the probability vector the lower boundary must be smaller than the` 359 `+upper boundary!")` 360 `+ }` 361 `+ ` 362 `+ simGPS <- numeric(0) ## results from the stability analysis of the GPS values` 363 `+ topo.dif <- numeric(0) ## results from the stability analysis of the topologies of the tree components of mixture models based on comp.models` 364 `+ topo.levels.dif <- numeric(0) ## results from the stability analysis of the topologies of the tree components of mixture models based on comp.models.levels` 365 `+ result.distr <- numeric(0) ## results from the stability analysis of the probability distribution encoded by the model` 366 `+ ` 367 `+ mat.true.gps <- numeric(0) ## matrix containing the true GPS values from each simulation iteration` 368 `+ mat.fit.gps <- numeric(0) ## matrix containing the corresponding fitted GPS values from each simulation iteration` 369 `+` 370 `+ list.true.models <- list() ## list containing the true models from each simulation iteration` 371 `+ list.fit.models <- list() ## list containing the corresponding fitted models from each simulation iteration` 372 `+ ## Simulation iterations:` 373 `+ for(i in 1:as.integer(no.sim)) {` 374 `+ print(i) ## simulation iteration` 375 `+ ## Pick a true model from the space of random models and draw data from it.` 376 `+ true.m <- generate(K = no.trees, no.events = no.events, prob = prob)` 377 `+ Weights(true.m) <- c(0.05, rep(0.95/(no.trees - 1), (no.trees - 1)))` 378 `+ sim.data <- sim(model = true.m, no.draws = no.draws) ` 379 `+ list.true.models <- c(list.true.models, true.m)` 380 `+ ## Calculate the GPS and the distribution encoded by true.m` 381 `+ true.gps <- GPS(gps(model = true.m, data = sim.data, no.sim = 10000))` 382 `+ mat.true.gps <- cbind(mat.true.gps, true.gps)` 383 `+ true.distr <- distribution(model = true.m)\$probability` 384 `+ ## Fit a mixture model to the data sim.data ` 385 `+ fm <- fit(data = sim.data, K = no.trees, noise = TRUE, equal.edgeweights = TRUE, eps = 0.01)` 386 `+ list.fit.models <- c(list.fit.models, fm)` 387 `+ ## Calculate the GPS and the distribution encoded by fm` 388 `+ fit.gps <- GPS(gps(model = fm, data = sim.data, no.sim = 10000))` 389 `+ mat.fit.gps <- cbind(mat.fit.gps, fit.gps)` 390 `+ fit.distr <- distribution(model = fm)\$probability` 391 `+ ## Compute different distances between the distributions induced` 392 `+ ## by the true and fitted models:` 393 `+ true.cosin <- cosin.dist(true.distr, fit.distr)` 394 `+ true.l1 <- L1.dist(true.distr, fit.distr)` 395 `+ true.kull <- kullback.leibler(true.distr, fit.distr)` 396 `+ ## Compute different distances between the GPS values` 397 `+ ## for the true and fitted models:` 398 `+ true.euclGPS <- euclidian.dist(true.gps, fit.gps)` 399 `+ true.rank.dist <- rank.cor.dist(true.gps, fit.gps)` 400 `+ ## Compute different similarity measures for comparing the` 401 `+ ## topologies of the nontrivial components of the true and fitted models:` 402 `+ true.topo.dif <- comp.models(true.m, fm)` 403 `+ true.topo.levels.dif <- comp.models.levels(true.m, fm)` 404 `+ ## Vectors for keeping the values for the different features` 405 `+ ## of the random mixture models needed for the p-values calculation:` 406 `+ rand.euclGPS <- numeric(0)` 407 `+ rand.rankGPS <- numeric(0)` 408 `+` 409 `+ rand.cos.distr <- numeric(0)` 410 `+ rand.l1.distr <- numeric(0)` 411 `+ rand.kull.distr <- numeric(0)` 412 `+` 413 `+ rand.topo <- numeric(0)` 414 `+ rand.topo.levels <- numeric(0)` 415 `+ ## Create the vectors with random values for the p-values calculation:` 416 `+ for(j in 1:(as.integer(no.rands) - 1)) {` 417 `+ ## Generate random model and calculate the GPS and distribution:` 418 `+ model <- generate(K = no.trees, no.events = no.events, prob = prob)` 419 `+ Weights(model) <- c(0.05, rep(0.95/(no.trees - 1), (no.trees - 1))) ` 420 `+ random.gps <- GPS(gps(model = model, data = sim.data, no.sim = 10000))` 421 `+ random.distr <- distribution(model = model)\$probability` 422 `+ ## GPS:` 423 `+ rand.euclGPS <- c(rand.euclGPS, euclidian.dist(true.gps, random.gps)) ` 424 `+ rand.rankGPS <- c(rand.rankGPS, rank.cor.dist(true.gps, random.gps))` 425 `+ ## Distribution:` 426 `+ rand.cos.distr <- c(rand.cos.distr, cosin.dist(true.distr, random.distr))` 427 `+ rand.l1.distr <- c(rand.l1.distr, L1.dist(true.distr, random.distr)) ` 428 `+ rand.kull.distr <- c(rand.kull.distr, kullback.leibler(true.distr, random.distr))` 429 `+ ## Tree topologies:` 430 `+ rand.topo <- c(rand.topo, comp.models(true.m, model))` 431 `+ rand.topo.levels <- c(rand.topo.levels, comp.models.levels(true.m, model))` 432 `+ }` 433 `+ ## Stability analysis of GPS values` 434 `+ ## (using Euclidian distance and rank correlation distance as similarity measures):` 435 `+ simGPS <- rbind(simGPS,` 436 `+ c(true.euclGPS, Pval.dist(true.euclGPS, rand.euclGPS), ` 437 `+ true.rank.dist, Pval.dist(true.rank.dist, rand.rankGPS)))` 438 `+ ## Stability analysis of induced distributions` 439 `+ ## (using cosine distance, L1 distance, Kullback-Leibler divergence as similarity measures):` 440 `+ result.distr <- rbind(result.distr,` 441 `+ c(true.cosin, Pval.dist(true.cosin, rand.cos.distr),` 442 `+ true.l1, Pval.dist(true.l1, rand.l1.distr),` 443 `+ true.kull, Pval.dist(true.kull, rand.kull.distr)))` 444 `+ ## Stability analysis of tree topologies` 445 `+ ## (using comp.models and comp.models.levels as similarity measures):` 446 `+ topo.dif <- rbind(topo.dif,` 447 `+ c(true.topo.dif, Pval.dist(true.topo.dif, rand.topo)))` 448 `+ topo.levels.dif <- rbind(topo.levels.dif,` 449 `+ c(true.topo.levels.dif, Pval.dist(true.topo.levels.dif, rand.topo.levels)))` 450 `+ ` 451 `+ }` 452 `+ ## Organize the output:` 453 `+ colnames(simGPS) <- c("eucl.dist", "p.val.eucl", "rank.cor.dist", "p.val.rank")` 454 `+ colnames(result.distr) <- c("cos", "p.val.cos", "L1", "p.val.L1", "KL", "p.val.KL")` 455 `+ colnames(topo.dif) <- c("topo.dif", "p.value")` 456 `+ colnames(topo.levels.dif) <- c("topo.levels.dif", "p.value")` 457 `+ colnames(mat.true.gps) <- c(1:no.sim)` 458 `+ colnames(mat.fit.gps) <- c(1:no.sim)` 459 `+ output <- list(simGPS, result.distr,` 460 `+ topo.dif, topo.levels.dif,` 461 `+ mat.true.gps, mat.fit.gps,` 462 `+ list.true.models, list.fit.models)` 463 `+ names(output) <- c("GPS", "Distribution", "Tree topologies (distinct edges)",` 464 `+ "Tree topologies (distinct edges + levelsL1dist)", "True GPS",` 465 `+ "Fitted GPS", "True models", "Fitted models")` 466 `+ ` 467 `+ return(output) ` 468 `+}`