Browse code

added the Rtreemix package and removed the bim package

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/Rtreemix@28790 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 16/11/2007 21:25:16
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,468 @@
1
+################################################################################
2
+##  FUNCTIONS FOR STABILITY ANALYSIS OF THE ONCOGENETIC TREES MIXTURE MODEL   ##
3
+################################################################################
4
+## Function for calculating the P value of a given similarity measure.
5
+## What is the probability for obtaining the same or smaller
6
+## value in a random vector of similarity values.
7
+Pval.dist <- function(dist.val, random.vals) {
8
+  return((sum(random.vals <= dist.val) + 1) /(length(random.vals) + 1))
9
+}
10
+######################################################################
11
+## Function for calculating the Kullback-Leibler divergence between
12
+## two discrete probability distributions p and q.
13
+kullback.leibler <- function(p, q) {
14
+  if(length(p) != length(q))
15
+    stop("Error: The distribution vectors have different lengths!")  
16
+  return(sum(p * log(p / q)))
17
+}
18
+######################################################################
19
+## Function for calculating the L1 distance between
20
+## two discrete probability distributions p and q.
21
+L1.dist <- function(p, q) {
22
+  if(length(p) != length(q))
23
+    stop("Error: The distribution vectors have different lengths!")
24
+
25
+  return(sum(abs(p - q)))
26
+}
27
+######################################################################
28
+## Function for calculating the cosine distance between
29
+## two discrete probability distributions p and q.
30
+cosin.dist <- function(p, q) {
31
+  if(length(p) != length(q))
32
+    stop("Error: The distribution vectors have different lengths!")
33
+
34
+  return(1 - (sum (p * q) / (L2.norm(p) * L2.norm(q)) ))
35
+}
36
+######################################################################
37
+## Function for calculating the L2 norm of a given vector x.
38
+L2.norm <- function(x) {
39
+  return(sqrt(sum(x * x)))
40
+}
41
+######################################################################
42
+## Function for calculating the Euclidian distance between
43
+## two vectors x and y.
44
+euclidian.dist <- function(x, y) {
45
+  if(length(x) != length(y))
46
+    stop("Error: The vectors have different lengths!")
47
+
48
+  return(sqrt(drop((x - y) %*% (x - y))))
49
+}
50
+######################################################################
51
+## Function for calculating the rank-correlation distance between
52
+## two vectors x and y.
53
+rank.cor.dist <- function(x, y) {
54
+  if(length(x) != length(y))
55
+    stop("Error: The vectors have different length!")
56
+
57
+  return(1 - rcorr(x, y, type = "spearman")[[1]][1, 2])
58
+}
59
+######################################################################
60
+## Function that implements a similarity measure for comparing the topologies
61
+## of the trees of two mixture models mixture1 and mixture2.
62
+## It returns a value that uses the number of different edges for caracterizing
63
+## the difference between the models.
64
+## For the comparisson it is necessary that the two models have the same
65
+## number of tree components build on the same number of genetic events.
66
+## It is assumed that the mixtures have at least two components (the first one is the noise).
67
+comp.models <- function(mixture1, mixture2) {
68
+  if((class(mixture1) != "RtreemixModel") || (class(mixture2) != "RtreemixModel"))
69
+    stop("The specified mixture models should be of type RtreemixModel!")
70
+  
71
+  if(numTrees(mixture1) != numTrees(mixture2))
72
+    stop("The two specified mixture models don't have the same number of trees!")
73
+  
74
+  if(eventsNum(mixture1) != eventsNum(mixture2))
75
+    stop("The two specified mixture models don't have the same number of events!")
76
+  no.trees <- numTrees(mixture1)
77
+  if (no.trees == 1)
78
+    stop("The mixture models should have at least two tree components, where the first one is the noise component!")
79
+  
80
+  no.events <- eventsNum(mixture1)
81
+  
82
+  true.order <- list() ## list specifying the edges in the components of mixture1
83
+  fit.order <- list() ## list specifying the edges in the components of mixture2
84
+
85
+  ## Build the vectors that characterize the edges of the components of the mixture models.
86
+  ## The noise components are ignored since they always have the same (star) topology.
87
+  for(k in 2:no.trees) {
88
+    true.vec <- character(0)
89
+    fit.vec <- character(0)
90
+    for(l in 2:no.events) {
91
+      ch <- names(which(sapply(edges(getTree(mixture1, k)), function(x, el) {el %in% x}, nodes(getTree(mixture1, k))[l])))
92
+      true.vec <- c(true.vec, ifelse((identical(ch, character(0)) || is.null(ch)), "e", ch))       
93
+      ch <- names(which(sapply(edges(getTree(mixture2, k)), function(x, el) {el %in% x}, nodes(getTree(mixture2, k))[l])))
94
+      fit.vec <- c(fit.vec, ifelse((identical(ch, character(0)) || is.null(ch)), "e", ch))        
95
+    }
96
+    names(true.vec) <- nodes(getTree(mixture1, k))[-1]
97
+    true.order <- c(true.order, list(true.vec))
98
+    names(fit.vec) <- nodes(getTree(mixture2, k))[-1]
99
+    fit.order <- c(fit.order, list(fit.vec))
100
+  }
101
+  ## Build the comparisson matrix.
102
+  ## The (i, j) element is the number of distinct edges of the
103
+  ## (i + 1)-th and (j + 1)-th component of the models
104
+  ## mixture1 and mixture2 respectively.  
105
+  comp.mat <- matrix(nrow = no.trees - 1, ncol = no.trees - 1)
106
+  for(i in 1:(no.trees - 1))
107
+    for(j in 1:(no.trees - 1)) {
108
+      comp.mat[i, j] <- sum(true.order[[i]] != fit.order[[j]])
109
+    }
110
+  ## Form the similarity pairs and calculate the similarity of the models
111
+  ## as a sum of the number of different edges of the trees in the similarity pairs:
112
+  if(no.trees > 2) {
113
+    rc <- which(comp.mat == min(comp.mat), arr.ind = TRUE)
114
+    diff.sum <- min(comp.mat)
115
+    row.index <- c(rc[1, 1]) ## get the row index of the minimum
116
+    col.index <- c(rc[1, 2]) ## get the column index of the minimum
117
+
118
+    for(m in 1:(nrow(comp.mat) - 1)) {
119
+      rc <- which(comp.mat == min(comp.mat[-(row.index), -(col.index)]), arr.ind = TRUE)
120
+      diff.sum <- diff.sum + min(comp.mat[-(row.index), -(col.index)])
121
+      row.index <- c(row.index, rc[1, 1])
122
+      col.index <- c(col.index, rc[1, 2])    
123
+    }
124
+  } else {
125
+    diff.sum <- comp.mat[1, 1]
126
+  }
127
+  ## Return a result between 0 and 1.    
128
+  return(diff.sum/((no.trees - 1) * (no.events - 1)))  
129
+
130
+}
131
+######################################################################
132
+## Function that assignes to each node the level at which that node is
133
+## in a specific tree (tree.num) of the trees mixture model.
134
+## The start.val is the maximum depth of the tree with which the tree
135
+## specified with tree.num will be compared.
136
+get.tree.levels <- function(mixture, tree.num, start.val) {
137
+  root <- nodes(getTree(mixture, tree.num))[1]
138
+  levels <- acc(getTree(mixture, tree.num), root)
139
+  ## vec <- rep(max(levels[[1]]), eventsNum(mixture) - 1)
140
+  vec <- rep(start.val, eventsNum(mixture) - 1)
141
+  names(vec) <- nodes(getTree(mixture, tree.num))[-1]
142
+  
143
+  vec[names(levels[[1]])] <- levels[[1]]
144
+
145
+  return(vec)
146
+}
147
+######################################################################
148
+## Function that implements a similarity measure for comparing the topologies
149
+## of the trees of two mixture models mixture1 and mixture2.
150
+## It returns a value that uses the number of different edges and the depth at
151
+## which the events are, for caracterizing the difference between the models.
152
+## This measure is more application specific, since the depth at which the
153
+## events are in a tree is very important for disease progression.
154
+## For the comparisson it is necessary that the two models have the same
155
+## number of tree components build on the same number of genetic events.
156
+## It is assumed that the mixtures have at least two components (the first one is the noise).
157
+comp.models.levels <- function(mixture1, mixture2) {
158
+  if((class(mixture1) != "RtreemixModel") || (class(mixture2) != "RtreemixModel"))
159
+    stop("The specified mixture models should be of type RtreemixModel!")
160
+  
161
+  if(numTrees(mixture1) != numTrees(mixture2))
162
+    stop("The two specified mixture models don't have the same number of trees!")
163
+
164
+  if(eventsNum(mixture1) != eventsNum(mixture2))
165
+    stop("The two specified mixture models don't have the same number of events!")
166
+  no.trees <- numTrees(mixture1)
167
+  if (no.trees == 1)
168
+    stop("The mixture models should have at least two tree components, where the first one is the noise component!")  
169
+  
170
+  no.events <- eventsNum(mixture1)
171
+  
172
+  true.order <- list() ## list specifying the edges in the components of mixture1
173
+  fit.order <- list() ## list specifying the edges in the components of mixture2
174
+
175
+  start.vals1 <- vector(mode = "numeric", length = (no.trees - 1)) ## the depth of the tree components in mixture1 (without the noise component) 
176
+  start.vals2 <- vector(mode = "numeric", length = (no.trees - 1)) ## the depth of the tree components in mixture2 (without the noise component)
177
+
178
+  ## Build the vectors that characterize the edges of the components of the mixture models.
179
+  ## The noise components are ignored since they always have the same (star) topology.  
180
+  for(k in 2:no.trees) {
181
+    true.vec <- character(0)
182
+    fit.vec <- character(0)
183
+    for(l in 2:no.events) {
184
+      ch <- names(which(sapply(edges(getTree(mixture1, k)), function(x, el) {el %in% x}, nodes(getTree(mixture1, k))[l])))
185
+      true.vec <- c(true.vec, ifelse((identical(ch, character(0)) || is.null(ch)), "e", ch))       
186
+      ch <- names(which(sapply(edges(getTree(mixture2, k)), function(x, el) {el %in% x}, nodes(getTree(mixture2, k))[l])))
187
+      fit.vec <- c(fit.vec, ifelse((identical(ch, character(0)) || is.null(ch)), "e", ch))        
188
+    }
189
+    names(true.vec) <- nodes(getTree(mixture1, k))[-1]
190
+    true.order <- c(true.order, list(true.vec))
191
+    names(fit.vec) <- nodes(getTree(mixture2, k))[-1]
192
+    fit.order <- c(fit.order, list(fit.vec))
193
+
194
+    ## Assign the (maximal depths + 1) for each tree in the mixtures
195
+    start.vals1[k - 1] <- max(acc(getTree(mixture1, k), nodes(getTree(mixture1, k))[1])[[1]] + 1, na.remove = TRUE)
196
+    start.vals2[k - 1] <- max(acc(getTree(mixture2, k), nodes(getTree(mixture2, k))[1])[[1]] + 1, na.remove = TRUE)                          
197
+  }
198
+  ## Build the comparisson matrix.
199
+  ## The (i, j) element is the number of distinct edges of the
200
+  ## (i + 1)-th and (j + 1)-th component of the models
201
+  ## mixture1 and mixture2 respectively.    
202
+  comp.mat <- matrix(nrow = no.trees - 1, ncol = no.trees - 1)
203
+  for(i in 1:(no.trees - 1))
204
+    for(j in 1:(no.trees - 1)) {
205
+      comp.mat[i, j] <- sum(true.order[[i]] != fit.order[[j]])
206
+    }
207
+  ## Form the similarity pairs and calculate the similarity of the models
208
+  ## as a sum of the number of different edges of the trees in the similarity pairs
209
+  ## and their corresponding L1-distance of the levels of the events:
210
+  if(no.trees > 2) {
211
+    rc <- which(comp.mat == min(comp.mat), arr.ind = TRUE)
212
+    diff.sum <- min(comp.mat) +
213
+      L1.dist(get.tree.levels(mixture1, rc[1, 1] + 1, start.vals2[rc[1, 2]]),
214
+              get.tree.levels(mixture2, rc[1, 2] + 1, start.vals1[rc[1, 1]]))
215
+    row.index <- c(rc[1, 1]) ## get the row index of the minimum
216
+    col.index <- c(rc[1, 2]) ## get the column index of the minimum
217
+
218
+    for(m in 1:(nrow(comp.mat) - 1)) {
219
+      rc <- which(comp.mat == min(comp.mat[-(row.index), -(col.index)]), arr.ind = TRUE)
220
+      diff.sum <- diff.sum + min(comp.mat[-(row.index), -(col.index)]) +
221
+        L1.dist(get.tree.levels(mixture1, rc[1, 1] + 1, start.vals2[rc[1, 2]]),
222
+                get.tree.levels(mixture2, rc[1, 2] + 1, start.vals1[rc[1, 1]]))
223
+      row.index <- c(row.index, rc[1, 1])
224
+      col.index <- c(col.index, rc[1, 2])   
225
+    }
226
+  } else {
227
+    if(no.trees == 2) {
228
+      diff.sum <- comp.mat[1, 1] +
229
+        L1.dist(get.tree.levels(mixture1, 2, start.vals2[1]),
230
+                get.tree.levels(mixture2, 2, start.vals1[1]))
231
+    } else {
232
+      stop("The specified mixture models must have at least two tree components, where the first one is the noise component!")
233
+    }
234
+  }
235
+    
236
+  return(diff.sum)  
237
+
238
+}
239
+######################################################################
240
+## Function that implements a similarity measure for comparing the topologies
241
+## of the nontrivial tree components of a specified mixture model.
242
+## It returns a value that uses the number of different edges for caracterizing
243
+## the difference of the trees in the model.
244
+## For the comparisson it is necessary that the model has at least two
245
+## nontrivial components.
246
+comp.trees <- function(model) {
247
+  no.trees <- numTrees(model)
248
+  if(no.trees <= 2)
249
+    stop("The specified mixture model should have at least two nontrivial tree components.")
250
+  
251
+  no.events <- eventsNum(model)
252
+  true.order <- list() ## list specifying the edges in the nontrivial components of the model  
253
+  ## Build the vectors that characterize the
254
+  ## edges of the nontrivial components of the mixture model.
255
+  for(k in 2:no.trees) {
256
+    true.vec <- character(0)
257
+    for(l in 2:no.events) {
258
+      ch <- names(which(sapply(edges(getTree(model, k)), function(x, el) {el %in% x}, nodes(getTree(model, k))[l])))
259
+      true.vec <- c(true.vec, ifelse((identical(ch, character(0)) || is.null(ch)), "e", ch))             
260
+    }
261
+    names(true.vec) <- nodes(getTree(model, k))[-1]
262
+    true.order <- c(true.order, list(true.vec))    
263
+  }
264
+  ## Calculate the similarity of the nontrivial components of the model 
265
+  ## as a sum of the number of different edges of all combinations of
266
+  ## two different nontrivial trees:
267
+  diff.sum <- 0
268
+  for(k1 in 2:(no.trees - 1)) 
269
+    for(k2 in (k1 + 1):no.trees) 
270
+      diff.sum <- diff.sum + sum(true.order[[k1 - 1]] != true.order[[k2 - 1]])
271
+    
272
+  ## Return a result between 0 and 1.
273
+  return(diff.sum / (choose((no.trees - 1), 2) * (no.events - 1)))
274
+}
275
+######################################################################
276
+## Function that implements a similarity measure for comparing the topologies
277
+## of the nontrivial tree components of a specified mixture model.
278
+## It returns a value that uses the  number of different edges and the depth at
279
+## which the events are, for caracterizing the difference of the trees in the model.
280
+## For the comparisson it is necessary that the model has at least two
281
+## nontrivial components.
282
+comp.trees.levels <- function(model) {
283
+  no.trees <- numTrees(model)
284
+  if(no.trees <= 2)
285
+    stop("The specified mixture model should have at least two nontrivial tree components.")
286
+  
287
+  no.events <- eventsNum(model)
288
+  start.vals <- vector(mode = "numeric", length = (no.trees - 1)) ## the depth of the nontrivial tree components in the model 
289
+  true.order <- list() ## list specifying the edges in the nontrivial components of the model 
290
+  
291
+  ## Build the vectors that characterize the
292
+  ## edges of the nontrivial components of the mixture model.  
293
+  for(k in 2:no.trees) {
294
+    true.vec <- character(0)
295
+    for(l in 2:no.events) {
296
+      ch <- names(which(sapply(edges(getTree(model, k)), function(x, el) {el %in% x}, nodes(getTree(model, k))[l])))
297
+      true.vec <- c(true.vec, ifelse((identical(ch, character(0)) || is.null(ch)), "e", ch))             
298
+    }
299
+    names(true.vec) <- nodes(getTree(model, k))[-1]
300
+    true.order <- c(true.order, list(true.vec))
301
+    
302
+    ## Assign the maximal depths + 1 for each tree in the mixtures
303
+    start.vals[k - 1] <- max(acc(getTree(model, k), nodes(getTree(model, k))[1])[[1]] + 1, na.remove = TRUE)    
304
+  }
305
+  ## Calculate the similarity of the models as a sum of the number of
306
+  ## different edges of all possible combinations of two different
307
+  ## nontrivial trees in the model and their corresponding L1-distance
308
+  ## of the levels of the events:
309
+  diff.sum <- 0
310
+  for(k1 in 2:(no.trees - 1)) {
311
+    for(k2 in (k1 + 1):no.trees) {
312
+      diff.sum <- diff.sum + (sum(true.order[[k1 - 1]] != true.order[[k2 - 1]])
313
+                              +  L1.dist(get.tree.levels(model, k1, start.vals[k2 - 1]),
314
+                                         get.tree.levels(model, k2, start.vals[k1 - 1])))
315
+    }
316
+  }
317
+  return(diff.sum)
318
+}
319
+######################################################################
320
+## Stability analysis of the oncogenetic trees mixture model.
321
+## This includes stability analysis on different levels of the mixture
322
+## model: GPS values, encoded probability distribution, tree topologies.
323
+## The function outputs a list of analysis for the mentioned features.
324
+## Each analysis contains the values of different similarity measures
325
+## with their corresponding p-values.
326
+## The models should have at least two components (the first one is the noise).
327
+stability.sim <- function(no.trees = 3, ## number of tree components
328
+                          no.events = 9, ## number of genetic events
329
+                          prob = c(0.2, 0.8), ## interval for the edgeweights of the random mixture models 
330
+                          no.draws = 300, ## number of samples drawn from the random models used for learning a mixture model
331
+                          no.rands = 100, ## number of rands for calculatin the p-values
332
+                          no.sim = 1 ## number of simulation iterations
333
+                          ) {
334
+  ## Set the true types.
335
+  no.trees <- as.integer(no.trees)
336
+  no.events <- as.integer(no.events)
337
+  no.draws <- as.integer(no.draws)
338
+  no.rands <- as.integer(no.rands)
339
+  no.sim <- as.integer(no.sim)
340
+  ## Check if the necessary parameters are provided and have the correct form.
341
+  if(no.trees < 2)
342
+    stop("The specified mixture model should have at least two
343
+tree components (where the first one is the noise).")
344
+  if (no.events < 1)
345
+    stop("The number of events must be an integer greater than zero!")
346
+  if(no.draws < 1)
347
+    stop("The number of draws (number of samples) must be an integer greater than zero!")  
348
+  if (no.rands < 1)
349
+    stop("The number of random models for the p-values  must be an integer greater than zero!")   
350
+  if (no.sim < 1)
351
+    stop("The number of iterations for the waiting time simulation
352
+must be an integer greater than zero!")
353
+  if(!missing(prob)) {
354
+    if(!is.numeric(prob) || (length(prob) != 2))
355
+      stop("Specify the boundaries of the conditional probabilities as a numeric vector
356
+of length two = c(min, max)!")  
357
+    if(prob[2] < prob[1])
358
+      stop("In the probability vector the lower boundary must be smaller than the
359
+upper boundary!")
360
+  }
361
+  
362
+  simGPS <- numeric(0) ## results from the stability analysis of the GPS values
363
+  topo.dif <- numeric(0) ## results from the stability analysis of the topologies of the tree components of mixture models based on comp.models
364
+  topo.levels.dif <- numeric(0) ## results from the stability analysis of the topologies of the tree components of mixture models based on comp.models.levels
365
+  result.distr <- numeric(0) ## results from the stability analysis of the probability distribution encoded by the model
366
+  
367
+  mat.true.gps <- numeric(0) ## matrix containing the true GPS values from each simulation iteration
368
+  mat.fit.gps <- numeric(0) ## matrix containing the corresponding fitted GPS values from each simulation iteration
369
+
370
+  list.true.models <- list() ## list containing the true models from each simulation iteration
371
+  list.fit.models <- list() ## list containing the corresponding fitted models from each simulation iteration
372
+  ## Simulation iterations:
373
+  for(i in 1:as.integer(no.sim)) {
374
+    print(i) ## simulation iteration
375
+    ## Pick a true model from the space of random models and draw data from it.
376
+    true.m <- generate(K = no.trees, no.events = no.events, prob = prob)
377
+    Weights(true.m) <- c(0.05, rep(0.95/(no.trees - 1), (no.trees - 1)))
378
+    sim.data <- sim(model = true.m, no.draws = no.draws)    
379
+    list.true.models <- c(list.true.models, true.m)
380
+    ## Calculate the GPS and the distribution encoded by true.m
381
+    true.gps <- GPS(gps(model = true.m, data = sim.data, no.sim = 10000))
382
+    mat.true.gps <- cbind(mat.true.gps, true.gps)
383
+    true.distr <- distribution(model = true.m)$probability
384
+    ## Fit a mixture model to the data sim.data    
385
+    fm <- fit(data = sim.data, K = no.trees, noise = TRUE, equal.edgeweights = TRUE, eps = 0.01)
386
+    list.fit.models <- c(list.fit.models, fm)
387
+    ## Calculate the GPS and the distribution encoded by fm
388
+    fit.gps <- GPS(gps(model = fm, data = sim.data, no.sim = 10000))
389
+    mat.fit.gps <- cbind(mat.fit.gps, fit.gps)
390
+    fit.distr <- distribution(model = fm)$probability
391
+    ## Compute different distances between the distributions induced
392
+    ## by the true and fitted models:
393
+    true.cosin <- cosin.dist(true.distr, fit.distr)
394
+    true.l1 <- L1.dist(true.distr, fit.distr)
395
+    true.kull <- kullback.leibler(true.distr, fit.distr)
396
+    ## Compute different distances between the GPS values
397
+    ## for the true and fitted models:
398
+    true.euclGPS <- euclidian.dist(true.gps, fit.gps)
399
+    true.rank.dist <-  rank.cor.dist(true.gps, fit.gps)
400
+    ## Compute different similarity measures for comparing the
401
+    ## topologies of the nontrivial components of the true and fitted models:
402
+    true.topo.dif <- comp.models(true.m, fm)
403
+    true.topo.levels.dif <- comp.models.levels(true.m, fm)
404
+    ## Vectors for keeping the values for the different features
405
+    ## of the random mixture models needed for the p-values calculation:
406
+    rand.euclGPS <- numeric(0)
407
+    rand.rankGPS <- numeric(0)
408
+
409
+    rand.cos.distr <- numeric(0)
410
+    rand.l1.distr <- numeric(0)
411
+    rand.kull.distr <- numeric(0)
412
+
413
+    rand.topo <- numeric(0)
414
+    rand.topo.levels <- numeric(0)
415
+    ## Create the vectors with random values for the p-values calculation:
416
+    for(j in 1:(as.integer(no.rands) - 1)) {
417
+      ## Generate random model and calculate the GPS and distribution:
418
+      model <- generate(K = no.trees, no.events = no.events, prob = prob)
419
+      Weights(model) <- c(0.05, rep(0.95/(no.trees - 1), (no.trees - 1)))      
420
+      random.gps <- GPS(gps(model = model, data = sim.data, no.sim = 10000))
421
+      random.distr <- distribution(model = model)$probability
422
+      ## GPS:
423
+      rand.euclGPS <- c(rand.euclGPS, euclidian.dist(true.gps, random.gps))      
424
+      rand.rankGPS <- c(rand.rankGPS, rank.cor.dist(true.gps, random.gps))
425
+      ## Distribution:
426
+      rand.cos.distr <- c(rand.cos.distr, cosin.dist(true.distr, random.distr))
427
+      rand.l1.distr <- c(rand.l1.distr, L1.dist(true.distr, random.distr))      
428
+      rand.kull.distr <- c(rand.kull.distr, kullback.leibler(true.distr, random.distr))
429
+      ## Tree topologies:
430
+      rand.topo <- c(rand.topo, comp.models(true.m, model))
431
+      rand.topo.levels <- c(rand.topo.levels, comp.models.levels(true.m, model))
432
+    }
433
+    ## Stability analysis of GPS values
434
+    ## (using Euclidian distance and rank correlation distance as similarity measures):
435
+    simGPS <- rbind(simGPS,
436
+                    c(true.euclGPS, Pval.dist(true.euclGPS, rand.euclGPS),   
437
+                      true.rank.dist, Pval.dist(true.rank.dist, rand.rankGPS)))
438
+    ## Stability analysis of induced distributions
439
+    ## (using cosine distance, L1 distance, Kullback-Leibler divergence as similarity measures):
440
+    result.distr <- rbind(result.distr,
441
+                          c(true.cosin, Pval.dist(true.cosin, rand.cos.distr),
442
+                            true.l1, Pval.dist(true.l1, rand.l1.distr),
443
+                            true.kull, Pval.dist(true.kull, rand.kull.distr)))
444
+    ## Stability analysis of tree topologies
445
+    ## (using comp.models and comp.models.levels as similarity measures):
446
+    topo.dif <- rbind(topo.dif,
447
+                      c(true.topo.dif, Pval.dist(true.topo.dif, rand.topo)))
448
+    topo.levels.dif <- rbind(topo.levels.dif,
449
+                             c(true.topo.levels.dif, Pval.dist(true.topo.levels.dif, rand.topo.levels)))
450
+    
451
+  }
452
+  ## Organize the output:
453
+  colnames(simGPS) <- c("eucl.dist", "p.val.eucl", "rank.cor.dist", "p.val.rank")
454
+  colnames(result.distr) <- c("cos", "p.val.cos", "L1", "p.val.L1", "KL", "p.val.KL")
455
+  colnames(topo.dif) <- c("topo.dif", "p.value")
456
+  colnames(topo.levels.dif) <- c("topo.levels.dif", "p.value")
457
+  colnames(mat.true.gps) <- c(1:no.sim)
458
+  colnames(mat.fit.gps) <- c(1:no.sim)
459
+  output <- list(simGPS, result.distr,
460
+                 topo.dif, topo.levels.dif,
461
+                 mat.true.gps, mat.fit.gps,
462
+                 list.true.models, list.fit.models)
463
+  names(output) <- c("GPS", "Distribution", "Tree topologies (distinct edges)",
464
+                     "Tree topologies (distinct edges + levelsL1dist)", "True GPS",
465
+                     "Fitted GPS", "True models", "Fitted models")
466
+    
467
+  return(output)  
468
+}