1 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,394 +0,0 @@ |
1 |
-## ##' site mask |
|
2 |
-## ##' |
|
3 |
-## ##' |
|
4 |
-## ##' @title mask |
|
5 |
-## ##' @param tree_object tree object |
|
6 |
-## ##' @param field selected field |
|
7 |
-## ##' @param site site |
|
8 |
-## ##' @param mask_site if TRUE, site will be masked. |
|
9 |
-## ##' if FALSE, selected site will not be masked, while other sites will be masked. |
|
10 |
-## ##' @return updated tree object |
|
11 |
-## ##' @export |
|
12 |
-## ##' @author Guangchuang Yu |
|
13 |
-## mask <- function(tree_object, field, site, mask_site=FALSE) { |
|
14 |
-## has_field <- has.field(tree_object, field) |
|
15 |
-## if (has_field == FALSE) { |
|
16 |
-## stop("'field' is not available in 'tree_object'...") |
|
17 |
-## } |
|
18 |
- |
|
19 |
-## has_slot <- attr(has_field, "has_slot") |
|
20 |
-## is_codeml <- attr(has_field, "is_codeml") |
|
21 |
- |
|
22 |
-## if (has_slot) { |
|
23 |
-## if (is_codeml) { |
|
24 |
-## field_info <- slot(tree_object@rst, field) |
|
25 |
-## } else { |
|
26 |
-## field_info <- slot(tree_object, field) |
|
27 |
-## } |
|
28 |
-## field_data <- field_info[,2] |
|
29 |
-## } else { |
|
30 |
-## field_data <- tree_object@extraInfo[, field] |
|
31 |
-## } |
|
32 |
- |
|
33 |
-## field_data <- sapply(field_data, gsub, pattern="\n", replacement="/") |
|
34 |
- |
|
35 |
-## x <- field_data[field_data != ""] |
|
36 |
-## x <- x[!is.na(x)] |
|
37 |
-## pos <- strsplit(x, " / ") %>% unlist %>% |
|
38 |
-## gsub("^[a-zA-Z]+", "", . ) %>% |
|
39 |
-## gsub("[a-zA-Z]\\s*$", "", .) %>% |
|
40 |
-## as.numeric |
|
41 |
- |
|
42 |
-## if (mask_site == FALSE) { |
|
43 |
-## pos2 <- 1:max(pos) |
|
44 |
-## pos2 <- pos2[-site] |
|
45 |
-## site <- pos2 |
|
46 |
-## } |
|
47 |
- |
|
48 |
-## site <- site[site %in% pos] |
|
49 |
- |
|
50 |
-## for (i in seq_along(field_data)) { |
|
51 |
-## if (is.na(field_data[i])) |
|
52 |
-## next |
|
53 |
-## for (j in seq_along(site)) { |
|
54 |
-## pattern <- paste0("/*\\s*[a-zA-Z]", site[j], "[a-zA-Z]\\s*") |
|
55 |
-## field_data[i] <- gsub(pattern, "", field_data[i]) |
|
56 |
-## } |
|
57 |
-## field_data[i] <- gsub("^/\\s", "", field_data[i]) %>% .add_new_line |
|
58 |
-## } |
|
59 |
- |
|
60 |
-## if (has_slot) { |
|
61 |
-## field_info[,2] <- field_data |
|
62 |
-## if (is_codeml) { |
|
63 |
-## slot(tree_object@rst, field) <- field_info |
|
64 |
-## } else { |
|
65 |
-## slot(tree_object, field) <- field_info |
|
66 |
-## } |
|
67 |
-## } else { |
|
68 |
-## tree_object@extraInfo[, field] <- field_data |
|
69 |
-## } |
|
70 |
-## tree_object |
|
71 |
-## } |
|
72 |
- |
|
73 |
- |
|
74 |
-## read.tip_seq_mlc <- function(mlcfile) { |
|
75 |
-## info <- getPhyInfo(mlcfile) |
|
76 |
-## mlc <- readLines(mlcfile) |
|
77 |
-## ## remove blank lines |
|
78 |
-## blk <- grep("^\\s*$", mlc) |
|
79 |
-## if (length(blk) > 0) { |
|
80 |
-## mlc <- mlc[-blk] |
|
81 |
-## } |
|
82 |
- |
|
83 |
-## seqs <- mlc[2:(info$num+1)] |
|
84 |
-## seqs <- gsub("\\s+", "", seqs) |
|
85 |
-## wd <- info$width |
|
86 |
-## res <- sapply(seqs, function(x) substring(x, (base::nchar(x) - wd + 1), base::nchar(x))) |
|
87 |
-## nn <- sapply(seqs, function(x) substring(x, 1, (base::nchar(x) - wd))) |
|
88 |
-## names(res) <- nn |
|
89 |
-## return(res) |
|
90 |
-## } |
|
91 |
- |
|
92 |
-## read.tip_seq_mlb <- read.tip_seq_mlc |
|
93 |
- |
|
94 |
-## read.dnds_mlc <- function(mlcfile) { |
|
95 |
-## mlc <- readLines(mlcfile) |
|
96 |
-## i <- grep("dN & dS for each branch", mlc) |
|
97 |
-## j <- grep("tree length for dN", mlc) |
|
98 |
- |
|
99 |
-## mlc <- mlc[i:j] |
|
100 |
-## hi <- grep("dN/dS", mlc) |
|
101 |
-## cn <- strsplit(mlc[hi], " ") %>% unlist %>% `[`(nzchar(.)) |
|
102 |
- |
|
103 |
-## ii <- grep("\\d+\\.\\.\\d+", mlc) |
|
104 |
-## info <- mlc[ii] |
|
105 |
-## info %<>% sub("^\\s+", "", .) |
|
106 |
-## info %<>% sub("\\s+$", "", .) |
|
107 |
-## res <- t(sapply(info, function(x) { |
|
108 |
-## y <- unlist(strsplit(x, "\\s+")) |
|
109 |
-## edge <- unlist(strsplit(y[1], "\\.\\.")) |
|
110 |
-## yy <- c(edge, y[-1]) |
|
111 |
-## as.numeric(yy) |
|
112 |
-## })) |
|
113 |
- |
|
114 |
-## row.names(res) <- NULL |
|
115 |
-## colnames(res) <- c("parent", "node", cn[-1]) |
|
116 |
-## colnames(res) <- gsub("\\*", "_x_", colnames(res)) |
|
117 |
-## colnames(res) <- gsub("\\/", "_vs_", colnames(res)) |
|
118 |
-## return(res) |
|
119 |
-## } |
|
120 |
- |
|
121 |
-## read.treetext_paml_mlc <- function(mlcfile) { |
|
122 |
-## read.treetext_paml(mlcfile, "mlc") |
|
123 |
-## } |
|
124 |
- |
|
125 |
-## read.treetext_paml_rst<- function(rstfile) { |
|
126 |
-## read.treetext_paml(rstfile, "rst") |
|
127 |
-## } |
|
128 |
- |
|
129 |
-## read.treetext_paml <- function(file, by) { |
|
130 |
-## ## works fine with baseml and codeml |
|
131 |
-## x <- readLines(file) |
|
132 |
-## tr.idx <- get_tree_index_paml(x) |
|
133 |
- |
|
134 |
-## if ( by == "rst" ) { |
|
135 |
-## ii <- 1 |
|
136 |
-## } else if (by == "mlc") { |
|
137 |
-## ii <- 3 |
|
138 |
-## } else { |
|
139 |
-## stop("_by_ should be one of 'rst' or 'mlc'") |
|
140 |
-## } |
|
141 |
- |
|
142 |
-## return(x[tr.idx][ii]) |
|
143 |
-## } |
|
144 |
- |
|
145 |
-## read.phylo_paml_mlc <- function(mlcfile) { |
|
146 |
-## parent <- node <- label <- NULL |
|
147 |
- |
|
148 |
-## mlc <- readLines(mlcfile) |
|
149 |
-## edge <- get_tree_edge_paml(mlc) |
|
150 |
- |
|
151 |
-## tr.idx <- get_tree_index_paml(mlc) |
|
152 |
-## tr2 <- read.tree(text=mlc[tr.idx[2]]) |
|
153 |
-## tr3 <- read.tree(text=mlc[tr.idx[3]]) |
|
154 |
- |
|
155 |
-## treeinfo <- as.data.frame(edge) |
|
156 |
-## colnames(treeinfo) <- c("parent", "node") |
|
157 |
-## treeinfo$length <- NA |
|
158 |
-## treeinfo$label <- NA |
|
159 |
-## treeinfo$isTip <- FALSE |
|
160 |
-## ntip <- Ntip(tr3) |
|
161 |
-## ii <- match(tr2$tip.label, treeinfo[, "node"]) |
|
162 |
-## treeinfo[ii, "label"] <- tr3$tip.label |
|
163 |
-## treeinfo[ii, "isTip"] <- TRUE |
|
164 |
-## ## jj <- match(1:ntip, tr3$edge[,2]) |
|
165 |
-## treeinfo[ii, "length"] <- tr2$edge.length[ii] ##tr3$edge.length[jj] |
|
166 |
- |
|
167 |
-## root <- getRoot(tr3) ## always == (Ntip(tr3) + 1) |
|
168 |
-## currentNode <- treeinfo$label[ii] |
|
169 |
-## treeinfo.tr3 <- fortify(tr3) |
|
170 |
- |
|
171 |
-## treeinfo$visited <- FALSE |
|
172 |
-## while(any(treeinfo$visited == FALSE)) { |
|
173 |
-## pNode <- c() |
|
174 |
-## for( kk in currentNode ) { |
|
175 |
-## i <- which(treeinfo$label == kk) |
|
176 |
-## treeinfo[i, "visited"] <- TRUE |
|
177 |
-## j <- which(treeinfo.tr3$label == kk) |
|
178 |
-## ip <- treeinfo[i, "parent"] |
|
179 |
-## if (ip != root) { |
|
180 |
-## ii <- which(treeinfo[, "node"] == ip) |
|
181 |
-## if (treeinfo$visited[ii] == FALSE) { |
|
182 |
-## jp <- treeinfo.tr3[j, "parent"] |
|
183 |
-## jj <- which(treeinfo.tr3[, "node"] == jp) |
|
184 |
-## treeinfo[ii, "label"] <- as.character(ip) |
|
185 |
-## treeinfo.tr3[jj, "label"] <- as.character(ip) |
|
186 |
-## treeinfo[ii, "length"] <- treeinfo.tr3[jj, "branch.length"] |
|
187 |
-## pNode <- c(pNode, ip) |
|
188 |
-## } |
|
189 |
-## treeinfo[ii, "visited"] <- TRUE |
|
190 |
-## } |
|
191 |
- |
|
192 |
-## } |
|
193 |
-## currentNode <- unique(pNode) |
|
194 |
-## } |
|
195 |
- |
|
196 |
-## phylo <- with(treeinfo, |
|
197 |
-## list(edge= cbind(as.numeric(parent), |
|
198 |
-## as.numeric(node)), |
|
199 |
-## edge.length = length, |
|
200 |
-## tip.label = label[order(node)][1:ntip], |
|
201 |
-## Nnode = tr3$Nnode, |
|
202 |
-## node.label = c(root, label[order(node)][-c(1:ntip)]) |
|
203 |
-## ) |
|
204 |
-## ) |
|
205 |
-## class(phylo) <- "phylo" |
|
206 |
-## phylo <- reorder.phylo(phylo, "cladewise") |
|
207 |
-## return(phylo) |
|
208 |
-## } |
|
209 |
- |
|
210 |
-## ##' @importFrom ape reorder.phylo |
|
211 |
-## read.phylo_paml_rst <- function(rstfile) { |
|
212 |
-## parent <- node <- label <- NULL |
|
213 |
- |
|
214 |
-## ## works fine with baseml and codeml |
|
215 |
-## rst <- readLines(rstfile) |
|
216 |
-## tr.idx <- get_tree_index_paml(rst) |
|
217 |
- |
|
218 |
-## tr1 <- read.tree(text=rst[tr.idx][1]) |
|
219 |
-## tr3 <- read.tree(text=rst[tr.idx][3]) |
|
220 |
- |
|
221 |
-## edge <- get_tree_edge_paml(rst) |
|
222 |
- |
|
223 |
-## label=c(tr3$tip.label, tr3$node.label) |
|
224 |
-## root <- getRoot(tr3) |
|
225 |
-## label %<>% `[`(. != root) |
|
226 |
- |
|
227 |
-## node.length <- data.frame(label=label, |
|
228 |
-## length=tr1$edge.length) |
|
229 |
- |
|
230 |
-## ## node.length$node <- sub("_\\w+", "", node.length$label |
|
231 |
-## node.length$node <- gsub("^(\\d+)_.*", "\\1", node.length$label) |
|
232 |
-## node.length$label %<>% sub("\\d+_", "", .) |
|
233 |
- |
|
234 |
-## edge <- as.data.frame(edge) |
|
235 |
-## colnames(edge) <- c("parent", "node") |
|
236 |
- |
|
237 |
-## treeinfo <- merge(edge, node.length, by.x="node", by.y="node") |
|
238 |
-## edge2 <- treeinfo[, c("parent", "node")] |
|
239 |
-## edge2 %<>% as.matrix |
|
240 |
- |
|
241 |
-## ntip <- Ntip(tr3) |
|
242 |
- |
|
243 |
-## phylo <- with(treeinfo, |
|
244 |
-## list(edge= cbind(as.numeric(parent), |
|
245 |
-## as.numeric(node)), |
|
246 |
-## edge.length = length, |
|
247 |
-## tip.label = label[order(node)][1:ntip], |
|
248 |
-## Nnode = tr3$Nnode, |
|
249 |
-## node.label = c(root, label[order(node)][-c(1:ntip)]) |
|
250 |
-## ) |
|
251 |
-## ) |
|
252 |
- |
|
253 |
-## class(phylo) <- "phylo" |
|
254 |
-## phylo <- reorder.phylo(phylo, "cladewise") |
|
255 |
- |
|
256 |
-## return(phylo) |
|
257 |
-## } |
|
258 |
- |
|
259 |
-## read.ancseq_paml_rst <- function(rstfile, by="Marginal") { |
|
260 |
-## ## works fine with baseml and codeml |
|
261 |
-## rst <- readLines(rstfile) |
|
262 |
- |
|
263 |
-## by <- match.arg(by, c("Marginal", "Joint")) |
|
264 |
-## query <- paste(by, "reconstruction of ancestral sequences") |
|
265 |
-## idx <- grep(query, rst) |
|
266 |
-## if(length(idx) == 0) { |
|
267 |
-## ## in some paml setting, joint_ancseq are not available. |
|
268 |
-## return("") |
|
269 |
-## } |
|
270 |
-## si <- grep("reconstructed sequences", rst) |
|
271 |
-## idx <- si[which.min(abs(si-idx))] |
|
272 |
- |
|
273 |
-## nl <- strsplit(rst[idx+2], split=" ") %>% unlist %<>% `[`(nzchar(.)) |
|
274 |
-## N <- as.numeric(nl[1]) |
|
275 |
-## seq.leng <- as.numeric(nl[2]) |
|
276 |
- |
|
277 |
-## seqs <- rst[(idx+4):(idx+3+N)] |
|
278 |
- |
|
279 |
-## seq.name <- character(N) |
|
280 |
-## res <- character(N) |
|
281 |
-## for (i in 1:N) { |
|
282 |
-## ss <- gsub(" ", "", seqs[i]) |
|
283 |
-## nn <- base::nchar(ss) |
|
284 |
-## res[i] <- substring(ss, nn-seq.leng+1,nn) |
|
285 |
-## seq.name[i] <- substring(ss, 1, nn-seq.leng) |
|
286 |
-## } |
|
287 |
-## seq.name <- sub("\\w+#", "", seq.name) |
|
288 |
-## names(res) <- seq.name |
|
289 |
- |
|
290 |
-## return(res) |
|
291 |
-## } |
|
292 |
- |
|
293 |
- |
|
294 |
-## get_tree_index_paml <- function(paml) { |
|
295 |
-## grep("\\)[ \\.0-9]*;", paml) |
|
296 |
-## } |
|
297 |
- |
|
298 |
-## get_tree_edge_index_paml <- function(paml) { |
|
299 |
-## grep("\\d+\\.\\.\\d+", paml) |
|
300 |
-## } |
|
301 |
- |
|
302 |
-## get_tree_edge_paml <- function(paml) { |
|
303 |
-## tr.idx <- get_tree_index_paml(paml) |
|
304 |
- |
|
305 |
-## edge.idx <- get_tree_edge_index_paml(paml) |
|
306 |
-## edge.idx <- edge.idx[edge.idx < tr.idx[3]] |
|
307 |
- |
|
308 |
-## nodeNum <- strsplit(paml[edge.idx], split="\\.\\.") %>% |
|
309 |
-## unlist %>% strsplit(split="[[:space:]]") %>% unlist |
|
310 |
- |
|
311 |
-## nodeNum %<>% `[`(nzchar(.)) |
|
312 |
- |
|
313 |
-## edge <- matrix(as.numeric(nodeNum), ncol=2, byrow = TRUE) |
|
314 |
- |
|
315 |
-## return(edge) |
|
316 |
-## } |
|
317 |
- |
|
318 |
-## set.paml_rst_ <- function(object) { |
|
319 |
-## if (!is(object, "paml_rst")) { |
|
320 |
-## stop("object should be an instance of 'paml_rst'") |
|
321 |
-## } |
|
322 |
-## if (length(object@tip_seq) == 0) { |
|
323 |
-## return(object) |
|
324 |
-## } |
|
325 |
- |
|
326 |
-## types <- get.fields(object) |
|
327 |
-## for (type in types) { |
|
328 |
-## value <- subs_paml_rst(object, type) |
|
329 |
-## if (all(is.na(value))) |
|
330 |
-## next |
|
331 |
- |
|
332 |
-## if (type == "marginal_subs") { |
|
333 |
-## object@marginal_subs <- value |
|
334 |
-## } else if (type == "marginal_AA_subs") { |
|
335 |
-## object@marginal_AA_subs <- value |
|
336 |
-## } else if (type == "joint_subs") { |
|
337 |
-## object@joint_subs <- value |
|
338 |
-## } else if (type == "joint_AA_subs") { |
|
339 |
-## object@joint_AA_subs <- value |
|
340 |
-## } |
|
341 |
-## } |
|
342 |
-## return(object) |
|
343 |
-## } |
|
344 |
- |
|
345 |
- |
|
346 |
-## get.subs_paml_rst <- function(object, type) { |
|
347 |
-## if (!is(object, "paml_rst")) { |
|
348 |
-## stop("object should be an instance of 'paml_rst'") |
|
349 |
-## } |
|
350 |
-## if (type == "marginal_subs") { |
|
351 |
-## res <- object@marginal_subs |
|
352 |
-## } else if (type == "marginal_AA_subs") { |
|
353 |
-## res <- object@marginal_AA_subs |
|
354 |
-## } else if (type == "joint_subs") { |
|
355 |
-## res <- object@joint_subs |
|
356 |
-## } else if (type == "joint_AA_subs") { |
|
357 |
-## res <- object@joint_AA_subs |
|
358 |
-## } else { |
|
359 |
-## stop("type should be one of 'marginal_subs', |
|
360 |
-## 'marginal_AA_subs', 'joint_subs' or 'joint_AA_subs'. ") |
|
361 |
-## } |
|
362 |
-## return(res) |
|
363 |
-## } |
|
364 |
- |
|
365 |
-## subs_paml_rst <- function(x, type, ...) { |
|
366 |
-## if (class(x) != "paml_rst") { |
|
367 |
-## stop("x should be an object of paml_rst...") |
|
368 |
-## } |
|
369 |
-## seqs <- x@tip_seq |
|
370 |
-## if (length(seqs) == 0) { |
|
371 |
-## stop("tip sequences is not available...") |
|
372 |
-## } |
|
373 |
-## if (type %in% c("marginal_subs", "marginal_AA_subs")) { |
|
374 |
-## ancseq <- x@marginal_ancseq |
|
375 |
-## ## seqs <- c(seqs, x@marginal_ancseq) |
|
376 |
-## } else if (type %in% c("joint_subs", "joint_AA_subs")){ |
|
377 |
-## ancseq <- x@joint_ancseq |
|
378 |
-## ## seqs <- c(seqs, x@joint_ancseq) |
|
379 |
-## } else { |
|
380 |
-## stop("type should be one of 'marginal_subs', |
|
381 |
-## 'marginal_AA_subs', 'joint_subs' or 'joint_AA_subs'. ") |
|
382 |
-## } |
|
383 |
-## if( type %in% c("marginal_subs", "joint_subs")) { |
|
384 |
-## translate <- FALSE |
|
385 |
-## } else { |
|
386 |
-## translate <- TRUE |
|
387 |
-## } |
|
388 |
- |
|
389 |
-## if (all(ancseq == "")) { |
|
390 |
-## return(NA) |
|
391 |
-## } |
|
392 |
-## seqs <- c(seqs, ancseq) |
|
393 |
-## get.subs_(x@phylo, seqs, translate=translate, ...) |
|
394 |
-## } |
... | ... |
@@ -1,394 +1,394 @@ |
1 |
-##' site mask |
|
2 |
-##' |
|
3 |
-##' |
|
4 |
-##' @title mask |
|
5 |
-##' @param tree_object tree object |
|
6 |
-##' @param field selected field |
|
7 |
-##' @param site site |
|
8 |
-##' @param mask_site if TRUE, site will be masked. |
|
9 |
-##' if FALSE, selected site will not be masked, while other sites will be masked. |
|
10 |
-##' @return updated tree object |
|
11 |
-##' @export |
|
12 |
-##' @author Guangchuang Yu |
|
13 |
-mask <- function(tree_object, field, site, mask_site=FALSE) { |
|
14 |
- has_field <- has.field(tree_object, field) |
|
15 |
- if (has_field == FALSE) { |
|
16 |
- stop("'field' is not available in 'tree_object'...") |
|
17 |
- } |
|
18 |
- |
|
19 |
- has_slot <- attr(has_field, "has_slot") |
|
20 |
- is_codeml <- attr(has_field, "is_codeml") |
|
21 |
- |
|
22 |
- if (has_slot) { |
|
23 |
- if (is_codeml) { |
|
24 |
- field_info <- slot(tree_object@rst, field) |
|
25 |
- } else { |
|
26 |
- field_info <- slot(tree_object, field) |
|
27 |
- } |
|
28 |
- field_data <- field_info[,2] |
|
29 |
- } else { |
|
30 |
- field_data <- tree_object@extraInfo[, field] |
|
31 |
- } |
|
32 |
- |
|
33 |
- field_data <- sapply(field_data, gsub, pattern="\n", replacement="/") |
|
34 |
- |
|
35 |
- x <- field_data[field_data != ""] |
|
36 |
- x <- x[!is.na(x)] |
|
37 |
- pos <- strsplit(x, " / ") %>% unlist %>% |
|
38 |
- gsub("^[a-zA-Z]+", "", . ) %>% |
|
39 |
- gsub("[a-zA-Z]\\s*$", "", .) %>% |
|
40 |
- as.numeric |
|
41 |
- |
|
42 |
- if (mask_site == FALSE) { |
|
43 |
- pos2 <- 1:max(pos) |
|
44 |
- pos2 <- pos2[-site] |
|
45 |
- site <- pos2 |
|
46 |
- } |
|
47 |
- |
|
48 |
- site <- site[site %in% pos] |
|
49 |
- |
|
50 |
- for (i in seq_along(field_data)) { |
|
51 |
- if (is.na(field_data[i])) |
|
52 |
- next |
|
53 |
- for (j in seq_along(site)) { |
|
54 |
- pattern <- paste0("/*\\s*[a-zA-Z]", site[j], "[a-zA-Z]\\s*") |
|
55 |
- field_data[i] <- gsub(pattern, "", field_data[i]) |
|
56 |
- } |
|
57 |
- field_data[i] <- gsub("^/\\s", "", field_data[i]) %>% .add_new_line |
|
58 |
- } |
|
59 |
- |
|
60 |
- if (has_slot) { |
|
61 |
- field_info[,2] <- field_data |
|
62 |
- if (is_codeml) { |
|
63 |
- slot(tree_object@rst, field) <- field_info |
|
64 |
- } else { |
|
65 |
- slot(tree_object, field) <- field_info |
|
66 |
- } |
|
67 |
- } else { |
|
68 |
- tree_object@extraInfo[, field] <- field_data |
|
69 |
- } |
|
70 |
- tree_object |
|
71 |
-} |
|
72 |
- |
|
73 |
- |
|
74 |
-read.tip_seq_mlc <- function(mlcfile) { |
|
75 |
- info <- getPhyInfo(mlcfile) |
|
76 |
- mlc <- readLines(mlcfile) |
|
77 |
- ## remove blank lines |
|
78 |
- blk <- grep("^\\s*$", mlc) |
|
79 |
- if (length(blk) > 0) { |
|
80 |
- mlc <- mlc[-blk] |
|
81 |
- } |
|
82 |
- |
|
83 |
- seqs <- mlc[2:(info$num+1)] |
|
84 |
- seqs <- gsub("\\s+", "", seqs) |
|
85 |
- wd <- info$width |
|
86 |
- res <- sapply(seqs, function(x) substring(x, (base::nchar(x) - wd + 1), base::nchar(x))) |
|
87 |
- nn <- sapply(seqs, function(x) substring(x, 1, (base::nchar(x) - wd))) |
|
88 |
- names(res) <- nn |
|
89 |
- return(res) |
|
90 |
-} |
|
91 |
- |
|
92 |
-read.tip_seq_mlb <- read.tip_seq_mlc |
|
93 |
- |
|
94 |
-read.dnds_mlc <- function(mlcfile) { |
|
95 |
- mlc <- readLines(mlcfile) |
|
96 |
- i <- grep("dN & dS for each branch", mlc) |
|
97 |
- j <- grep("tree length for dN", mlc) |
|
98 |
- |
|
99 |
- mlc <- mlc[i:j] |
|
100 |
- hi <- grep("dN/dS", mlc) |
|
101 |
- cn <- strsplit(mlc[hi], " ") %>% unlist %>% `[`(nzchar(.)) |
|
102 |
- |
|
103 |
- ii <- grep("\\d+\\.\\.\\d+", mlc) |
|
104 |
- info <- mlc[ii] |
|
105 |
- info %<>% sub("^\\s+", "", .) |
|
106 |
- info %<>% sub("\\s+$", "", .) |
|
107 |
- res <- t(sapply(info, function(x) { |
|
108 |
- y <- unlist(strsplit(x, "\\s+")) |
|
109 |
- edge <- unlist(strsplit(y[1], "\\.\\.")) |
|
110 |
- yy <- c(edge, y[-1]) |
|
111 |
- as.numeric(yy) |
|
112 |
- })) |
|
113 |
- |
|
114 |
- row.names(res) <- NULL |
|
115 |
- colnames(res) <- c("parent", "node", cn[-1]) |
|
116 |
- colnames(res) <- gsub("\\*", "_x_", colnames(res)) |
|
117 |
- colnames(res) <- gsub("\\/", "_vs_", colnames(res)) |
|
118 |
- return(res) |
|
119 |
-} |
|
120 |
- |
|
121 |
-read.treetext_paml_mlc <- function(mlcfile) { |
|
122 |
- read.treetext_paml(mlcfile, "mlc") |
|
123 |
-} |
|
124 |
- |
|
125 |
-read.treetext_paml_rst<- function(rstfile) { |
|
126 |
- read.treetext_paml(rstfile, "rst") |
|
127 |
-} |
|
128 |
- |
|
129 |
-read.treetext_paml <- function(file, by) { |
|
130 |
- ## works fine with baseml and codeml |
|
131 |
- x <- readLines(file) |
|
132 |
- tr.idx <- get_tree_index_paml(x) |
|
133 |
- |
|
134 |
- if ( by == "rst" ) { |
|
135 |
- ii <- 1 |
|
136 |
- } else if (by == "mlc") { |
|
137 |
- ii <- 3 |
|
138 |
- } else { |
|
139 |
- stop("_by_ should be one of 'rst' or 'mlc'") |
|
140 |
- } |
|
141 |
- |
|
142 |
- return(x[tr.idx][ii]) |
|
143 |
-} |
|
144 |
- |
|
145 |
-read.phylo_paml_mlc <- function(mlcfile) { |
|
146 |
- parent <- node <- label <- NULL |
|
147 |
- |
|
148 |
- mlc <- readLines(mlcfile) |
|
149 |
- edge <- get_tree_edge_paml(mlc) |
|
150 |
- |
|
151 |
- tr.idx <- get_tree_index_paml(mlc) |
|
152 |
- tr2 <- read.tree(text=mlc[tr.idx[2]]) |
|
153 |
- tr3 <- read.tree(text=mlc[tr.idx[3]]) |
|
154 |
- |
|
155 |
- treeinfo <- as.data.frame(edge) |
|
156 |
- colnames(treeinfo) <- c("parent", "node") |
|
157 |
- treeinfo$length <- NA |
|
158 |
- treeinfo$label <- NA |
|
159 |
- treeinfo$isTip <- FALSE |
|
160 |
- ntip <- Ntip(tr3) |
|
161 |
- ii <- match(tr2$tip.label, treeinfo[, "node"]) |
|
162 |
- treeinfo[ii, "label"] <- tr3$tip.label |
|
163 |
- treeinfo[ii, "isTip"] <- TRUE |
|
164 |
- ## jj <- match(1:ntip, tr3$edge[,2]) |
|
165 |
- treeinfo[ii, "length"] <- tr2$edge.length[ii] ##tr3$edge.length[jj] |
|
166 |
- |
|
167 |
- root <- getRoot(tr3) ## always == (Ntip(tr3) + 1) |
|
168 |
- currentNode <- treeinfo$label[ii] |
|
169 |
- treeinfo.tr3 <- fortify(tr3) |
|
170 |
- |
|
171 |
- treeinfo$visited <- FALSE |
|
172 |
- while(any(treeinfo$visited == FALSE)) { |
|
173 |
- pNode <- c() |
|
174 |
- for( kk in currentNode ) { |
|
175 |
- i <- which(treeinfo$label == kk) |
|
176 |
- treeinfo[i, "visited"] <- TRUE |
|
177 |
- j <- which(treeinfo.tr3$label == kk) |
|
178 |
- ip <- treeinfo[i, "parent"] |
|
179 |
- if (ip != root) { |
|
180 |
- ii <- which(treeinfo[, "node"] == ip) |
|
181 |
- if (treeinfo$visited[ii] == FALSE) { |
|
182 |
- jp <- treeinfo.tr3[j, "parent"] |
|
183 |
- jj <- which(treeinfo.tr3[, "node"] == jp) |
|
184 |
- treeinfo[ii, "label"] <- as.character(ip) |
|
185 |
- treeinfo.tr3[jj, "label"] <- as.character(ip) |
|
186 |
- treeinfo[ii, "length"] <- treeinfo.tr3[jj, "branch.length"] |
|
187 |
- pNode <- c(pNode, ip) |
|
188 |
- } |
|
189 |
- treeinfo[ii, "visited"] <- TRUE |
|
190 |
- } |
|
191 |
- |
|
192 |
- } |
|
193 |
- currentNode <- unique(pNode) |
|
194 |
- } |
|
195 |
- |
|
196 |
- phylo <- with(treeinfo, |
|
197 |
- list(edge= cbind(as.numeric(parent), |
|
198 |
- as.numeric(node)), |
|
199 |
- edge.length = length, |
|
200 |
- tip.label = label[order(node)][1:ntip], |
|
201 |
- Nnode = tr3$Nnode, |
|
202 |
- node.label = c(root, label[order(node)][-c(1:ntip)]) |
|
203 |
- ) |
|
204 |
- ) |
|
205 |
- class(phylo) <- "phylo" |
|
206 |
- phylo <- reorder.phylo(phylo, "cladewise") |
|
207 |
- return(phylo) |
|
208 |
-} |
|
209 |
- |
|
210 |
-##' @importFrom ape reorder.phylo |
|
211 |
-read.phylo_paml_rst <- function(rstfile) { |
|
212 |
- parent <- node <- label <- NULL |
|
213 |
- |
|
214 |
- ## works fine with baseml and codeml |
|
215 |
- rst <- readLines(rstfile) |
|
216 |
- tr.idx <- get_tree_index_paml(rst) |
|
217 |
- |
|
218 |
- tr1 <- read.tree(text=rst[tr.idx][1]) |
|
219 |
- tr3 <- read.tree(text=rst[tr.idx][3]) |
|
220 |
- |
|
221 |
- edge <- get_tree_edge_paml(rst) |
|
222 |
- |
|
223 |
- label=c(tr3$tip.label, tr3$node.label) |
|
224 |
- root <- getRoot(tr3) |
|
225 |
- label %<>% `[`(. != root) |
|
226 |
- |
|
227 |
- node.length <- data.frame(label=label, |
|
228 |
- length=tr1$edge.length) |
|
229 |
- |
|
230 |
- ## node.length$node <- sub("_\\w+", "", node.length$label |
|
231 |
- node.length$node <- gsub("^(\\d+)_.*", "\\1", node.length$label) |
|
232 |
- node.length$label %<>% sub("\\d+_", "", .) |
|
233 |
- |
|
234 |
- edge <- as.data.frame(edge) |
|
235 |
- colnames(edge) <- c("parent", "node") |
|
236 |
- |
|
237 |
- treeinfo <- merge(edge, node.length, by.x="node", by.y="node") |
|
238 |
- edge2 <- treeinfo[, c("parent", "node")] |
|
239 |
- edge2 %<>% as.matrix |
|
240 |
- |
|
241 |
- ntip <- Ntip(tr3) |
|
242 |
- |
|
243 |
- phylo <- with(treeinfo, |
|
244 |
- list(edge= cbind(as.numeric(parent), |
|
245 |
- as.numeric(node)), |
|
246 |
- edge.length = length, |
|
247 |
- tip.label = label[order(node)][1:ntip], |
|
248 |
- Nnode = tr3$Nnode, |
|
249 |
- node.label = c(root, label[order(node)][-c(1:ntip)]) |
|
250 |
- ) |
|
251 |
- ) |
|
252 |
- |
|
253 |
- class(phylo) <- "phylo" |
|
254 |
- phylo <- reorder.phylo(phylo, "cladewise") |
|
255 |
- |
|
256 |
- return(phylo) |
|
257 |
-} |
|
258 |
- |
|
259 |
-read.ancseq_paml_rst <- function(rstfile, by="Marginal") { |
|
260 |
- ## works fine with baseml and codeml |
|
261 |
- rst <- readLines(rstfile) |
|
262 |
- |
|
263 |
- by <- match.arg(by, c("Marginal", "Joint")) |
|
264 |
- query <- paste(by, "reconstruction of ancestral sequences") |
|
265 |
- idx <- grep(query, rst) |
|
266 |
- if(length(idx) == 0) { |
|
267 |
- ## in some paml setting, joint_ancseq are not available. |
|
268 |
- return("") |
|
269 |
- } |
|
270 |
- si <- grep("reconstructed sequences", rst) |
|
271 |
- idx <- si[which.min(abs(si-idx))] |
|
272 |
- |
|
273 |
- nl <- strsplit(rst[idx+2], split=" ") %>% unlist %<>% `[`(nzchar(.)) |
|
274 |
- N <- as.numeric(nl[1]) |
|
275 |
- seq.leng <- as.numeric(nl[2]) |
|
276 |
- |
|
277 |
- seqs <- rst[(idx+4):(idx+3+N)] |
|
278 |
- |
|
279 |
- seq.name <- character(N) |
|
280 |
- res <- character(N) |
|
281 |
- for (i in 1:N) { |
|
282 |
- ss <- gsub(" ", "", seqs[i]) |
|
283 |
- nn <- base::nchar(ss) |
|
284 |
- res[i] <- substring(ss, nn-seq.leng+1,nn) |
|
285 |
- seq.name[i] <- substring(ss, 1, nn-seq.leng) |
|
286 |
- } |
|
287 |
- seq.name <- sub("\\w+#", "", seq.name) |
|
288 |
- names(res) <- seq.name |
|
289 |
- |
|
290 |
- return(res) |
|
291 |
-} |
|
292 |
- |
|
293 |
- |
|
294 |
-get_tree_index_paml <- function(paml) { |
|
295 |
- grep("\\)[ \\.0-9]*;", paml) |
|
296 |
-} |
|
297 |
- |
|
298 |
-get_tree_edge_index_paml <- function(paml) { |
|
299 |
- grep("\\d+\\.\\.\\d+", paml) |
|
300 |
-} |
|
301 |
- |
|
302 |
-get_tree_edge_paml <- function(paml) { |
|
303 |
- tr.idx <- get_tree_index_paml(paml) |
|
304 |
- |
|
305 |
- edge.idx <- get_tree_edge_index_paml(paml) |
|
306 |
- edge.idx <- edge.idx[edge.idx < tr.idx[3]] |
|
307 |
- |
|
308 |
- nodeNum <- strsplit(paml[edge.idx], split="\\.\\.") %>% |
|
309 |
- unlist %>% strsplit(split="[[:space:]]") %>% unlist |
|
310 |
- |
|
311 |
- nodeNum %<>% `[`(nzchar(.)) |
|
312 |
- |
|
313 |
- edge <- matrix(as.numeric(nodeNum), ncol=2, byrow = TRUE) |
|
314 |
- |
|
315 |
- return(edge) |
|
316 |
-} |
|
317 |
- |
|
318 |
-set.paml_rst_ <- function(object) { |
|
319 |
- if (!is(object, "paml_rst")) { |
|
320 |
- stop("object should be an instance of 'paml_rst'") |
|
321 |
- } |
|
322 |
- if (length(object@tip_seq) == 0) { |
|
323 |
- return(object) |
|
324 |
- } |
|
325 |
- |
|
326 |
- types <- get.fields(object) |
|
327 |
- for (type in types) { |
|
328 |
- value <- subs_paml_rst(object, type) |
|
329 |
- if (all(is.na(value))) |
|
330 |
- next |
|
331 |
- |
|
332 |
- if (type == "marginal_subs") { |
|
333 |
- object@marginal_subs <- value |
|
334 |
- } else if (type == "marginal_AA_subs") { |
|
335 |
- object@marginal_AA_subs <- value |
|
336 |
- } else if (type == "joint_subs") { |
|
337 |
- object@joint_subs <- value |
|
338 |
- } else if (type == "joint_AA_subs") { |
|
339 |
- object@joint_AA_subs <- value |
|
340 |
- } |
|
341 |
- } |
|
342 |
- return(object) |
|
343 |
-} |
|
344 |
- |
|
345 |
- |
|
346 |
-get.subs_paml_rst <- function(object, type) { |
|
347 |
- if (!is(object, "paml_rst")) { |
|
348 |
- stop("object should be an instance of 'paml_rst'") |
|
349 |
- } |
|
350 |
- if (type == "marginal_subs") { |
|
351 |
- res <- object@marginal_subs |
|
352 |
- } else if (type == "marginal_AA_subs") { |
|
353 |
- res <- object@marginal_AA_subs |
|
354 |
- } else if (type == "joint_subs") { |
|
355 |
- res <- object@joint_subs |
|
356 |
- } else if (type == "joint_AA_subs") { |
|
357 |
- res <- object@joint_AA_subs |
|
358 |
- } else { |
|
359 |
- stop("type should be one of 'marginal_subs', |
|
360 |
- 'marginal_AA_subs', 'joint_subs' or 'joint_AA_subs'. ") |
|
361 |
- } |
|
362 |
- return(res) |
|
363 |
-} |
|
364 |
- |
|
365 |
-subs_paml_rst <- function(x, type, ...) { |
|
366 |
- if (class(x) != "paml_rst") { |
|
367 |
- stop("x should be an object of paml_rst...") |
|
368 |
- } |
|
369 |
- seqs <- x@tip_seq |
|
370 |
- if (length(seqs) == 0) { |
|
371 |
- stop("tip sequences is not available...") |
|
372 |
- } |
|
373 |
- if (type %in% c("marginal_subs", "marginal_AA_subs")) { |
|
374 |
- ancseq <- x@marginal_ancseq |
|
375 |
- ## seqs <- c(seqs, x@marginal_ancseq) |
|
376 |
- } else if (type %in% c("joint_subs", "joint_AA_subs")){ |
|
377 |
- ancseq <- x@joint_ancseq |
|
378 |
- ## seqs <- c(seqs, x@joint_ancseq) |
|
379 |
- } else { |
|
380 |
- stop("type should be one of 'marginal_subs', |
|
381 |
- 'marginal_AA_subs', 'joint_subs' or 'joint_AA_subs'. ") |
|
382 |
- } |
|
383 |
- if( type %in% c("marginal_subs", "joint_subs")) { |
|
384 |
- translate <- FALSE |
|
385 |
- } else { |
|
386 |
- translate <- TRUE |
|
387 |
- } |
|
388 |
- |
|
389 |
- if (all(ancseq == "")) { |
|
390 |
- return(NA) |
|
391 |
- } |
|
392 |
- seqs <- c(seqs, ancseq) |
|
393 |
- get.subs_(x@phylo, seqs, translate=translate, ...) |
|
394 |
-} |
|
1 |
+## ##' site mask |
|
2 |
+## ##' |
|
3 |
+## ##' |
|
4 |
+## ##' @title mask |
|
5 |
+## ##' @param tree_object tree object |
|
6 |
+## ##' @param field selected field |
|
7 |
+## ##' @param site site |
|
8 |
+## ##' @param mask_site if TRUE, site will be masked. |
|
9 |
+## ##' if FALSE, selected site will not be masked, while other sites will be masked. |
|
10 |
+## ##' @return updated tree object |
|
11 |
+## ##' @export |
|
12 |
+## ##' @author Guangchuang Yu |
|
13 |
+## mask <- function(tree_object, field, site, mask_site=FALSE) { |
|
14 |
+## has_field <- has.field(tree_object, field) |
|
15 |
+## if (has_field == FALSE) { |
|
16 |
+## stop("'field' is not available in 'tree_object'...") |
|
17 |
+## } |
|
18 |
+ |
|
19 |
+## has_slot <- attr(has_field, "has_slot") |
|
20 |
+## is_codeml <- attr(has_field, "is_codeml") |
|
21 |
+ |
|
22 |
+## if (has_slot) { |
|
23 |
+## if (is_codeml) { |
|
24 |
+## field_info <- slot(tree_object@rst, field) |
|
25 |
+## } else { |
|
26 |
+## field_info <- slot(tree_object, field) |
|
27 |
+## } |
|
28 |
+## field_data <- field_info[,2] |
|
29 |
+## } else { |
|
30 |
+## field_data <- tree_object@extraInfo[, field] |
|
31 |
+## } |
|
32 |
+ |
|
33 |
+## field_data <- sapply(field_data, gsub, pattern="\n", replacement="/") |
|
34 |
+ |
|
35 |
+## x <- field_data[field_data != ""] |
|
36 |
+## x <- x[!is.na(x)] |
|
37 |
+## pos <- strsplit(x, " / ") %>% unlist %>% |
|
38 |
+## gsub("^[a-zA-Z]+", "", . ) %>% |
|
39 |
+## gsub("[a-zA-Z]\\s*$", "", .) %>% |
|
40 |
+## as.numeric |
|
41 |
+ |
|
42 |
+## if (mask_site == FALSE) { |
|
43 |
+## pos2 <- 1:max(pos) |
|
44 |
+## pos2 <- pos2[-site] |
|
45 |
+## site <- pos2 |
|
46 |
+## } |
|
47 |
+ |
|
48 |
+## site <- site[site %in% pos] |
|
49 |
+ |
|
50 |
+## for (i in seq_along(field_data)) { |
|
51 |
+## if (is.na(field_data[i])) |
|
52 |
+## next |
|
53 |
+## for (j in seq_along(site)) { |
|
54 |
+## pattern <- paste0("/*\\s*[a-zA-Z]", site[j], "[a-zA-Z]\\s*") |
|
55 |
+## field_data[i] <- gsub(pattern, "", field_data[i]) |
|
56 |
+## } |
|
57 |
+## field_data[i] <- gsub("^/\\s", "", field_data[i]) %>% .add_new_line |
|
58 |
+## } |
|
59 |
+ |
|
60 |
+## if (has_slot) { |
|
61 |
+## field_info[,2] <- field_data |
|
62 |
+## if (is_codeml) { |
|
63 |
+## slot(tree_object@rst, field) <- field_info |
|
64 |
+## } else { |
|
65 |
+## slot(tree_object, field) <- field_info |
|
66 |
+## } |
|
67 |
+## } else { |
|
68 |
+## tree_object@extraInfo[, field] <- field_data |
|
69 |
+## } |
|
70 |
+## tree_object |
|
71 |
+## } |
|
72 |
+ |
|
73 |
+ |
|
74 |
+## read.tip_seq_mlc <- function(mlcfile) { |
|
75 |
+## info <- getPhyInfo(mlcfile) |
|
76 |
+## mlc <- readLines(mlcfile) |
|
77 |
+## ## remove blank lines |
|
78 |
+## blk <- grep("^\\s*$", mlc) |
|
79 |
+## if (length(blk) > 0) { |
|
80 |
+## mlc <- mlc[-blk] |
|
81 |
+## } |
|
82 |
+ |
|
83 |
+## seqs <- mlc[2:(info$num+1)] |
|
84 |
+## seqs <- gsub("\\s+", "", seqs) |
|
85 |
+## wd <- info$width |
|
86 |
+## res <- sapply(seqs, function(x) substring(x, (base::nchar(x) - wd + 1), base::nchar(x))) |
|
87 |
+## nn <- sapply(seqs, function(x) substring(x, 1, (base::nchar(x) - wd))) |
|
88 |
+## names(res) <- nn |
|
89 |
+## return(res) |
|
90 |
+## } |
|
91 |
+ |
|
92 |
+## read.tip_seq_mlb <- read.tip_seq_mlc |
|
93 |
+ |
|
94 |
+## read.dnds_mlc <- function(mlcfile) { |
|
95 |
+## mlc <- readLines(mlcfile) |
|
96 |
+## i <- grep("dN & dS for each branch", mlc) |
|
97 |
+## j <- grep("tree length for dN", mlc) |
|
98 |
+ |
|
99 |
+## mlc <- mlc[i:j] |
|
100 |
+## hi <- grep("dN/dS", mlc) |
|
101 |
+## cn <- strsplit(mlc[hi], " ") %>% unlist %>% `[`(nzchar(.)) |
|
102 |
+ |
|
103 |
+## ii <- grep("\\d+\\.\\.\\d+", mlc) |
|
104 |
+## info <- mlc[ii] |
|
105 |
+## info %<>% sub("^\\s+", "", .) |
|
106 |
+## info %<>% sub("\\s+$", "", .) |
|
107 |
+## res <- t(sapply(info, function(x) { |
|
108 |