Browse code

clean up code

guangchuang yu authored on 14/12/2017 08:47:21
Showing 1 changed files
1 1
deleted file mode 100644
... ...
@@ -1,394 +0,0 @@
1
-## ##' site mask
2
-## ##'
3
-## ##'
4
-## ##' @title mask
5
-## ##' @param tree_object tree object
6
-## ##' @param field selected field
7
-## ##' @param site site
8
-## ##' @param mask_site if TRUE, site will be masked.
9
-## ##'                  if FALSE, selected site will not be masked, while other sites will be masked.
10
-## ##' @return updated tree object
11
-## ##' @export
12
-## ##' @author Guangchuang Yu
13
-## mask <- function(tree_object, field, site, mask_site=FALSE) {
14
-##     has_field <- has.field(tree_object, field)
15
-##     if (has_field == FALSE) {
16
-##         stop("'field' is not available in 'tree_object'...")
17
-##     }
18
-
19
-##     has_slot <- attr(has_field, "has_slot")
20
-##     is_codeml <- attr(has_field, "is_codeml")
21
-
22
-##     if (has_slot) {
23
-##         if (is_codeml) {
24
-##             field_info <- slot(tree_object@rst, field)
25
-##         } else {
26
-##             field_info <- slot(tree_object, field)
27
-##         }
28
-##         field_data <- field_info[,2]
29
-##     } else {
30
-##         field_data <- tree_object@extraInfo[, field]
31
-##     }
32
-
33
-##     field_data <- sapply(field_data, gsub, pattern="\n", replacement="/")
34
-
35
-##     x <- field_data[field_data != ""]
36
-##     x <- x[!is.na(x)]
37
-##     pos <- strsplit(x, " / ") %>% unlist %>%
38
-##         gsub("^[a-zA-Z]+", "", . ) %>%
39
-##         gsub("[a-zA-Z]\\s*$", "", .) %>%
40
-##         as.numeric
41
-
42
-##     if (mask_site == FALSE) {
43
-##         pos2 <- 1:max(pos)
44
-##         pos2 <- pos2[-site]
45
-##         site <- pos2
46
-##     }
47
-
48
-##     site <- site[site %in% pos]
49
-
50
-##     for (i in seq_along(field_data)) {
51
-##         if (is.na(field_data[i]))
52
-##             next
53
-##         for (j in seq_along(site)) {
54
-##             pattern <- paste0("/*\\s*[a-zA-Z]", site[j], "[a-zA-Z]\\s*")
55
-##             field_data[i] <- gsub(pattern, "",  field_data[i])
56
-##         }
57
-##         field_data[i] <- gsub("^/\\s", "", field_data[i]) %>% .add_new_line
58
-##     }
59
-
60
-##     if (has_slot) {
61
-##         field_info[,2] <- field_data
62
-##         if (is_codeml) {
63
-##             slot(tree_object@rst, field) <- field_info
64
-##         } else {
65
-##             slot(tree_object, field) <- field_info
66
-##         }
67
-##     } else {
68
-##         tree_object@extraInfo[, field] <- field_data
69
-##     }
70
-##     tree_object
71
-## }
72
-
73
-
74
-## read.tip_seq_mlc <- function(mlcfile) {
75
-##     info <- getPhyInfo(mlcfile)
76
-##     mlc <- readLines(mlcfile)
77
-##     ## remove blank lines
78
-##     blk <- grep("^\\s*$", mlc)
79
-##     if (length(blk) > 0) {
80
-##         mlc <- mlc[-blk]
81
-##     }
82
-
83
-##     seqs <- mlc[2:(info$num+1)]
84
-##     seqs <- gsub("\\s+", "", seqs)
85
-##     wd <- info$width
86
-##     res <- sapply(seqs, function(x) substring(x, (base::nchar(x) - wd + 1), base::nchar(x)))
87
-##     nn <- sapply(seqs, function(x) substring(x, 1, (base::nchar(x) - wd)))
88
-##     names(res) <- nn
89
-##     return(res)
90
-## }
91
-
92
-## read.tip_seq_mlb <- read.tip_seq_mlc
93
-
94
-## read.dnds_mlc <- function(mlcfile) {
95
-##     mlc <- readLines(mlcfile)
96
-##     i <- grep("dN & dS for each branch", mlc)
97
-##     j <- grep("tree length for dN", mlc)
98
-
99
-##     mlc <- mlc[i:j]
100
-##     hi <- grep("dN/dS", mlc)
101
-##     cn <- strsplit(mlc[hi], " ") %>% unlist %>% `[`(nzchar(.))
102
-
103
-##     ii <- grep("\\d+\\.\\.\\d+", mlc)
104
-##     info <- mlc[ii]
105
-##     info %<>% sub("^\\s+", "", .)
106
-##     info %<>% sub("\\s+$", "", .)
107
-##     res <- t(sapply(info, function(x) {
108
-##         y <- unlist(strsplit(x, "\\s+"))
109
-##         edge <- unlist(strsplit(y[1], "\\.\\."))
110
-##         yy <- c(edge, y[-1])
111
-##         as.numeric(yy)
112
-##     }))
113
-
114
-##     row.names(res) <- NULL
115
-##     colnames(res) <- c("parent", "node", cn[-1])
116
-##     colnames(res) <- gsub("\\*", "_x_", colnames(res))
117
-##     colnames(res) <- gsub("\\/", "_vs_", colnames(res))
118
-##     return(res)
119
-## }
120
-
121
-## read.treetext_paml_mlc <- function(mlcfile) {
122
-##     read.treetext_paml(mlcfile, "mlc")
123
-## }
124
-
125
-## read.treetext_paml_rst<- function(rstfile) {
126
-##     read.treetext_paml(rstfile, "rst")
127
-## }
128
-
129
-## read.treetext_paml <- function(file, by) {
130
-##     ## works fine with baseml and codeml
131
-##     x <- readLines(file)
132
-##     tr.idx <- get_tree_index_paml(x)
133
-
134
-##     if ( by == "rst" ) {
135
-##         ii <- 1
136
-##     } else if (by == "mlc") {
137
-##         ii <- 3
138
-##     } else {
139
-##         stop("_by_ should be one of 'rst' or 'mlc'")
140
-##     }
141
-
142
-##     return(x[tr.idx][ii])
143
-## }
144
-
145
-## read.phylo_paml_mlc <- function(mlcfile) {
146
-##     parent <- node <- label <- NULL
147
-
148
-##     mlc <- readLines(mlcfile)
149
-##     edge <- get_tree_edge_paml(mlc)
150
-
151
-##     tr.idx <- get_tree_index_paml(mlc)
152
-##     tr2 <- read.tree(text=mlc[tr.idx[2]])
153
-##     tr3 <- read.tree(text=mlc[tr.idx[3]])
154
-
155
-##     treeinfo <- as.data.frame(edge)
156
-##     colnames(treeinfo) <- c("parent", "node")
157
-##     treeinfo$length <- NA
158
-##     treeinfo$label <- NA
159
-##     treeinfo$isTip <- FALSE
160
-##     ntip <- Ntip(tr3)
161
-##     ii <- match(tr2$tip.label, treeinfo[, "node"])
162
-##     treeinfo[ii, "label"] <- tr3$tip.label
163
-##     treeinfo[ii, "isTip"] <- TRUE
164
-##     ## jj <- match(1:ntip, tr3$edge[,2])
165
-##     treeinfo[ii, "length"] <- tr2$edge.length[ii] ##tr3$edge.length[jj]
166
-
167
-##     root <- getRoot(tr3) ## always == (Ntip(tr3) + 1)
168
-##     currentNode <- treeinfo$label[ii]
169
-##     treeinfo.tr3 <- fortify(tr3)
170
-
171
-##     treeinfo$visited <- FALSE
172
-##     while(any(treeinfo$visited == FALSE)) {
173
-##         pNode <- c()
174
-##         for( kk in currentNode ) {
175
-##             i <- which(treeinfo$label == kk)
176
-##             treeinfo[i, "visited"] <- TRUE
177
-##             j <- which(treeinfo.tr3$label == kk)
178
-##             ip <- treeinfo[i, "parent"]
179
-##             if (ip != root) {
180
-##                 ii <- which(treeinfo[, "node"] == ip)
181
-##                 if (treeinfo$visited[ii] == FALSE) {
182
-##                     jp <- treeinfo.tr3[j, "parent"]
183
-##                     jj <- which(treeinfo.tr3[, "node"] == jp)
184
-##                     treeinfo[ii, "label"] <- as.character(ip)
185
-##                     treeinfo.tr3[jj, "label"] <- as.character(ip)
186
-##                     treeinfo[ii, "length"] <- treeinfo.tr3[jj, "branch.length"]
187
-##                     pNode <- c(pNode, ip)
188
-##                 }
189
-##                 treeinfo[ii, "visited"] <- TRUE
190
-##             }
191
-
192
-##         }
193
-##         currentNode <- unique(pNode)
194
-##     }
195
-
196
-##     phylo <- with(treeinfo,
197
-##                   list(edge= cbind(as.numeric(parent),
198
-##                            as.numeric(node)),
199
-##                        edge.length = length,
200
-##                        tip.label = label[order(node)][1:ntip],
201
-##                        Nnode = tr3$Nnode,
202
-##                        node.label = c(root, label[order(node)][-c(1:ntip)])
203
-##                        )
204
-##                   )
205
-##     class(phylo) <- "phylo"
206
-##     phylo <- reorder.phylo(phylo, "cladewise")
207
-##     return(phylo)
208
-## }
209
-
210
-## ##' @importFrom ape reorder.phylo
211
-## read.phylo_paml_rst <- function(rstfile) {
212
-##     parent <- node <- label <- NULL
213
-
214
-##     ## works fine with baseml and codeml
215
-##     rst <- readLines(rstfile)
216
-##     tr.idx <- get_tree_index_paml(rst)
217
-
218
-##     tr1 <- read.tree(text=rst[tr.idx][1])
219
-##     tr3 <- read.tree(text=rst[tr.idx][3])
220
-
221
-##     edge <- get_tree_edge_paml(rst)
222
-
223
-##     label=c(tr3$tip.label, tr3$node.label)
224
-##     root <- getRoot(tr3)
225
-##     label %<>% `[`(. != root)
226
-
227
-##     node.length <- data.frame(label=label,
228
-##                               length=tr1$edge.length)
229
-
230
-##     ## node.length$node <- sub("_\\w+", "", node.length$label
231
-##     node.length$node <- gsub("^(\\d+)_.*", "\\1", node.length$label)
232
-##     node.length$label %<>% sub("\\d+_", "", .)
233
-
234
-##     edge <- as.data.frame(edge)
235
-##     colnames(edge) <- c("parent", "node")
236
-
237
-##     treeinfo <- merge(edge, node.length, by.x="node", by.y="node")
238
-##     edge2 <- treeinfo[, c("parent", "node")]
239
-##     edge2 %<>% as.matrix
240
-
241
-##     ntip <- Ntip(tr3)
242
-
243
-##     phylo <- with(treeinfo,
244
-##                   list(edge= cbind(as.numeric(parent),
245
-##                       as.numeric(node)),
246
-##                        edge.length = length,
247
-##                        tip.label = label[order(node)][1:ntip],
248
-##                        Nnode = tr3$Nnode,
249
-##                        node.label = c(root, label[order(node)][-c(1:ntip)])
250
-##                        )
251
-##                   )
252
-
253
-##     class(phylo) <- "phylo"
254
-##     phylo <- reorder.phylo(phylo, "cladewise")
255
-
256
-##     return(phylo)
257
-## }
258
-
259
-## read.ancseq_paml_rst <- function(rstfile, by="Marginal") {
260
-##     ## works fine with baseml and codeml
261
-##     rst <- readLines(rstfile)
262
-
263
-##     by <- match.arg(by, c("Marginal", "Joint"))
264
-##     query <- paste(by, "reconstruction of ancestral sequences")
265
-##     idx <- grep(query, rst)
266
-##     if(length(idx) == 0) {
267
-##         ## in some paml setting, joint_ancseq are not available.
268
-##         return("")
269
-##     }
270
-##     si <- grep("reconstructed sequences", rst)
271
-##     idx <- si[which.min(abs(si-idx))]
272
-
273
-##     nl <- strsplit(rst[idx+2], split=" ") %>% unlist %<>% `[`(nzchar(.))
274
-##     N <- as.numeric(nl[1])
275
-##     seq.leng <- as.numeric(nl[2])
276
-
277
-##     seqs <- rst[(idx+4):(idx+3+N)]
278
-
279
-##     seq.name <- character(N)
280
-##     res <- character(N)
281
-##     for (i in 1:N) {
282
-##         ss <- gsub(" ", "", seqs[i])
283
-##         nn <- base::nchar(ss)
284
-##         res[i] <- substring(ss, nn-seq.leng+1,nn)
285
-##         seq.name[i] <- substring(ss, 1, nn-seq.leng)
286
-##     }
287
-##     seq.name <- sub("\\w+#", "", seq.name)
288
-##     names(res) <- seq.name
289
-
290
-##     return(res)
291
-## }
292
-
293
-
294
-## get_tree_index_paml <- function(paml) {
295
-##     grep("\\)[ \\.0-9]*;", paml)
296
-## }
297
-
298
-## get_tree_edge_index_paml <- function(paml) {
299
-##     grep("\\d+\\.\\.\\d+", paml)
300
-## }
301
-
302
-## get_tree_edge_paml <- function(paml) {
303
-##     tr.idx <- get_tree_index_paml(paml)
304
-
305
-##     edge.idx <- get_tree_edge_index_paml(paml)
306
-##     edge.idx <- edge.idx[edge.idx < tr.idx[3]]
307
-
308
-##     nodeNum <- strsplit(paml[edge.idx], split="\\.\\.") %>%
309
-##         unlist %>% strsplit(split="[[:space:]]") %>% unlist
310
-
311
-##     nodeNum %<>% `[`(nzchar(.))
312
-
313
-##     edge <- matrix(as.numeric(nodeNum), ncol=2, byrow = TRUE)
314
-
315
-##     return(edge)
316
-## }
317
-
318
-## set.paml_rst_ <- function(object) {
319
-##     if (!is(object, "paml_rst")) {
320
-##         stop("object should be an instance of 'paml_rst'")
321
-##     }
322
-##     if (length(object@tip_seq) == 0) {
323
-##         return(object)
324
-##     }
325
-
326
-##     types <- get.fields(object)
327
-##     for (type in types) {
328
-##         value <- subs_paml_rst(object, type)
329
-##         if (all(is.na(value)))
330
-##             next
331
-
332
-##         if (type == "marginal_subs") {
333
-##             object@marginal_subs <- value
334
-##         } else if (type == "marginal_AA_subs") {
335
-##             object@marginal_AA_subs <- value
336
-##         } else if (type == "joint_subs") {
337
-##             object@joint_subs <- value
338
-##         } else if (type == "joint_AA_subs") {
339
-##             object@joint_AA_subs <- value
340
-##         }
341
-##     }
342
-##     return(object)
343
-## }
344
-
345
-
346
-## get.subs_paml_rst <- function(object, type) {
347
-##     if (!is(object, "paml_rst")) {
348
-##         stop("object should be an instance of 'paml_rst'")
349
-##     }
350
-##     if (type == "marginal_subs") {
351
-##         res <- object@marginal_subs
352
-##     } else if (type == "marginal_AA_subs") {
353
-##         res <- object@marginal_AA_subs
354
-##     } else if (type == "joint_subs") {
355
-##         res <- object@joint_subs
356
-##     } else if (type == "joint_AA_subs") {
357
-##         res <- object@joint_AA_subs
358
-##     } else {
359
-##         stop("type should be one of 'marginal_subs',
360
-##                              'marginal_AA_subs', 'joint_subs' or 'joint_AA_subs'. ")
361
-##     }
362
-##     return(res)
363
-## }
364
-
365
-## subs_paml_rst <- function(x, type, ...) {
366
-##     if (class(x) != "paml_rst") {
367
-##         stop("x should be an object of paml_rst...")
368
-##     }
369
-##     seqs <- x@tip_seq
370
-##     if (length(seqs) == 0) {
371
-##         stop("tip sequences is not available...")
372
-##     }
373
-##     if (type %in% c("marginal_subs", "marginal_AA_subs")) {
374
-##         ancseq <- x@marginal_ancseq
375
-##         ## seqs <- c(seqs, x@marginal_ancseq)
376
-##     } else if (type %in% c("joint_subs", "joint_AA_subs")){
377
-##         ancseq <- x@joint_ancseq
378
-##         ## seqs <- c(seqs, x@joint_ancseq)
379
-##     } else {
380
-##         stop("type should be one of 'marginal_subs',
381
-##                              'marginal_AA_subs', 'joint_subs' or 'joint_AA_subs'. ")
382
-##     }
383
-##     if( type %in% c("marginal_subs", "joint_subs")) {
384
-##         translate <- FALSE
385
-##     } else {
386
-##         translate <- TRUE
387
-##     }
388
-
389
-##     if (all(ancseq == "")) {
390
-##         return(NA)
391
-##     }
392
-##     seqs <- c(seqs, ancseq)
393
-##     get.subs_(x@phylo, seqs, translate=translate, ...)
394
-## }
Browse code

move code to treeio

guangchuang yu authored on 21/12/2016 08:57:38
Showing 1 changed files
... ...
@@ -1,394 +1,394 @@
1
-##' site mask
2
-##'
3
-##'
4
-##' @title mask
5
-##' @param tree_object tree object
6
-##' @param field selected field
7
-##' @param site site
8
-##' @param mask_site if TRUE, site will be masked.
9
-##'                  if FALSE, selected site will not be masked, while other sites will be masked.
10
-##' @return updated tree object
11
-##' @export
12
-##' @author Guangchuang Yu
13
-mask <- function(tree_object, field, site, mask_site=FALSE) {
14
-    has_field <- has.field(tree_object, field)
15
-    if (has_field == FALSE) {
16
-        stop("'field' is not available in 'tree_object'...")
17
-    }
18
-
19
-    has_slot <- attr(has_field, "has_slot")
20
-    is_codeml <- attr(has_field, "is_codeml")
21
-
22
-    if (has_slot) {
23
-        if (is_codeml) {
24
-            field_info <- slot(tree_object@rst, field)
25
-        } else {
26
-            field_info <- slot(tree_object, field)
27
-        }
28
-        field_data <- field_info[,2]
29
-    } else {
30
-        field_data <- tree_object@extraInfo[, field]
31
-    }
32
-
33
-    field_data <- sapply(field_data, gsub, pattern="\n", replacement="/")
34
-
35
-    x <- field_data[field_data != ""]
36
-    x <- x[!is.na(x)]
37
-    pos <- strsplit(x, " / ") %>% unlist %>%
38
-        gsub("^[a-zA-Z]+", "", . ) %>%
39
-        gsub("[a-zA-Z]\\s*$", "", .) %>%
40
-        as.numeric
41
-
42
-    if (mask_site == FALSE) {
43
-        pos2 <- 1:max(pos)
44
-        pos2 <- pos2[-site]
45
-        site <- pos2
46
-    }
47
-
48
-    site <- site[site %in% pos]
49
-
50
-    for (i in seq_along(field_data)) {
51
-        if (is.na(field_data[i]))
52
-            next
53
-        for (j in seq_along(site)) {
54
-            pattern <- paste0("/*\\s*[a-zA-Z]", site[j], "[a-zA-Z]\\s*")
55
-            field_data[i] <- gsub(pattern, "",  field_data[i])
56
-        }
57
-        field_data[i] <- gsub("^/\\s", "", field_data[i]) %>% .add_new_line
58
-    }
59
-
60
-    if (has_slot) {
61
-        field_info[,2] <- field_data
62
-        if (is_codeml) {
63
-            slot(tree_object@rst, field) <- field_info
64
-        } else {
65
-            slot(tree_object, field) <- field_info
66
-        }
67
-    } else {
68
-        tree_object@extraInfo[, field] <- field_data
69
-    }
70
-    tree_object
71
-}
72
-
73
-
74
-read.tip_seq_mlc <- function(mlcfile) {
75
-    info <- getPhyInfo(mlcfile)
76
-    mlc <- readLines(mlcfile)
77
-    ## remove blank lines
78
-    blk <- grep("^\\s*$", mlc)
79
-    if (length(blk) > 0) {
80
-        mlc <- mlc[-blk]
81
-    }
82
-
83
-    seqs <- mlc[2:(info$num+1)]
84
-    seqs <- gsub("\\s+", "", seqs)
85
-    wd <- info$width
86
-    res <- sapply(seqs, function(x) substring(x, (base::nchar(x) - wd + 1), base::nchar(x)))
87
-    nn <- sapply(seqs, function(x) substring(x, 1, (base::nchar(x) - wd)))
88
-    names(res) <- nn
89
-    return(res)
90
-}
91
-
92
-read.tip_seq_mlb <- read.tip_seq_mlc
93
-
94
-read.dnds_mlc <- function(mlcfile) {
95
-    mlc <- readLines(mlcfile)
96
-    i <- grep("dN & dS for each branch", mlc)
97
-    j <- grep("tree length for dN", mlc)
98
-
99
-    mlc <- mlc[i:j]
100
-    hi <- grep("dN/dS", mlc)
101
-    cn <- strsplit(mlc[hi], " ") %>% unlist %>% `[`(nzchar(.))
102
-
103
-    ii <- grep("\\d+\\.\\.\\d+", mlc)
104
-    info <- mlc[ii]
105
-    info %<>% sub("^\\s+", "", .)
106
-    info %<>% sub("\\s+$", "", .)
107
-    res <- t(sapply(info, function(x) {
108
-        y <- unlist(strsplit(x, "\\s+"))
109
-        edge <- unlist(strsplit(y[1], "\\.\\."))
110
-        yy <- c(edge, y[-1])
111
-        as.numeric(yy)
112
-    }))
113
-
114
-    row.names(res) <- NULL
115
-    colnames(res) <- c("parent", "node", cn[-1])
116
-    colnames(res) <- gsub("\\*", "_x_", colnames(res))
117
-    colnames(res) <- gsub("\\/", "_vs_", colnames(res))
118
-    return(res)
119
-}
120
-
121
-read.treetext_paml_mlc <- function(mlcfile) {
122
-    read.treetext_paml(mlcfile, "mlc")
123
-}
124
-
125
-read.treetext_paml_rst<- function(rstfile) {
126
-    read.treetext_paml(rstfile, "rst")
127
-}
128
-
129
-read.treetext_paml <- function(file, by) {
130
-    ## works fine with baseml and codeml
131
-    x <- readLines(file)
132
-    tr.idx <- get_tree_index_paml(x)
133
-
134
-    if ( by == "rst" ) {
135
-        ii <- 1
136
-    } else if (by == "mlc") {
137
-        ii <- 3
138
-    } else {
139
-        stop("_by_ should be one of 'rst' or 'mlc'")
140
-    }
141
-
142
-    return(x[tr.idx][ii])
143
-}
144
-
145
-read.phylo_paml_mlc <- function(mlcfile) {
146
-    parent <- node <- label <- NULL
147
-
148
-    mlc <- readLines(mlcfile)
149
-    edge <- get_tree_edge_paml(mlc)
150
-
151
-    tr.idx <- get_tree_index_paml(mlc)
152
-    tr2 <- read.tree(text=mlc[tr.idx[2]])
153
-    tr3 <- read.tree(text=mlc[tr.idx[3]])
154
-
155
-    treeinfo <- as.data.frame(edge)
156
-    colnames(treeinfo) <- c("parent", "node")
157
-    treeinfo$length <- NA
158
-    treeinfo$label <- NA
159
-    treeinfo$isTip <- FALSE
160
-    ntip <- Ntip(tr3)
161
-    ii <- match(tr2$tip.label, treeinfo[, "node"])
162
-    treeinfo[ii, "label"] <- tr3$tip.label
163
-    treeinfo[ii, "isTip"] <- TRUE
164
-    ## jj <- match(1:ntip, tr3$edge[,2])
165
-    treeinfo[ii, "length"] <- tr2$edge.length[ii] ##tr3$edge.length[jj]
166
-
167
-    root <- getRoot(tr3) ## always == (Ntip(tr3) + 1)
168
-    currentNode <- treeinfo$label[ii]
169
-    treeinfo.tr3 <- fortify(tr3)
170
-
171
-    treeinfo$visited <- FALSE
172
-    while(any(treeinfo$visited == FALSE)) {
173
-        pNode <- c()
174
-        for( kk in currentNode ) {
175
-            i <- which(treeinfo$label == kk)
176
-            treeinfo[i, "visited"] <- TRUE
177
-            j <- which(treeinfo.tr3$label == kk)
178
-            ip <- treeinfo[i, "parent"]
179
-            if (ip != root) {
180
-                ii <- which(treeinfo[, "node"] == ip)
181
-                if (treeinfo$visited[ii] == FALSE) {
182
-                    jp <- treeinfo.tr3[j, "parent"]
183
-                    jj <- which(treeinfo.tr3[, "node"] == jp)
184
-                    treeinfo[ii, "label"] <- as.character(ip)
185
-                    treeinfo.tr3[jj, "label"] <- as.character(ip)
186
-                    treeinfo[ii, "length"] <- treeinfo.tr3[jj, "branch.length"]
187
-                    pNode <- c(pNode, ip)
188
-                }
189
-                treeinfo[ii, "visited"] <- TRUE
190
-            }
191
-
192
-        }
193
-        currentNode <- unique(pNode)
194
-    }
195
-
196
-    phylo <- with(treeinfo,
197
-                  list(edge= cbind(as.numeric(parent),
198
-                           as.numeric(node)),
199
-                       edge.length = length,
200
-                       tip.label = label[order(node)][1:ntip],
201
-                       Nnode = tr3$Nnode,
202
-                       node.label = c(root, label[order(node)][-c(1:ntip)])
203
-                       )
204
-                  )
205
-    class(phylo) <- "phylo"
206
-    phylo <- reorder.phylo(phylo, "cladewise")
207
-    return(phylo)
208
-}
209
-
210
-##' @importFrom ape reorder.phylo
211
-read.phylo_paml_rst <- function(rstfile) {
212
-    parent <- node <- label <- NULL
213
-
214
-    ## works fine with baseml and codeml
215
-    rst <- readLines(rstfile)
216
-    tr.idx <- get_tree_index_paml(rst)
217
-
218
-    tr1 <- read.tree(text=rst[tr.idx][1])
219
-    tr3 <- read.tree(text=rst[tr.idx][3])
220
-
221
-    edge <- get_tree_edge_paml(rst)
222
-
223
-    label=c(tr3$tip.label, tr3$node.label)
224
-    root <- getRoot(tr3)
225
-    label %<>% `[`(. != root)
226
-
227
-    node.length <- data.frame(label=label,
228
-                              length=tr1$edge.length)
229
-
230
-    ## node.length$node <- sub("_\\w+", "", node.length$label
231
-    node.length$node <- gsub("^(\\d+)_.*", "\\1", node.length$label)
232
-    node.length$label %<>% sub("\\d+_", "", .)
233
-
234
-    edge <- as.data.frame(edge)
235
-    colnames(edge) <- c("parent", "node")
236
-
237
-    treeinfo <- merge(edge, node.length, by.x="node", by.y="node")
238
-    edge2 <- treeinfo[, c("parent", "node")]
239
-    edge2 %<>% as.matrix
240
-
241
-    ntip <- Ntip(tr3)
242
-
243
-    phylo <- with(treeinfo,
244
-                  list(edge= cbind(as.numeric(parent),
245
-                      as.numeric(node)),
246
-                       edge.length = length,
247
-                       tip.label = label[order(node)][1:ntip],
248
-                       Nnode = tr3$Nnode,
249
-                       node.label = c(root, label[order(node)][-c(1:ntip)])
250
-                       )
251
-                  )
252
-
253
-    class(phylo) <- "phylo"
254
-    phylo <- reorder.phylo(phylo, "cladewise")
255
-
256
-    return(phylo)
257
-}
258
-
259
-read.ancseq_paml_rst <- function(rstfile, by="Marginal") {
260
-    ## works fine with baseml and codeml
261
-    rst <- readLines(rstfile)
262
-
263
-    by <- match.arg(by, c("Marginal", "Joint"))
264
-    query <- paste(by, "reconstruction of ancestral sequences")
265
-    idx <- grep(query, rst)
266
-    if(length(idx) == 0) {
267
-        ## in some paml setting, joint_ancseq are not available.
268
-        return("")
269
-    }
270
-    si <- grep("reconstructed sequences", rst)
271
-    idx <- si[which.min(abs(si-idx))]
272
-
273
-    nl <- strsplit(rst[idx+2], split=" ") %>% unlist %<>% `[`(nzchar(.))
274
-    N <- as.numeric(nl[1])
275
-    seq.leng <- as.numeric(nl[2])
276
-
277
-    seqs <- rst[(idx+4):(idx+3+N)]
278
-
279
-    seq.name <- character(N)
280
-    res <- character(N)
281
-    for (i in 1:N) {
282
-        ss <- gsub(" ", "", seqs[i])
283
-        nn <- base::nchar(ss)
284
-        res[i] <- substring(ss, nn-seq.leng+1,nn)
285
-        seq.name[i] <- substring(ss, 1, nn-seq.leng)
286
-    }
287
-    seq.name <- sub("\\w+#", "", seq.name)
288
-    names(res) <- seq.name
289
-
290
-    return(res)
291
-}
292
-
293
-
294
-get_tree_index_paml <- function(paml) {
295
-    grep("\\)[ \\.0-9]*;", paml)
296
-}
297
-
298
-get_tree_edge_index_paml <- function(paml) {
299
-    grep("\\d+\\.\\.\\d+", paml)
300
-}
301
-
302
-get_tree_edge_paml <- function(paml) {
303
-    tr.idx <- get_tree_index_paml(paml)
304
-
305
-    edge.idx <- get_tree_edge_index_paml(paml)
306
-    edge.idx <- edge.idx[edge.idx < tr.idx[3]]
307
-
308
-    nodeNum <- strsplit(paml[edge.idx], split="\\.\\.") %>%
309
-        unlist %>% strsplit(split="[[:space:]]") %>% unlist
310
-
311
-    nodeNum %<>% `[`(nzchar(.))
312
-
313
-    edge <- matrix(as.numeric(nodeNum), ncol=2, byrow = TRUE)
314
-
315
-    return(edge)
316
-}
317
-
318
-set.paml_rst_ <- function(object) {
319
-    if (!is(object, "paml_rst")) {
320
-        stop("object should be an instance of 'paml_rst'")
321
-    }
322
-    if (length(object@tip_seq) == 0) {
323
-        return(object)
324
-    }
325
-
326
-    types <- get.fields(object)
327
-    for (type in types) {
328
-        value <- subs_paml_rst(object, type)
329
-        if (all(is.na(value)))
330
-            next
331
-
332
-        if (type == "marginal_subs") {
333
-            object@marginal_subs <- value
334
-        } else if (type == "marginal_AA_subs") {
335
-            object@marginal_AA_subs <- value
336
-        } else if (type == "joint_subs") {
337
-            object@joint_subs <- value
338
-        } else if (type == "joint_AA_subs") {
339
-            object@joint_AA_subs <- value
340
-        }
341
-    }
342
-    return(object)
343
-}
344
-
345
-
346
-get.subs_paml_rst <- function(object, type) {
347
-    if (!is(object, "paml_rst")) {
348
-        stop("object should be an instance of 'paml_rst'")
349
-    }
350
-    if (type == "marginal_subs") {
351
-        res <- object@marginal_subs
352
-    } else if (type == "marginal_AA_subs") {
353
-        res <- object@marginal_AA_subs
354
-    } else if (type == "joint_subs") {
355
-        res <- object@joint_subs
356
-    } else if (type == "joint_AA_subs") {
357
-        res <- object@joint_AA_subs
358
-    } else {
359
-        stop("type should be one of 'marginal_subs',
360
-                             'marginal_AA_subs', 'joint_subs' or 'joint_AA_subs'. ")
361
-    }
362
-    return(res)
363
-}
364
-
365
-subs_paml_rst <- function(x, type, ...) {
366
-    if (class(x) != "paml_rst") {
367
-        stop("x should be an object of paml_rst...")
368
-    }
369
-    seqs <- x@tip_seq
370
-    if (length(seqs) == 0) {
371
-        stop("tip sequences is not available...")
372
-    }
373
-    if (type %in% c("marginal_subs", "marginal_AA_subs")) {
374
-        ancseq <- x@marginal_ancseq
375
-        ## seqs <- c(seqs, x@marginal_ancseq)
376
-    } else if (type %in% c("joint_subs", "joint_AA_subs")){
377
-        ancseq <- x@joint_ancseq
378
-        ## seqs <- c(seqs, x@joint_ancseq)
379
-    } else {
380
-        stop("type should be one of 'marginal_subs',
381
-                             'marginal_AA_subs', 'joint_subs' or 'joint_AA_subs'. ")
382
-    }
383
-    if( type %in% c("marginal_subs", "joint_subs")) {
384
-        translate <- FALSE
385
-    } else {
386
-        translate <- TRUE
387
-    }
388
-
389
-    if (all(ancseq == "")) {
390
-        return(NA)
391
-    }
392
-    seqs <- c(seqs, ancseq)
393
-    get.subs_(x@phylo, seqs, translate=translate, ...)
394
-}
1
+## ##' site mask
2
+## ##'
3
+## ##'
4
+## ##' @title mask
5
+## ##' @param tree_object tree object
6
+## ##' @param field selected field
7
+## ##' @param site site
8
+## ##' @param mask_site if TRUE, site will be masked.
9
+## ##'                  if FALSE, selected site will not be masked, while other sites will be masked.
10
+## ##' @return updated tree object
11
+## ##' @export
12
+## ##' @author Guangchuang Yu
13
+## mask <- function(tree_object, field, site, mask_site=FALSE) {
14
+##     has_field <- has.field(tree_object, field)
15
+##     if (has_field == FALSE) {
16
+##         stop("'field' is not available in 'tree_object'...")
17
+##     }
18
+
19
+##     has_slot <- attr(has_field, "has_slot")
20
+##     is_codeml <- attr(has_field, "is_codeml")
21
+
22
+##     if (has_slot) {
23
+##         if (is_codeml) {
24
+##             field_info <- slot(tree_object@rst, field)
25
+##         } else {
26
+##             field_info <- slot(tree_object, field)
27
+##         }
28
+##         field_data <- field_info[,2]
29
+##     } else {
30
+##         field_data <- tree_object@extraInfo[, field]
31
+##     }
32
+
33
+##     field_data <- sapply(field_data, gsub, pattern="\n", replacement="/")
34
+
35
+##     x <- field_data[field_data != ""]
36
+##     x <- x[!is.na(x)]
37
+##     pos <- strsplit(x, " / ") %>% unlist %>%
38
+##         gsub("^[a-zA-Z]+", "", . ) %>%
39
+##         gsub("[a-zA-Z]\\s*$", "", .) %>%
40
+##         as.numeric
41
+
42
+##     if (mask_site == FALSE) {
43
+##         pos2 <- 1:max(pos)
44
+##         pos2 <- pos2[-site]
45
+##         site <- pos2
46
+##     }
47
+
48
+##     site <- site[site %in% pos]
49
+
50
+##     for (i in seq_along(field_data)) {
51
+##         if (is.na(field_data[i]))
52
+##             next
53
+##         for (j in seq_along(site)) {
54
+##             pattern <- paste0("/*\\s*[a-zA-Z]", site[j], "[a-zA-Z]\\s*")
55
+##             field_data[i] <- gsub(pattern, "",  field_data[i])
56
+##         }
57
+##         field_data[i] <- gsub("^/\\s", "", field_data[i]) %>% .add_new_line
58
+##     }
59
+
60
+##     if (has_slot) {
61
+##         field_info[,2] <- field_data
62
+##         if (is_codeml) {
63
+##             slot(tree_object@rst, field) <- field_info
64
+##         } else {
65
+##             slot(tree_object, field) <- field_info
66
+##         }
67
+##     } else {
68
+##         tree_object@extraInfo[, field] <- field_data
69
+##     }
70
+##     tree_object
71
+## }
72
+
73
+
74
+## read.tip_seq_mlc <- function(mlcfile) {
75
+##     info <- getPhyInfo(mlcfile)
76
+##     mlc <- readLines(mlcfile)
77
+##     ## remove blank lines
78
+##     blk <- grep("^\\s*$", mlc)
79
+##     if (length(blk) > 0) {
80
+##         mlc <- mlc[-blk]
81
+##     }
82
+
83
+##     seqs <- mlc[2:(info$num+1)]
84
+##     seqs <- gsub("\\s+", "", seqs)
85
+##     wd <- info$width
86
+##     res <- sapply(seqs, function(x) substring(x, (base::nchar(x) - wd + 1), base::nchar(x)))
87
+##     nn <- sapply(seqs, function(x) substring(x, 1, (base::nchar(x) - wd)))
88
+##     names(res) <- nn
89
+##     return(res)
90
+## }
91
+
92
+## read.tip_seq_mlb <- read.tip_seq_mlc
93
+
94
+## read.dnds_mlc <- function(mlcfile) {
95
+##     mlc <- readLines(mlcfile)
96
+##     i <- grep("dN & dS for each branch", mlc)
97
+##     j <- grep("tree length for dN", mlc)
98
+
99
+##     mlc <- mlc[i:j]
100
+##     hi <- grep("dN/dS", mlc)
101
+##     cn <- strsplit(mlc[hi], " ") %>% unlist %>% `[`(nzchar(.))
102
+
103
+##     ii <- grep("\\d+\\.\\.\\d+", mlc)
104
+##     info <- mlc[ii]
105
+##     info %<>% sub("^\\s+", "", .)
106
+##     info %<>% sub("\\s+$", "", .)
107
+##     res <- t(sapply(info, function(x) {
108