######################################################################### # File Name: AssemblyStats.R # Author: Ge Tan # mail: gtan@me.com # Created Time: Sun Nov 16 17:23:28 2014 ######################################################################### ### ----------------------------------------------------------------- ### N50 calculation for a assembly ### Exported! NXX <- function(filepath, XX=50){ if(grepl("\\.2bit$", filepath, ignore.case=TRUE)){ lengths <- seqlengths(TwoBitFile(filepath)) }else if(grepl("(\\.fa$|\\.fasta$)", filepath, ignore.case=TRUE)){ lengths <- fasta.info(filepath) }else{ stop("The suffix can only be .2bit, .fa, .fasta!") } lengths <- as.numeric(sort(lengths, decreasing=TRUE)) index <- which(cumsum(lengths) / sum(lengths) >= XX/100)[1] return(lengths[index]) } N50 <- function(filepath){ return(NXX(filepath, XX=50)) } N90 <- function(filepath){ return(NXX(filepath, XX=90)) }