R/aggregateByTaxonomy.R
fdfd989f
 #' Aggregates a MRexperiment object or counts matrix to a particular level.
700b534d
 #' 
fdfd989f
 #' Using the featureData information in the MRexperiment, calling aggregateByTaxonomy on a
700b534d
 #' MRexperiment and a particular featureData column (i.e. 'genus') will aggregate counts
2d031e15
 #' to the desired level using the aggfun function (default colSums). Possible aggfun alternatives
700b534d
 #' include colMeans and colMedians.
 #' 
34792370
 #' @param obj A MRexperiment object or count matrix.
 #' @param lvl featureData column name from the MRexperiment object or if count matrix object a vector of labels.
2d031e15
 #' @param alternate Use the rowname for undefined OTUs instead of aggregating to "no_match".
700b534d
 #' @param norm Whether to aggregate normalized counts or not.
b327ef52
 #' @param log Whether or not to log2 transform the counts - if MRexperiment object.
700b534d
 #' @param aggfun Aggregation function.
a94b8b09
 #' @param sl scaling value, default is 1000.
34792370
 #' @param out Either 'MRexperiment' or 'matrix'
700b534d
 #' @return An aggregated count matrix.
42eba9b4
 #' @aliases aggTax
 #' @rdname aggregateByTaxonomy
700b534d
 #' @export
 #' @examples
 #' 
dd097c73
 #' data(mouseData)
 #' aggregateByTaxonomy(mouseData[1:100,],lvl="class",norm=TRUE,aggfun=colSums)
700b534d
 #' # not run
dd097c73
 #' # aggregateByTaxonomy(mouseData,lvl="class",norm=TRUE,aggfun=colMedians)
bb840b98
 #' # aggTax(mouseData,lvl='phylum',norm=FALSE,aggfun=colSums)
700b534d
 #' 
bb840b98
 aggregateByTaxonomy<-function(obj,lvl,alternate=FALSE,norm=FALSE,log=FALSE,aggfun = colSums,sl=1000,out="MRexperiment"){
e7c65158
 	if(class(obj)=="MRexperiment"){
a94b8b09
 		mat = MRcounts(obj,norm=norm,log=log,sl=sl)
f12f833b
 		if(length(lvl)==1) levels = as.character(fData(obj)[,lvl])
 		else levels = as.character(lvl)
34792370
 	} else {
 		mat = obj
 		levels = as.character(lvl)
 		if(length(levels)!=nrow(mat)) stop("If input is a count matrix, lvl must be a vector of length = nrow(count matrix)")
 	}
 	if(!(out%in%c("MRexperiment","matrix"))){
 		stop("The variable out must either be 'MRexperiment' or 'matrix'")
 	}
 	
 	nafeatures = is.na(levels)
 	if(length(nafeatures)>0){
 		if(alternate==FALSE){
 			levels[nafeatures] = "no_match"
 		} else {
 			levels[nafeatures] = paste("OTU_",rownames(obj)[nafeatures],sep="")
 		}
 	}
700b534d
 	grps = split(seq_along(levels),levels)
 	
 	newMat = array(NA,dim=c(length(grps),ncol(obj)))
49754a92
 	for(i in seq_along(grps)){
700b534d
 		newMat[i,] = aggfun(mat[grps[[i]],,drop=FALSE])
 	}
 	rownames(newMat) = names(grps)
34792370
 	colnames(newMat) = colnames(obj)
 	if(out=='matrix') return(newMat)
 	if(out=='MRexperiment'){
 		taxa = data.frame(names(grps))
 		colnames(taxa) = "Taxa"
 		rownames(taxa) = names(grps)
 		taxa = as(taxa,"AnnotatedDataFrame")
 		if(class(obj)=="MRexperiment"){
46a01528
 			pd = phenoData(obj)
34792370
 			newObj = newMRexperiment(newMat,featureData=taxa,phenoData=pd)
 		} else {
 			newObj = newMRexperiment(newMat,featureData=taxa)
 		}
 		return(newObj)
 	}
42eba9b4
 }
 #' @rdname aggregateByTaxonomy
 #' @export
bb840b98
 aggTax<-function(obj,lvl,alternate=FALSE,norm=FALSE,log=FALSE,aggfun = colSums,sl=1000,out='MRexperiment'){
34792370
 	aggregateByTaxonomy(obj,lvl,alternate=alternate,norm=norm,log=log,aggfun = aggfun,sl=sl,out=out)
e093bc69
 }