R/MRtable.R
1f4053bb
 #' Table of top microbial marker gene from linear model fit including sequence
 #' information
 #' 
 #' Extract a table of the top-ranked features from a linear model fit. This
 #' function will be updated soon to provide better flexibility similar to
 #' limma's topTable. This function differs from \code{link{MRcoefs}} in that it
 #' provides other information about the presence or absence of features to help
 #' ensure significant features called are moderately present.
 #' 
 #' 
e2fe187b
 #' @param obj Output of fitFeatureModel or fitZig.
1f4053bb
 #' @param by Column number or column name specifying which coefficient or
 #' contrast of the linear model is of interest.
 #' @param coef Column number(s) or column name(s) specifying which coefficient
 #' or contrast of the linear model to display.
 #' @param number The number of bacterial features to pick out.
 #' @param taxa Taxa list.
 #' @param uniqueNames Number the various taxa.
e2fe187b
 #' @param adjustMethod Method to adjust p-values by. Default is "FDR". Options
1f4053bb
 #' include "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr",
 #' "none". See \code{\link{p.adjust}} for more details.
b6bea4a3
 #' @param group One of five choices, 0,1,2,3,4. 0: the sort is ordered by a
1f4053bb
 #' decreasing absolute value coefficient fit. 1: the sort is ordered by the raw
 #' coefficient fit in decreasing order. 2: the sort is ordered by the raw
 #' coefficient fit in increasing order. 3: the sort is ordered by the p-value
efec27e9
 #' of the coefficient fit in increasing order. 4: no sorting.
b6bea4a3
 #' @param eff Filter features to have at least a "eff" quantile or number of effective samples.
efec27e9
 #' @param numberEff Boolean, whether eff should represent quantile (default/FALSE) or number.
e2fe187b
 #' @param ncounts Filter features to have at least 'counts' of counts.
efec27e9
 #' @param file Name of file, including location, to save the table.
1f4053bb
 #' @return Table of the top-ranked features determined by the linear fit's
 #' coefficient.
41613241
 #' @seealso \code{\link{fitZig}} \code{\link{fitFeatureModel}} \code{\link{MRcoefs}} \code{\link{MRfulltable}}
1f4053bb
 #' @examples
 #' 
 #' data(lungData)
 #' k = grep("Extraction.Control",pData(lungData)$SampleType)
 #' lungTrim = lungData[,-k]
41613241
 #' lungTrim=filterData(lungTrim,present=30)
 #' lungTrim=cumNorm(lungTrim,p=0.5)
1f4053bb
 #' smokingStatus = pData(lungTrim)$SmokingStatus
 #' mod = model.matrix(~smokingStatus)
41613241
 #' fit = fitZig(obj = lungTrim,mod=mod)
1f4053bb
 #' head(MRtable(fit))
e2fe187b
 #' ####
 #' fit = fitFeatureModel(obj = lungTrim,mod=mod)
 #' head(MRtable(fit))
41613241
 #'
e2fe187b
 MRtable<-function(obj,by=2,coef=NULL,number=10,taxa=obj$taxa,
     uniqueNames=FALSE,adjustMethod="fdr",group=0,eff=0,numberEff=FALSE,ncounts=0,file=NULL){
 
     if(length(grep("fitFeatureModel",obj$call))){
         groups = factor(obj$design[,by])
         by = "logFC"; coef = 1:2;
         tb = data.frame(logFC=obj$fitZeroLogNormal$logFC,se=obj$fitZeroLogNormal$se)
         p  = obj$pvalues
     } else {
         tb = obj$fit$coefficients
         if(is.null(coef)){
             coef = 1:ncol(tb)
         }
         p=obj$eb$p.value[,by]
         groups = factor(obj$fit$design[,by])
         if(eff>0){
             effectiveSamples = calculateEffectiveSamples(obj)
             if(numberEff == FALSE){
                 valid = which(effectiveSamples>=quantile(effectiveSamples,p=eff,na.rm=TRUE))
             } else {
                 valid = which(effectiveSamples>=eff)
             }
         }        
     }
f7060c99
     
e2fe187b
     tx = as.character(taxa)
03d3c28b
     if(uniqueNames==TRUE){
82c621a7
         for (nm in unique(tx)) {
             ii=which(tx==nm)
e2fe187b
             tx[ii]=paste(tx[ii],seq_along(ii),sep=":")
82c621a7
         }
f7060c99
     }
e2fe187b
     padj = p.adjust(p,method=adjustMethod)
     cnts = obj$counts
     posIndices = cnts>0
f7060c99
     
e2fe187b
     np0 = rowSums(posIndices[,groups==0])
     np1 = rowSums(posIndices[,groups==1])
f7060c99
 
e2fe187b
     nc0 = rowSums(cnts[,groups==0])
     nc1 = rowSums(cnts[,groups==1])
f7060c99
 
     if(group==0){
679d9067
         srt = order(abs(tb[,by]),decreasing=TRUE)
f7060c99
     } else if(group==1){
679d9067
         srt = order((tb[,by]),decreasing=TRUE)
f7060c99
     } else if(group==2){
679d9067
         srt = order((tb[,by]),decreasing=FALSE)
82c621a7
     } else if(group==3){
679d9067
         srt = order(p,decreasing=FALSE)
efec27e9
     } else {
e2fe187b
         srt = 1:length(padj)
f7060c99
     }
 
e2fe187b
     valid = 1:length(padj)
     if(ncounts>0){
         np=rowSums(cbind(np0,np1))
         valid = intersect(valid,which(np>=ncounts))
efec27e9
     }
9a964a43
     srt = srt[which(srt%in%valid)][1:min(number,nrow(tb))]
679d9067
 
e2fe187b
     mat = cbind(np0,np1)
     mat = cbind(mat,nc0)
     mat = cbind(mat,nc1)
     mat = cbind(mat,tb[,coef])
     mat = cbind(mat,p)
     mat = cbind(mat,padj)
     rownames(mat) = tx
     mat = mat[srt,]
f7060c99
 
7284c919
     nm = c("+samples in group 0","+samples in group 1","counts in group 0",
e2fe187b
         "counts in group 1",colnames(tb)[coef],"pvalues","adjPvalues")
     colnames(mat) = nm
f7060c99
 
efec27e9
     if(!is.null(file)){
f7060c99
         nm = c("Taxa",nm)
         mat2 = cbind(rownames(mat),mat)
         mat2 = rbind(nm,mat2)
efec27e9
         write(t(mat2),ncolumns=ncol(mat2),file=file,sep="\t")
f7060c99
     }
ad9f1d3c
     invisible(as.data.frame(mat))
1f4053bb
 }