git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/metagenomeSeq@79408 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -4,40 +4,53 @@ Version: 1.2.1 |
4 | 4 |
Date: 2013-06-11 |
5 | 5 |
Author: Joseph Nathaniel Paulson, Mihai Pop, Hector Corrada Bravo |
6 | 6 |
Maintainer: Joseph N. Paulson <jpaulson@umiacs.umd.edu> |
7 |
-Description: metagenomeSeq is designed to determine features (be it Operational Taxanomic Unit (OTU), species, etc.) that are differentially abundant between two or more groups of multiple samples. metagenomeSeq is designed to address the effects of both normalization and under-sampling of microbial communities on disease association detection and the testing of feature correlations. |
|
7 |
+Description: metagenomeSeq is designed to determine features (be it Operational |
|
8 |
+ Taxanomic Unit (OTU), species, etc.) that are differentially abundant |
|
9 |
+ between two or more groups of multiple samples. metagenomeSeq is designed |
|
10 |
+ to address the effects of both normalization and under-sampling of |
|
11 |
+ microbial communities on disease association detection and the testing of |
|
12 |
+ feature correlations. |
|
8 | 13 |
License: Artistic-2.0 |
9 |
-Depends: R(>= 3.0), Biobase, limma, matrixStats, methods, |
|
10 |
- RColorBrewer, gplots |
|
11 |
-Suggests: annotate |
|
14 |
+Depends: |
|
15 |
+ R(>= 3.0), |
|
16 |
+ Biobase, |
|
17 |
+ limma, |
|
18 |
+ matrixStats, |
|
19 |
+ methods, |
|
20 |
+ RColorBrewer, |
|
21 |
+ gplots |
|
22 |
+Suggests: |
|
23 |
+ annotate |
|
12 | 24 |
biocViews: Bioinformatics, DifferentialExpression, Metagenomics, |
13 |
- Visualization |
|
14 |
-Collate: 'zigControl.R' |
|
15 |
- 'cumNorm.R' |
|
16 |
- 'plotOTU.R' |
|
17 |
- 'fitZig.R' |
|
18 |
- 'doCountMStep.R' |
|
19 |
- 'doZeroMStep.R' |
|
20 |
- 'doEStep.R' |
|
21 |
- 'getZ.R' |
|
22 |
- 'getPi.R' |
|
23 |
- 'getCountDensity.R' |
|
24 |
- 'getNegativeLogLikelihoods.R' |
|
25 |
- 'isItStillActive.R' |
|
26 |
- 'getEpsilon.R' |
|
27 |
- 'load_meta.R' |
|
28 |
- 'load_phenoData.R' |
|
29 |
- 'exportMat.R' |
|
30 |
- 'exportStats.R' |
|
31 |
- 'cumNormStat.R' |
|
32 |
- 'plotGenus.R' |
|
33 |
- 'aggregateM.R' |
|
34 |
- 'cumNormMat.R' |
|
35 |
- 'load_metaQ.R' |
|
36 |
- 'allClasses.R' |
|
37 |
- 'MRtable.R' |
|
38 |
- 'MRcoefs.R' |
|
39 |
- 'plotMRheatmap.R' |
|
40 |
- 'plotCorr.R' |
|
41 |
- 'MRfisher.R' |
|
42 |
- 'MRfulltable.R' |
|
25 |
+ Visualization |
|
26 |
+Collate: |
|
27 |
+ 'zigControl.R' |
|
28 |
+ 'cumNorm.R' |
|
29 |
+ 'plotOTU.R' |
|
30 |
+ 'fitZig.R' |
|
31 |
+ 'doCountMStep.R' |
|
32 |
+ 'doZeroMStep.R' |
|
33 |
+ 'doEStep.R' |
|
34 |
+ 'getZ.R' |
|
35 |
+ 'getPi.R' |
|
36 |
+ 'getCountDensity.R' |
|
37 |
+ 'getNegativeLogLikelihoods.R' |
|
38 |
+ 'isItStillActive.R' |
|
39 |
+ 'getEpsilon.R' |
|
40 |
+ 'load_meta.R' |
|
41 |
+ 'load_phenoData.R' |
|
42 |
+ 'exportMat.R' |
|
43 |
+ 'exportStats.R' |
|
44 |
+ 'cumNormStat.R' |
|
45 |
+ 'plotGenus.R' |
|
46 |
+ 'aggregateM.R' |
|
47 |
+ 'cumNormMat.R' |
|
48 |
+ 'load_metaQ.R' |
|
49 |
+ 'allClasses.R' |
|
50 |
+ 'MRtable.R' |
|
51 |
+ 'MRcoefs.R' |
|
52 |
+ 'plotMRheatmap.R' |
|
53 |
+ 'plotCorr.R' |
|
54 |
+ 'MRfisher.R' |
|
55 |
+ 'MRfulltable.R' |
|
43 | 56 |
URL: http://cbcb.umd.edu/software/metagenomeSeq |
... | ... |
@@ -1,3 +1,46 @@ |
1 |
+#' Table of top-ranked microbial marker gene from linear model fit |
|
2 |
+#' |
|
3 |
+#' Extract a table of the top-ranked features from a linear model fit. This |
|
4 |
+#' function will be updated soon to provide better flexibility similar to |
|
5 |
+#' limma's topTable. |
|
6 |
+#' |
|
7 |
+#' |
|
8 |
+#' @param obj A list containing the linear model fit produced by lmFit through |
|
9 |
+#' fitZig. |
|
10 |
+#' @param by Column number or column name specifying which coefficient or |
|
11 |
+#' contrast of the linear model is of interest. |
|
12 |
+#' @param coef Column number(s) or column name(s) specifying which coefficient |
|
13 |
+#' or contrast of the linear model to display. |
|
14 |
+#' @param number The number of bacterial features to pick out. |
|
15 |
+#' @param taxa Taxa list. |
|
16 |
+#' @param uniqueNames Number the various taxa. |
|
17 |
+#' @param adjust.method Method to adjust p-values by. Default is "FDR". Options |
|
18 |
+#' include "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", |
|
19 |
+#' "none". See \code{\link{p.adjust}} for more details. |
|
20 |
+#' @param group One of three choices, 0,1,2,3. 0: the sort is ordered by a |
|
21 |
+#' decreasing absolute value coefficient fit. 1: the sort is ordered by the raw |
|
22 |
+#' coefficient fit in decreasing order. 2: the sort is ordered by the raw |
|
23 |
+#' coefficient fit in increasing order. 3: the sort is ordered by the p-value |
|
24 |
+#' of the coefficient fit in increasing order. |
|
25 |
+#' @param eff Restrict samples to have at least eff quantile effective samples. |
|
26 |
+#' @param output Name of output file, including location, to save the table. |
|
27 |
+#' @return Table of the top-ranked features determined by the linear fit's |
|
28 |
+#' coefficient. |
|
29 |
+#' @seealso \code{\link{fitZig}} \code{\link{MRtable}} |
|
30 |
+#' @examples |
|
31 |
+#' |
|
32 |
+#' data(lungData) |
|
33 |
+#' k = grep("Extraction.Control",pData(lungData)$SampleType) |
|
34 |
+#' lungTrim = lungData[,-k] |
|
35 |
+#' k = which(rowSums(MRcounts(lungTrim)>0)<10) |
|
36 |
+#' lungTrim = lungTrim[-k,] |
|
37 |
+#' cumNorm(lungTrim) |
|
38 |
+#' smokingStatus = pData(lungTrim)$SmokingStatus |
|
39 |
+#' mod = model.matrix(~smokingStatus) |
|
40 |
+#' settings = zigControl(maxit=1,verbose=FALSE) |
|
41 |
+#' fit = fitZig(obj = lungTrim,mod=mod,control=settings) |
|
42 |
+#' head(MRcoefs(fit)) |
|
43 |
+#' |
|
1 | 44 |
MRcoefs<-function(obj,by=2,coef=NULL,number=10,taxa=obj$taxa,uniqueNames=FALSE,adjust.method="fdr",group=0,eff=0,output=NULL){ |
2 | 45 |
tb = obj$fit$coefficients |
3 | 46 |
tx = as.character(taxa); |
... | ... |
@@ -43,4 +86,4 @@ MRcoefs<-function(obj,by=2,coef=NULL,number=10,taxa=obj$taxa,uniqueNames=FALSE,a |
43 | 86 |
} else{ |
44 | 87 |
return(as.data.frame(mat)) |
45 | 88 |
} |
46 |
-} |
|
47 | 89 |
\ No newline at end of file |
90 |
+} |
... | ... |
@@ -1,3 +1,25 @@ |
1 |
+#' Wrapper to run fisher's test on presence/absence of a feature. |
|
2 |
+#' |
|
3 |
+#' This function returns a data frame of p-values, odds ratios, lower and upper |
|
4 |
+#' confidence limits for every row of a matrix. |
|
5 |
+#' |
|
6 |
+#' |
|
7 |
+#' @param obj A MRexperiment object with a count matrix, or a simple count |
|
8 |
+#' matrix. |
|
9 |
+#' @param cl Group comparison |
|
10 |
+#' @param mat logical indicating whether obj is a MRexperiment object or |
|
11 |
+#' matrix. Default is a MRexperiment object. |
|
12 |
+#' @return NA |
|
13 |
+#' @seealso \code{\link{cumNorm}} \code{\link{fitZig}} |
|
14 |
+#' @examples |
|
15 |
+#' |
|
16 |
+#' data(lungData) |
|
17 |
+#' k = grep("Extraction.Control",pData(lungData)$SampleType) |
|
18 |
+#' lungTrim = lungData[,-k] |
|
19 |
+#' lungTrim = lungTrim[-which(rowSums(MRcounts(lungTrim)>0)<20),] |
|
20 |
+#' res = MRfisher(lungTrim,pData(lungTrim)$SmokingStatus); |
|
21 |
+#' head(res) |
|
22 |
+#' |
|
1 | 23 |
MRfisher<-function(obj,cl,mat=FALSE){ |
2 | 24 |
if(mat==FALSE){ |
3 | 25 |
x = MRcounts(obj)>0; |
... | ... |
@@ -1,3 +1,50 @@ |
1 |
+#' Table of top microbial marker gene from linear model fit including sequence |
|
2 |
+#' information |
|
3 |
+#' |
|
4 |
+#' Extract a table of the top-ranked features from a linear model fit. This |
|
5 |
+#' function will be updated soon to provide better flexibility similar to |
|
6 |
+#' limma's topTable. This function differs from \code{link{MRcoefs}} in that it |
|
7 |
+#' provides other information about the presence or absence of features to help |
|
8 |
+#' ensure significant features called are moderately present. |
|
9 |
+#' |
|
10 |
+#' |
|
11 |
+#' @param obj A list containing the linear model fit produced by lmFit through |
|
12 |
+#' fitZig. |
|
13 |
+#' @param by Column number or column name specifying which coefficient or |
|
14 |
+#' contrast of the linear model is of interest. |
|
15 |
+#' @param coef Column number(s) or column name(s) specifying which coefficient |
|
16 |
+#' or contrast of the linear model to display. |
|
17 |
+#' @param number The number of bacterial features to pick out. |
|
18 |
+#' @param taxa Taxa list. |
|
19 |
+#' @param uniqueNames Number the various taxa. |
|
20 |
+#' @param adjust.method Method to adjust p-values by. Default is "FDR". Options |
|
21 |
+#' include "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", |
|
22 |
+#' "none". See \code{\link{p.adjust}} for more details. |
|
23 |
+#' @param group One of three choices, 0,1,2. 0: the sort is ordered by a |
|
24 |
+#' decreasing absolute value coefficient fit. 1: the sort is ordered by the raw |
|
25 |
+#' coefficient fit in decreasing order. 2: the sort is ordered by the raw |
|
26 |
+#' coefficient fit in increasing order. 3: the sort is ordered by the p-value |
|
27 |
+#' of the coefficient fit in increasing order. |
|
28 |
+#' @param eff Restrict samples to have at least eff quantile effective samples. |
|
29 |
+#' @param output Name of output file, including location, to save the table. |
|
30 |
+#' @return Table of the top-ranked features determined by the linear fit's |
|
31 |
+#' coefficient. |
|
32 |
+#' @seealso \code{\link{fitZig}} \code{\link{MRcoefs}} \code{\link{MRtable}} |
|
33 |
+#' \code{\link{MRfisher}} |
|
34 |
+#' @examples |
|
35 |
+#' |
|
36 |
+#' data(lungData) |
|
37 |
+#' k = grep("Extraction.Control",pData(lungData)$SampleType) |
|
38 |
+#' lungTrim = lungData[,-k] |
|
39 |
+#' k = which(rowSums(MRcounts(lungTrim)>0)<10) |
|
40 |
+#' lungTrim = lungTrim[-k,] |
|
41 |
+#' cumNorm(lungTrim) |
|
42 |
+#' smokingStatus = pData(lungTrim)$SmokingStatus |
|
43 |
+#' mod = model.matrix(~smokingStatus) |
|
44 |
+#' settings = zigControl(maxit=1,verbose=FALSE) |
|
45 |
+#' fit = fitZig(obj = lungTrim,mod=mod,control=settings) |
|
46 |
+#' head(MRfulltable(fit)) |
|
47 |
+#' |
|
1 | 48 |
MRfulltable<-function(obj,by=2,coef=NULL,number=10,taxa=obj$taxa,uniqueNames=FALSE,adjust.method="fdr",group=0,eff=0,output=NULL){ |
2 | 49 |
|
3 | 50 |
tb = obj$fit$coefficients |
... | ... |
@@ -62,4 +109,4 @@ MRfulltable<-function(obj,by=2,coef=NULL,number=10,taxa=obj$taxa,uniqueNames=FAL |
62 | 109 |
} else{ |
63 | 110 |
return(as.data.frame(mat)) |
64 | 111 |
} |
65 |
-} |
|
66 | 112 |
\ No newline at end of file |
113 |
+} |
... | ... |
@@ -1,3 +1,48 @@ |
1 |
+#' Table of top microbial marker gene from linear model fit including sequence |
|
2 |
+#' information |
|
3 |
+#' |
|
4 |
+#' Extract a table of the top-ranked features from a linear model fit. This |
|
5 |
+#' function will be updated soon to provide better flexibility similar to |
|
6 |
+#' limma's topTable. This function differs from \code{link{MRcoefs}} in that it |
|
7 |
+#' provides other information about the presence or absence of features to help |
|
8 |
+#' ensure significant features called are moderately present. |
|
9 |
+#' |
|
10 |
+#' |
|
11 |
+#' @param obj A list containing the linear model fit produced by lmFit through |
|
12 |
+#' fitZig. |
|
13 |
+#' @param by Column number or column name specifying which coefficient or |
|
14 |
+#' contrast of the linear model is of interest. |
|
15 |
+#' @param coef Column number(s) or column name(s) specifying which coefficient |
|
16 |
+#' or contrast of the linear model to display. |
|
17 |
+#' @param number The number of bacterial features to pick out. |
|
18 |
+#' @param taxa Taxa list. |
|
19 |
+#' @param uniqueNames Number the various taxa. |
|
20 |
+#' @param adjust.method Method to adjust p-values by. Default is "FDR". Options |
|
21 |
+#' include "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", |
|
22 |
+#' "none". See \code{\link{p.adjust}} for more details. |
|
23 |
+#' @param group One of three choices, 0,1,2. 0: the sort is ordered by a |
|
24 |
+#' decreasing absolute value coefficient fit. 1: the sort is ordered by the raw |
|
25 |
+#' coefficient fit in decreasing order. 2: the sort is ordered by the raw |
|
26 |
+#' coefficient fit in increasing order. 3: the sort is ordered by the p-value |
|
27 |
+#' of the coefficient fit in increasing order. |
|
28 |
+#' @param output Name of output file, including location, to save the table. |
|
29 |
+#' @return Table of the top-ranked features determined by the linear fit's |
|
30 |
+#' coefficient. |
|
31 |
+#' @seealso \code{\link{fitZig}} \code{\link{MRcoefs}} |
|
32 |
+#' @examples |
|
33 |
+#' |
|
34 |
+#' data(lungData) |
|
35 |
+#' k = grep("Extraction.Control",pData(lungData)$SampleType) |
|
36 |
+#' lungTrim = lungData[,-k] |
|
37 |
+#' k = which(rowSums(MRcounts(lungTrim)>0)<10) |
|
38 |
+#' lungTrim = lungTrim[-k,] |
|
39 |
+#' cumNorm(lungTrim) |
|
40 |
+#' smokingStatus = pData(lungTrim)$SmokingStatus |
|
41 |
+#' mod = model.matrix(~smokingStatus) |
|
42 |
+#' settings = zigControl(maxit=1,verbose=FALSE) |
|
43 |
+#' fit = fitZig(obj = lungTrim,mod=mod,control=settings) |
|
44 |
+#' head(MRtable(fit)) |
|
45 |
+#' |
|
1 | 46 |
MRtable<-function(obj,by=2,coef=NULL,number=10,taxa=obj$taxa,uniqueNames=FALSE,adjust.method="fdr",group=0,output=NULL){ |
2 | 47 |
tb = obj$fit$coefficients |
3 | 48 |
tx = as.character(taxa); |
... | ... |
@@ -56,4 +101,4 @@ MRtable<-function(obj,by=2,coef=NULL,number=10,taxa=obj$taxa,uniqueNames=FALSE,a |
56 | 101 |
} else{ |
57 | 102 |
return(as.data.frame(mat)) |
58 | 103 |
} |
59 |
-} |
|
60 | 104 |
\ No newline at end of file |
105 |
+} |
... | ... |
@@ -1,14 +1,17 @@ |
1 |
-#' Aggregates the counts to a particular classification. |
|
2 |
-#' |
|
3 |
-#' This function takes an eSet object of data at a particular level with feature information allowing |
|
4 |
-#' for aggregation of counts to a particular level. This method assumes taxa begin at the highest level and continue to the current level. |
|
5 |
-#' |
|
6 |
-#' @param obj An eSet object of count data. |
|
1 |
+#' Aggregates counts by a particular classification. |
|
2 |
+#' |
|
3 |
+#' This function takes a MRexperiment object of data at a particular level with |
|
4 |
+#' feature information allowing for aggregation of counts to a particular |
|
5 |
+#' level. This method assumes taxa begin at the highest level and continue to |
|
6 |
+#' the current level, reverse assumes taxa begin at the lowest level. |
|
7 |
+#' |
|
8 |
+#' |
|
9 |
+#' @param obj A MRexperiment object. |
|
7 | 10 |
#' @param lvl The level to go up (numeric, 1,2,3). |
11 |
+#' @param taxa A vector of taxa annotations with splits |
|
8 | 12 |
#' @param split The way character strings in taxa in the obj are split. |
9 |
-#' @return Updated eSet object with counts aggregated to the various taxanomic levels. |
|
10 |
-#' |
|
11 |
-#' @name aggregateM |
|
13 |
+#' @return Updated object with counts aggregated to the various taxanomic |
|
14 |
+#' levels. |
|
12 | 15 |
aggregateM <- |
13 | 16 |
function(obj,taxa,lvl,split=";"){ |
14 | 17 |
|
... | ... |
@@ -11,6 +11,33 @@ setMethod("[", "MRexperiment", function (x, i, j, ..., drop = FALSE) { |
11 | 11 |
obj |
12 | 12 |
}) |
13 | 13 |
|
14 |
+ |
|
15 |
+ |
|
16 |
+#' Create a MRexperiment object |
|
17 |
+#' |
|
18 |
+#' This function creates a MRexperiment object from a matrix or data frame of |
|
19 |
+#' count data. |
|
20 |
+#' |
|
21 |
+#' See \code{\link{MRexperiment-class}} and \code{eSet} (from the Biobase |
|
22 |
+#' package) for the meaning of the various slots. |
|
23 |
+#' |
|
24 |
+#' @param counts A matrix or data frame of count data. The count data is |
|
25 |
+#' representative of the number of reads annotated for a feature (be it gene, |
|
26 |
+#' OTU, species, etc). Rows should correspond to features and columns to |
|
27 |
+#' samples. |
|
28 |
+#' @param phenoData An AnnotatedDataFrame with pertinent sample information. |
|
29 |
+#' @param featureData An AnnotatedDataFrame with pertinent feature information. |
|
30 |
+#' @param libSize libSize, library size, is the total number of reads for a |
|
31 |
+#' particular sample. |
|
32 |
+#' @param normFactors normFactors, the normalization factors used in either the |
|
33 |
+#' model or as scaling factors of sample counts for each particular sample. |
|
34 |
+#' @return an object of class MRexperiment |
|
35 |
+#' @author Joseph N Paulson, jpaulson@@umiacs.umd.edu |
|
36 |
+#' @examples |
|
37 |
+#' |
|
38 |
+#' cnts = matrix(abs(rnorm(1000)),nc=10) |
|
39 |
+#' obj <- newMRexperiment(cnts) |
|
40 |
+#' |
|
14 | 41 |
newMRexperiment <- function(counts, phenoData=NULL, featureData=NULL,libSize=NULL, normFactors=NULL) { |
15 | 42 |
counts= as.matrix(counts) |
16 | 43 |
|
... | ... |
@@ -19,9 +46,46 @@ newMRexperiment <- function(counts, phenoData=NULL, featureData=NULL,libSize=NUL |
19 | 46 |
if( is.null( phenoData ) ) |
20 | 47 |
phenoData <- annotatedDataFrameFrom(counts, byrow=FALSE) |
21 | 48 |
if( is.null( libSize ) ) |
49 |
+ |
|
50 |
+ |
|
51 |
+#' Access sample depth of coverage from MRexperiment object |
|
52 |
+#' |
|
53 |
+#' The libSize vector represents the column (sample specific) sums of features, |
|
54 |
+#' i.e. the total number of reads for a sample. It is used by |
|
55 |
+#' \code{\link{fitZig}}. |
|
56 |
+#' |
|
57 |
+#' |
|
58 |
+#' @name libSize |
|
59 |
+#' @aliases libSize,MRexperiment-method libSize |
|
60 |
+#' @docType methods |
|
61 |
+#' @param obj a \code{MRexperiment} object. |
|
62 |
+#' @author Joseph N. Paulson, jpaulson@@umiacs.umd.edu |
|
63 |
+#' @examples |
|
64 |
+#' |
|
65 |
+#' data(lungData) |
|
66 |
+#' head(libSize(lungData)) |
|
67 |
+#' |
|
22 | 68 |
libSize <- as.matrix(colSums(counts)) |
23 | 69 |
rownames(libSize) = colnames(counts) |
24 | 70 |
if( is.null( normFactors ) ){ |
71 |
+ |
|
72 |
+ |
|
73 |
+#' Access the normalization factors in a MRexperiment object |
|
74 |
+#' |
|
75 |
+#' Function to access the scaling factors, aka the normalization factors, of |
|
76 |
+#' samples in a MRexperiment object. |
|
77 |
+#' |
|
78 |
+#' |
|
79 |
+#' @name normFactors |
|
80 |
+#' @aliases normFactors,MRexperiment-method normFactors |
|
81 |
+#' @docType methods |
|
82 |
+#' @param obj a \code{MRexperiment} object. |
|
83 |
+#' @author Joseph N. Paulson, jpaulson@@umiacs.umd.edu |
|
84 |
+#' @examples |
|
85 |
+#' |
|
86 |
+#' data(lungData) |
|
87 |
+#' head(normFactors(lungData)) |
|
88 |
+#' |
|
25 | 89 |
normFactors <- as.matrix(rep( NA_real_, length(libSize) )) |
26 | 90 |
rownames(normFactors) = rownames(libSize) |
27 | 91 |
} |
... | ... |
@@ -45,6 +109,26 @@ setValidity( "MRexperiment", function( object ) { |
45 | 109 |
TRUE |
46 | 110 |
} ) |
47 | 111 |
|
112 |
+ |
|
113 |
+ |
|
114 |
+#' Accessor for the counts slot of a MRexperiment object |
|
115 |
+#' |
|
116 |
+#' The counts slot holds the raw count data representing (along the rows) the |
|
117 |
+#' number of reads annotated for a particular feature and (along the columns) |
|
118 |
+#' the sample. |
|
119 |
+#' |
|
120 |
+#' |
|
121 |
+#' @name MRcounts |
|
122 |
+#' @aliases MRcounts,MRexperiment-method MRcounts |
|
123 |
+#' @docType methods |
|
124 |
+#' @param cnts a \code{MRexperiment} object. |
|
125 |
+#' @param norm logical indicating whether or not to return normalized counts. |
|
126 |
+#' @author Joseph N. Paulson, jpaulson@@umiacs.umd.edu |
|
127 |
+#' @examples |
|
128 |
+#' |
|
129 |
+#' data(lungData) |
|
130 |
+#' head(MRcounts(lungData)) |
|
131 |
+#' |
|
48 | 132 |
MRcounts <- function( obj ,norm=FALSE) { |
49 | 133 |
stopifnot( is( obj, "MRexperiment" ) ) |
50 | 134 |
if(!norm){ |
... | ... |
@@ -57,6 +141,23 @@ MRcounts <- function( obj ,norm=FALSE) { |
57 | 141 |
} |
58 | 142 |
} |
59 | 143 |
|
144 |
+ |
|
145 |
+ |
|
146 |
+#' Access the posterior probabilities that results from analysis |
|
147 |
+#' |
|
148 |
+#' Accessing the posterior probabilities following a run through |
|
149 |
+#' \code{\link{fitZig}} |
|
150 |
+#' |
|
151 |
+#' |
|
152 |
+#' @name posterior.probs |
|
153 |
+#' @aliases posterior.probs,MRexperiment-method posterior.probs |
|
154 |
+#' @docType methods |
|
155 |
+#' @param obj a \code{MRexperiment} object. |
|
156 |
+#' @author Joseph N. Paulson, jpaulson@@umiacs.umd.edu |
|
157 |
+#' @examples |
|
158 |
+#' |
|
159 |
+#' # see vignette |
|
160 |
+#' |
|
60 | 161 |
posterior.probs <- function( obj ) { |
61 | 162 |
stopifnot( is( obj, "MRexperiment" ) ) |
62 | 163 |
assayData(obj)[["z"]] |
... | ... |
@@ -74,7 +175,26 @@ libSize<-function(obj){ |
74 | 175 |
ls |
75 | 176 |
} |
76 | 177 |
|
178 |
+ |
|
179 |
+ |
|
180 |
+#' Access MRexperiment object experiment data |
|
181 |
+#' |
|
182 |
+#' The expSummary vectors represent the column (sample specific) sums of |
|
183 |
+#' features, i.e. the total number of reads for a sample, libSize and also the |
|
184 |
+#' normalization factors, normFactor. |
|
185 |
+#' |
|
186 |
+#' |
|
187 |
+#' @name expSummary |
|
188 |
+#' @aliases expSummary,MRexperiment-method expSummary |
|
189 |
+#' @docType methods |
|
190 |
+#' @param obj a \code{MRexperiment} object. |
|
191 |
+#' @author Joseph N. Paulson, jpaulson@@umiacs.umd.edu |
|
192 |
+#' @examples |
|
193 |
+#' |
|
194 |
+#' data(mouseData) |
|
195 |
+#' expSummary(mouseData) |
|
196 |
+#' |
|
77 | 197 |
expSummary<-function(obj){ |
78 | 198 |
stopifnot( is( obj, "MRexperiment" ) ) |
79 | 199 |
pData(obj@expSummary$expSummary) |
80 |
-} |
|
81 | 200 |
\ No newline at end of file |
201 |
+} |
... | ... |
@@ -1,13 +1,19 @@ |
1 | 1 |
#' Cumulative sum scaling factors. |
2 |
-#' |
|
3 |
-#' Calculates each column's quantile and calculates the sum up to and including that quantile. |
|
4 |
-#' |
|
5 |
-#' @param jobj An eSet object. |
|
2 |
+#' |
|
3 |
+#' Calculates each column's quantile and calculates the sum up to and including |
|
4 |
+#' that quantile. |
|
5 |
+#' |
|
6 |
+#' |
|
7 |
+#' @param obj An MRexperiment object. |
|
6 | 8 |
#' @param p The pth quantile. |
7 | 9 |
#' @return Vector of the sum up to and including a sample's pth quantile |
8 |
-#' |
|
9 |
-#' @name cumNorm |
|
10 |
-#' @seealso \code{\link{fitZig}} |
|
10 |
+#' @seealso \code{\link{fitZig}} \code{\link{cumNormStat}} |
|
11 |
+#' @examples |
|
12 |
+#' |
|
13 |
+#' data(mouseData) |
|
14 |
+#' cumNorm(mouseData) |
|
15 |
+#' head(normFactors(mouseData)) |
|
16 |
+#' |
|
11 | 17 |
cumNorm <- |
12 | 18 |
function(obj,p=cumNormStat(obj)){ |
13 | 19 |
x = MRcounts(obj) |
... | ... |
@@ -1,13 +1,19 @@ |
1 | 1 |
#' Cumulative sum scaling factors. |
2 |
-#' |
|
3 |
-#' Calculates each column's quantile and calculates the sum up to and including that quantile. |
|
4 |
-#' |
|
5 |
-#' @param jobj An eSet object. |
|
2 |
+#' |
|
3 |
+#' Calculates each column's quantile and calculates the sum up to and including |
|
4 |
+#' that quantile. |
|
5 |
+#' |
|
6 |
+#' |
|
7 |
+#' @param obj A MRexperiment object. |
|
6 | 8 |
#' @param p The pth quantile. |
7 |
-#' @return jobj An updated eSet object with normalized counts |
|
8 |
-#' |
|
9 |
-#' @name cumNormMat |
|
9 |
+#' @return Returns a matrix normalized by scaling counts up to and including |
|
10 |
+#' the pth quantile. |
|
10 | 11 |
#' @seealso \code{\link{fitZig}} \code{\link{cumNorm}} |
12 |
+#' @examples |
|
13 |
+#' |
|
14 |
+#' data(mouseData) |
|
15 |
+#' head(cumNormMat(mouseData)) |
|
16 |
+#' |
|
11 | 17 |
cumNormMat <- |
12 | 18 |
function(obj,p= cumNormStat(obj)){ |
13 | 19 |
#################################################################################### |
... | ... |
@@ -1,12 +1,22 @@ |
1 |
-#' Cumulative normalization statistic. |
|
2 |
-#' |
|
3 |
-#' @param obj An eSet object. |
|
4 |
-#' @param pFlag Whether or not to plot the reference. |
|
5 |
-#' @param rel Relative difference of rel percent. |
|
6 |
-#' @return P-value for which to cumulative normalize. |
|
7 |
-#' |
|
8 |
-#' @name cumNormStat |
|
1 |
+#' Cumulative sum scaling percentile selection |
|
2 |
+#' |
|
3 |
+#' Calculates the percentile for which to sum counts up to and scale by. |
|
4 |
+#' |
|
5 |
+#' |
|
6 |
+#' @param obj A list with count data |
|
7 |
+#' @param pFlag Plot the median difference quantiles |
|
8 |
+#' @param rel Cutoff for the relative difference from one median difference |
|
9 |
+#' from the reference to the next |
|
10 |
+#' @param qFlag Flag to either calculate the proper percentile using a |
|
11 |
+#' step-wise or triangular approximation of the sample count distribution. |
|
12 |
+#' @param ... Applicable if pFlag == TRUE. Extra plotting parameters. |
|
13 |
+#' @return Percentile for which to scale data |
|
9 | 14 |
#' @seealso \code{\link{fitZig}} \code{\link{cumNorm}} |
15 |
+#' @examples |
|
16 |
+#' |
|
17 |
+#' data(mouseData) |
|
18 |
+#' p = round(cumNormStat(mouseData,pFlag=FALSE),digits=2) |
|
19 |
+#' |
|
10 | 20 |
cumNormStat <- |
11 | 21 |
function(obj,pFlag = FALSE,rel=.1,qFlag = TRUE, ...){ |
12 | 22 |
|
... | ... |
@@ -42,4 +52,4 @@ function(obj,pFlag = FALSE,rel=.1,qFlag = TRUE, ...){ |
42 | 52 |
x = which(abs(diff(diffr2))/diffr2[-1]>rel)[1] / length(diffr2) |
43 | 53 |
obj@expSummary$cumNormStat = x; |
44 | 54 |
return(x) |
45 |
-} |
|
46 | 55 |
\ No newline at end of file |
56 |
+} |
... | ... |
@@ -1,22 +1,26 @@ |
1 | 1 |
#' Compute the Maximization step calculation for features still active. |
2 |
-#' |
|
3 |
-#' Maximization step is solved by weighted least squares. The function also computes counts residuals. |
|
4 |
-#' |
|
5 |
-#' Maximum-likelihood estimates are approximated using the EM algorithm where we treat mixture membership $\deta_{ij}$ = 1 if $y_{ij}$ |
|
6 |
-#' is generated from the zero point mass as latent indicator variables. The density is defined as $f_zig(y_{ij} = \pi_j(S_j) \cdot f_{0}(y_{ij}) |
|
7 |
-#' +(1-\pi_j (S_j))\cdot f_{count}(y_{ij};\mu_i,\sigma_i^2)$. |
|
8 |
-#' The log-likelihood in this extended model is |
|
9 |
-#' $(1−\delta_{ij}) \log f_{count}(y;\mu_i,\sigma_i^2 )+\delta_{ij} \log \pi_j(s_j)+(1−\delta_{ij})\log (1−\pi_j (sj))$. |
|
10 |
-#' The responsibilities are defined as $z_{ij} = pr(\delta_{ij}=1 | data)$. |
|
11 |
-#' |
|
12 |
-#' @param z Matrix (m x n) of estimate responsibilities (probabilities that a count comes from a spike distribution at 0). |
|
2 |
+#' |
|
3 |
+#' Maximization step is solved by weighted least squares. The function also |
|
4 |
+#' computes counts residuals. |
|
5 |
+#' |
|
6 |
+#' Maximum-likelihood estimates are approximated using the EM algorithm where |
|
7 |
+#' we treat mixture membership $delta_ij$ = 1 if $y_ij$ is generated from the |
|
8 |
+#' zero point mass as latent indicator variables. The density is defined as |
|
9 |
+#' $f_zig(y_ij = pi_j(S_j)*f_0(y_ij) +(1-pi_j (S_j)) * |
|
10 |
+#' f_count(y_ij;mu_i,sigma_i^2)$. The log-likelihood in this extended model is |
|
11 |
+#' $(1-delta_ij) log f_count(y;mu_i,sigma_i^2 )+delta_ij log |
|
12 |
+#' pi_j(s_j)+(1-delta_ij)log (1-pi_j (s_j))$. The responsibilities are defined |
|
13 |
+#' as $z_ij = pr(delta_ij=1 | data)$. |
|
14 |
+#' |
|
15 |
+#' @param z Matrix (m x n) of estimate responsibilities (probabilities that a |
|
16 |
+#' count comes from a spike distribution at 0). |
|
13 | 17 |
#' @param y Matrix (m x n) of count observations. |
14 | 18 |
#' @param mmCount Model matrix for the count distribution. |
15 |
-#' @param stillActive Boolean vector of size M, indicating whether a feature converged or not. |
|
19 |
+#' @param stillActive Boolean vector of size M, indicating whether a feature |
|
20 |
+#' converged or not. |
|
16 | 21 |
#' @param fit2 Previous fit of the count model. |
17 |
-#' @return Update matrix (m x n) of estimate responsibilities (probabilities that a count comes from a spike distribution at 0). |
|
18 |
-#' |
|
19 |
-#' @name doCountMStep |
|
22 |
+#' @return Update matrix (m x n) of estimate responsibilities (probabilities |
|
23 |
+#' that a count comes from a spike distribution at 0). |
|
20 | 24 |
#' @seealso \code{\link{fitZig}} |
21 | 25 |
doCountMStep <- |
22 | 26 |
function(z, y, mmCount, stillActive,fit2=NULL){ |
... | ... |
@@ -1,20 +1,22 @@ |
1 | 1 |
#' Compute the Expectation step. |
2 |
-#' |
|
3 |
-#' Estimates the responsibilities $z_{ij} = \frac{\pi_j \cdot I_{0}(y_{ij}}{\pi_j \cdot I_{0}(y_{ij} + (1-\pi_j) \cdot f_{count}(y_{ij}} |
|
4 |
-#' |
|
5 |
-#' Maximum-likelihood estimates are approximated using the EM algorithm where we treat mixture membership $\deta_{ij}$ = 1 if $y_{ij}$ |
|
6 |
-#' is generated from the zero point mass as latent indicator variables. The density is defined as $f_zig(y_{ij} = \pi_j(S_j) \cdot f_{0}(y_{ij}) |
|
7 |
-#' +(1-\pi_j (S_j))\cdot f_{count}(y_{ij};\mu_i,\sigma_i^2)$. |
|
8 |
-#' The log-likelihood in this extended model is |
|
9 |
-#' $(1−\delta_{ij}) \log f_{count}(y;\mu_i,\sigma_i^2 )+\delta_{ij} \log \pi_j(s_j)+(1−\delta_{ij})\log (1−\pi_j (sj))$. |
|
10 |
-#' The responsibilities are defined as $z_{ij} = pr(\delta_{ij}=1 | data)$. |
|
11 |
-#' |
|
2 |
+#' |
|
3 |
+#' Estimates the responsibilities $z_ij = fracpi_j cdot I_0(y_ijpi_j cdot |
|
4 |
+#' I_0(y_ij + (1-pi_j) cdot f_count(y_ij |
|
5 |
+#' |
|
6 |
+#' Maximum-likelihood estimates are approximated using the EM algorithm where |
|
7 |
+#' we treat mixture membership $delta_ij$ = 1 if $y_ij$ is generated from the |
|
8 |
+#' zero point mass as latent indicator variables. The density is defined as |
|
9 |
+#' $f_zig(y_ij = pi_j(S_j) cdot f_0(y_ij) +(1-pi_j (S_j))cdot |
|
10 |
+#' f_count(y_ij;mu_i,sigma_i^2)$. The log-likelihood in this extended model is |
|
11 |
+#' $(1-delta_ij) log f_count(y;mu_i,sigma_i^2 )+delta_ij log |
|
12 |
+#' pi_j(s_j)+(1-delta_ij)log (1-pi_j (sj))$. The responsibilities are defined |
|
13 |
+#' as $z_ij = pr(delta_ij=1 | data)$. |
|
14 |
+#' |
|
12 | 15 |
#' @param countResiduals Residuals from the count model. |
13 | 16 |
#' @param zeroResiduals Residuals from the zero model. |
14 | 17 |
#' @param zeroIndices Index (matrix m x n) of counts that are zero/non-zero. |
15 |
-#' @return Updated matrix (m x n) of estimate responsibilities (probabilities that a count comes from a spike distribution at 0). |
|
16 |
-#' |
|
17 |
-#' @name doEStep |
|
18 |
+#' @return Updated matrix (m x n) of estimate responsibilities (probabilities |
|
19 |
+#' that a count comes from a spike distribution at 0). |
|
18 | 20 |
#' @seealso \code{\link{fitZig}} |
19 | 21 |
doEStep <- |
20 | 22 |
function(countResiduals, zeroResiduals, zeroIndices) |
... | ... |
@@ -1,22 +1,25 @@ |
1 | 1 |
#' Compute the zero Maximization step. |
2 |
-#' |
|
3 |
-#' Performs Maximization step calculation for the mixture components. Uses least squares to fit the parameters of the mean of the logistic distribution. |
|
4 |
-#' $$ |
|
5 |
-#' pi_j = \sum_i^M \frac{1}{M}z_{ij} |
|
6 |
-#' $$ |
|
7 |
-#' Maximum-likelihood estimates are approximated using the EM algorithm where we treat mixture membership $\deta_{ij}$ = 1 if $y_{ij}$ |
|
8 |
-#' is generated from the zero point mass as latent indicator variables. The density is defined as $f_zig(y_{ij} = \pi_j(S_j) \cdot f_{0}(y_{ij}) |
|
9 |
-#' +(1-\pi_j (S_j))\cdot f_{count}(y_{ij};\mu_i,\sigma_i^2)$. |
|
10 |
-#' The log-likelihood in this extended model is |
|
11 |
-#' $(1−\delta_{ij}) \log f_{count}(y;\mu_i,\sigma_i^2 )+\delta_{ij} \log \pi_j(s_j)+(1−\delta_{ij})\log (1−\pi_j (sj))$. |
|
12 |
-#' The responsibilities are defined as $z_{ij} = pr(\delta_{ij}=1 | data)$. |
|
13 |
-#' |
|
14 |
-#' @param z Matrix (m x n) of estimate responsibilities (probabilities that a count comes from a spike distribution at 0). |
|
2 |
+#' |
|
3 |
+#' Performs Maximization step calculation for the mixture components. Uses |
|
4 |
+#' least squares to fit the parameters of the mean of the logistic |
|
5 |
+#' distribution. $$ pi_j = sum_i^M frac1Mz_ij $$ Maximum-likelihood estimates |
|
6 |
+#' are approximated using the EM algorithm where we treat mixture membership |
|
7 |
+#' $delta_ij$ = 1 if $y_ij$ is generated from the zero point mass as latent |
|
8 |
+#' indicator variables. The density is defined as $f_zig(y_ij = pi_j(S_j) cdot |
|
9 |
+#' f_0(y_ij) +(1-pi_j (S_j))cdot f_count(y_ij;mu_i,sigma_i^2)$. The |
|
10 |
+#' log-likelihood in this extended model is $(1-delta_ij) log |
|
11 |
+#' f_count(y;mu_i,sigma_i^2 )+delta_ij log pi_j(s_j)+(1-delta_ij)log (1-pi_j |
|
12 |
+#' (sj))$. The responsibilities are defined as $z_ij = pr(delta_ij=1 | data)$. |
|
13 |
+#' |
|
14 |
+#' |
|
15 |
+#' @param z Matrix (m x n) of estimate responsibilities (probabilities that a |
|
16 |
+#' count comes from a spike distribution at 0). |
|
15 | 17 |
#' @param zeroIndices Index (matrix m x n) of counts that are zero/non-zero. |
16 |
-#' @param mmZero The zero model, the model matrix to account for the change in the number of OTUs observed as a linear effect of the depth of coverage. |
|
17 |
-#' @return List of the zero fit (zero mean model) coefficients, variance - scale parameter (scalar), and normalized residuals of length sum(zeroIndices). |
|
18 |
-#' |
|
19 |
-#' @name doZeroMStep |
|
18 |
+#' @param mmZero The zero model, the model matrix to account for the change in |
|
19 |
+#' the number of OTUs observed as a linear effect of the depth of coverage. |
|
20 |
+#' @return List of the zero fit (zero mean model) coefficients, variance - |
|
21 |
+#' scale parameter (scalar), and normalized residuals of length |
|
22 |
+#' sum(zeroIndices). |
|
20 | 23 |
#' @seealso \code{\link{fitZig}} |
21 | 24 |
doZeroMStep <- |
22 | 25 |
function(z, zeroIndices, mmZero) |
... | ... |
@@ -1,18 +1,18 @@ |
1 | 1 |
#' export the normalized eSet dataset as a matrix. |
2 |
-#' |
|
3 |
-#' This function allows the user to take the normalized dataset or counts and output |
|
4 |
-#' the dataset to the user's workspace as a tab-delimited file, etc. |
|
5 |
-#' |
|
6 |
-#' @param jobj An eSet object with count data. |
|
7 |
-#' @param output Output file name. |
|
2 |
+#' |
|
3 |
+#' This function allows the user to take a dataset of counts and output the |
|
4 |
+#' dataset to the user's workspace as a tab-delimited file, etc. |
|
5 |
+#' |
|
6 |
+#' |
|
7 |
+#' @aliases exportMatrix exportMat |
|
8 |
+#' @param mat A matrix of values (normalized, or otherwise) |
|
9 |
+#' @param output Output file name |
|
8 | 10 |
#' @return NA |
9 |
-#' |
|
10 |
-#' @name export_mat |
|
11 |
-#' @aliases exportMatrix |
|
12 | 11 |
#' @seealso \code{\link{cumNorm}} |
13 | 12 |
#' @examples |
14 |
-#' export_mat(jobj,output="~/Desktop/normMatrix.tsv"); |
|
15 |
- |
|
13 |
+#' |
|
14 |
+#' # see vignette |
|
15 |
+#' |
|
16 | 16 |
exportMat <- |
17 | 17 |
function(mat,output="~/Desktop/matrix.tsv"){ |
18 | 18 |
matrix = mat; |
... | ... |
@@ -1,17 +1,19 @@ |
1 | 1 |
#' Various statistics of the count data. |
2 |
-#' |
|
3 |
-#' A matrix of values for each sample. The matrix consists of sample ids, the sample scaling factor, quantile value, and the number of number of features. |
|
4 |
-#' |
|
5 |
-#' @param obj An eSet object with count data. |
|
6 |
-#' @param p Quantile value to calculate the scaling factor and quantiles for the various samples. |
|
2 |
+#' |
|
3 |
+#' A matrix of values for each sample. The matrix consists of sample ids, the |
|
4 |
+#' sample scaling factor, quantile value, and the number of number of features. |
|
5 |
+#' |
|
6 |
+#' |
|
7 |
+#' @param obj A MRexperiment object with count data. |
|
8 |
+#' @param p Quantile value to calculate the scaling factor and quantiles for |
|
9 |
+#' the various samples. |
|
7 | 10 |
#' @param output Output file name. |
8 | 11 |
#' @return None. |
9 |
-#' |
|
10 |
-#' @name export_stats |
|
11 | 12 |
#' @seealso \code{\link{cumNorm}} \code{\link{quantile}} |
12 | 13 |
#' @examples |
13 |
-#' export_stats(obj,p=1,output="~/Desktop/obj-stats.tsv") |
|
14 |
- |
|
14 |
+#' |
|
15 |
+#' # see vignette |
|
16 |
+#' |
|
15 | 17 |
exportStats <- |
16 | 18 |
function(obj,p= cumNormStat(obj),output="~/Desktop/res.stats.tsv"){ |
17 | 19 |
|
... | ... |
@@ -1,27 +1,41 @@ |
1 | 1 |
#' Computes the weighted fold-change estimates and t-statistics. |
2 |
-#' |
|
3 |
-#' Wrapper to actually run the Expectation-maximization algorithm and estimate $f_{count}$ fits. |
|
4 |
-#' Maximum-likelihood estimates are approximated using the EM algorithm where we treat mixture membership $\deta_{ij}$ = 1 if $y_{ij}$ |
|
5 |
-#' is generated from the zero point mass as latent indicator variables. The density is defined as $f_zig(y_{ij} = \pi_j(S_j) \cdot f_{0}(y_{ij}) |
|
6 |
-#' +(1-\pi_j (S_j))\cdot f_{count}(y_{ij};\mu_i,\sigma_i^2)$. |
|
7 |
-#' The log-likelihood in this extended model is |
|
8 |
-#' $(1−\delta_{ij}) \log f_{count}(y;\mu_i,\sigma_i^2 )+\delta_{ij} \log \pi_j(s_j)+(1−\delta_{ij})\log (1−\pi_j (sj))$. |
|
9 |
-#' The responsibilities are defined as $z_{ij} = pr(\delta_{ij}=1 | data)$. |
|
10 |
-#' |
|
11 |
-#' @param obj An eSet object with count data. |
|
2 |
+#' |
|
3 |
+#' Wrapper to actually run the Expectation-maximization algorithm and estimate |
|
4 |
+#' $f_count$ fits. Maximum-likelihood estimates are approximated using the EM |
|
5 |
+#' algorithm where we treat mixture membership $delta_ij = 1$ if $y_ij$ is |
|
6 |
+#' generated from the zero point mass as latent indicator variables. The |
|
7 |
+#' density is defined as $f_zig(y_ij = pi_j(S_j)*f_0(y_ij) +(1-pi_j (S_j)) * |
|
8 |
+#' f_count(y_ij; mu_i, sigma_i^2)$. The log-likelihood in this extended model |
|
9 |
+#' is: $(1-delta_ij) log f_count(y;mu_i,sigma_i^2 )+delta_ij log |
|
10 |
+#' pi_j(s_j)+(1-delta_ij) log (1-pi_j (s_j))$. The responsibilities are defined |
|
11 |
+#' as $z_ij = pr(delta_ij=1 | data)$. |
|
12 |
+#' |
|
13 |
+#' |
|
14 |
+#' @param obj A MRexperiment object with count data. |
|
12 | 15 |
#' @param mod The model for the count distribution. |
13 |
-#' @param s95 A vector of size M of the scaling values to be included in the model. |
|
14 |
-#' @param zeroMod The zero model, the model to account for the change in the number of OTUs observed as a linear effect of the depth of coverage. |
|
15 |
-#' @param useS95offset Boolean, whether to include the default scaling parameters in the model or not. |
|
16 |
-#' @param control The settings for fitZig. |
|
17 |
-#' @param s The raw total counts for the various samples. |
|
18 |
-#' @return The fits, posterior probabilities, posterior probabilities used at time of convergence for each feature, ebayes (limma object) fit, among other data. |
|
19 |
-#' |
|
20 |
-#' @name fitZig |
|
16 |
+#' @param zeroMod The zero model, the model to account for the change in the |
|
17 |
+#' number of OTUs observed as a linear effect of the depth of coverage. |
|
18 |
+#' @param useS95offset Boolean, whether to include the default scaling |
|
19 |
+#' parameters in the model or not. |
|
20 |
+#' @param control The settings for fitZig. |
|
21 |
+#' @return The fits, posterior probabilities, posterior probabilities used at |
|
22 |
+#' time of convergence for each feature, ebayes (limma object) fit, among other |
|
23 |
+#' data. |
|
24 |
+#' @export |
|
21 | 25 |
#' @seealso \code{\link{cumNorm}} \code{\link{zigControl}} |
22 | 26 |
#' @examples |
23 |
-#' model = model.matrix(~1+type+log2(s95/1000+1)) |
|
24 |
-#' res = fitZig(obj = obj,mod=mod,useS95offset=FALSE) |
|
27 |
+#' |
|
28 |
+#' data(lungData) |
|
29 |
+#' k = grep("Extraction.Control",pData(lungData)$SampleType) |
|
30 |
+#' lungTrim = lungData[,-k] |
|
31 |
+#' k = which(rowSums(MRcounts(lungTrim)>0)<30) |
|
32 |
+#' cumNorm(lungTrim) |
|
33 |
+#' lungTrim = lungTrim[-k,] |
|
34 |
+#' smokingStatus = pData(lungTrim)$SmokingStatus |
|
35 |
+#' mod = model.matrix(~smokingStatus) |
|
36 |
+#' settings = zigControl(maxit=1,verbose=FALSE) |
|
37 |
+#' fit = fitZig(obj = lungTrim,mod=mod,control=settings) |
|
38 |
+#' |
|
25 | 39 |
fitZig <- |
26 | 40 |
function(obj,mod,zeroMod=NULL,useS95offset=TRUE,control=zigControl()){ |
27 | 41 |
|
... | ... |
@@ -109,4 +123,4 @@ function(obj,mod,zeroMod=NULL,useS95offset=TRUE,control=zigControl()){ |
109 | 123 |
dat = list(fit=fit$fit,countResiduals=fit$residuals, |
110 | 124 |
z=z,eb=eb,taxa=rownames(obj),counts=y,zeroMod =mmZero,stillActive=stillActive,stillActiveNLL=stillActiveNLL,zeroCoef=zeroCoef) |
111 | 125 |
return(dat) |
112 |
- } |
|
113 | 126 |
\ No newline at end of file |
127 |
+ } |
... | ... |
@@ -1,18 +1,20 @@ |
1 |
-#' Compute the value of the count density function from the count model residuals. |
|
2 |
-#' |
|
3 |
-#' Calculate density values from a normal: $f(x) = 1/(\sqrt (2 \pi ) \sigma ) e^-((x - \mu )^2/(2 \sigma^2))$. |
|
4 |
-#' Maximum-likelihood estimates are approximated using the EM algorithm where we treat mixture membership $\deta_{ij}$ = 1 if $y_{ij}$ |
|
5 |
-#' is generated from the zero point mass as latent indicator variables. The density is defined as $f_zig(y_{ij} = \pi_j(S_j) \cdot f_{0}(y_{ij}) |
|
6 |
-#' +(1-\pi_j (S_j))\cdot f_{count}(y_{ij};\mu_i,\sigma_i^2)$. |
|
7 |
-#' The log-likelihood in this extended model is |
|
8 |
-#' $(1−\delta_{ij}) \log f_{count}(y;\mu_i,\sigma_i^2 )+\delta_{ij} \log \pi_j(s_j)+(1−\delta_{ij})\log (1−\pi_j (sj))$. |
|
9 |
-#' The responsibilities are defined as $z_{ij} = pr(\delta_{ij}=1 | data)$. |
|
10 |
-#' |
|
1 |
+#' Compute the value of the count density function from the count model |
|
2 |
+#' residuals. |
|
3 |
+#' |
|
4 |
+#' Calculate density values from a normal: $f(x) = 1/(sqrt (2 pi ) sigma ) |
|
5 |
+#' e^-((x - mu )^2/(2 sigma^2))$. Maximum-likelihood estimates are |
|
6 |
+#' approximated using the EM algorithm where we treat mixture membership |
|
7 |
+#' $deta_ij$ = 1 if $y_ij$ is generated from the zero point mass as latent |
|
8 |
+#' indicator variables. The density is defined as $f_zig(y_ij = pi_j(S_j) cdot |
|
9 |
+#' f_0(y_ij) +(1-pi_j (S_j))cdot f_count(y_ij;mu_i,sigma_i^2)$. The |
|
10 |
+#' log-likelihood in this extended model is $(1-delta_ij) log |
|
11 |
+#' f_count(y;mu_i,sigma_i^2 )+delta_ij log pi_j(s_j)+(1-delta_ij)log (1-pi_j |
|
12 |
+#' (sj))$. The responsibilities are defined as $z_ij = pr(delta_ij=1 | data)$. |
|
13 |
+#' |
|
14 |
+#' |
|
11 | 15 |
#' @param residuals Residuals from the count model. |
12 | 16 |
#' @param log Whether or not we are calculating from a log-normal distribution. |
13 | 17 |
#' @return Density values from the count model residuals. |
14 |
-#' |
|
15 |
-#' @name getCountDensity |
|
16 | 18 |
#' @seealso \code{\link{fitZig}} |
17 | 19 |
getCountDensity <- |
18 | 20 |
function(residuals, log=FALSE){ |
... | ... |
@@ -1,15 +1,19 @@ |
1 |
-#' Calculate the relative difference between iterations of the negative log-likelihoods. |
|
2 |
-#' |
|
3 |
-#' Maximum-likelihood estimates are approximated using the EM algorithm where we treat mixture membership $\deta_{ij}$ = 1 if $y_{ij}$ |
|
4 |
-#' is generated from the zero point mass as latent indicator variables. The log-likelihood in this extended model is |
|
5 |
-#' $(1−\delta_{ij}) \log f_{count}(y;\mu_i,\sigma_i^2 )+\delta_{ij} \log \pi_j(s_j)+(1−\delta_{ij})\log (1−\pi_j (sj))$. |
|
6 |
-#' The responsibilities are defined as $z_{ij} = pr(\delta_{ij}=1 | data)$. |
|
7 |
-#' |
|
1 |
+#' Calculate the relative difference between iterations of the negative |
|
2 |
+#' log-likelihoods. |
|
3 |
+#' |
|
4 |
+#' Maximum-likelihood estimates are approximated using the EM algorithm where |
|
5 |
+#' we treat mixture membership $delta_ij$ = 1 if $y_ij$ is generated from the |
|
6 |
+#' zero point mass as latent indicator variables. The log-likelihood in this |
|
7 |
+#' extended model is $(1-delta_ij) log f_count(y;mu_i,sigma_i^2 )+delta_ij log |
|
8 |
+#' pi_j(s_j)+(1-delta_ij)log (1-pi_j (sj))$. The responsibilities are defined |
|
9 |
+#' as $z_ij = pr(delta_ij=1 | data)$. |
|
10 |
+#' |
|
11 |
+#' |
|
8 | 12 |
#' @param nll Vector of size M with the current negative log-likelihoods. |
9 |
-#' @param nllOld Vector of size M with the previous iterations negative log-likelihoods. |
|
10 |
-#' @return Vector of size M of the relative differences between the previous and current iteration nll. |
|
11 |
-#' |
|
12 |
-#' @name getEpsilon |
|
13 |
+#' @param nllOld Vector of size M with the previous iterations negative |
|
14 |
+#' log-likelihoods. |
|
15 |
+#' @return Vector of size M of the relative differences between the previous |
|
16 |
+#' and current iteration nll. |
|
13 | 17 |
#' @seealso \code{\link{fitZig}} |
14 | 18 |
getEpsilon <- |
15 | 19 |
function(nll, nllOld){ |
... | ... |
@@ -1,16 +1,20 @@ |
1 |
-#' Calculate the negative log-likelihoods for the various features given the residuals. |
|
2 |
-#' |
|
3 |
-#' Maximum-likelihood estimates are approximated using the EM algorithm where we treat mixture membership $\deta_{ij}$ = 1 if $y_{ij}$ |
|
4 |
-#' is generated from the zero point mass as latent indicator variables. The log-likelihood in this extended model is |
|
5 |
-#' $(1−\delta_{ij}) \log f_{count}(y;\mu_i,\sigma_i^2 )+\delta_{ij} \log \pi_j(s_j)+(1−\delta_{ij})\log (1−\pi_j (sj))$. |
|
6 |
-#' The responsibilities are defined as $z_{ij} = pr(\delta_{ij}=1 | data and current values)$. |
|
7 |
-#' |
|
8 |
-#' @param z Matrix (m x n) of estimate responsibilities (probabilities that a count comes from a spike distribution at 0). |
|
1 |
+#' Calculate the negative log-likelihoods for the various features given the |
|
2 |
+#' residuals. |
|
3 |
+#' |
|
4 |
+#' Maximum-likelihood estimates are approximated using the EM algorithm where |
|
5 |
+#' we treat mixture membership $delta_ij$ = 1 if $y_ij$ is generated from the |
|
6 |
+#' zero point mass as latent indicator variables. The log-likelihood in this |
|
7 |
+#' extended model is $(1-delta_ij) log f_count(y;mu_i,sigma_i^2 )+delta_ij log |
|
8 |
+#' pi_j(s_j)+(1-delta_ij)log (1-pi_j (sj))$. The responsibilities are defined |
|
9 |
+#' as $z_ij = pr(delta_ij=1 | data and current values)$. |
|
10 |
+#' |
|
11 |
+#' |
|
12 |
+#' @param z Matrix (m x n) of estimate responsibilities (probabilities that a |
|
13 |
+#' count comes from a spike distribution at 0). |
|
9 | 14 |
#' @param countResiduals Residuals from the count model. |
10 | 15 |
#' @param zeroResiduals Residuals from the zero model. |
11 |
-#' @return Vector of size M of the negative log-likelihoods for the various features. |
|
12 |
-#' |
|
13 |
-#' @name getNegativeLogLikelihoods |
|
16 |
+#' @return Vector of size M of the negative log-likelihoods for the various |
|
17 |
+#' features. |
|
14 | 18 |
#' @seealso \code{\link{fitZig}} |
15 | 19 |
getNegativeLogLikelihoods <- |
16 | 20 |
function(z, countResiduals, zeroResiduals){ |
... | ... |
@@ -1,15 +1,17 @@ |
1 |
-#' Calculate the mixture proportions from the zero model / spike mass model residuals. |
|
2 |
-#' |
|
1 |
+#' Calculate the mixture proportions from the zero model / spike mass model |
|
2 |
+#' residuals. |
|
3 |
+#' |
|
3 | 4 |
#' F(x) = 1 / (1 + exp(-(x-m)/s)) (the CDF of the logistic distribution). |
4 |
-#' Provides the probability that a real-valued random variable X with a given probability distribution will be found at a value less than or equal to x. |
|
5 |
-#' The output are the mixture proportions for the samples given the residuals from the zero model. |
|
6 |
-#' |
|
5 |
+#' Provides the probability that a real-valued random variable X with a given |
|
6 |
+#' probability distribution will be found at a value less than or equal to x. |
|
7 |
+#' The output are the mixture proportions for the samples given the residuals |
|
8 |
+#' from the zero model. |
|
9 |
+#' |
|
10 |
+#' |
|
7 | 11 |
#' @param residuals Residuals from the zero model. |
8 | 12 |
#' @return Mixture proportions for each sample. |
9 |
-#' |
|
10 |
-#' @name getPi |
|
11 | 13 |
#' @seealso \code{\link{fitZig}} |
12 | 14 |
getPi <- |
13 | 15 |
function(residuals){ |
14 | 16 |
plogis(residuals) |
15 |
-} |
|
16 | 17 |
\ No newline at end of file |
18 |
+} |
... | ... |
@@ -1,13 +1,18 @@ |
1 | 1 |
#' Calculate the current Z estimate responsibilities (posterior probabilities) |
2 |
-#' |
|
3 |
-#' @param z Matrix (m x n) of estimate responsibilities (probabilities that a count comes from a spike distribution at 0). |
|
4 |
-#' @param zUsed Matrix (m x n) of estimate responsibilities (probabilities that a count comes from a spike distribution at 0) that are actually used (following convergence). |
|
5 |
-#' @param stillActive A vector of size M booleans saying if a feature is still active or not. |
|
2 |
+#' |
|
3 |
+#' Calculate the current Z estimate responsibilities (posterior probabilities) |
|
4 |
+#' |
|
5 |
+#' |
|
6 |
+#' @param z Matrix (m x n) of estimate responsibilities (probabilities that a |
|
7 |
+#' count comes from a spike distribution at 0). |
|
8 |
+#' @param zUsed Matrix (m x n) of estimate responsibilities (probabilities that |
|
9 |
+#' a count comes from a spike distribution at 0) that are actually used |
|
10 |
+#' (following convergence). |
|
11 |
+#' @param stillActive A vector of size M booleans saying if a feature is still |
|
12 |
+#' active or not. |
|
6 | 13 |
#' @param nll Vector of size M with the current negative log-likelihoods. |
7 | 14 |
#' @param nllUSED Vector of size M with the converged negative log-likelihoods. |
8 | 15 |
#' @return A list of updated zUsed and nllUSED. |
9 |
-#' |
|
10 |
-#' @name getZ |
|
11 | 16 |
#' @seealso \code{\link{fitZig}} |
12 | 17 |
getZ <- |
13 | 18 |
function(z,zUsed,stillActive,nll,nllUSED){ |
... | ... |
@@ -1,17 +1,19 @@ |
1 | 1 |
#' Load a count dataset associated with a study. |
2 |
-#' |
|
2 |
+#' |
|
3 | 3 |
#' Load a matrix of OTUs in a tab delimited format |
4 |
-#' |
|
4 |
+#' |
|
5 |
+#' |
|
6 |
+#' @aliases load_meta metagenomicLoader |
|
5 | 7 |
#' @param file Path and filename of the actual data file. |
8 |
+#' @param sep File delimiter. |
|
6 | 9 |
#' @return An object of count data. |
7 |
-#' |
|
8 |
-#' @name load_meta |
|
9 |
-#' @aliases metagenomicLoader |
|
10 | 10 |
#' @seealso \code{\link{load_phenoData}} |
11 | 11 |
#' @examples |
12 |
-#' obj = load_meta("~/Desktop/testFile.tsv") |
|
13 |
-load_meta <- |
|
14 |
-function(file,sep="\t") |
|
12 |
+#' |
|
13 |
+#' dataDirectory <- system.file("extdata", package="metagenomeSeq") |
|
14 |
+#' lung = load_meta(file.path(dataDirectory,"CHK_NAME.otus.count.csv")) |
|
15 |
+#' |
|
16 |
+load_meta <- function(file,sep="\t") |
|
15 | 17 |
{ |
16 | 18 |
dat2 <- read.table(file,header=FALSE,sep=sep,nrows=1,stringsAsFactors=FALSE); |
17 | 19 |
subjects <- as.character(dat2[1,-1]); |
... | ... |
@@ -1,15 +1,16 @@ |
1 | 1 |
#' Load a count dataset associated with a study set up in a Qiime format. |
2 |
-#' |
|
2 |
+#' |
|
3 | 3 |
#' Load a matrix of OTUs in Qiime's format |
4 |
-#' |
|
4 |
+#' |
|
5 |
+#' |
|
6 |
+#' @aliases load_metaQ qiimeLoader |
|
5 | 7 |
#' @param file Path and filename of the actual data file. |
6 | 8 |
#' @return An object of count data. |
7 |
-#' |
|
8 |
-#' @name load_metaQ |
|
9 |
-#' @aliases qiimeLoader |
|
10 | 9 |
#' @seealso \code{\link{load_meta}} \code{\link{load_phenoData}} |
11 | 10 |
#' @examples |
12 |
-#' obj = load_metaQ("~/Desktop/testFile.tsv") |
|
11 |
+#' |
|
12 |
+#' # see vignette |
|
13 |
+#' |
|
13 | 14 |
load_metaQ <- function(file) { |
14 | 15 |
dat2 <- read.delim(file,header=FALSE,stringsAsFactors=FALSE,nrows=1,skip=1); |
15 | 16 |
len = ncol(dat2) |
... | ... |
@@ -1,18 +1,19 @@ |
1 | 1 |
#' Load a clinical/phenotypic dataset associated with a study. |
2 |
-#' |
|
2 |
+#' |
|
3 | 3 |
#' Load a matrix of metadata associated with a study. |
4 |
-#' |
|
4 |
+#' |
|
5 |
+#' |
|
6 |
+#' @aliases load_phenoData phenoData |
|
5 | 7 |
#' @param file Path and filename of the actual clinical file. |
6 |
-#' @param tran Boolean. If the covariates are along the columns and samples along the rows, then tran should equal TRUE. |
|
8 |
+#' @param tran Boolean. If the covariates are along the columns and samples |
|
9 |
+#' along the rows, then tran should equal TRUE. |
|
7 | 10 |
#' @param sep The separator for the file. |
8 | 11 |
#' @return The metadata as a dataframe. |
9 |
-#' |
|
10 |
-#' @name load_phenoData |
|
11 |
-#' @aliases phenoData |
|
12 | 12 |
#' @seealso \code{\link{load_meta}} |
13 | 13 |
#' @examples |
14 |
-#' clin = load_phenoData("~/Desktop/testFile.tsv") |
|
15 |
- |
|
14 |
+#' |
|
15 |
+#' # see vignette |
|
16 |
+#' |
|
16 | 17 |
load_phenoData <- |
17 | 18 |
function(file,tran=FALSE,sep="\t") |
18 | 19 |
{ |
... | ... |
@@ -1,3 +1,24 @@ |
1 |
+#' Basic correlation plot function for normalized or unnormalized counts. |
|
2 |
+#' |
|
3 |
+#' This function plots a heatmap of the "n" features with greatest variance |
|
4 |
+#' across rows. |
|
5 |
+#' |
|
6 |
+#' |
|
7 |
+#' @param obj A MRexperiment object with count data. |
|
8 |
+#' @param n The number of features to plot |
|
9 |
+#' @param log Whether or not to log transform the counts. |
|
10 |
+#' @param norm Whether or not to normalize the counts. |
|
11 |
+#' @param fun Function to calculate pair-wise relationships. Default is pearson |
|
12 |
+#' correlation |
|
13 |
+#' @param ... Additional plot arguments. |
|
14 |
+#' @return NA |
|
15 |
+#' @seealso \code{\link{cumNormMat}} |
|
16 |
+#' @examples |
|
17 |
+#' |
|
18 |
+#' data(mouseData) |
|
19 |
+#' trials = pData(mouseData)$diet |
|
20 |
+#' plotCorr(obj=mouseData,n=200,cexRow = 0.4,cexCol = 0.4,trace="none",dendrogram="none") |
|
21 |
+#' |
|
1 | 22 |
plotCorr <- function(obj,n,log=TRUE,norm=TRUE,fun=cor,...) { |
2 | 23 |
if (log == TRUE) { |
3 | 24 |
if (norm == TRUE) { |
... | ... |
@@ -1,31 +1,35 @@ |
1 | 1 |
#' Basic plot function of the raw or normalized data. |
2 |
-#' |
|
3 |
-#' This function plots the abundance of a particular OTU by class. The function uses |
|
4 |
-#' the estimated posterior probabilities to make technical zeros transparent. |
|
5 |
-#' |
|
6 |
-#' @param obj An eSet object with count data. |
|
2 |
+#' |
|
3 |
+#' This function plots the abundance of a particular OTU by class. The function |
|
4 |
+#' uses the estimated posterior probabilities to make technical zeros |
|
5 |
+#' transparent. |
|
6 |
+#' |
|
7 |
+#' |
|
8 |
+#' @aliases genusPlot plotGenus |
|
9 |
+#' @param obj An MRexperiment object with count data. |
|
7 | 10 |
#' @param otuIndex A list of the otus with the same annotation. |
8 | 11 |
#' @param classIndex A list of the samples in their respective groups. |
9 | 12 |
#' @param norm Whether or not to normalize the counts. |
10 |
-#' @param normp The value at which to scale the counts by and then log. |
|
11 | 13 |
#' @param no Which of the otuIndex to plot. |
12 | 14 |
#' @param factor Factor value for jitter |
13 | 15 |
#' @param pch Standard pch value for the plot command. |
16 |
+#' @param labs Whether to include group labels or not. (TRUE/FALSE) |
|
17 |
+#' @param xlab xlabel for the plot. |
|
18 |
+#' @param ylab ylabel for the plot. |
|
14 | 19 |
#' @param jitter Boolean to jitter the count data or not. |
15 | 20 |
#' @param ret Boolean to return the observed data that would have been plotted. |
16 | 21 |
#' @param ... Additional plot arguments. |
17 | 22 |
#' @return NA |
18 |
-#' @note \code{\link{detect}} makes use of settings. |
|
19 |
-#' |
|
20 |
-#' @name plotGenus |
|
21 |
-#' @aliases genusPlot |
|
22 | 23 |
#' @seealso \code{\link{cumNorm}} |
23 |
-#' @examples |
|
24 |
-#' classIndex=list(controls=which(type=="Control")) |
|
25 |
-#' classIndex$cases=which(type=="Case") |
|
26 |
-#' otuIndex = which(taxa == "E-coli") |
|
27 |
-#' plotGenus(obj,otu=12,classIndex,xlab="OTU log-normalized counts") |
|
28 |
- |
|
24 |
+#' @examples |
|
25 |
+#' |
|
26 |
+#' data(mouseData) |
|
27 |
+#' classIndex=list(controls=which(pData(mouseData)$diet=="BK")) |
|
28 |
+#' classIndex$cases=which(pData(mouseData)$diet=="Western") |
|
29 |
+#' otuIndex = grep("Strep",fData(mouseData)$fdata) |
|
30 |
+#' otuIndex=otuIndex[order(rowSums(MRcounts(mouseData)[otuIndex,]),decreasing=TRUE)] |
|
31 |
+#' plotGenus(mouseData,otuIndex,classIndex,no=1:2,xaxt="n",norm=FALSE,ylab="Strep normalized log(cpt)") |
|
32 |
+#' |
|
29 | 33 |
plotGenus <- |
30 | 34 |
function(obj,otuIndex,classIndex,norm=TRUE,no=1:length(otuIndex),labs=TRUE,xlab=NULL,ylab=NULL,jitter=TRUE,factor=1,pch=21,ret=FALSE,...){ |
31 | 35 |
|
... | ... |
@@ -63,4 +67,4 @@ function(obj,otuIndex,classIndex,norm=TRUE,no=1:length(otuIndex),labs=TRUE,xlab= |
63 | 67 |
} |
64 | 68 |
|
65 | 69 |
if(ret) list(x=x,y=y) |
66 |
-} |
|
67 | 70 |
\ No newline at end of file |
71 |
+} |
... | ... |
@@ -1,3 +1,23 @@ |
1 |
+#' Basic heatmap plot function for normalized counts. |
|
2 |
+#' |
|
3 |
+#' This function plots a heatmap of the "n" features with greatest variance |
|
4 |
+#' across rows. |
|
5 |
+#' |
|
6 |
+#' |
|
7 |
+#' @param obj A MRexperiment object with count data. |
|
8 |
+#' @param n The number of features to plot |
|
9 |
+#' @param trials A vector of clinical information for. |
|
10 |
+#' @param log Whether or not to log transform the counts. |
|
11 |
+#' @param norm Whether or not to normalize the counts. |
|
12 |
+#' @param ... Additional plot arguments. |
|
13 |
+#' @return NA |
|
14 |
+#' @seealso \code{\link{cumNormMat}} |
|
15 |
+#' @examples |
|
16 |
+#' |
|
17 |
+#' data(mouseData) |
|
18 |
+#' trials = pData(mouseData)$diet |
|
19 |
+#' plotMRheatmap(obj=mouseData,n=200,trials=trials,cexRow = 0.4,cexCol = 0.4,trace="none") |
|
20 |
+#' |
|
1 | 21 |
plotMRheatmap <- function(obj,n,trials,log=TRUE,norm=TRUE,...) { |
2 | 22 |
|
3 | 23 |
if(log==TRUE){ |
... | ... |
@@ -21,4 +41,4 @@ plotMRheatmap <- function(obj,n,trials,log=TRUE,norm=TRUE,...) { |
21 | 41 |
heatmapColColors=brewer.pal(12,"Set3")[as.integer(factor(trials))]; |
22 | 42 |
heatmap.2(mat2,col=heatmapCols,ColSideColors=heatmapColColors,...); |
23 | 43 |
invisible() |
24 |
-} |
|
25 | 44 |
\ No newline at end of file |
45 |
+} |
... | ... |
@@ -1,28 +1,32 @@ |
1 | 1 |
#' Basic plot function of the raw or normalized data. |
2 |
-#' |
|
3 |
-#' This function plots the abundance of a particular OTU by class. The function uses |
|
4 |
-#' the estimated posterior probabilities to make technical zeros transparent. |
|
5 |
-#' |
|
6 |
-#' @param obj An eSet object with count data. |
|
2 |
+#' |
|
3 |
+#' This function plots the abundance of a particular OTU by class. The function |
|
4 |
+#' uses the estimated posterior probabilities to make technical zeros |
|
5 |
+#' transparent. |
|
6 |
+#' |
|
7 |
+#' |
|
8 |
+#' @param obj A MRexperiment object with count data. |
|
7 | 9 |
#' @param otu The row number/OTU to plot. |
8 | 10 |
#' @param classIndex A list of the samples in their respective groups. |
9 | 11 |
#' @param norm Whether or not to normalize the counts. |
10 |
-#' @param normp The value at which to scale the counts by and then log. |
|
11 | 12 |
#' @param factor Factor value for jitter. |
12 | 13 |
#' @param pch Standard pch value for the plot command. |
14 |
+#' @param labs Whether to include group labels or not. (TRUE/FALSE) |
|
15 |
+#' @param xlab xlabel for the plot. |
|
16 |
+#' @param ylab ylabel for the plot. |
|
13 | 17 |
#' @param jitter Boolean to jitter the count data or not. |
14 | 18 |
#' @param ret Boolean to return the observed data that would have been plotted. |
15 | 19 |
#' @param ... Additional plot arguments. |
16 | 20 |
#' @return NA |
17 |
-#' |
|
18 |
-#' @name plotOTU |
|
19 |
-#' @aliases otuplot |
|
20 | 21 |
#' @seealso \code{\link{cumNorm}} |
21 |
-#' @examples |
|
22 |
-#' classIndex=list(controls=which(type=="Control")) |
|
23 |
-#' classIndex$cases=which(type=="Case") |
|
24 |
-#' plotOTU(obj,otu=12,classIndex,xlab="OTU log-normalized counts") |
|
25 |
- |
|
22 |
+#' @examples |
|
23 |
+#' |
|
24 |
+#' data(mouseData) |
|
25 |
+#' classIndex=list(controls=which(pData(mouseData)$diet=="BK")) |
|
26 |
+#' classIndex$cases=which(pData(mouseData)$diet=="Western") |
|
27 |
+#' # you can specify whether or not to normalize, and to what level |
|
28 |
+#' plotOTU(mouseData,otu=9083,classIndex,norm=FALSE,main="9083 feature abundances") |
|
29 |
+#' |
|
26 | 30 |
plotOTU <- |
27 | 31 |
function(obj,otu,classIndex,norm=TRUE,factor=1,pch=21,labs=TRUE,xlab=NULL,ylab=NULL,jitter=TRUE,ret=FALSE,...){ |
28 | 32 |
|
... | ... |
@@ -67,4 +71,4 @@ function(obj,otu,classIndex,norm=TRUE,factor=1,pch=21,labs=TRUE,xlab=NULL,ylab=N |
67 | 71 |
|
68 | 72 |
if (ret) |
69 | 73 |
list(x=x,y=y) |
70 |
-} |
|
71 | 74 |
\ No newline at end of file |
75 |
+} |
... | ... |
@@ -11,7 +11,7 @@ |
11 | 11 |
#' @seealso \code{\link{fitZig}} \code{\link{cumNorm}} \code{\link{plotOTU}} |
12 | 12 |
#' @examples |
13 | 13 |
#' control = zigControl(tol=1e-10,maxit=10,verbose=FALSE) |
14 |
- |
|
14 |
+#' |
|
15 | 15 |
zigControl <-function(tol=1e-4,maxit=10,verbose=TRUE){ |
16 | 16 |
set <-list(tol=tol,maxit=maxit,verbose=verbose); |
17 | 17 |
return(set) |
... | ... |
@@ -2,33 +2,57 @@ |
2 | 2 |
\alias{MRcoefs} |
3 | 3 |
\title{Table of top-ranked microbial marker gene from linear model fit} |
4 | 4 |
\usage{ |
5 |
- MRcoefs(obj,by=2,coef=NULL,number=10,taxa=obj$taxa,uniqueNames=FALSE,adjust.method="fdr",group=0,eff=0,output=NULL) |
|
5 |
+ MRcoefs(obj, by = 2, coef = NULL, number = 10, |
|
6 |
+ taxa = obj$taxa, uniqueNames = FALSE, |
|
7 |
+ adjust.method = "fdr", group = 0, eff = 0, |
|
8 |
+ output = NULL) |
|
6 | 9 |
} |
7 | 10 |
\arguments{ |
8 |
- \item{obj}{A list containing the linear model fit produced by lmFit through fitZig.} |
|
9 |
- |
|
10 |
- \item{by}{Column number or column name specifying which coefficient or contrast of the linear model is of interest.} |
|
11 |
- |
|
12 |
- \item{coef}{Column number(s) or column name(s) specifying which coefficient or contrast of the linear model to display.} |
|
13 |
- |
|
14 |
- \item{number}{The number of bacterial features to pick out.} |
|
15 |
- |
|
11 |
+ \item{obj}{A list containing the linear model fit |
|
12 |
+ produced by lmFit through fitZig.} |
|
13 |
+ |
|
14 |
+ \item{by}{Column number or column name specifying which |
|
15 |
+ coefficient or contrast of the linear model is of |
|
16 |
+ interest.} |
|
17 |
+ |
|
18 |
+ \item{coef}{Column number(s) or column name(s) specifying |
|
19 |
+ which coefficient or contrast of the linear model to |
|
20 |
+ display.} |
|
21 |
+ |
|
22 |
+ \item{number}{The number of bacterial features to pick |
|
23 |
+ out.} |
|
24 |
+ |
|
16 | 25 |
\item{taxa}{Taxa list.} |
17 |
- |
|
26 |
+ |
|
18 | 27 |
\item{uniqueNames}{Number the various taxa.} |
19 |
- |
|
20 |
- \item{adjust.method}{Method to adjust p-values by. Default is "FDR". Options include "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none". See \code{\link{p.adjust}} for more details.} |
|
21 |
- |
|
22 |
- \item{group}{One of three choices, 0,1,2,3. 0: the sort is ordered by a decreasing absolute value coefficient fit. 1: the sort is ordered by the raw coefficient fit in decreasing order. 2: the sort is ordered by the raw coefficient fit in increasing order. 3: the sort is ordered by the p-value of the coefficient fit in increasing order.} |
|
23 |
- \item{eff}{Restrict samples to have at least eff quantile effective samples.} |
|
24 |
- |
|
25 |
- \item{output}{Name of output file, including location, to save the table.} |
|
28 |
+ |
|
29 |
+ \item{adjust.method}{Method to adjust p-values by. |
|
30 |
+ Default is "FDR". Options include "holm", "hochberg", |
|
31 |
+ "hommel", "bonferroni", "BH", "BY", "fdr", "none". See |
|
32 |
+ \code{\link{p.adjust}} for more details.} |
|
33 |
+ |
|
34 |
+ \item{group}{One of three choices, 0,1,2,3. 0: the sort |
|
35 |
+ is ordered by a decreasing absolute value coefficient |
|
36 |
+ fit. 1: the sort is ordered by the raw coefficient fit in |
|
37 |
+ decreasing order. 2: the sort is ordered by the raw |
|
38 |
+ coefficient fit in increasing order. 3: the sort is |
|
39 |
+ ordered by the p-value of the coefficient fit in |
|
40 |
+ increasing order.} |
|
41 |
+ |
|
42 |
+ \item{eff}{Restrict samples to have at least eff quantile |
|
43 |
+ effective samples.} |
|
44 |
+ |
|
45 |
+ \item{output}{Name of output file, including location, to |
|
46 |
+ save the table.} |
|
26 | 47 |
} |
27 | 48 |
\value{ |
28 |
- Table of the top-ranked features determined by the linear fit's coefficient. |
|
49 |
+ Table of the top-ranked features determined by the linear |
|
50 |
+ fit's coefficient. |
|
29 | 51 |
} |
30 | 52 |
\description{ |
31 |
- Extract a table of the top-ranked features from a linear model fit. This function will be updated soon to provide better flexibility similar to limma's topTable. |
|
53 |
+ Extract a table of the top-ranked features from a linear |
|
54 |
+ model fit. This function will be updated soon to provide |
|
55 |
+ better flexibility similar to limma's topTable. |
|
32 | 56 |
} |
33 | 57 |
\examples{ |
34 | 58 |
data(lungData) |
... | ... |
@@ -44,6 +68,6 @@ fit = fitZig(obj = lungTrim,mod=mod,control=settings) |
44 | 68 |
head(MRcoefs(fit)) |
45 | 69 |
} |
46 | 70 |
\seealso{ |
47 |
- \code{\link{fitZig}} \code{\link{MRtable}} |
|
71 |
+ \code{\link{fitZig}} \code{\link{MRtable}} |
|
48 | 72 |
} |
49 | 73 |
|
... | ... |
@@ -1,30 +1,27 @@ |
1 |
-\name{MRcounts} |
|
2 |
-\Rdversion{1.0} |
|
3 | 1 |
\docType{methods} |
4 |
-\alias{MRcounts,MRexperiment-method} |
|
2 |
+\name{MRcounts} |
|
5 | 3 |
\alias{MRcounts} |
6 |
-\title{ |
|
7 |
- Accessor for the counts slot of a MRexperiment object |
|
8 |
-} |
|
9 |
- |
|
10 |
-\description{ |
|
11 |
- The counts slot holds the raw count data representing (along the rows) the number of reads annotated for a particular feature and (along the columns) the sample. |
|
12 |
-} |
|
13 |
- |
|
4 |
+\alias{MRcounts,MRexperiment-method} |
|
5 |
+\title{Accessor for the counts slot of a MRexperiment object} |
|
14 | 6 |
\usage{ |
15 |
-\S4method{MRcounts}{MRexperiment}(cnts, norm=FALSE) |
|
7 |
+ MRcounts(obj, norm = FALSE) |
|
16 | 8 |
} |
17 |
- |
|
18 | 9 |
\arguments{ |
19 | 10 |
\item{cnts}{a \code{MRexperiment} object.} |
20 |
- \item{norm}{logical indicating whether or not to return normalized counts.} |
|
21 |
-} |
|
22 | 11 |
|
23 |
-\author{ |
|
24 |
- Joseph N. Paulson, jpaulson@umiacs.umd.edu |
|
12 |
+ \item{norm}{logical indicating whether or not to return |
|
13 |
+ normalized counts.} |
|
14 |
+} |
|
15 |
+\description{ |
|
16 |
+ The counts slot holds the raw count data representing |
|
17 |
+ (along the rows) the number of reads annotated for a |
|
18 |
+ particular feature and (along the columns) the sample. |
|
25 | 19 |
} |
26 | 20 |
\examples{ |
27 |
- data(lungData) |
|
21 |
+data(lungData) |
|
28 | 22 |
head(MRcounts(lungData)) |
29 |
-} |
|
30 |
- |
|
23 |
+} |
|
24 |
+\author{ |
|
25 |
+ Joseph N. Paulson, jpaulson@umiacs.umd.edu |
|
26 |
+} |
|
27 |
+ |
... | ... |
@@ -2,19 +2,25 @@ |
2 | 2 |
\alias{MRfisher} |
3 | 3 |
\title{Wrapper to run fisher's test on presence/absence of a feature.} |
4 | 4 |
\usage{ |
5 |
- MRfisher(obj,cl,mat=FALSE) |
|
5 |
+ MRfisher(obj, cl, mat = FALSE) |
|
6 | 6 |
} |
7 | 7 |
\arguments{ |
8 |
- \item{obj}{A MRexperiment object with a count matrix, or a simple count matrix.} |
|
8 |
+ \item{obj}{A MRexperiment object with a count matrix, or |
|
9 |
+ a simple count matrix.} |
|
10 |
+ |
|
9 | 11 |
\item{cl}{Group comparison} |
10 |
- \item{mat}{logical indicating whether obj is a MRexperiment object or matrix. Default is a MRexperiment object.} |
|
12 |
+ |
|
13 |
+ \item{mat}{logical indicating whether obj is a |
|