Browse code

updated function descriptions with roxygen2

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/metagenomeSeq@79408 bc3139a8-67e5-0310-9ffc-ced21a209358

Joseph Paulson authored on 13/08/2013 19:16:25
Showing 60 changed files

... ...
@@ -4,40 +4,53 @@ Version: 1.2.1
4 4
 Date: 2013-06-11
5 5
 Author: Joseph Nathaniel Paulson, Mihai Pop,  Hector Corrada Bravo
6 6
 Maintainer: Joseph N. Paulson <jpaulson@umiacs.umd.edu>
7
-Description: metagenomeSeq is designed to determine features (be it Operational Taxanomic Unit (OTU), species, etc.) that are differentially abundant between two or more groups of multiple samples. metagenomeSeq is designed to address the effects of both normalization and under-sampling of microbial communities on disease association detection and the testing of feature correlations.
7
+Description: metagenomeSeq is designed to determine features (be it Operational
8
+    Taxanomic Unit (OTU), species, etc.) that are differentially abundant
9
+    between two or more groups of multiple samples. metagenomeSeq is designed
10
+    to address the effects of both normalization and under-sampling of
11
+    microbial communities on disease association detection and the testing of
12
+    feature correlations.
8 13
 License: Artistic-2.0
9
-Depends: R(>= 3.0), Biobase, limma, matrixStats, methods,
10
-        RColorBrewer, gplots
11
-Suggests: annotate
14
+Depends:
15
+    R(>= 3.0),
16
+    Biobase,
17
+    limma,
18
+    matrixStats,
19
+    methods,
20
+    RColorBrewer,
21
+    gplots
22
+Suggests:
23
+    annotate
12 24
 biocViews: Bioinformatics, DifferentialExpression, Metagenomics,
13
-        Visualization
14
-Collate: 'zigControl.R' 
15
-	'cumNorm.R' 
16
-	'plotOTU.R' 
17
-	'fitZig.R'
18
-        'doCountMStep.R' 
19
-	'doZeroMStep.R' 
20
-	'doEStep.R' 
21
-	'getZ.R' 
22
-	'getPi.R'
23
-        'getCountDensity.R' 
24
-	'getNegativeLogLikelihoods.R'
25
-        'isItStillActive.R' 
26
-	'getEpsilon.R' 
27
-	'load_meta.R'
28
-        'load_phenoData.R' 
29
-	'exportMat.R' 
30
-	'exportStats.R'
31
-        'cumNormStat.R'
32
-	'plotGenus.R'
33
-	'aggregateM.R' 
34
-	'cumNormMat.R'
35
-        'load_metaQ.R' 
36
-	'allClasses.R' 
37
-	'MRtable.R' 
38
-	'MRcoefs.R'
39
-        'plotMRheatmap.R'
40
-	'plotCorr.R'
41
-	'MRfisher.R'
42
-	'MRfulltable.R'
25
+    Visualization
26
+Collate:
27
+    'zigControl.R'
28
+    'cumNorm.R'
29
+    'plotOTU.R'
30
+    'fitZig.R'
31
+    'doCountMStep.R'
32
+    'doZeroMStep.R'
33
+    'doEStep.R'
34
+    'getZ.R'
35
+    'getPi.R'
36
+    'getCountDensity.R'
37
+    'getNegativeLogLikelihoods.R'
38
+    'isItStillActive.R'
39
+    'getEpsilon.R'
40
+    'load_meta.R'
41
+    'load_phenoData.R'
42
+    'exportMat.R'
43
+    'exportStats.R'
44
+    'cumNormStat.R'
45
+    'plotGenus.R'
46
+    'aggregateM.R'
47
+    'cumNormMat.R'
48
+    'load_metaQ.R'
49
+    'allClasses.R'
50
+    'MRtable.R'
51
+    'MRcoefs.R'
52
+    'plotMRheatmap.R'
53
+    'plotCorr.R'
54
+    'MRfisher.R'
55
+    'MRfulltable.R'
43 56
 URL: http://cbcb.umd.edu/software/metagenomeSeq
... ...
@@ -1,3 +1,46 @@
1
+#' Table of top-ranked microbial marker gene from linear model fit
2
+#' 
3
+#' Extract a table of the top-ranked features from a linear model fit. This
4
+#' function will be updated soon to provide better flexibility similar to
5
+#' limma's topTable.
6
+#' 
7
+#' 
8
+#' @param obj A list containing the linear model fit produced by lmFit through
9
+#' fitZig.
10
+#' @param by Column number or column name specifying which coefficient or
11
+#' contrast of the linear model is of interest.
12
+#' @param coef Column number(s) or column name(s) specifying which coefficient
13
+#' or contrast of the linear model to display.
14
+#' @param number The number of bacterial features to pick out.
15
+#' @param taxa Taxa list.
16
+#' @param uniqueNames Number the various taxa.
17
+#' @param adjust.method Method to adjust p-values by. Default is "FDR". Options
18
+#' include "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr",
19
+#' "none". See \code{\link{p.adjust}} for more details.
20
+#' @param group One of three choices, 0,1,2,3. 0: the sort is ordered by a
21
+#' decreasing absolute value coefficient fit. 1: the sort is ordered by the raw
22
+#' coefficient fit in decreasing order. 2: the sort is ordered by the raw
23
+#' coefficient fit in increasing order. 3: the sort is ordered by the p-value
24
+#' of the coefficient fit in increasing order.
25
+#' @param eff Restrict samples to have at least eff quantile effective samples.
26
+#' @param output Name of output file, including location, to save the table.
27
+#' @return Table of the top-ranked features determined by the linear fit's
28
+#' coefficient.
29
+#' @seealso \code{\link{fitZig}} \code{\link{MRtable}}
30
+#' @examples
31
+#' 
32
+#' data(lungData)
33
+#' k = grep("Extraction.Control",pData(lungData)$SampleType)
34
+#' lungTrim = lungData[,-k]
35
+#' k = which(rowSums(MRcounts(lungTrim)>0)<10)
36
+#' lungTrim = lungTrim[-k,]
37
+#' cumNorm(lungTrim)
38
+#' smokingStatus = pData(lungTrim)$SmokingStatus
39
+#' mod = model.matrix(~smokingStatus)
40
+#' settings = zigControl(maxit=1,verbose=FALSE)
41
+#' fit = fitZig(obj = lungTrim,mod=mod,control=settings)
42
+#' head(MRcoefs(fit))
43
+#' 
1 44
 MRcoefs<-function(obj,by=2,coef=NULL,number=10,taxa=obj$taxa,uniqueNames=FALSE,adjust.method="fdr",group=0,eff=0,output=NULL){
2 45
     tb = obj$fit$coefficients
3 46
     tx = as.character(taxa);
... ...
@@ -43,4 +86,4 @@ MRcoefs<-function(obj,by=2,coef=NULL,number=10,taxa=obj$taxa,uniqueNames=FALSE,a
43 86
     } else{
44 87
         return(as.data.frame(mat))
45 88
     }
46
-}
47 89
\ No newline at end of file
90
+}
... ...
@@ -1,3 +1,25 @@
1
+#' Wrapper to run fisher's test on presence/absence of a feature.
2
+#' 
3
+#' This function returns a data frame of p-values, odds ratios, lower and upper
4
+#' confidence limits for every row of a matrix.
5
+#' 
6
+#' 
7
+#' @param obj A MRexperiment object with a count matrix, or a simple count
8
+#' matrix.
9
+#' @param cl Group comparison
10
+#' @param mat logical indicating whether obj is a MRexperiment object or
11
+#' matrix. Default is a MRexperiment object.
12
+#' @return NA
13
+#' @seealso \code{\link{cumNorm}} \code{\link{fitZig}}
14
+#' @examples
15
+#' 
16
+#' data(lungData)
17
+#' k = grep("Extraction.Control",pData(lungData)$SampleType)
18
+#' lungTrim = lungData[,-k]
19
+#' lungTrim = lungTrim[-which(rowSums(MRcounts(lungTrim)>0)<20),]
20
+#' res = MRfisher(lungTrim,pData(lungTrim)$SmokingStatus);
21
+#' head(res)
22
+#' 
1 23
 MRfisher<-function(obj,cl,mat=FALSE){
2 24
     if(mat==FALSE){
3 25
         x = MRcounts(obj)>0;
... ...
@@ -1,3 +1,50 @@
1
+#' Table of top microbial marker gene from linear model fit including sequence
2
+#' information
3
+#' 
4
+#' Extract a table of the top-ranked features from a linear model fit. This
5
+#' function will be updated soon to provide better flexibility similar to
6
+#' limma's topTable. This function differs from \code{link{MRcoefs}} in that it
7
+#' provides other information about the presence or absence of features to help
8
+#' ensure significant features called are moderately present.
9
+#' 
10
+#' 
11
+#' @param obj A list containing the linear model fit produced by lmFit through
12
+#' fitZig.
13
+#' @param by Column number or column name specifying which coefficient or
14
+#' contrast of the linear model is of interest.
15
+#' @param coef Column number(s) or column name(s) specifying which coefficient
16
+#' or contrast of the linear model to display.
17
+#' @param number The number of bacterial features to pick out.
18
+#' @param taxa Taxa list.
19
+#' @param uniqueNames Number the various taxa.
20
+#' @param adjust.method Method to adjust p-values by. Default is "FDR". Options
21
+#' include "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr",
22
+#' "none". See \code{\link{p.adjust}} for more details.
23
+#' @param group One of three choices, 0,1,2. 0: the sort is ordered by a
24
+#' decreasing absolute value coefficient fit. 1: the sort is ordered by the raw
25
+#' coefficient fit in decreasing order. 2: the sort is ordered by the raw
26
+#' coefficient fit in increasing order. 3: the sort is ordered by the p-value
27
+#' of the coefficient fit in increasing order.
28
+#' @param eff Restrict samples to have at least eff quantile effective samples.
29
+#' @param output Name of output file, including location, to save the table.
30
+#' @return Table of the top-ranked features determined by the linear fit's
31
+#' coefficient.
32
+#' @seealso \code{\link{fitZig}} \code{\link{MRcoefs}} \code{\link{MRtable}}
33
+#' \code{\link{MRfisher}}
34
+#' @examples
35
+#' 
36
+#' data(lungData)
37
+#' k = grep("Extraction.Control",pData(lungData)$SampleType)
38
+#' lungTrim = lungData[,-k]
39
+#' k = which(rowSums(MRcounts(lungTrim)>0)<10)
40
+#' lungTrim = lungTrim[-k,]
41
+#' cumNorm(lungTrim)
42
+#' smokingStatus = pData(lungTrim)$SmokingStatus
43
+#' mod = model.matrix(~smokingStatus)
44
+#' settings = zigControl(maxit=1,verbose=FALSE)
45
+#' fit = fitZig(obj = lungTrim,mod=mod,control=settings)
46
+#' head(MRfulltable(fit))
47
+#' 
1 48
 MRfulltable<-function(obj,by=2,coef=NULL,number=10,taxa=obj$taxa,uniqueNames=FALSE,adjust.method="fdr",group=0,eff=0,output=NULL){
2 49
     
3 50
     tb = obj$fit$coefficients
... ...
@@ -62,4 +109,4 @@ MRfulltable<-function(obj,by=2,coef=NULL,number=10,taxa=obj$taxa,uniqueNames=FAL
62 109
     } else{
63 110
         return(as.data.frame(mat))
64 111
     }
65
-}
66 112
\ No newline at end of file
113
+}
... ...
@@ -1,3 +1,48 @@
1
+#' Table of top microbial marker gene from linear model fit including sequence
2
+#' information
3
+#' 
4
+#' Extract a table of the top-ranked features from a linear model fit. This
5
+#' function will be updated soon to provide better flexibility similar to
6
+#' limma's topTable. This function differs from \code{link{MRcoefs}} in that it
7
+#' provides other information about the presence or absence of features to help
8
+#' ensure significant features called are moderately present.
9
+#' 
10
+#' 
11
+#' @param obj A list containing the linear model fit produced by lmFit through
12
+#' fitZig.
13
+#' @param by Column number or column name specifying which coefficient or
14
+#' contrast of the linear model is of interest.
15
+#' @param coef Column number(s) or column name(s) specifying which coefficient
16
+#' or contrast of the linear model to display.
17
+#' @param number The number of bacterial features to pick out.
18
+#' @param taxa Taxa list.
19
+#' @param uniqueNames Number the various taxa.
20
+#' @param adjust.method Method to adjust p-values by. Default is "FDR". Options
21
+#' include "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr",
22
+#' "none". See \code{\link{p.adjust}} for more details.
23
+#' @param group One of three choices, 0,1,2. 0: the sort is ordered by a
24
+#' decreasing absolute value coefficient fit. 1: the sort is ordered by the raw
25
+#' coefficient fit in decreasing order. 2: the sort is ordered by the raw
26
+#' coefficient fit in increasing order. 3: the sort is ordered by the p-value
27
+#' of the coefficient fit in increasing order.
28
+#' @param output Name of output file, including location, to save the table.
29
+#' @return Table of the top-ranked features determined by the linear fit's
30
+#' coefficient.
31
+#' @seealso \code{\link{fitZig}} \code{\link{MRcoefs}}
32
+#' @examples
33
+#' 
34
+#' data(lungData)
35
+#' k = grep("Extraction.Control",pData(lungData)$SampleType)
36
+#' lungTrim = lungData[,-k]
37
+#' k = which(rowSums(MRcounts(lungTrim)>0)<10)
38
+#' lungTrim = lungTrim[-k,]
39
+#' cumNorm(lungTrim)
40
+#' smokingStatus = pData(lungTrim)$SmokingStatus
41
+#' mod = model.matrix(~smokingStatus)
42
+#' settings = zigControl(maxit=1,verbose=FALSE)
43
+#' fit = fitZig(obj = lungTrim,mod=mod,control=settings)
44
+#' head(MRtable(fit))
45
+#' 
1 46
 MRtable<-function(obj,by=2,coef=NULL,number=10,taxa=obj$taxa,uniqueNames=FALSE,adjust.method="fdr",group=0,output=NULL){
2 47
     tb = obj$fit$coefficients
3 48
     tx = as.character(taxa);
... ...
@@ -56,4 +101,4 @@ MRtable<-function(obj,by=2,coef=NULL,number=10,taxa=obj$taxa,uniqueNames=FALSE,a
56 101
     } else{
57 102
         return(as.data.frame(mat))
58 103
     }
59
-}
60 104
\ No newline at end of file
105
+}
... ...
@@ -1,14 +1,17 @@
1
-#' Aggregates the counts to a particular classification.
2
-#'
3
-#' This function takes an eSet object of data at a particular level with feature information allowing
4
-#' for aggregation of counts to a particular level. This method assumes taxa begin at the highest level and continue to the current level.
5
-#'
6
-#' @param obj An eSet object of count data.
1
+#' Aggregates counts by a particular classification.
2
+#' 
3
+#' This function takes a MRexperiment object of data at a particular level with
4
+#' feature information allowing for aggregation of counts to a particular
5
+#' level. This method assumes taxa begin at the highest level and continue to
6
+#' the current level, reverse assumes taxa begin at the lowest level.
7
+#' 
8
+#' 
9
+#' @param obj A MRexperiment object.
7 10
 #' @param lvl The level to go up (numeric, 1,2,3).
11
+#' @param taxa A vector of taxa annotations with splits
8 12
 #' @param split The way character strings in taxa in the obj are split.
9
-#' @return Updated eSet object with counts aggregated to the various taxanomic levels.
10
-#'
11
-#' @name aggregateM
13
+#' @return Updated object with counts aggregated to the various taxanomic
14
+#' levels.
12 15
 aggregateM <-
13 16
 function(obj,taxa,lvl,split=";"){
14 17
 
... ...
@@ -11,6 +11,33 @@ setMethod("[", "MRexperiment", function (x, i, j, ..., drop = FALSE) {
11 11
         obj
12 12
 })
13 13
 
14
+
15
+
16
+#' Create a MRexperiment object
17
+#' 
18
+#' This function creates a MRexperiment object from a matrix or data frame of
19
+#' count data.
20
+#' 
21
+#' See \code{\link{MRexperiment-class}} and \code{eSet} (from the Biobase
22
+#' package) for the meaning of the various slots.
23
+#' 
24
+#' @param counts A matrix or data frame of count data. The count data is
25
+#' representative of the number of reads annotated for a feature (be it gene,
26
+#' OTU, species, etc). Rows should correspond to features and columns to
27
+#' samples.
28
+#' @param phenoData An AnnotatedDataFrame with pertinent sample information.
29
+#' @param featureData An AnnotatedDataFrame with pertinent feature information.
30
+#' @param libSize libSize, library size, is the total number of reads for a
31
+#' particular sample.
32
+#' @param normFactors normFactors, the normalization factors used in either the
33
+#' model or as scaling factors of sample counts for each particular sample.
34
+#' @return an object of class MRexperiment
35
+#' @author Joseph N Paulson, jpaulson@@umiacs.umd.edu
36
+#' @examples
37
+#' 
38
+#' cnts = matrix(abs(rnorm(1000)),nc=10)
39
+#' obj <- newMRexperiment(cnts)
40
+#' 
14 41
 newMRexperiment <- function(counts, phenoData=NULL, featureData=NULL,libSize=NULL, normFactors=NULL) {
15 42
     counts= as.matrix(counts)
16 43
 
... ...
@@ -19,9 +46,46 @@ newMRexperiment <- function(counts, phenoData=NULL, featureData=NULL,libSize=NUL
19 46
     if( is.null( phenoData ) )
20 47
       phenoData   <- annotatedDataFrameFrom(counts, byrow=FALSE)
21 48
     if( is.null( libSize ) )
49
+
50
+
51
+#' Access sample depth of coverage from MRexperiment object
52
+#' 
53
+#' The libSize vector represents the column (sample specific) sums of features,
54
+#' i.e. the total number of reads for a sample. It is used by
55
+#' \code{\link{fitZig}}.
56
+#' 
57
+#' 
58
+#' @name libSize
59
+#' @aliases libSize,MRexperiment-method libSize
60
+#' @docType methods
61
+#' @param obj a \code{MRexperiment} object.
62
+#' @author Joseph N. Paulson, jpaulson@@umiacs.umd.edu
63
+#' @examples
64
+#' 
65
+#'    data(lungData)
66
+#'    head(libSize(lungData))
67
+#' 
22 68
       libSize <- as.matrix(colSums(counts))
23 69
       rownames(libSize) = colnames(counts)
24 70
     if( is.null( normFactors ) ){
71
+
72
+
73
+#' Access the normalization factors in a MRexperiment object
74
+#' 
75
+#' Function to access the scaling factors, aka the normalization factors, of
76
+#' samples in a MRexperiment object.
77
+#' 
78
+#' 
79
+#' @name normFactors
80
+#' @aliases normFactors,MRexperiment-method normFactors
81
+#' @docType methods
82
+#' @param obj a \code{MRexperiment} object.
83
+#' @author Joseph N. Paulson, jpaulson@@umiacs.umd.edu
84
+#' @examples
85
+#' 
86
+#'    data(lungData)
87
+#'    head(normFactors(lungData))
88
+#' 
25 89
       normFactors <- as.matrix(rep( NA_real_, length(libSize) ))
26 90
       rownames(normFactors) = rownames(libSize)
27 91
     }
... ...
@@ -45,6 +109,26 @@ setValidity( "MRexperiment", function( object ) {
45 109
     TRUE
46 110
 } )
47 111
 
112
+
113
+
114
+#' Accessor for the counts slot of a MRexperiment object
115
+#' 
116
+#' The counts slot holds the raw count data representing (along the rows) the
117
+#' number of reads annotated for a particular feature and (along the columns)
118
+#' the sample.
119
+#' 
120
+#' 
121
+#' @name MRcounts
122
+#' @aliases MRcounts,MRexperiment-method MRcounts
123
+#' @docType methods
124
+#' @param cnts a \code{MRexperiment} object.
125
+#' @param norm logical indicating whether or not to return normalized counts.
126
+#' @author Joseph N. Paulson, jpaulson@@umiacs.umd.edu
127
+#' @examples
128
+#' 
129
+#'    data(lungData)
130
+#'    head(MRcounts(lungData))
131
+#' 
48 132
 MRcounts <- function( obj ,norm=FALSE) {
49 133
    stopifnot( is( obj, "MRexperiment" ) )
50 134
    if(!norm){
... ...
@@ -57,6 +141,23 @@ MRcounts <- function( obj ,norm=FALSE) {
57 141
    }
58 142
 }
59 143
 
144
+
145
+
146
+#' Access the posterior probabilities that results from analysis
147
+#' 
148
+#' Accessing the posterior probabilities following a run through
149
+#' \code{\link{fitZig}}
150
+#' 
151
+#' 
152
+#' @name posterior.probs
153
+#' @aliases posterior.probs,MRexperiment-method posterior.probs
154
+#' @docType methods
155
+#' @param obj a \code{MRexperiment} object.
156
+#' @author Joseph N. Paulson, jpaulson@@umiacs.umd.edu
157
+#' @examples
158
+#' 
159
+#' # see vignette
160
+#' 
60 161
 posterior.probs <- function( obj ) {
61 162
    stopifnot( is( obj, "MRexperiment" ) )
62 163
    assayData(obj)[["z"]]
... ...
@@ -74,7 +175,26 @@ libSize<-function(obj){
74 175
    ls
75 176
 }
76 177
 
178
+
179
+
180
+#' Access MRexperiment object experiment data
181
+#' 
182
+#' The expSummary vectors represent the column (sample specific) sums of
183
+#' features, i.e. the total number of reads for a sample, libSize and also the
184
+#' normalization factors, normFactor.
185
+#' 
186
+#' 
187
+#' @name expSummary
188
+#' @aliases expSummary,MRexperiment-method expSummary
189
+#' @docType methods
190
+#' @param obj a \code{MRexperiment} object.
191
+#' @author Joseph N. Paulson, jpaulson@@umiacs.umd.edu
192
+#' @examples
193
+#' 
194
+#' data(mouseData)
195
+#' expSummary(mouseData)
196
+#' 
77 197
 expSummary<-function(obj){
78 198
   stopifnot( is( obj, "MRexperiment" ) )
79 199
   pData(obj@expSummary$expSummary)
80
-}
81 200
\ No newline at end of file
201
+}
... ...
@@ -1,13 +1,19 @@
1 1
 #' Cumulative sum scaling factors.
2
-#'
3
-#' Calculates each column's quantile and calculates the sum up to and including that quantile.
4
-#'
5
-#' @param jobj An eSet object.
2
+#' 
3
+#' Calculates each column's quantile and calculates the sum up to and including
4
+#' that quantile.
5
+#' 
6
+#' 
7
+#' @param obj An MRexperiment object.
6 8
 #' @param p The pth quantile.
7 9
 #' @return Vector of the sum up to and including a sample's pth quantile
8
-#'
9
-#' @name cumNorm
10
-#' @seealso \code{\link{fitZig}}
10
+#' @seealso \code{\link{fitZig}} \code{\link{cumNormStat}}
11
+#' @examples
12
+#' 
13
+#' data(mouseData)
14
+#' cumNorm(mouseData)
15
+#' head(normFactors(mouseData))
16
+#' 
11 17
 cumNorm <-
12 18
 function(obj,p=cumNormStat(obj)){
13 19
 	x = MRcounts(obj)
... ...
@@ -1,13 +1,19 @@
1 1
 #' Cumulative sum scaling factors.
2
-#'
3
-#' Calculates each column's quantile and calculates the sum up to and including that quantile.
4
-#'
5
-#' @param jobj An eSet object.
2
+#' 
3
+#' Calculates each column's quantile and calculates the sum up to and including
4
+#' that quantile.
5
+#' 
6
+#' 
7
+#' @param obj A MRexperiment object.
6 8
 #' @param p The pth quantile.
7
-#' @return jobj An updated eSet object with normalized counts
8
-#'
9
-#' @name cumNormMat
9
+#' @return Returns a matrix normalized by scaling counts up to and including
10
+#' the pth quantile.
10 11
 #' @seealso \code{\link{fitZig}} \code{\link{cumNorm}}
12
+#' @examples
13
+#' 
14
+#' data(mouseData)
15
+#' head(cumNormMat(mouseData))
16
+#' 
11 17
 cumNormMat <-
12 18
 function(obj,p= cumNormStat(obj)){
13 19
 ####################################################################################
... ...
@@ -1,12 +1,22 @@
1
-#' Cumulative normalization statistic.
2
-#'
3
-#' @param obj An eSet object.
4
-#' @param pFlag Whether or not to plot the reference.
5
-#' @param rel Relative difference of rel percent.
6
-#' @return P-value for which to cumulative normalize.
7
-#'
8
-#' @name cumNormStat
1
+#' Cumulative sum scaling percentile selection
2
+#' 
3
+#' Calculates the percentile for which to sum counts up to and scale by.
4
+#' 
5
+#' 
6
+#' @param obj A list with count data
7
+#' @param pFlag Plot the median difference quantiles
8
+#' @param rel Cutoff for the relative difference from one median difference
9
+#' from the reference to the next
10
+#' @param qFlag Flag to either calculate the proper percentile using a
11
+#' step-wise or triangular approximation of the sample count distribution.
12
+#' @param ... Applicable if pFlag == TRUE. Extra plotting parameters.
13
+#' @return Percentile for which to scale data
9 14
 #' @seealso \code{\link{fitZig}} \code{\link{cumNorm}}
15
+#' @examples
16
+#' 
17
+#' data(mouseData)
18
+#' p = round(cumNormStat(mouseData,pFlag=FALSE),digits=2)
19
+#' 
10 20
 cumNormStat <-
11 21
 function(obj,pFlag = FALSE,rel=.1,qFlag = TRUE, ...){
12 22
     
... ...
@@ -42,4 +52,4 @@ function(obj,pFlag = FALSE,rel=.1,qFlag = TRUE, ...){
42 52
     x = which(abs(diff(diffr2))/diffr2[-1]>rel)[1] / length(diffr2)
43 53
     obj@expSummary$cumNormStat = x;
44 54
 	return(x)
45
-}
46 55
\ No newline at end of file
56
+}
... ...
@@ -1,22 +1,26 @@
1 1
 #' Compute the Maximization step calculation for features still active.
2
-#'
3
-#' Maximization step is solved by weighted least squares. The function also computes counts residuals.
4
-#'
5
-#' Maximum-likelihood estimates are approximated using the EM algorithm where we treat mixture membership $\deta_{ij}$ = 1 if $y_{ij}$
6
-#' is generated from the zero point mass as latent indicator variables. The density is defined as $f_zig(y_{ij} = \pi_j(S_j) \cdot f_{0}(y_{ij}) 
7
-#' +(1-\pi_j (S_j))\cdot f_{count}(y_{ij};\mu_i,\sigma_i^2)$.
8
-#' The log-likelihood in this extended model is
9
-#' $(1−\delta_{ij}) \log f_{count}(y;\mu_i,\sigma_i^2 )+\delta_{ij} \log \pi_j(s_j)+(1−\delta_{ij})\log (1−\pi_j (sj))$.
10
-#' The responsibilities are defined as $z_{ij} = pr(\delta_{ij}=1 | data)$.
11
-#'
12
-#' @param z Matrix (m x n) of estimate responsibilities (probabilities that a count comes from a spike distribution at 0).
2
+#' 
3
+#' Maximization step is solved by weighted least squares.  The function also
4
+#' computes counts residuals.
5
+#' 
6
+#' Maximum-likelihood estimates are approximated using the EM algorithm where
7
+#' we treat mixture membership $delta_ij$ = 1 if $y_ij$ is generated from the
8
+#' zero point mass as latent indicator variables. The density is defined as
9
+#' $f_zig(y_ij = pi_j(S_j)*f_0(y_ij) +(1-pi_j (S_j)) *
10
+#' f_count(y_ij;mu_i,sigma_i^2)$. The log-likelihood in this extended model is
11
+#' $(1-delta_ij) log f_count(y;mu_i,sigma_i^2 )+delta_ij log
12
+#' pi_j(s_j)+(1-delta_ij)log (1-pi_j (s_j))$. The responsibilities are defined
13
+#' as $z_ij = pr(delta_ij=1 | data)$.
14
+#' 
15
+#' @param z Matrix (m x n) of estimate responsibilities (probabilities that a
16
+#' count comes from a spike distribution at 0).
13 17
 #' @param y Matrix (m x n) of count observations.
14 18
 #' @param mmCount Model matrix for the count distribution.
15
-#' @param stillActive Boolean vector of size M, indicating whether a feature converged or not.
19
+#' @param stillActive Boolean vector of size M, indicating whether a feature
20
+#' converged or not.
16 21
 #' @param fit2 Previous fit of the count model.
17
-#' @return Update matrix (m x n) of estimate responsibilities (probabilities that a count comes from a spike distribution at 0).
18
-#'
19
-#' @name doCountMStep
22
+#' @return Update matrix (m x n) of estimate responsibilities (probabilities
23
+#' that a count comes from a spike distribution at 0).
20 24
 #' @seealso \code{\link{fitZig}}
21 25
 doCountMStep <-
22 26
 function(z, y, mmCount, stillActive,fit2=NULL){
... ...
@@ -1,20 +1,22 @@
1 1
 #' Compute the Expectation step.
2
-#'
3
-#' Estimates the responsibilities $z_{ij} = \frac{\pi_j \cdot I_{0}(y_{ij}}{\pi_j \cdot I_{0}(y_{ij} + (1-\pi_j) \cdot f_{count}(y_{ij}}
4
-#'
5
-#' Maximum-likelihood estimates are approximated using the EM algorithm where we treat mixture membership $\deta_{ij}$ = 1 if $y_{ij}$
6
-#' is generated from the zero point mass as latent indicator variables. The density is defined as $f_zig(y_{ij} = \pi_j(S_j) \cdot f_{0}(y_{ij}) 
7
-#' +(1-\pi_j (S_j))\cdot f_{count}(y_{ij};\mu_i,\sigma_i^2)$.
8
-#' The log-likelihood in this extended model is
9
-#' $(1−\delta_{ij}) \log f_{count}(y;\mu_i,\sigma_i^2 )+\delta_{ij} \log \pi_j(s_j)+(1−\delta_{ij})\log (1−\pi_j (sj))$.
10
-#' The responsibilities are defined as $z_{ij} = pr(\delta_{ij}=1 | data)$.
11
-#'
2
+#' 
3
+#' Estimates the responsibilities $z_ij = fracpi_j cdot I_0(y_ijpi_j cdot
4
+#' I_0(y_ij + (1-pi_j) cdot f_count(y_ij
5
+#' 
6
+#' Maximum-likelihood estimates are approximated using the EM algorithm where
7
+#' we treat mixture membership $delta_ij$ = 1 if $y_ij$ is generated from the
8
+#' zero point mass as latent indicator variables. The density is defined as
9
+#' $f_zig(y_ij = pi_j(S_j) cdot f_0(y_ij) +(1-pi_j (S_j))cdot
10
+#' f_count(y_ij;mu_i,sigma_i^2)$. The log-likelihood in this extended model is
11
+#' $(1-delta_ij) log f_count(y;mu_i,sigma_i^2 )+delta_ij log
12
+#' pi_j(s_j)+(1-delta_ij)log (1-pi_j (sj))$. The responsibilities are defined
13
+#' as $z_ij = pr(delta_ij=1 | data)$.
14
+#' 
12 15
 #' @param countResiduals Residuals from the count model.
13 16
 #' @param zeroResiduals Residuals from the zero model.
14 17
 #' @param zeroIndices Index (matrix m x n) of counts that are zero/non-zero.
15
-#' @return Updated matrix (m x n) of estimate responsibilities (probabilities that a count comes from a spike distribution at 0).
16
-#'
17
-#' @name doEStep
18
+#' @return Updated matrix (m x n) of estimate responsibilities (probabilities
19
+#' that a count comes from a spike distribution at 0).
18 20
 #' @seealso \code{\link{fitZig}}
19 21
 doEStep <-
20 22
 function(countResiduals,  zeroResiduals, zeroIndices)
... ...
@@ -1,22 +1,25 @@
1 1
 #' Compute the zero Maximization step.
2
-#'
3
-#' Performs Maximization step calculation for the mixture components. Uses least squares to fit the parameters of the mean of the logistic distribution.
4
-#' $$
5
-#' pi_j = \sum_i^M \frac{1}{M}z_{ij}
6
-#' $$
7
-#' Maximum-likelihood estimates are approximated using the EM algorithm where we treat mixture membership $\deta_{ij}$ = 1 if $y_{ij}$
8
-#' is generated from the zero point mass as latent indicator variables. The density is defined as $f_zig(y_{ij} = \pi_j(S_j) \cdot f_{0}(y_{ij}) 
9
-#' +(1-\pi_j (S_j))\cdot f_{count}(y_{ij};\mu_i,\sigma_i^2)$.
10
-#' The log-likelihood in this extended model is
11
-#' $(1−\delta_{ij}) \log f_{count}(y;\mu_i,\sigma_i^2 )+\delta_{ij} \log \pi_j(s_j)+(1−\delta_{ij})\log (1−\pi_j (sj))$.
12
-#' The responsibilities are defined as $z_{ij} = pr(\delta_{ij}=1 | data)$.
13
-#'
14
-#' @param z Matrix (m x n) of estimate responsibilities (probabilities that a count comes from a spike distribution at 0).
2
+#' 
3
+#' Performs Maximization step calculation for the mixture components. Uses
4
+#' least squares to fit the parameters of the mean of the logistic
5
+#' distribution. $$ pi_j = sum_i^M frac1Mz_ij $$ Maximum-likelihood estimates
6
+#' are approximated using the EM algorithm where we treat mixture membership
7
+#' $delta_ij$ = 1 if $y_ij$ is generated from the zero point mass as latent
8
+#' indicator variables. The density is defined as $f_zig(y_ij = pi_j(S_j) cdot
9
+#' f_0(y_ij) +(1-pi_j (S_j))cdot f_count(y_ij;mu_i,sigma_i^2)$. The
10
+#' log-likelihood in this extended model is $(1-delta_ij) log
11
+#' f_count(y;mu_i,sigma_i^2 )+delta_ij log pi_j(s_j)+(1-delta_ij)log (1-pi_j
12
+#' (sj))$. The responsibilities are defined as $z_ij = pr(delta_ij=1 | data)$.
13
+#' 
14
+#' 
15
+#' @param z Matrix (m x n) of estimate responsibilities (probabilities that a
16
+#' count comes from a spike distribution at 0).
15 17
 #' @param zeroIndices Index (matrix m x n) of counts that are zero/non-zero.
16
-#' @param mmZero The zero model, the model matrix to account for the change in the number of OTUs observed as a linear effect of the depth of coverage.
17
-#' @return List of the zero fit (zero mean model) coefficients, variance - scale parameter (scalar), and normalized residuals of length sum(zeroIndices).
18
-#'
19
-#' @name doZeroMStep
18
+#' @param mmZero The zero model, the model matrix to account for the change in
19
+#' the number of OTUs observed as a linear effect of the depth of coverage.
20
+#' @return List of the zero fit (zero mean model) coefficients, variance -
21
+#' scale parameter (scalar), and normalized residuals of length
22
+#' sum(zeroIndices).
20 23
 #' @seealso \code{\link{fitZig}}
21 24
 doZeroMStep <-
22 25
 function(z, zeroIndices, mmZero)
... ...
@@ -1,18 +1,18 @@
1 1
 #' export the normalized eSet dataset as a matrix.
2
-#'
3
-#' This function allows the user to take the normalized dataset or counts and output
4
-#' the dataset to the user's workspace as a tab-delimited file, etc.
5
-#'
6
-#' @param jobj An eSet object with count data.
7
-#' @param output Output file name.
2
+#' 
3
+#' This function allows the user to take a dataset of counts and output the
4
+#' dataset to the user's workspace as a tab-delimited file, etc.
5
+#' 
6
+#' 
7
+#' @aliases exportMatrix exportMat
8
+#' @param mat A matrix of values (normalized, or otherwise)
9
+#' @param output Output file name
8 10
 #' @return NA
9
-#'
10
-#' @name export_mat
11
-#' @aliases exportMatrix
12 11
 #' @seealso \code{\link{cumNorm}}
13 12
 #' @examples
14
-#' export_mat(jobj,output="~/Desktop/normMatrix.tsv");
15
-
13
+#' 
14
+#' # see vignette
15
+#' 
16 16
 exportMat <-
17 17
 function(mat,output="~/Desktop/matrix.tsv"){
18 18
 	matrix = mat;
... ...
@@ -1,17 +1,19 @@
1 1
 #' Various statistics of the count data.
2
-#'
3
-#' A matrix of values for each sample. The matrix consists of sample ids, the sample scaling factor, quantile value, and the number of number of features.
4
-#'
5
-#' @param obj An eSet object with count data.
6
-#' @param p Quantile value to calculate the scaling factor and quantiles for the various samples.
2
+#' 
3
+#' A matrix of values for each sample. The matrix consists of sample ids, the
4
+#' sample scaling factor, quantile value, and the number of number of features.
5
+#' 
6
+#' 
7
+#' @param obj A MRexperiment object with count data.
8
+#' @param p Quantile value to calculate the scaling factor and quantiles for
9
+#' the various samples.
7 10
 #' @param output Output file name.
8 11
 #' @return None.
9
-#'
10
-#' @name export_stats
11 12
 #' @seealso \code{\link{cumNorm}} \code{\link{quantile}}
12 13
 #' @examples
13
-#' export_stats(obj,p=1,output="~/Desktop/obj-stats.tsv")
14
-
14
+#' 
15
+#' # see vignette
16
+#' 
15 17
 exportStats <-
16 18
 function(obj,p= cumNormStat(obj),output="~/Desktop/res.stats.tsv"){
17 19
 
... ...
@@ -1,27 +1,41 @@
1 1
 #' Computes the weighted fold-change estimates and t-statistics.
2
-#'
3
-#' Wrapper to actually run the Expectation-maximization algorithm and estimate $f_{count}$ fits.
4
-#' Maximum-likelihood estimates are approximated using the EM algorithm where we treat mixture membership $\deta_{ij}$ = 1 if $y_{ij}$
5
-#' is generated from the zero point mass as latent indicator variables. The density is defined as $f_zig(y_{ij} = \pi_j(S_j) \cdot f_{0}(y_{ij}) 
6
-#' +(1-\pi_j (S_j))\cdot f_{count}(y_{ij};\mu_i,\sigma_i^2)$.
7
-#' The log-likelihood in this extended model is
8
-#' $(1−\delta_{ij}) \log f_{count}(y;\mu_i,\sigma_i^2 )+\delta_{ij} \log \pi_j(s_j)+(1−\delta_{ij})\log (1−\pi_j (sj))$.
9
-#' The responsibilities are defined as $z_{ij} = pr(\delta_{ij}=1 | data)$.
10
-#'
11
-#' @param obj An eSet object with count data.
2
+#' 
3
+#' Wrapper to actually run the Expectation-maximization algorithm and estimate
4
+#' $f_count$ fits.  Maximum-likelihood estimates are approximated using the EM
5
+#' algorithm where we treat mixture membership $delta_ij = 1$ if $y_ij$ is
6
+#' generated from the zero point mass as latent indicator variables. The
7
+#' density is defined as $f_zig(y_ij = pi_j(S_j)*f_0(y_ij) +(1-pi_j (S_j)) *
8
+#' f_count(y_ij; mu_i, sigma_i^2)$. The log-likelihood in this extended model
9
+#' is: $(1-delta_ij) log f_count(y;mu_i,sigma_i^2 )+delta_ij log
10
+#' pi_j(s_j)+(1-delta_ij) log (1-pi_j (s_j))$. The responsibilities are defined
11
+#' as $z_ij = pr(delta_ij=1 | data)$.
12
+#' 
13
+#' 
14
+#' @param obj A MRexperiment object with count data.
12 15
 #' @param mod The model for the count distribution.
13
-#' @param s95 A vector of size M of the scaling values to be included in the model.
14
-#' @param zeroMod The zero model, the model to account for the change in the number of OTUs observed as a linear effect of the depth of coverage.
15
-#' @param useS95offset Boolean, whether to include the default scaling parameters in the model or not.
16
-#' @param control The settings for fitZig. 
17
-#' @param s The raw total counts for the various samples.
18
-#' @return The fits, posterior probabilities, posterior probabilities used at time of convergence for each feature, ebayes (limma object) fit, among other data. 
19
-#'
20
-#' @name fitZig
16
+#' @param zeroMod The zero model, the model to account for the change in the
17
+#' number of OTUs observed as a linear effect of the depth of coverage.
18
+#' @param useS95offset Boolean, whether to include the default scaling
19
+#' parameters in the model or not.
20
+#' @param control The settings for fitZig.
21
+#' @return The fits, posterior probabilities, posterior probabilities used at
22
+#' time of convergence for each feature, ebayes (limma object) fit, among other
23
+#' data.
24
+#' @export
21 25
 #' @seealso \code{\link{cumNorm}} \code{\link{zigControl}}
22 26
 #' @examples
23
-#' model = model.matrix(~1+type+log2(s95/1000+1))
24
-#' res = fitZig(obj = obj,mod=mod,useS95offset=FALSE)
27
+#' 
28
+#' data(lungData)
29
+#' k = grep("Extraction.Control",pData(lungData)$SampleType)
30
+#' lungTrim = lungData[,-k]
31
+#' k = which(rowSums(MRcounts(lungTrim)>0)<30)
32
+#' cumNorm(lungTrim)
33
+#' lungTrim = lungTrim[-k,]
34
+#' smokingStatus = pData(lungTrim)$SmokingStatus
35
+#' mod = model.matrix(~smokingStatus)
36
+#' settings = zigControl(maxit=1,verbose=FALSE)
37
+#' fit = fitZig(obj = lungTrim,mod=mod,control=settings)
38
+#' 
25 39
 fitZig <-
26 40
 function(obj,mod,zeroMod=NULL,useS95offset=TRUE,control=zigControl()){
27 41
 
... ...
@@ -109,4 +123,4 @@ function(obj,mod,zeroMod=NULL,useS95offset=TRUE,control=zigControl()){
109 123
 		dat = list(fit=fit$fit,countResiduals=fit$residuals,
110 124
 				   z=z,eb=eb,taxa=rownames(obj),counts=y,zeroMod =mmZero,stillActive=stillActive,stillActiveNLL=stillActiveNLL,zeroCoef=zeroCoef)
111 125
 		return(dat)
112
-	}
113 126
\ No newline at end of file
127
+	}
... ...
@@ -1,18 +1,20 @@
1
-#' Compute the value of the count density function from the count model residuals.
2
-#'
3
-#' Calculate density values from a normal: $f(x) = 1/(\sqrt (2 \pi ) \sigma ) e^-((x - \mu )^2/(2 \sigma^2))$.
4
-#' Maximum-likelihood estimates are approximated using the EM algorithm where we treat mixture membership $\deta_{ij}$ = 1 if $y_{ij}$
5
-#' is generated from the zero point mass as latent indicator variables. The density is defined as $f_zig(y_{ij} = \pi_j(S_j) \cdot f_{0}(y_{ij}) 
6
-#' +(1-\pi_j (S_j))\cdot f_{count}(y_{ij};\mu_i,\sigma_i^2)$.
7
-#' The log-likelihood in this extended model is
8
-#' $(1−\delta_{ij}) \log f_{count}(y;\mu_i,\sigma_i^2 )+\delta_{ij} \log \pi_j(s_j)+(1−\delta_{ij})\log (1−\pi_j (sj))$.
9
-#' The responsibilities are defined as $z_{ij} = pr(\delta_{ij}=1 | data)$.
10
-#'
1
+#' Compute the value of the count density function from the count model
2
+#' residuals.
3
+#' 
4
+#' Calculate density values from a normal: $f(x) = 1/(sqrt (2 pi ) sigma )
5
+#' e^-((x - mu )^2/(2 sigma^2))$.  Maximum-likelihood estimates are
6
+#' approximated using the EM algorithm where we treat mixture membership
7
+#' $deta_ij$ = 1 if $y_ij$ is generated from the zero point mass as latent
8
+#' indicator variables. The density is defined as $f_zig(y_ij = pi_j(S_j) cdot
9
+#' f_0(y_ij) +(1-pi_j (S_j))cdot f_count(y_ij;mu_i,sigma_i^2)$. The
10
+#' log-likelihood in this extended model is $(1-delta_ij) log
11
+#' f_count(y;mu_i,sigma_i^2 )+delta_ij log pi_j(s_j)+(1-delta_ij)log (1-pi_j
12
+#' (sj))$. The responsibilities are defined as $z_ij = pr(delta_ij=1 | data)$.
13
+#' 
14
+#' 
11 15
 #' @param residuals Residuals from the count model.
12 16
 #' @param log Whether or not we are calculating from a log-normal distribution.
13 17
 #' @return Density values from the count model residuals.
14
-#'
15
-#' @name getCountDensity
16 18
 #' @seealso \code{\link{fitZig}}
17 19
 getCountDensity <-
18 20
 function(residuals, log=FALSE){
... ...
@@ -1,15 +1,19 @@
1
-#' Calculate the relative difference between iterations of the negative log-likelihoods.
2
-#'
3
-#' Maximum-likelihood estimates are approximated using the EM algorithm where we treat mixture membership $\deta_{ij}$ = 1 if $y_{ij}$
4
-#' is generated from the zero point mass as latent indicator variables. The log-likelihood in this extended model is
5
-#' $(1−\delta_{ij}) \log f_{count}(y;\mu_i,\sigma_i^2 )+\delta_{ij} \log \pi_j(s_j)+(1−\delta_{ij})\log (1−\pi_j (sj))$.
6
-#' The responsibilities are defined as $z_{ij} = pr(\delta_{ij}=1 | data)$.
7
-#'
1
+#' Calculate the relative difference between iterations of the negative
2
+#' log-likelihoods.
3
+#' 
4
+#' Maximum-likelihood estimates are approximated using the EM algorithm where
5
+#' we treat mixture membership $delta_ij$ = 1 if $y_ij$ is generated from the
6
+#' zero point mass as latent indicator variables. The log-likelihood in this
7
+#' extended model is $(1-delta_ij) log f_count(y;mu_i,sigma_i^2 )+delta_ij log
8
+#' pi_j(s_j)+(1-delta_ij)log (1-pi_j (sj))$. The responsibilities are defined
9
+#' as $z_ij = pr(delta_ij=1 | data)$.
10
+#' 
11
+#' 
8 12
 #' @param nll Vector of size M with the current negative log-likelihoods.
9
-#' @param nllOld Vector of size M with the previous iterations negative log-likelihoods.
10
-#' @return Vector of size M of the relative differences between the previous and current iteration nll.
11
-#'
12
-#' @name getEpsilon
13
+#' @param nllOld Vector of size M with the previous iterations negative
14
+#' log-likelihoods.
15
+#' @return Vector of size M of the relative differences between the previous
16
+#' and current iteration nll.
13 17
 #' @seealso \code{\link{fitZig}}
14 18
 getEpsilon <-
15 19
 function(nll, nllOld){
... ...
@@ -1,16 +1,20 @@
1
-#' Calculate the negative log-likelihoods for the various features given the residuals.
2
-#'
3
-#' Maximum-likelihood estimates are approximated using the EM algorithm where we treat mixture membership $\deta_{ij}$ = 1 if $y_{ij}$
4
-#' is generated from the zero point mass as latent indicator variables. The log-likelihood in this extended model is
5
-#' $(1−\delta_{ij}) \log f_{count}(y;\mu_i,\sigma_i^2 )+\delta_{ij} \log \pi_j(s_j)+(1−\delta_{ij})\log (1−\pi_j (sj))$.
6
-#' The responsibilities are defined as $z_{ij} = pr(\delta_{ij}=1 | data and current  values)$.
7
-#'
8
-#' @param z Matrix (m x n) of estimate responsibilities (probabilities that a count comes from a spike distribution at 0).
1
+#' Calculate the negative log-likelihoods for the various features given the
2
+#' residuals.
3
+#' 
4
+#' Maximum-likelihood estimates are approximated using the EM algorithm where
5
+#' we treat mixture membership $delta_ij$ = 1 if $y_ij$ is generated from the
6
+#' zero point mass as latent indicator variables. The log-likelihood in this
7
+#' extended model is $(1-delta_ij) log f_count(y;mu_i,sigma_i^2 )+delta_ij log
8
+#' pi_j(s_j)+(1-delta_ij)log (1-pi_j (sj))$. The responsibilities are defined
9
+#' as $z_ij = pr(delta_ij=1 | data and current values)$.
10
+#' 
11
+#' 
12
+#' @param z Matrix (m x n) of estimate responsibilities (probabilities that a
13
+#' count comes from a spike distribution at 0).
9 14
 #' @param countResiduals Residuals from the count model.
10 15
 #' @param zeroResiduals Residuals from the zero model.
11
-#' @return Vector of size M of the negative log-likelihoods for the various features.
12
-#'
13
-#' @name getNegativeLogLikelihoods
16
+#' @return Vector of size M of the negative log-likelihoods for the various
17
+#' features.
14 18
 #' @seealso \code{\link{fitZig}}
15 19
 getNegativeLogLikelihoods <-
16 20
 function(z, countResiduals, zeroResiduals){
... ...
@@ -1,15 +1,17 @@
1
-#' Calculate the mixture proportions from the zero model / spike mass model residuals.
2
-#'
1
+#' Calculate the mixture proportions from the zero model / spike mass model
2
+#' residuals.
3
+#' 
3 4
 #' F(x) = 1 / (1 + exp(-(x-m)/s)) (the CDF of the logistic distribution).
4
-#' Provides the probability that a real-valued random variable X with a given probability distribution will be found at a value less than or equal to x.
5
-#' The output are the mixture proportions for the samples given the residuals from the zero model.
6
-#'
5
+#' Provides the probability that a real-valued random variable X with a given
6
+#' probability distribution will be found at a value less than or equal to x.
7
+#' The output are the mixture proportions for the samples given the residuals
8
+#' from the zero model.
9
+#' 
10
+#' 
7 11
 #' @param residuals Residuals from the zero model.
8 12
 #' @return Mixture proportions for each sample.
9
-#'
10
-#' @name getPi
11 13
 #' @seealso \code{\link{fitZig}}
12 14
 getPi <-
13 15
 function(residuals){
14 16
 	plogis(residuals)
15
-}
16 17
\ No newline at end of file
18
+}
... ...
@@ -1,13 +1,18 @@
1 1
 #' Calculate the current Z estimate responsibilities (posterior probabilities)
2
-#'
3
-#' @param z Matrix (m x n) of estimate responsibilities (probabilities that a count comes from a spike distribution at 0).
4
-#' @param zUsed Matrix (m x n) of estimate responsibilities (probabilities that a count comes from a spike distribution at 0) that are actually used (following convergence). 
5
-#' @param stillActive A vector of size M booleans saying if a feature is still active or not.
2
+#' 
3
+#' Calculate the current Z estimate responsibilities (posterior probabilities)
4
+#' 
5
+#' 
6
+#' @param z Matrix (m x n) of estimate responsibilities (probabilities that a
7
+#' count comes from a spike distribution at 0).
8
+#' @param zUsed Matrix (m x n) of estimate responsibilities (probabilities that
9
+#' a count comes from a spike distribution at 0) that are actually used
10
+#' (following convergence).
11
+#' @param stillActive A vector of size M booleans saying if a feature is still
12
+#' active or not.
6 13
 #' @param nll Vector of size M with the current negative log-likelihoods.
7 14
 #' @param nllUSED Vector of size M with the converged negative log-likelihoods.
8 15
 #' @return A list of updated zUsed and nllUSED.
9
-#'
10
-#' @name getZ
11 16
 #' @seealso \code{\link{fitZig}}
12 17
 getZ <-
13 18
 function(z,zUsed,stillActive,nll,nllUSED){
... ...
@@ -12,7 +12,7 @@
12 12
 #'
13 13
 #' @name isItStillActive
14 14
 #' @seealso \code{\link{fitZig}}
15
-
15
+#'
16 16
 isItStillActive <-
17 17
 function(eps, tol,stillActive,stillActiveNLL,nll){
18 18
 	stillActive[stillActive]=!is.finite(eps[stillActive]) | eps[stillActive]>tol
... ...
@@ -1,17 +1,19 @@
1 1
 #' Load a count dataset associated with a study.
2
-#'
2
+#' 
3 3
 #' Load a matrix of OTUs in a tab delimited format
4
-#'
4
+#' 
5
+#' 
6
+#' @aliases load_meta metagenomicLoader
5 7
 #' @param file Path and filename of the actual data file.
8
+#' @param sep File delimiter.
6 9
 #' @return An object of count data.
7
-#'
8
-#' @name load_meta
9
-#' @aliases metagenomicLoader
10 10
 #' @seealso \code{\link{load_phenoData}}
11 11
 #' @examples
12
-#' obj = load_meta("~/Desktop/testFile.tsv")
13
-load_meta <-
14
-function(file,sep="\t")
12
+#' 
13
+#' dataDirectory <- system.file("extdata", package="metagenomeSeq")
14
+#' lung = load_meta(file.path(dataDirectory,"CHK_NAME.otus.count.csv"))
15
+#' 
16
+load_meta <- function(file,sep="\t")
15 17
 {
16 18
 	dat2 <- read.table(file,header=FALSE,sep=sep,nrows=1,stringsAsFactors=FALSE);
17 19
 	subjects <- as.character(dat2[1,-1]);
... ...
@@ -1,15 +1,16 @@
1 1
 #' Load a count dataset associated with a study set up in a Qiime format.
2
-#'
2
+#' 
3 3
 #' Load a matrix of OTUs in Qiime's format
4
-#'
4
+#' 
5
+#' 
6
+#' @aliases load_metaQ qiimeLoader
5 7
 #' @param file Path and filename of the actual data file.
6 8
 #' @return An object of count data.
7
-#'
8
-#' @name load_metaQ
9
-#' @aliases qiimeLoader
10 9
 #' @seealso \code{\link{load_meta}} \code{\link{load_phenoData}}
11 10
 #' @examples
12
-#' obj = load_metaQ("~/Desktop/testFile.tsv")
11
+#' 
12
+#' # see vignette
13
+#' 
13 14
 load_metaQ <- function(file) {	
14 15
 	dat2 <- read.delim(file,header=FALSE,stringsAsFactors=FALSE,nrows=1,skip=1);
15 16
 	len = ncol(dat2)
... ...
@@ -1,18 +1,19 @@
1 1
 #' Load a clinical/phenotypic dataset associated with a study.
2
-#'
2
+#' 
3 3
 #' Load a matrix of metadata associated with a study.
4
-#'
4
+#' 
5
+#' 
6
+#' @aliases load_phenoData phenoData
5 7
 #' @param file Path and filename of the actual clinical file.
6
-#' @param tran Boolean. If the covariates are along the columns and samples along the rows, then tran should equal TRUE.
8
+#' @param tran Boolean. If the covariates are along the columns and samples
9
+#' along the rows, then tran should equal TRUE.
7 10
 #' @param sep The separator for the file.
8 11
 #' @return The metadata as a dataframe.
9
-#'
10
-#' @name load_phenoData
11
-#' @aliases phenoData
12 12
 #' @seealso \code{\link{load_meta}}
13 13
 #' @examples
14
-#' clin = load_phenoData("~/Desktop/testFile.tsv")
15
-
14
+#' 
15
+#' # see vignette
16
+#' 
16 17
 load_phenoData <-
17 18
 function(file,tran=FALSE,sep="\t")
18 19
 {
... ...
@@ -1,3 +1,24 @@
1
+#' Basic correlation plot function for normalized or unnormalized counts.
2
+#' 
3
+#' This function plots a heatmap of the "n" features with greatest variance
4
+#' across rows.
5
+#' 
6
+#' 
7
+#' @param obj A MRexperiment object with count data.
8
+#' @param n The number of features to plot
9
+#' @param log Whether or not to log transform the counts.
10
+#' @param norm Whether or not to normalize the counts.
11
+#' @param fun Function to calculate pair-wise relationships. Default is pearson
12
+#' correlation
13
+#' @param ... Additional plot arguments.
14
+#' @return NA
15
+#' @seealso \code{\link{cumNormMat}}
16
+#' @examples
17
+#' 
18
+#' data(mouseData)
19
+#' trials = pData(mouseData)$diet
20
+#' plotCorr(obj=mouseData,n=200,cexRow = 0.4,cexCol = 0.4,trace="none",dendrogram="none")
21
+#' 
1 22
 plotCorr <- function(obj,n,log=TRUE,norm=TRUE,fun=cor,...) {
2 23
     if (log == TRUE) {
3 24
         if (norm == TRUE) {
... ...
@@ -1,31 +1,35 @@
1 1
 #' Basic plot function of the raw or normalized data.
2
-#'
3
-#' This function plots the abundance of a particular OTU by class. The function uses
4
-#' the estimated posterior probabilities to make technical zeros transparent. 
5
-#'
6
-#' @param obj An eSet object with count data.
2
+#' 
3
+#' This function plots the abundance of a particular OTU by class. The function
4
+#' uses the estimated posterior probabilities to make technical zeros
5
+#' transparent.
6
+#' 
7
+#' 
8
+#' @aliases genusPlot plotGenus
9
+#' @param obj An MRexperiment object with count data.
7 10
 #' @param otuIndex A list of the otus with the same annotation.
8 11
 #' @param classIndex A list of the samples in their respective groups.
9 12
 #' @param norm Whether or not to normalize the counts.
10
-#' @param normp The value at which to scale the counts by and then log.
11 13
 #' @param no Which of the otuIndex to plot.
12 14
 #' @param factor Factor value for jitter
13 15
 #' @param pch Standard pch value for the plot command.
16
+#' @param labs Whether to include group labels or not. (TRUE/FALSE)
17
+#' @param xlab xlabel for the plot.
18
+#' @param ylab ylabel for the plot.
14 19
 #' @param jitter Boolean to jitter the count data or not.
15 20
 #' @param ret Boolean to return the observed data that would have been plotted.
16 21
 #' @param ... Additional plot arguments.
17 22
 #' @return NA
18
-#' @note \code{\link{detect}} makes use of settings.
19
-#'
20
-#' @name plotGenus
21
-#' @aliases genusPlot
22 23
 #' @seealso \code{\link{cumNorm}}
23
-#' @examples 
24
-#' classIndex=list(controls=which(type=="Control"))
25
-#' classIndex$cases=which(type=="Case")
26
-#' otuIndex = which(taxa == "E-coli")
27
-#' plotGenus(obj,otu=12,classIndex,xlab="OTU log-normalized counts")
28
-
24
+#' @examples
25
+#' 
26
+#' data(mouseData)
27
+#' classIndex=list(controls=which(pData(mouseData)$diet=="BK"))
28
+#' classIndex$cases=which(pData(mouseData)$diet=="Western")
29
+#' otuIndex = grep("Strep",fData(mouseData)$fdata)
30
+#' otuIndex=otuIndex[order(rowSums(MRcounts(mouseData)[otuIndex,]),decreasing=TRUE)]
31
+#' plotGenus(mouseData,otuIndex,classIndex,no=1:2,xaxt="n",norm=FALSE,ylab="Strep normalized log(cpt)")
32
+#' 
29 33
 plotGenus <-
30 34
 function(obj,otuIndex,classIndex,norm=TRUE,no=1:length(otuIndex),labs=TRUE,xlab=NULL,ylab=NULL,jitter=TRUE,factor=1,pch=21,ret=FALSE,...){
31 35
 
... ...
@@ -63,4 +67,4 @@ function(obj,otuIndex,classIndex,norm=TRUE,no=1:length(otuIndex),labs=TRUE,xlab=
63 67
     }
64 68
 
65 69
 	if(ret) list(x=x,y=y)
66
-}
67 70
\ No newline at end of file
71
+}
... ...
@@ -1,3 +1,23 @@
1
+#' Basic heatmap plot function for normalized counts.
2
+#' 
3
+#' This function plots a heatmap of the "n" features with greatest variance
4
+#' across rows.
5
+#' 
6
+#' 
7
+#' @param obj A MRexperiment object with count data.
8
+#' @param n The number of features to plot
9
+#' @param trials A vector of clinical information for.
10
+#' @param log Whether or not to log transform the counts.
11
+#' @param norm Whether or not to normalize the counts.
12
+#' @param ... Additional plot arguments.
13
+#' @return NA
14
+#' @seealso \code{\link{cumNormMat}}
15
+#' @examples
16
+#' 
17
+#' data(mouseData)
18
+#' trials = pData(mouseData)$diet
19
+#' plotMRheatmap(obj=mouseData,n=200,trials=trials,cexRow = 0.4,cexCol = 0.4,trace="none")
20
+#' 
1 21
 plotMRheatmap <- function(obj,n,trials,log=TRUE,norm=TRUE,...) {
2 22
   
3 23
   if(log==TRUE){
... ...
@@ -21,4 +41,4 @@ plotMRheatmap <- function(obj,n,trials,log=TRUE,norm=TRUE,...) {
21 41
   heatmapColColors=brewer.pal(12,"Set3")[as.integer(factor(trials))];
22 42
 	heatmap.2(mat2,col=heatmapCols,ColSideColors=heatmapColColors,...);
23 43
   invisible()
24
-}
25 44
\ No newline at end of file
45
+}
... ...
@@ -1,28 +1,32 @@
1 1
 #' Basic plot function of the raw or normalized data.
2
-#'
3
-#' This function plots the abundance of a particular OTU by class. The function uses
4
-#' the estimated posterior probabilities to make technical zeros transparent. 
5
-#'
6
-#' @param obj An eSet object with count data.
2
+#' 
3
+#' This function plots the abundance of a particular OTU by class. The function
4
+#' uses the estimated posterior probabilities to make technical zeros
5
+#' transparent.
6
+#' 
7
+#' 
8
+#' @param obj A MRexperiment object with count data.
7 9
 #' @param otu The row number/OTU to plot.
8 10
 #' @param classIndex A list of the samples in their respective groups.
9 11
 #' @param norm Whether or not to normalize the counts.
10
-#' @param normp The value at which to scale the counts by and then log.
11 12
 #' @param factor Factor value for jitter.
12 13
 #' @param pch Standard pch value for the plot command.
14
+#' @param labs Whether to include group labels or not. (TRUE/FALSE)
15
+#' @param xlab xlabel for the plot.
16
+#' @param ylab ylabel for the plot.
13 17
 #' @param jitter Boolean to jitter the count data or not.
14 18
 #' @param ret Boolean to return the observed data that would have been plotted.
15 19
 #' @param ... Additional plot arguments.
16 20
 #' @return NA
17
-#'
18
-#' @name plotOTU
19
-#' @aliases otuplot
20 21
 #' @seealso \code{\link{cumNorm}}
21
-#' @examples 
22
-#' classIndex=list(controls=which(type=="Control"))
23
-#' classIndex$cases=which(type=="Case")
24
-#' plotOTU(obj,otu=12,classIndex,xlab="OTU log-normalized counts")
25
-
22
+#' @examples
23
+#' 
24
+#' data(mouseData)
25
+#' classIndex=list(controls=which(pData(mouseData)$diet=="BK"))
26
+#' classIndex$cases=which(pData(mouseData)$diet=="Western")
27
+#' # you can specify whether or not to normalize, and to what level
28
+#' plotOTU(mouseData,otu=9083,classIndex,norm=FALSE,main="9083 feature abundances")
29
+#' 
26 30
 plotOTU <-
27 31
 function(obj,otu,classIndex,norm=TRUE,factor=1,pch=21,labs=TRUE,xlab=NULL,ylab=NULL,jitter=TRUE,ret=FALSE,...){
28 32
 
... ...
@@ -67,4 +71,4 @@ function(obj,otu,classIndex,norm=TRUE,factor=1,pch=21,labs=TRUE,xlab=NULL,ylab=N
67 71
     
68 72
 	if (ret)
69 73
 		list(x=x,y=y)
70
-}
71 74
\ No newline at end of file
75
+}
... ...
@@ -11,7 +11,7 @@
11 11
 #' @seealso \code{\link{fitZig}} \code{\link{cumNorm}} \code{\link{plotOTU}}
12 12
 #' @examples
13 13
 #' control =  zigControl(tol=1e-10,maxit=10,verbose=FALSE)
14
-
14
+#'
15 15
 zigControl <-function(tol=1e-4,maxit=10,verbose=TRUE){
16 16
 	set <-list(tol=tol,maxit=maxit,verbose=verbose);
17 17
 	return(set)	
... ...
@@ -2,33 +2,57 @@
2 2
 \alias{MRcoefs}
3 3
 \title{Table of top-ranked microbial marker gene from linear model fit}
4 4
 \usage{
5
-  MRcoefs(obj,by=2,coef=NULL,number=10,taxa=obj$taxa,uniqueNames=FALSE,adjust.method="fdr",group=0,eff=0,output=NULL)
5
+  MRcoefs(obj, by = 2, coef = NULL, number = 10,
6
+    taxa = obj$taxa, uniqueNames = FALSE,
7
+    adjust.method = "fdr", group = 0, eff = 0,
8
+    output = NULL)
6 9
 }
7 10
 \arguments{
8
-  \item{obj}{A list containing the linear model fit produced by lmFit through fitZig.}
9
-
10
-  \item{by}{Column number or column name specifying which coefficient or contrast of the linear model is of interest.}
11
-  
12
-  \item{coef}{Column number(s) or column name(s) specifying which coefficient or contrast of the linear model to display.}
13
-  
14
-  \item{number}{The number of bacterial features to pick out.}
15
-  
11
+  \item{obj}{A list containing the linear model fit
12
+  produced by lmFit through fitZig.}
13
+
14
+  \item{by}{Column number or column name specifying which
15
+  coefficient or contrast of the linear model is of
16
+  interest.}
17
+
18
+  \item{coef}{Column number(s) or column name(s) specifying
19
+  which coefficient or contrast of the linear model to
20
+  display.}
21
+
22
+  \item{number}{The number of bacterial features to pick
23
+  out.}
24
+
16 25
   \item{taxa}{Taxa list.}
17
-  
26
+
18 27
   \item{uniqueNames}{Number the various taxa.}
19
-  
20
-  \item{adjust.method}{Method to adjust p-values by. Default is "FDR". Options include "holm", "hochberg", "hommel", "bonferroni", "BH", "BY",         "fdr", "none". See \code{\link{p.adjust}} for more details.}
21
-  
22
-  \item{group}{One of three choices, 0,1,2,3. 0: the sort is ordered by a decreasing absolute value coefficient fit. 1: the sort is ordered by the raw coefficient fit in decreasing order. 2: the sort is ordered by the raw coefficient fit in increasing order. 3: the sort is ordered by the p-value of the coefficient fit in increasing order.}
23
-  \item{eff}{Restrict samples to have at least eff quantile effective samples.}
24
-  
25
-  \item{output}{Name of output file, including location, to save the table.}
28
+
29
+  \item{adjust.method}{Method to adjust p-values by.
30
+  Default is "FDR". Options include "holm", "hochberg",
31
+  "hommel", "bonferroni", "BH", "BY", "fdr", "none". See
32
+  \code{\link{p.adjust}} for more details.}
33
+
34
+  \item{group}{One of three choices, 0,1,2,3. 0: the sort
35
+  is ordered by a decreasing absolute value coefficient
36
+  fit. 1: the sort is ordered by the raw coefficient fit in
37
+  decreasing order. 2: the sort is ordered by the raw
38
+  coefficient fit in increasing order. 3: the sort is
39
+  ordered by the p-value of the coefficient fit in
40
+  increasing order.}
41
+
42
+  \item{eff}{Restrict samples to have at least eff quantile
43
+  effective samples.}
44
+
45
+  \item{output}{Name of output file, including location, to
46
+  save the table.}
26 47
 }
27 48
 \value{
28
-  Table of the top-ranked features determined by the linear fit's coefficient.
49
+  Table of the top-ranked features determined by the linear
50
+  fit's coefficient.
29 51
 }
30 52
 \description{
31
-  Extract a table of the top-ranked features from a linear model fit. This function will be updated soon to provide better flexibility similar to limma's topTable.
53
+  Extract a table of the top-ranked features from a linear
54
+  model fit. This function will be updated soon to provide
55
+  better flexibility similar to limma's topTable.
32 56
 }
33 57
 \examples{
34 58
 data(lungData)
... ...
@@ -44,6 +68,6 @@ fit = fitZig(obj = lungTrim,mod=mod,control=settings)
44 68
 head(MRcoefs(fit))
45 69
 }
46 70
 \seealso{
47
-  \code{\link{fitZig}}  \code{\link{MRtable}}
71
+  \code{\link{fitZig}} \code{\link{MRtable}}
48 72
 }
49 73
 
... ...
@@ -1,30 +1,27 @@
1
-\name{MRcounts}
2
-\Rdversion{1.0}
3 1
 \docType{methods}
4
-\alias{MRcounts,MRexperiment-method}
2
+\name{MRcounts}
5 3
 \alias{MRcounts}
6
-\title{
7
-   Accessor for the counts slot of a MRexperiment object
8
-}
9
-
10
-\description{
11
-   The counts slot holds the raw count data representing (along the rows) the number of reads annotated for a particular feature and (along the columns) the sample.
12
-}
13
-
4
+\alias{MRcounts,MRexperiment-method}
5
+\title{Accessor for the counts slot of a MRexperiment object}
14 6
 \usage{
15
-\S4method{MRcounts}{MRexperiment}(cnts, norm=FALSE)
7
+  MRcounts(obj, norm = FALSE)
16 8
 }
17
-
18 9
 \arguments{
19 10
   \item{cnts}{a \code{MRexperiment} object.}
20
-  \item{norm}{logical indicating whether or not to return normalized counts.}
21
-}
22 11
 
23
-\author{
24
-   Joseph N. Paulson, jpaulson@umiacs.umd.edu
12
+  \item{norm}{logical indicating whether or not to return
13
+  normalized counts.}
14
+}
15
+\description{
16
+  The counts slot holds the raw count data representing
17
+  (along the rows) the number of reads annotated for a
18
+  particular feature and (along the columns) the sample.
25 19
 }
26 20
 \examples{
27
-   data(lungData)
21
+data(lungData)
28 22
    head(MRcounts(lungData))
29
-}   
30
-   
23
+}
24
+\author{
25
+  Joseph N. Paulson, jpaulson@umiacs.umd.edu
26
+}
27
+
... ...
@@ -2,19 +2,25 @@
2 2
 \alias{MRfisher}
3 3
 \title{Wrapper to run fisher's test on presence/absence of a feature.}
4 4
 \usage{
5
-  MRfisher(obj,cl,mat=FALSE)
5
+  MRfisher(obj, cl, mat = FALSE)
6 6
 }
7 7
 \arguments{
8
-  \item{obj}{A MRexperiment object with a count matrix, or a simple count matrix.}
8
+  \item{obj}{A MRexperiment object with a count matrix, or
9
+  a simple count matrix.}
10
+
9 11
   \item{cl}{Group comparison}
10
-  \item{mat}{logical indicating whether obj is a MRexperiment object or matrix. Default is a MRexperiment object.}
12
+
13
+  \item{mat}{logical indicating whether obj is a