Browse code

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@50951 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 18/11/2010 05:32:16
Showing1 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,121 @@
1
+\name{genotype.Illumina}
2
+\alias{genotype.Illumina}
3
+
4
+\title{
5
+	Preprocessing and genotyping of Illumina Infinium II arrays.
6
+}
7
+\description{
8
+	Preprocessing and genotyping of Illumina Infinium II arrays.
9
+}
10
+\usage{
11
+genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
12
+      arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
13
+      highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"),
14
+      cdfName, copynumber=TRUE, batch, outdir=".", saveDate=TRUE, stripNorm=TRUE, useTarget=TRUE, 
15
+      mixtureSampleSize=10^5, fitMixture=TRUE, eps =0.1, verbose = TRUE, seed = 1, 
16
+      sns, probs = rep(1/3, 3), DF = 6, SNRMin = 5, recallMin = 10, recallRegMin = 1000,
17
+      gender = NULL, returnParams = TRUE, badSNP = 0.7)
18
+}
19
+\arguments{
20
+  \item{sampleSheet}{\code{data.frame} containing Illumina sample sheet
21
+    information (for required columns, refer to BeadStudio Genotyping
22
+    guide - Appendix A).}
23
+  \item{arrayNames}{character vector containing names of arrays to be
24
+    read in.  If \code{NULL}, all arrays that can be found in the
25
+    specified working directory will be read in.}
26
+  \item{ids}{vector containing ids of probes to be read in.  If
27
+    \code{NULL} all probes found on the first array are read in.}
28
+  \item{path}{character string specifying the location of files to be
29
+    read by the function}
30
+  \item{arrayInfoColNames}{(used when \code{sampleSheet} is specified)
31
+    list containing elements 'barcode' which indicates column names in
32
+    the \code{sampleSheet} which contains the arrayNumber/barcode number
33
+    and 'position' which indicates the strip number.  In older style
34
+    sample sheets, this information is combined (usually in a column
35
+    named 'SentrixPosition') and this should be specified as
36
+    \code{list(barcode=NULL, position="SentrixPosition")}}
37
+  \item{highDensity}{logical (used when \code{sampleSheet} is
38
+    specified). If \code{TRUE}, array extensions '\_A', '\_B' in
39
+    sampleSheet are replaced with 'R01C01', 'R01C02' etc.}
40
+  \item{sep}{character string specifying separator used in .idat file
41
+    names.}
42
+  \item{fileExt}{list containing elements 'Green' and 'Red' which
43
+    specify the .idat file extension for the Cy3 and Cy5 channels.}
44
+  \item{cdfName}{ annotation package  (see also \code{validCdfNames})}
45
+  \item{copynumber}{ 'logical.' Whether to store copy number intensities with SNP output.} 
46
+  \item{batch}{ batch variable. See details. }
47
+  \item{outdir}{character string specifying the location to store large data objects.}
48
+  \item{saveDate}{'logical'.  Should the dates from each .idat be saved
49
+    with sample information?}
50
+  \item{stripNorm}{'logical'.  Should the data be strip-level normalized?}
51
+  \item{useTarget}{'logical' (only used when \code{stripNorm=TRUE}).
52
+    Should the reference HapMap intensities be used in strip-level normalization?}
53
+  \item{mixtureSampleSize}{ Sample size to be use when fitting the mixture model.}
54
+  \item{fitMixture}{ 'logical.' Whether to fit per-array mixture model.}
55
+  \item{eps}{   Stop criteria.}
56
+  \item{verbose}{  'logical.'  Whether to print descriptive messages during processing.}
57
+  \item{seed}{ Seed to be used when sampling. Useful for reproducibility}
58
+  \item{sns}{The sample identifiers.  If missing, the default sample names are \code{basename(filenames)}}
59
+  \item{probs}{'numeric' vector with priors for AA, AB and BB.}
60
+  \item{DF}{'integer' with number of degrees of freedom to use with t-distribution.}
61
+  \item{SNRMin}{'numeric' scalar defining the minimum SNR used to filter
62
+  out samples.}
63
+  \item{recallMin}{Minimum number of samples for recalibration. }
64
+  \item{recallRegMin}{Minimum number of SNP's for regression.}
65
+  \item{gender}{  integer vector (  male = 1, female =2 ) or missing,
66
+  with same length as filenames.  If missing, the gender is predicted.}
67
+  \item{returnParams}{'logical'. Return recalibrated parameters from crlmm.}
68
+  \item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)}
69
+}
70
+
71
+\details{
72
+
73
+	For large datasets it is important to utilize the large data
74
+	support by installing and loading the ff package before calling
75
+	the \code{genotype} function. In previous versions of the
76
+	\code{crlmm} package, we useed different functions for
77
+	genotyping depending on whether the ff package is loaded, namely
78
+	\code{genotype} and \code{genotype2}.  The \code{genotype}
79
+	function now handles both instances.
80
+
81
+	\code{genotype.Illumina} is a wrapper of the \code{crlmm}
82
+	function for genotyping.  Differences include (1) that the copy
83
+	number probes (if present) are also quantile-normalized and (2)
84
+	the class of object returned by this function, \code{CNSet}, is
85
+	needed for subsequent copy number estimation.  Note that the
86
+	batch variable that must be passed to this function has no
87
+	effect on the normalization or genotyping steps.  Rather,
88
+	\code{batch} is required in order to initialize a \code{CNSet}
89
+	container with the appropriate dimensions.
90
+}
91
+
92
+\value{	A \code{SnpSuperSet} instance.}
93
+\references{
94
+  Ritchie ME, Carvalho BS, Hetrick KN, Tavar\'{e} S, Irizarry RA.
95
+  R/Bioconductor software for Illumina's Infinium whole-genome 
96
+  genotyping BeadChips. Bioinformatics. 2009 Oct 1;25(19):2621-3.
97
+
98
+  Carvalho B, Bengtsson H, Speed TP, Irizarry RA. Exploration,
99
+  normalization, and genotype calls of high-density oligonucleotide SNP
100
+  array data. Biostatistics. 2007 Apr;8(2):485-99. Epub 2006 Dec
101
+  22. PMID: 17189563.
102
+
103
+  Carvalho BS, Louis TA, Irizarry RA.
104
+  Quantifying uncertainty in genotype calls.
105
+  Bioinformatics. 2010 Jan 15;26(2):242-9.
106
+
107
+}
108
+\author{Matt Ritchie}
109
+\note{For large datasets, load the 'ff' package prior to genotyping --
110
+this will greatly reduce the RAM required for big jobs.  See
111
+\code{ldPath} and \code{ocSamples}.}
112
+
113
+\seealso{
114
+	\code{\link{crlmmIlluminaV2}}, 
115
+	\code{\link[oligoClasses]{ocSamples}},
116
+	\code{\link[oligoClasses]{ldOpts}}
117
+}
118
+\examples{
119
+  ##
120
+}
121
+\keyword{classif}