git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45050 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 05/03/2010 05:23:48
Showing 5 changed files

... ...
@@ -410,3 +410,6 @@ Such files arise when scanner settings contain the line <GenerateVersionTwoIdatF
410 410
 If this is modified to <GenerateVersionTwoIdatFiles>true</GenerateVersionTwoIdatFiles>, 
411 411
 then readIDAT() should work. Thanks to Pierre Cherel who reported this error.
412 412
  * Removed AllClasses.R from Collate field in DESCRIPTION file (causes an error in build)
413
+
414
+2010-03-05 M. Ritchie - committed version 1.5.29
415
+ * crlmmIlluminaV2() now exported.  Added man page crlmmIlluminaV2.Rd
... ...
@@ -1,8 +1,8 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.5.28
5
-Date: 2010-03-02
4
+Version: 1.5.29
5
+Date: 2010-03-05
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>
7 7
 Maintainer: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms
... ...
@@ -61,7 +61,7 @@ importFrom(mvtnorm, dmvnorm)
61 61
 importFrom(ellipse, ellipse)
62 62
 
63 63
 exportMethods(copyNumber)
64
-export(cnOptions, crlmm, crlmmIllumina, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam) 
64
+export(cnOptions, crlmm, crlmmIllumina, crlmmIlluminaV2, crlmmCopynumber, ellipse, readIdatFiles, snprma, getParam) 
65 65
 
66 66
 
67 67
 #############
... ...
@@ -68,7 +68,7 @@ crlmmIllumina(RG, XY, stripNorm=TRUE,
68 68
   intensities, not both.  Alternatively if \code{crlmmIllumina} has been
69 69
   run already with \code{save.it=TRUE}, the preprocessed data can be
70 70
   loaded from file by specifying \code{load.it=TRUE} and
71
-  \code{intensityFile} (\code{RG} or \code{XY} are not needed in this case).
71
+  \code{snpFile} (\code{RG} or \code{XY} are not needed in this case).
72 72
 }
73 73
 
74 74
 \references{
... ...
@@ -91,4 +91,5 @@ crlmmIllumina(RG, XY, stripNorm=TRUE,
91 91
 \examples{
92 92
 ## crlmmOut = crlmmIllumina(RG)
93 93
 }
94
+\seealso{\code{\link{readIdatFiles}, \code{\link{crlmmIlluminaV2}}}
94 95
 \keyword{classif}
95 96
new file mode 100644
... ...
@@ -0,0 +1,123 @@
1
+\name{crlmmIlluminaV2}
2
+\alias{crlmmIlluminaV2}
3
+\title{Read and Genotype Illumina Infinium II BeadChip data with CRLMM}
4
+\description{
5
+  Implementation of the CRLMM algorithm for
6
+  data from Illumina's Infinium II BeadChips.
7
+}
8
+\usage{
9
+
10
+crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
11
+      arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
12
+      highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"),
13
+      saveDate=FALSE, save.rg=FALSE, rgFile,
14
+      stripNorm=TRUE, useTarget=TRUE, row.names=TRUE, col.names=TRUE,
15
+      probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
16
+      seed=1, save.ab=FALSE, snpFile, cnFile,
17
+      mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
18
+      cdfName, sns, recallMin=10, recallRegMin=1000,
19
+      returnParams=FALSE, badSNP=.7)
20
+}
21
+
22
+\arguments{
23
+  \item{sampleSheet}{\code{data.frame} containing Illumina sample sheet
24
+    information (for required columns, refer to BeadStudio Genotyping
25
+    guide - Appendix A).}
26
+  \item{arrayNames}{character vector containing names of arrays to be
27
+    read in.  If \code{NULL}, all arrays that can be found in the
28
+    specified working directory will be read in.}
29
+  \item{ids}{vector containing ids of probes to be read in.  If
30
+    \code{NULL} all probes found on the first array are read in.}
31
+  \item{path}{character string specifying the location of files to be
32
+    read by the function}
33
+  \item{arrayInfoColNames}{(used when \code{sampleSheet} is specified)
34
+    list containing elements 'barcode' which indicates column names in
35
+    the \code{sampleSheet} which contains the arrayNumber/barcode number
36
+    and 'position' which indicates the strip number.  In older style
37
+    sample sheets, this information is combined (usually in a column
38
+    named 'SentrixPosition') and this should be specified as
39
+    \code{list(barcode=NULL, position="SentrixPosition")}}
40
+  \item{highDensity}{logical (used when \code{sampleSheet} is
41
+    specified). If \code{TRUE}, array extensions '\_A', '\_B' in
42
+    sampleSheet are replaced with 'R01C01', 'R01C02' etc.}
43
+  \item{sep}{character string specifying separator used in .idat file
44
+    names.}
45
+  \item{fileExt}{list containing elements 'Green' and 'Red' which
46
+    specify the .idat file extension for the Cy3 and Cy5 channels.}
47
+  \item{saveDate}{'logical'.  Should the dates from each .idat be saved
48
+    with sample information?}
49
+  \item{save.rg}{'logical'. Save RG data read in from idat files?}
50
+  \item{rgFile}{'character' specifying filename to use to save RG data.}
51
+  \item{stripNorm}{'logical'.  Should the data be strip-level normalized?}
52
+  \item{useTarget}{'logical' (only used when \code{stripNorm=TRUE}).
53
+    Should the reference HapMap intensities be used in strip-level normalization?}
54
+  \item{row.names}{'logical'. Use rownames - SNP names?}
55
+  \item{col.names}{'logical'. Use colnames - Sample names?}
56
+  \item{probs}{'numeric' vector with priors for AA, AB and BB.}
57
+  \item{DF}{'integer' with number of degrees of freedom to use with t-distribution.}
58
+  \item{SNRMin}{'numeric' scalar defining the minimum SNR used to filter
59
+  out samples.}
60
+  \item{gender}{'integer' vector, with same length as 'filenames',
61
+    defining sex. (1 - male; 2 - female)}
62
+  \item{seed}{'integer' scalar for random number generator (used to
63
+    sample \code{mixtureSampleSize} SNPs for mixture model.}
64
+  \item{save.it}{'logical'. Save preprocessed SNP and copy number data?}
65
+  \item{load.it}{'logical'. Load preprocessed SNP data to speed up analysis?}
66
+  \item{snpFile}{'character' with filename of preprocessed SNP data to
67
+    be saved/loaded.}
68
+  \item{cnFile}{'character' with filename of preprocessed copy number 
69
+    data to be saved.}
70
+  \item{mixtureSampleSize}{'integer'. The number of SNP's to be used
71
+    when fitting the mixture model.}
72
+  \item{eps}{Minimum change for mixture model.}
73
+  \item{verbose}{'logical'.}
74
+  \item{cdfName}{'character' defining the chip annotation (manifest) to use
75
+    ('human370v1c', human550v3b', 'human650v3a', 'human1mv1c',
76
+    'human370quadv3c', 'human610quadv1b', 'human660quadv1a',
77
+    'human1mduov3b', 'humanomni1quadv1b')}
78
+  \item{sns}{'character' vector with sample names to be used.}
79
+  \item{recallMin}{'integer'. Minimum number of samples for recalibration.}
80
+  \item{recallRegMin}{'integer'. Minimum number of SNP's for regression.}
81
+  \item{returnParams}{'logical'. Return recalibrated parameters.}
82
+  \item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)}
83
+}
84
+\value{
85
+  A \code{SnpSet} object which contains
86
+  \item{calls}{Genotype calls (1 - AA, 2 - AB, 3 - BB)}
87
+  \item{callProbability}{confidence scores 'round(-1000*log2(1-p))'}
88
+  in the \code{assayData} slot and
89
+  \item{SNPQC}{SNP Quality Scores}
90
+  \item{batchQC}{Batch Quality Scores}
91
+  along with center and scale parameters when \code{returnParams=TRUE}
92
+  in the \code{featureData} slot.
93
+}
94
+
95
+\details{
96
+  This function combines the reading of data from idat files using 
97
+  \code{readIdatFiles} and genotyping to reduce memory.  
98
+}
99
+
100
+\references{
101
+  Ritchie ME, Carvalho BS, Hetrick KN, Tavar\'{e} S, Irizarry RA.
102
+  R/Bioconductor software for Illumina's Infinium whole-genome 
103
+  genotyping BeadChips. Bioinformatics. 2009 Oct 1;25(19):2621-3.
104
+
105
+  Carvalho B, Bengtsson H, Speed TP, Irizarry RA. Exploration,
106
+  normalization, and genotype calls of high-density oligonucleotide SNP
107
+  array data. Biostatistics. 2007 Apr;8(2):485-99. Epub 2006 Dec
108
+  22. PMID: 17189563.
109
+
110
+  Carvalho BS, Louis TA, Irizarry RA. 
111
+  Quantifying uncertainty in genotype calls.
112
+  Bioinformatics. 2010 Jan 15;26(2):242-9.
113
+}
114
+
115
+\author{Matt Ritchie}
116
+
117
+\examples{
118
+## crlmmOut = crlmmIlluminaV2(samples,path=path,arrayInfoColNames=list(barcode="Chip",position="Section"),
119
+##                             saveDate=TRUE,cdfName="human370v1c",returnParams=TRUE)
120
+
121
+}
122
+\seealso{\code{\link{crlmmIllumina}}, \code{\link{readIdatFiles}}}
123
+\keyword{classif}