git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@117087 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,113 +0,0 @@ |
1 |
-\name{crlmmIlluminaV2} |
|
2 |
-\alias{crlmmIlluminaV2} |
|
3 |
-\title{Read and Genotype Illumina Infinium II BeadChip data with CRLMM} |
|
4 |
-\description{ |
|
5 |
- Implementation of the CRLMM algorithm for |
|
6 |
- data from Illumina's Infinium II BeadChips. |
|
7 |
-} |
|
8 |
-\usage{ |
|
9 |
- |
|
10 |
-crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
|
11 |
- arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
|
12 |
- highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), |
|
13 |
- saveDate=FALSE, stripNorm=TRUE, useTarget=TRUE, |
|
14 |
- row.names=TRUE, col.names=TRUE, probs=c(1/3, 1/3, 1/3), |
|
15 |
- DF=6, SNRMin=5, gender=NULL, seed=1, mixtureSampleSize=10^5, |
|
16 |
- eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10, |
|
17 |
- recallRegMin=1000, returnParams=FALSE, badSNP=.7) |
|
18 |
-} |
|
19 |
- |
|
20 |
-\arguments{ |
|
21 |
- \item{sampleSheet}{\code{data.frame} containing Illumina sample sheet |
|
22 |
- information (for required columns, refer to BeadStudio Genotyping |
|
23 |
- guide - Appendix A).} |
|
24 |
- \item{arrayNames}{character vector containing names of arrays to be |
|
25 |
- read in. If \code{NULL}, all arrays that can be found in the |
|
26 |
- specified working directory will be read in.} |
|
27 |
- \item{ids}{vector containing ids of probes to be read in. If |
|
28 |
- \code{NULL} all probes found on the first array are read in.} |
|
29 |
- \item{path}{character string specifying the location of files to be |
|
30 |
- read by the function} |
|
31 |
- \item{arrayInfoColNames}{(used when \code{sampleSheet} is specified) |
|
32 |
- list containing elements 'barcode' which indicates column names in |
|
33 |
- the \code{sampleSheet} which contains the arrayNumber/barcode number |
|
34 |
- and 'position' which indicates the strip number. In older style |
|
35 |
- sample sheets, this information is combined (usually in a column |
|
36 |
- named 'SentrixPosition') and this should be specified as |
|
37 |
- \code{list(barcode=NULL, position="SentrixPosition")}} |
|
38 |
- \item{highDensity}{logical (used when \code{sampleSheet} is |
|
39 |
- specified). If \code{TRUE}, array extensions '\_A', '\_B' in |
|
40 |
- sampleSheet are replaced with 'R01C01', 'R01C02' etc.} |
|
41 |
- \item{sep}{character string specifying separator used in .idat file |
|
42 |
- names.} |
|
43 |
- \item{fileExt}{list containing elements 'Green' and 'Red' which |
|
44 |
- specify the .idat file extension for the Cy3 and Cy5 channels.} |
|
45 |
- \item{saveDate}{'logical'. Should the dates from each .idat be saved |
|
46 |
- with sample information?} |
|
47 |
- \item{stripNorm}{'logical'. Should the data be strip-level normalized?} |
|
48 |
- \item{useTarget}{'logical' (only used when \code{stripNorm=TRUE}). |
|
49 |
- Should the reference HapMap intensities be used in strip-level normalization?} |
|
50 |
- \item{row.names}{'logical'. Use rownames - SNP names?} |
|
51 |
- \item{col.names}{'logical'. Use colnames - Sample names?} |
|
52 |
- \item{probs}{'numeric' vector with priors for AA, AB and BB.} |
|
53 |
- \item{DF}{'integer' with number of degrees of freedom to use with t-distribution.} |
|
54 |
- \item{SNRMin}{'numeric' scalar defining the minimum SNR used to filter |
|
55 |
- out samples.} |
|
56 |
- \item{gender}{'integer' vector, with same length as 'filenames', |
|
57 |
- defining sex. (1 - male; 2 - female)} |
|
58 |
- \item{seed}{'integer' scalar for random number generator (used to |
|
59 |
- sample \code{mixtureSampleSize} SNPs for mixture model.} |
|
60 |
- \item{mixtureSampleSize}{'integer'. The number of SNP's to be used |
|
61 |
- when fitting the mixture model.} |
|
62 |
- \item{eps}{Minimum change for mixture model.} |
|
63 |
- \item{verbose}{'logical'.} |
|
64 |
- \item{cdfName}{'character' defining the chip annotation (manifest) to use |
|
65 |
- ('human370v1c', human550v3b', 'human650v3a', 'human1mv1c', |
|
66 |
- 'human370quadv3c', 'human610quadv1b', 'human660quadv1a', |
|
67 |
- 'human1mduov3b', 'humanomni1quadv1b', 'humanomniexpress12v1b', 'humancytosnp12v2p1h')} |
|
68 |
- \item{sns}{'character' vector with sample names to be used.} |
|
69 |
- \item{recallMin}{'integer'. Minimum number of samples for recalibration.} |
|
70 |
- \item{recallRegMin}{'integer'. Minimum number of SNP's for regression.} |
|
71 |
- \item{returnParams}{'logical'. Return recalibrated parameters.} |
|
72 |
- \item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)} |
|
73 |
-} |
|
74 |
-\value{ |
|
75 |
- A \code{SnpSet} object which contains |
|
76 |
- \item{calls}{Genotype calls (1 - AA, 2 - AB, 3 - BB)} |
|
77 |
- \item{callProbability}{confidence scores 'round(-1000*log2(1-p))'} |
|
78 |
- in the \code{assayData} slot and |
|
79 |
- \item{SNPQC}{SNP Quality Scores} |
|
80 |
- \item{batchQC}{Batch Quality Scores} |
|
81 |
- along with center and scale parameters when \code{returnParams=TRUE} |
|
82 |
- in the \code{featureData} slot. |
|
83 |
-} |
|
84 |
- |
|
85 |
-\details{ |
|
86 |
- This function combines the reading of data from idat files using |
|
87 |
- \code{readIdatFiles} and genotyping to reduce memory usage. |
|
88 |
-} |
|
89 |
- |
|
90 |
-\references{ |
|
91 |
- Ritchie ME, Carvalho BS, Hetrick KN, Tavar\'{e} S, Irizarry RA. |
|
92 |
- R/Bioconductor software for Illumina's Infinium whole-genome |
|
93 |
- genotyping BeadChips. Bioinformatics. 2009 Oct 1;25(19):2621-3. |
|
94 |
- |
|
95 |
- Carvalho B, Bengtsson H, Speed TP, Irizarry RA. Exploration, |
|
96 |
- normalization, and genotype calls of high-density oligonucleotide SNP |
|
97 |
- array data. Biostatistics. 2007 Apr;8(2):485-99. Epub 2006 Dec |
|
98 |
- 22. PMID: 17189563. |
|
99 |
- |
|
100 |
- Carvalho BS, Louis TA, Irizarry RA. |
|
101 |
- Quantifying uncertainty in genotype calls. |
|
102 |
- Bioinformatics. 2010 Jan 15;26(2):242-9. |
|
103 |
-} |
|
104 |
- |
|
105 |
-\author{Matt Ritchie} |
|
106 |
- |
|
107 |
-\examples{ |
|
108 |
-## crlmmOut = crlmmIlluminaV2(samples,path=path,arrayInfoColNames=list(barcode="Chip",position="Section"), |
|
109 |
-## saveDate=TRUE,cdfName="human370v1c",returnParams=TRUE) |
|
110 |
- |
|
111 |
-} |
|
112 |
-\seealso{\code{\link{crlmmIllumina}}} |
|
113 |
-\keyword{classif} |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@58616 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -10,10 +10,10 @@ |
10 | 10 |
crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
11 | 11 |
arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
12 | 12 |
highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), |
13 |
- saveDate=FALSE, stripNorm=TRUE, useTarget=TRUE, |
|
14 |
- row.names=TRUE, col.names=TRUE, probs=c(1/3, 1/3, 1/3), |
|
15 |
- DF=6, SNRMin=5, gender=NULL, seed=1, mixtureSampleSize=10^5, |
|
16 |
- eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10, |
|
13 |
+ saveDate=FALSE, stripNorm=TRUE, useTarget=TRUE, |
|
14 |
+ row.names=TRUE, col.names=TRUE, probs=c(1/3, 1/3, 1/3), |
|
15 |
+ DF=6, SNRMin=5, gender=NULL, seed=1, mixtureSampleSize=10^5, |
|
16 |
+ eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10, |
|
17 | 17 |
recallRegMin=1000, returnParams=FALSE, badSNP=.7) |
18 | 18 |
} |
19 | 19 |
|
... | ... |
@@ -83,13 +83,13 @@ crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
83 | 83 |
} |
84 | 84 |
|
85 | 85 |
\details{ |
86 |
- This function combines the reading of data from idat files using |
|
87 |
- \code{readIdatFiles} and genotyping to reduce memory usage. |
|
86 |
+ This function combines the reading of data from idat files using |
|
87 |
+ \code{readIdatFiles} and genotyping to reduce memory usage. |
|
88 | 88 |
} |
89 | 89 |
|
90 | 90 |
\references{ |
91 | 91 |
Ritchie ME, Carvalho BS, Hetrick KN, Tavar\'{e} S, Irizarry RA. |
92 |
- R/Bioconductor software for Illumina's Infinium whole-genome |
|
92 |
+ R/Bioconductor software for Illumina's Infinium whole-genome |
|
93 | 93 |
genotyping BeadChips. Bioinformatics. 2009 Oct 1;25(19):2621-3. |
94 | 94 |
|
95 | 95 |
Carvalho B, Bengtsson H, Speed TP, Irizarry RA. Exploration, |
... | ... |
@@ -97,7 +97,7 @@ crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
97 | 97 |
array data. Biostatistics. 2007 Apr;8(2):485-99. Epub 2006 Dec |
98 | 98 |
22. PMID: 17189563. |
99 | 99 |
|
100 |
- Carvalho BS, Louis TA, Irizarry RA. |
|
100 |
+ Carvalho BS, Louis TA, Irizarry RA. |
|
101 | 101 |
Quantifying uncertainty in genotype calls. |
102 | 102 |
Bioinformatics. 2010 Jan 15;26(2):242-9. |
103 | 103 |
} |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@57404 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -64,7 +64,7 @@ crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
64 | 64 |
\item{cdfName}{'character' defining the chip annotation (manifest) to use |
65 | 65 |
('human370v1c', human550v3b', 'human650v3a', 'human1mv1c', |
66 | 66 |
'human370quadv3c', 'human610quadv1b', 'human660quadv1a', |
67 |
- 'human1mduov3b', 'humanomni1quadv1b', 'humanomniexpress12v1b')} |
|
67 |
+ 'human1mduov3b', 'humanomni1quadv1b', 'humanomniexpress12v1b', 'humancytosnp12v2p1h')} |
|
68 | 68 |
\item{sns}{'character' vector with sample names to be used.} |
69 | 69 |
\item{recallMin}{'integer'. Minimum number of samples for recalibration.} |
70 | 70 |
\item{recallRegMin}{'integer'. Minimum number of SNP's for regression.} |
... | ... |
@@ -10,7 +10,7 @@ |
10 | 10 |
crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
11 | 11 |
arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
12 | 12 |
highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), |
13 |
- saveDate=FALSE, stripNorm=TRUE, useTarget=TRUE, outdir=".", |
|
13 |
+ saveDate=FALSE, stripNorm=TRUE, useTarget=TRUE, |
|
14 | 14 |
row.names=TRUE, col.names=TRUE, probs=c(1/3, 1/3, 1/3), |
15 | 15 |
DF=6, SNRMin=5, gender=NULL, seed=1, mixtureSampleSize=10^5, |
16 | 16 |
eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10, |
... | ... |
@@ -47,8 +47,6 @@ crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
47 | 47 |
\item{stripNorm}{'logical'. Should the data be strip-level normalized?} |
48 | 48 |
\item{useTarget}{'logical' (only used when \code{stripNorm=TRUE}). |
49 | 49 |
Should the reference HapMap intensities be used in strip-level normalization?} |
50 |
- \item{outdir}{character string specifying the location to store large data objects |
|
51 |
- (used when \code{ff} package is loaded)} |
|
52 | 50 |
\item{row.names}{'logical'. Use rownames - SNP names?} |
53 | 51 |
\item{col.names}{'logical'. Use colnames - Sample names?} |
54 | 52 |
\item{probs}{'numeric' vector with priors for AA, AB and BB.} |
... | ... |
@@ -10,11 +10,11 @@ |
10 | 10 |
crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
11 | 11 |
arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
12 | 12 |
highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), |
13 |
- saveDate=FALSE, stripNorm=TRUE, useTarget=TRUE, row.names=TRUE, col.names=TRUE, |
|
14 |
- probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
15 |
- seed=1, mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
16 |
- cdfName, sns, recallMin=10, recallRegMin=1000, |
|
17 |
- returnParams=FALSE, badSNP=.7) |
|
13 |
+ saveDate=FALSE, stripNorm=TRUE, useTarget=TRUE, outdir=".", |
|
14 |
+ row.names=TRUE, col.names=TRUE, probs=c(1/3, 1/3, 1/3), |
|
15 |
+ DF=6, SNRMin=5, gender=NULL, seed=1, mixtureSampleSize=10^5, |
|
16 |
+ eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10, |
|
17 |
+ recallRegMin=1000, returnParams=FALSE, badSNP=.7) |
|
18 | 18 |
} |
19 | 19 |
|
20 | 20 |
\arguments{ |
... | ... |
@@ -47,6 +47,8 @@ crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
47 | 47 |
\item{stripNorm}{'logical'. Should the data be strip-level normalized?} |
48 | 48 |
\item{useTarget}{'logical' (only used when \code{stripNorm=TRUE}). |
49 | 49 |
Should the reference HapMap intensities be used in strip-level normalization?} |
50 |
+ \item{outdir}{character string specifying the location to store large data objects |
|
51 |
+ (used when \code{ff} package is loaded)} |
|
50 | 52 |
\item{row.names}{'logical'. Use rownames - SNP names?} |
51 | 53 |
\item{col.names}{'logical'. Use colnames - Sample names?} |
52 | 54 |
\item{probs}{'numeric' vector with priors for AA, AB and BB.} |
... | ... |
@@ -12,8 +12,7 @@ crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
12 | 12 |
highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), |
13 | 13 |
saveDate=FALSE, stripNorm=TRUE, useTarget=TRUE, row.names=TRUE, col.names=TRUE, |
14 | 14 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
15 |
- seed=1, save.ab=FALSE, snpFile, cnFile, |
|
16 |
- mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
15 |
+ seed=1, mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
17 | 16 |
cdfName, sns, recallMin=10, recallRegMin=1000, |
18 | 17 |
returnParams=FALSE, badSNP=.7) |
19 | 18 |
} |
... | ... |
@@ -58,11 +57,6 @@ crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
58 | 57 |
defining sex. (1 - male; 2 - female)} |
59 | 58 |
\item{seed}{'integer' scalar for random number generator (used to |
60 | 59 |
sample \code{mixtureSampleSize} SNPs for mixture model.} |
61 |
- \item{save.it}{'logical'. Save preprocessed SNP and copy number data?} |
|
62 |
- \item{snpFile}{'character' with filename of preprocessed SNP data to |
|
63 |
- be saved/loaded.} |
|
64 |
- \item{cnFile}{'character' with filename of preprocessed copy number |
|
65 |
- data to be saved.} |
|
66 | 60 |
\item{mixtureSampleSize}{'integer'. The number of SNP's to be used |
67 | 61 |
when fitting the mixture model.} |
68 | 62 |
\item{eps}{Minimum change for mixture model.} |
... | ... |
@@ -10,8 +10,7 @@ |
10 | 10 |
crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
11 | 11 |
arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
12 | 12 |
highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), |
13 |
- saveDate=FALSE, save.rg=FALSE, rgFile, |
|
14 |
- stripNorm=TRUE, useTarget=TRUE, row.names=TRUE, col.names=TRUE, |
|
13 |
+ saveDate=FALSE, stripNorm=TRUE, useTarget=TRUE, row.names=TRUE, col.names=TRUE, |
|
15 | 14 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
16 | 15 |
seed=1, save.ab=FALSE, snpFile, cnFile, |
17 | 16 |
mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,120 @@ |
1 |
+\name{crlmmIlluminaV2} |
|
2 |
+\alias{crlmmIlluminaV2} |
|
3 |
+\title{Read and Genotype Illumina Infinium II BeadChip data with CRLMM} |
|
4 |
+\description{ |
|
5 |
+ Implementation of the CRLMM algorithm for |
|
6 |
+ data from Illumina's Infinium II BeadChips. |
|
7 |
+} |
|
8 |
+\usage{ |
|
9 |
+ |
|
10 |
+crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
|
11 |
+ arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
|
12 |
+ highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), |
|
13 |
+ saveDate=FALSE, save.rg=FALSE, rgFile, |
|
14 |
+ stripNorm=TRUE, useTarget=TRUE, row.names=TRUE, col.names=TRUE, |
|
15 |
+ probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
16 |
+ seed=1, save.ab=FALSE, snpFile, cnFile, |
|
17 |
+ mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
18 |
+ cdfName, sns, recallMin=10, recallRegMin=1000, |
|
19 |
+ returnParams=FALSE, badSNP=.7) |
|
20 |
+} |
|
21 |
+ |
|
22 |
+\arguments{ |
|
23 |
+ \item{sampleSheet}{\code{data.frame} containing Illumina sample sheet |
|
24 |
+ information (for required columns, refer to BeadStudio Genotyping |
|
25 |
+ guide - Appendix A).} |
|
26 |
+ \item{arrayNames}{character vector containing names of arrays to be |
|
27 |
+ read in. If \code{NULL}, all arrays that can be found in the |
|
28 |
+ specified working directory will be read in.} |
|
29 |
+ \item{ids}{vector containing ids of probes to be read in. If |
|
30 |
+ \code{NULL} all probes found on the first array are read in.} |
|
31 |
+ \item{path}{character string specifying the location of files to be |
|
32 |
+ read by the function} |
|
33 |
+ \item{arrayInfoColNames}{(used when \code{sampleSheet} is specified) |
|
34 |
+ list containing elements 'barcode' which indicates column names in |
|
35 |
+ the \code{sampleSheet} which contains the arrayNumber/barcode number |
|
36 |
+ and 'position' which indicates the strip number. In older style |
|
37 |
+ sample sheets, this information is combined (usually in a column |
|
38 |
+ named 'SentrixPosition') and this should be specified as |
|
39 |
+ \code{list(barcode=NULL, position="SentrixPosition")}} |
|
40 |
+ \item{highDensity}{logical (used when \code{sampleSheet} is |
|
41 |
+ specified). If \code{TRUE}, array extensions '\_A', '\_B' in |
|
42 |
+ sampleSheet are replaced with 'R01C01', 'R01C02' etc.} |
|
43 |
+ \item{sep}{character string specifying separator used in .idat file |
|
44 |
+ names.} |
|
45 |
+ \item{fileExt}{list containing elements 'Green' and 'Red' which |
|
46 |
+ specify the .idat file extension for the Cy3 and Cy5 channels.} |
|
47 |
+ \item{saveDate}{'logical'. Should the dates from each .idat be saved |
|
48 |
+ with sample information?} |
|
49 |
+ \item{stripNorm}{'logical'. Should the data be strip-level normalized?} |
|
50 |
+ \item{useTarget}{'logical' (only used when \code{stripNorm=TRUE}). |
|
51 |
+ Should the reference HapMap intensities be used in strip-level normalization?} |
|
52 |
+ \item{row.names}{'logical'. Use rownames - SNP names?} |
|
53 |
+ \item{col.names}{'logical'. Use colnames - Sample names?} |
|
54 |
+ \item{probs}{'numeric' vector with priors for AA, AB and BB.} |
|
55 |
+ \item{DF}{'integer' with number of degrees of freedom to use with t-distribution.} |
|
56 |
+ \item{SNRMin}{'numeric' scalar defining the minimum SNR used to filter |
|
57 |
+ out samples.} |
|
58 |
+ \item{gender}{'integer' vector, with same length as 'filenames', |
|
59 |
+ defining sex. (1 - male; 2 - female)} |
|
60 |
+ \item{seed}{'integer' scalar for random number generator (used to |
|
61 |
+ sample \code{mixtureSampleSize} SNPs for mixture model.} |
|
62 |
+ \item{save.it}{'logical'. Save preprocessed SNP and copy number data?} |
|
63 |
+ \item{snpFile}{'character' with filename of preprocessed SNP data to |
|
64 |
+ be saved/loaded.} |
|
65 |
+ \item{cnFile}{'character' with filename of preprocessed copy number |
|
66 |
+ data to be saved.} |
|
67 |
+ \item{mixtureSampleSize}{'integer'. The number of SNP's to be used |
|
68 |
+ when fitting the mixture model.} |
|
69 |
+ \item{eps}{Minimum change for mixture model.} |
|
70 |
+ \item{verbose}{'logical'.} |
|
71 |
+ \item{cdfName}{'character' defining the chip annotation (manifest) to use |
|
72 |
+ ('human370v1c', human550v3b', 'human650v3a', 'human1mv1c', |
|
73 |
+ 'human370quadv3c', 'human610quadv1b', 'human660quadv1a', |
|
74 |
+ 'human1mduov3b', 'humanomni1quadv1b', 'humanomniexpress12v1b')} |
|
75 |
+ \item{sns}{'character' vector with sample names to be used.} |
|
76 |
+ \item{recallMin}{'integer'. Minimum number of samples for recalibration.} |
|
77 |
+ \item{recallRegMin}{'integer'. Minimum number of SNP's for regression.} |
|
78 |
+ \item{returnParams}{'logical'. Return recalibrated parameters.} |
|
79 |
+ \item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)} |
|
80 |
+} |
|
81 |
+\value{ |
|
82 |
+ A \code{SnpSet} object which contains |
|
83 |
+ \item{calls}{Genotype calls (1 - AA, 2 - AB, 3 - BB)} |
|
84 |
+ \item{callProbability}{confidence scores 'round(-1000*log2(1-p))'} |
|
85 |
+ in the \code{assayData} slot and |
|
86 |
+ \item{SNPQC}{SNP Quality Scores} |
|
87 |
+ \item{batchQC}{Batch Quality Scores} |
|
88 |
+ along with center and scale parameters when \code{returnParams=TRUE} |
|
89 |
+ in the \code{featureData} slot. |
|
90 |
+} |
|
91 |
+ |
|
92 |
+\details{ |
|
93 |
+ This function combines the reading of data from idat files using |
|
94 |
+ \code{readIdatFiles} and genotyping to reduce memory usage. |
|
95 |
+} |
|
96 |
+ |
|
97 |
+\references{ |
|
98 |
+ Ritchie ME, Carvalho BS, Hetrick KN, Tavar\'{e} S, Irizarry RA. |
|
99 |
+ R/Bioconductor software for Illumina's Infinium whole-genome |
|
100 |
+ genotyping BeadChips. Bioinformatics. 2009 Oct 1;25(19):2621-3. |
|
101 |
+ |
|
102 |
+ Carvalho B, Bengtsson H, Speed TP, Irizarry RA. Exploration, |
|
103 |
+ normalization, and genotype calls of high-density oligonucleotide SNP |
|
104 |
+ array data. Biostatistics. 2007 Apr;8(2):485-99. Epub 2006 Dec |
|
105 |
+ 22. PMID: 17189563. |
|
106 |
+ |
|
107 |
+ Carvalho BS, Louis TA, Irizarry RA. |
|
108 |
+ Quantifying uncertainty in genotype calls. |
|
109 |
+ Bioinformatics. 2010 Jan 15;26(2):242-9. |
|
110 |
+} |
|
111 |
+ |
|
112 |
+\author{Matt Ritchie} |
|
113 |
+ |
|
114 |
+\examples{ |
|
115 |
+## crlmmOut = crlmmIlluminaV2(samples,path=path,arrayInfoColNames=list(barcode="Chip",position="Section"), |
|
116 |
+## saveDate=TRUE,cdfName="human370v1c",returnParams=TRUE) |
|
117 |
+ |
|
118 |
+} |
|
119 |
+\seealso{\code{\link{crlmmIllumina}}} |
|
120 |
+\keyword{classif} |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45126 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,123 +0,0 @@ |
1 |
-\name{crlmmIlluminaV2} |
|
2 |
-\alias{crlmmIlluminaV2} |
|
3 |
-\title{Read and Genotype Illumina Infinium II BeadChip data with CRLMM} |
|
4 |
-\description{ |
|
5 |
- Implementation of the CRLMM algorithm for |
|
6 |
- data from Illumina's Infinium II BeadChips. |
|
7 |
-} |
|
8 |
-\usage{ |
|
9 |
- |
|
10 |
-crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
|
11 |
- arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
|
12 |
- highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), |
|
13 |
- saveDate=FALSE, save.rg=FALSE, rgFile, |
|
14 |
- stripNorm=TRUE, useTarget=TRUE, row.names=TRUE, col.names=TRUE, |
|
15 |
- probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
16 |
- seed=1, save.ab=FALSE, snpFile, cnFile, |
|
17 |
- mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
18 |
- cdfName, sns, recallMin=10, recallRegMin=1000, |
|
19 |
- returnParams=FALSE, badSNP=.7) |
|
20 |
-} |
|
21 |
- |
|
22 |
-\arguments{ |
|
23 |
- \item{sampleSheet}{\code{data.frame} containing Illumina sample sheet |
|
24 |
- information (for required columns, refer to BeadStudio Genotyping |
|
25 |
- guide - Appendix A).} |
|
26 |
- \item{arrayNames}{character vector containing names of arrays to be |
|
27 |
- read in. If \code{NULL}, all arrays that can be found in the |
|
28 |
- specified working directory will be read in.} |
|
29 |
- \item{ids}{vector containing ids of probes to be read in. If |
|
30 |
- \code{NULL} all probes found on the first array are read in.} |
|
31 |
- \item{path}{character string specifying the location of files to be |
|
32 |
- read by the function} |
|
33 |
- \item{arrayInfoColNames}{(used when \code{sampleSheet} is specified) |
|
34 |
- list containing elements 'barcode' which indicates column names in |
|
35 |
- the \code{sampleSheet} which contains the arrayNumber/barcode number |
|
36 |
- and 'position' which indicates the strip number. In older style |
|
37 |
- sample sheets, this information is combined (usually in a column |
|
38 |
- named 'SentrixPosition') and this should be specified as |
|
39 |
- \code{list(barcode=NULL, position="SentrixPosition")}} |
|
40 |
- \item{highDensity}{logical (used when \code{sampleSheet} is |
|
41 |
- specified). If \code{TRUE}, array extensions '\_A', '\_B' in |
|
42 |
- sampleSheet are replaced with 'R01C01', 'R01C02' etc.} |
|
43 |
- \item{sep}{character string specifying separator used in .idat file |
|
44 |
- names.} |
|
45 |
- \item{fileExt}{list containing elements 'Green' and 'Red' which |
|
46 |
- specify the .idat file extension for the Cy3 and Cy5 channels.} |
|
47 |
- \item{saveDate}{'logical'. Should the dates from each .idat be saved |
|
48 |
- with sample information?} |
|
49 |
- \item{save.rg}{'logical'. Save RG data read in from idat files?} |
|
50 |
- \item{rgFile}{'character' specifying filename to use to save RG data.} |
|
51 |
- \item{stripNorm}{'logical'. Should the data be strip-level normalized?} |
|
52 |
- \item{useTarget}{'logical' (only used when \code{stripNorm=TRUE}). |
|
53 |
- Should the reference HapMap intensities be used in strip-level normalization?} |
|
54 |
- \item{row.names}{'logical'. Use rownames - SNP names?} |
|
55 |
- \item{col.names}{'logical'. Use colnames - Sample names?} |
|
56 |
- \item{probs}{'numeric' vector with priors for AA, AB and BB.} |
|
57 |
- \item{DF}{'integer' with number of degrees of freedom to use with t-distribution.} |
|
58 |
- \item{SNRMin}{'numeric' scalar defining the minimum SNR used to filter |
|
59 |
- out samples.} |
|
60 |
- \item{gender}{'integer' vector, with same length as 'filenames', |
|
61 |
- defining sex. (1 - male; 2 - female)} |
|
62 |
- \item{seed}{'integer' scalar for random number generator (used to |
|
63 |
- sample \code{mixtureSampleSize} SNPs for mixture model.} |
|
64 |
- \item{save.it}{'logical'. Save preprocessed SNP and copy number data?} |
|
65 |
- \item{load.it}{'logical'. Load preprocessed SNP data to speed up analysis?} |
|
66 |
- \item{snpFile}{'character' with filename of preprocessed SNP data to |
|
67 |
- be saved/loaded.} |
|
68 |
- \item{cnFile}{'character' with filename of preprocessed copy number |
|
69 |
- data to be saved.} |
|
70 |
- \item{mixtureSampleSize}{'integer'. The number of SNP's to be used |
|
71 |
- when fitting the mixture model.} |
|
72 |
- \item{eps}{Minimum change for mixture model.} |
|
73 |
- \item{verbose}{'logical'.} |
|
74 |
- \item{cdfName}{'character' defining the chip annotation (manifest) to use |
|
75 |
- ('human370v1c', human550v3b', 'human650v3a', 'human1mv1c', |
|
76 |
- 'human370quadv3c', 'human610quadv1b', 'human660quadv1a', |
|
77 |
- 'human1mduov3b', 'humanomni1quadv1b')} |
|
78 |
- \item{sns}{'character' vector with sample names to be used.} |
|
79 |
- \item{recallMin}{'integer'. Minimum number of samples for recalibration.} |
|
80 |
- \item{recallRegMin}{'integer'. Minimum number of SNP's for regression.} |
|
81 |
- \item{returnParams}{'logical'. Return recalibrated parameters.} |
|
82 |
- \item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)} |
|
83 |
-} |
|
84 |
-\value{ |
|
85 |
- A \code{SnpSet} object which contains |
|
86 |
- \item{calls}{Genotype calls (1 - AA, 2 - AB, 3 - BB)} |
|
87 |
- \item{callProbability}{confidence scores 'round(-1000*log2(1-p))'} |
|
88 |
- in the \code{assayData} slot and |
|
89 |
- \item{SNPQC}{SNP Quality Scores} |
|
90 |
- \item{batchQC}{Batch Quality Scores} |
|
91 |
- along with center and scale parameters when \code{returnParams=TRUE} |
|
92 |
- in the \code{featureData} slot. |
|
93 |
-} |
|
94 |
- |
|
95 |
-\details{ |
|
96 |
- This function combines the reading of data from idat files using |
|
97 |
- \code{readIdatFiles} and genotyping to reduce memory. |
|
98 |
-} |
|
99 |
- |
|
100 |
-\references{ |
|
101 |
- Ritchie ME, Carvalho BS, Hetrick KN, Tavar\'{e} S, Irizarry RA. |
|
102 |
- R/Bioconductor software for Illumina's Infinium whole-genome |
|
103 |
- genotyping BeadChips. Bioinformatics. 2009 Oct 1;25(19):2621-3. |
|
104 |
- |
|
105 |
- Carvalho B, Bengtsson H, Speed TP, Irizarry RA. Exploration, |
|
106 |
- normalization, and genotype calls of high-density oligonucleotide SNP |
|
107 |
- array data. Biostatistics. 2007 Apr;8(2):485-99. Epub 2006 Dec |
|
108 |
- 22. PMID: 17189563. |
|
109 |
- |
|
110 |
- Carvalho BS, Louis TA, Irizarry RA. |
|
111 |
- Quantifying uncertainty in genotype calls. |
|
112 |
- Bioinformatics. 2010 Jan 15;26(2):242-9. |
|
113 |
-} |
|
114 |
- |
|
115 |
-\author{Matt Ritchie} |
|
116 |
- |
|
117 |
-\examples{ |
|
118 |
-## crlmmOut = crlmmIlluminaV2(samples,path=path,arrayInfoColNames=list(barcode="Chip",position="Section"), |
|
119 |
-## saveDate=TRUE,cdfName="human370v1c",returnParams=TRUE) |
|
120 |
- |
|
121 |
-} |
|
122 |
-\seealso{\code{\link{crlmmIllumina}}, \code{\link{readIdatFiles}}} |
|
123 |
-\keyword{classif} |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45050 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,123 @@ |
1 |
+\name{crlmmIlluminaV2} |
|
2 |
+\alias{crlmmIlluminaV2} |
|
3 |
+\title{Read and Genotype Illumina Infinium II BeadChip data with CRLMM} |
|
4 |
+\description{ |
|
5 |
+ Implementation of the CRLMM algorithm for |
|
6 |
+ data from Illumina's Infinium II BeadChips. |
|
7 |
+} |
|
8 |
+\usage{ |
|
9 |
+ |
|
10 |
+crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
|
11 |
+ arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
|
12 |
+ highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), |
|
13 |
+ saveDate=FALSE, save.rg=FALSE, rgFile, |
|
14 |
+ stripNorm=TRUE, useTarget=TRUE, row.names=TRUE, col.names=TRUE, |
|
15 |
+ probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
16 |
+ seed=1, save.ab=FALSE, snpFile, cnFile, |
|
17 |
+ mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
18 |
+ cdfName, sns, recallMin=10, recallRegMin=1000, |
|
19 |
+ returnParams=FALSE, badSNP=.7) |
|
20 |
+} |
|
21 |
+ |
|
22 |
+\arguments{ |
|
23 |
+ \item{sampleSheet}{\code{data.frame} containing Illumina sample sheet |
|
24 |
+ information (for required columns, refer to BeadStudio Genotyping |
|
25 |
+ guide - Appendix A).} |
|
26 |
+ \item{arrayNames}{character vector containing names of arrays to be |
|
27 |
+ read in. If \code{NULL}, all arrays that can be found in the |
|
28 |
+ specified working directory will be read in.} |
|
29 |
+ \item{ids}{vector containing ids of probes to be read in. If |
|
30 |
+ \code{NULL} all probes found on the first array are read in.} |
|
31 |
+ \item{path}{character string specifying the location of files to be |
|
32 |
+ read by the function} |
|
33 |
+ \item{arrayInfoColNames}{(used when \code{sampleSheet} is specified) |
|
34 |
+ list containing elements 'barcode' which indicates column names in |
|
35 |
+ the \code{sampleSheet} which contains the arrayNumber/barcode number |
|
36 |
+ and 'position' which indicates the strip number. In older style |
|
37 |
+ sample sheets, this information is combined (usually in a column |
|
38 |
+ named 'SentrixPosition') and this should be specified as |
|
39 |
+ \code{list(barcode=NULL, position="SentrixPosition")}} |
|
40 |
+ \item{highDensity}{logical (used when \code{sampleSheet} is |
|
41 |
+ specified). If \code{TRUE}, array extensions '\_A', '\_B' in |
|
42 |
+ sampleSheet are replaced with 'R01C01', 'R01C02' etc.} |
|
43 |
+ \item{sep}{character string specifying separator used in .idat file |
|
44 |
+ names.} |
|
45 |
+ \item{fileExt}{list containing elements 'Green' and 'Red' which |
|
46 |
+ specify the .idat file extension for the Cy3 and Cy5 channels.} |
|
47 |
+ \item{saveDate}{'logical'. Should the dates from each .idat be saved |
|
48 |
+ with sample information?} |
|
49 |
+ \item{save.rg}{'logical'. Save RG data read in from idat files?} |
|
50 |
+ \item{rgFile}{'character' specifying filename to use to save RG data.} |
|
51 |
+ \item{stripNorm}{'logical'. Should the data be strip-level normalized?} |
|
52 |
+ \item{useTarget}{'logical' (only used when \code{stripNorm=TRUE}). |
|
53 |
+ Should the reference HapMap intensities be used in strip-level normalization?} |
|
54 |
+ \item{row.names}{'logical'. Use rownames - SNP names?} |
|
55 |
+ \item{col.names}{'logical'. Use colnames - Sample names?} |
|
56 |
+ \item{probs}{'numeric' vector with priors for AA, AB and BB.} |
|
57 |
+ \item{DF}{'integer' with number of degrees of freedom to use with t-distribution.} |
|
58 |
+ \item{SNRMin}{'numeric' scalar defining the minimum SNR used to filter |
|
59 |
+ out samples.} |
|
60 |
+ \item{gender}{'integer' vector, with same length as 'filenames', |
|
61 |
+ defining sex. (1 - male; 2 - female)} |
|
62 |
+ \item{seed}{'integer' scalar for random number generator (used to |
|
63 |
+ sample \code{mixtureSampleSize} SNPs for mixture model.} |
|
64 |
+ \item{save.it}{'logical'. Save preprocessed SNP and copy number data?} |
|
65 |
+ \item{load.it}{'logical'. Load preprocessed SNP data to speed up analysis?} |
|
66 |
+ \item{snpFile}{'character' with filename of preprocessed SNP data to |
|
67 |
+ be saved/loaded.} |
|
68 |
+ \item{cnFile}{'character' with filename of preprocessed copy number |
|
69 |
+ data to be saved.} |
|
70 |
+ \item{mixtureSampleSize}{'integer'. The number of SNP's to be used |
|
71 |
+ when fitting the mixture model.} |
|
72 |
+ \item{eps}{Minimum change for mixture model.} |
|
73 |
+ \item{verbose}{'logical'.} |
|
74 |
+ \item{cdfName}{'character' defining the chip annotation (manifest) to use |
|
75 |
+ ('human370v1c', human550v3b', 'human650v3a', 'human1mv1c', |
|
76 |
+ 'human370quadv3c', 'human610quadv1b', 'human660quadv1a', |
|
77 |
+ 'human1mduov3b', 'humanomni1quadv1b')} |
|
78 |
+ \item{sns}{'character' vector with sample names to be used.} |
|
79 |
+ \item{recallMin}{'integer'. Minimum number of samples for recalibration.} |
|
80 |
+ \item{recallRegMin}{'integer'. Minimum number of SNP's for regression.} |
|
81 |
+ \item{returnParams}{'logical'. Return recalibrated parameters.} |
|
82 |
+ \item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)} |
|
83 |
+} |
|
84 |
+\value{ |
|
85 |
+ A \code{SnpSet} object which contains |
|
86 |
+ \item{calls}{Genotype calls (1 - AA, 2 - AB, 3 - BB)} |
|
87 |
+ \item{callProbability}{confidence scores 'round(-1000*log2(1-p))'} |
|
88 |
+ in the \code{assayData} slot and |
|
89 |
+ \item{SNPQC}{SNP Quality Scores} |
|
90 |
+ \item{batchQC}{Batch Quality Scores} |
|
91 |
+ along with center and scale parameters when \code{returnParams=TRUE} |
|
92 |
+ in the \code{featureData} slot. |
|
93 |
+} |
|
94 |
+ |
|
95 |
+\details{ |
|
96 |
+ This function combines the reading of data from idat files using |
|
97 |
+ \code{readIdatFiles} and genotyping to reduce memory. |
|
98 |
+} |
|
99 |
+ |
|
100 |
+\references{ |
|
101 |
+ Ritchie ME, Carvalho BS, Hetrick KN, Tavar\'{e} S, Irizarry RA. |
|
102 |
+ R/Bioconductor software for Illumina's Infinium whole-genome |
|
103 |
+ genotyping BeadChips. Bioinformatics. 2009 Oct 1;25(19):2621-3. |
|
104 |
+ |
|
105 |
+ Carvalho B, Bengtsson H, Speed TP, Irizarry RA. Exploration, |
|
106 |
+ normalization, and genotype calls of high-density oligonucleotide SNP |
|
107 |
+ array data. Biostatistics. 2007 Apr;8(2):485-99. Epub 2006 Dec |
|
108 |
+ 22. PMID: 17189563. |
|
109 |
+ |
|
110 |
+ Carvalho BS, Louis TA, Irizarry RA. |
|
111 |
+ Quantifying uncertainty in genotype calls. |
|
112 |
+ Bioinformatics. 2010 Jan 15;26(2):242-9. |
|
113 |
+} |
|
114 |
+ |
|
115 |
+\author{Matt Ritchie} |
|
116 |
+ |
|
117 |
+\examples{ |
|
118 |
+## crlmmOut = crlmmIlluminaV2(samples,path=path,arrayInfoColNames=list(barcode="Chip",position="Section"), |
|
119 |
+## saveDate=TRUE,cdfName="human370v1c",returnParams=TRUE) |
|
120 |
+ |
|
121 |
+} |
|
122 |
+\seealso{\code{\link{crlmmIllumina}}, \code{\link{readIdatFiles}}} |
|
123 |
+\keyword{classif} |