Browse code

Added nopackage option for krlmm

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@117087 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 03/05/2016 23:39:54
Showing 1 changed files
... ...
@@ -1,5 +1,6 @@
1 1
 \name{genotype.Illumina}
2 2
 \alias{genotype.Illumina}
3
+\alias{crlmmIllumina}
3 4
 
4 5
 \title{
5 6
 	Preprocessing and genotyping of Illumina Infinium II arrays.
... ...
@@ -10,11 +11,20 @@
10 11
 \usage{
11 12
 genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
12 13
       arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
13
-      highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), XY=NULL,
14
+      highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), XY=NULL, anno, genome, 
14 15
       call.method="crlmm", trueCalls=NULL, cdfName, copynumber=TRUE, batch=NULL, saveDate=FALSE, stripNorm=TRUE, 
15
-      useTarget=TRUE, quantile.method="between", mixtureSampleSize=10^5, fitMixture=TRUE,                               
16
-      eps =0.1, verbose = TRUE, seed = 1, sns, probs = rep(1/3, 3), DF = 6, SNRMin = 5, 
16
+      useTarget=TRUE, quantile.method="between", nopackage.norm="quantile", mixtureSampleSize=10^5, fitMixture=TRUE,
17
+      eps=0.1, verbose = TRUE, seed = 1, sns, probs = rep(1/3, 3), DF = 6, SNRMin = 5, 
17 18
       recallMin = 10, recallRegMin = 1000, gender = NULL, returnParams = TRUE, badSNP = 0.7)
19
+
20
+crlmmIllumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
21
+      arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
22
+      highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), XY=NULL, anno, genome, 
23
+      call.method="crlmm", trueCalls=NULL, cdfName, copynumber=TRUE, batch=NULL, saveDate=FALSE, stripNorm=TRUE, 
24
+      useTarget=TRUE, quantile.method="between", nopackage.norm="quantile", mixtureSampleSize=10^5, fitMixture=TRUE,
25
+      eps=0.1, verbose = TRUE, seed = 1, sns, probs = rep(1/3, 3), DF = 6, SNRMin = 5, 
26
+      recallMin = 10, recallRegMin = 1000, gender = NULL, returnParams = TRUE, badSNP = 0.7)
27
+
18 28
 }
19 29
 \arguments{
20 30
   \item{sampleSheet}{\code{data.frame} containing Illumina sample sheet
... ...
@@ -42,9 +52,13 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
42 52
   \item{fileExt}{list containing elements 'Green' and 'Red' which
43 53
     specify the .idat file extension for the Cy3 and Cy5 channels.}
44 54
   \item{XY}{\code{NChannelSet} containing X and Y intensities.}
55
+  \item{anno}{data.frame containing SNP annotation information from 
56
+    manifest and additional columns 'isSnp', 'position', 'chromosome' 
57
+    and 'featureNames'. For use when \code{cdfName}='nopackage'}
58
+  \item{genome}{character string specifying which genome is used in annotation}
45 59
   \item{call.method}{character string specifying the genotype calling algorithm to use ('crlmm' or 'krlmm').}
46 60
   \item{trueCalls}{matrix specifying known Genotype calls(can contain some NAs) for a subset of samples and features (1 - AA, 2 - AB, 3 - BB).}
47
-  \item{cdfName}{ annotation package  (see also \code{validCdfNames})}
61
+  \item{cdfName}{annotation package (see also \code{validCdfNames}) or 'nopackage' when combined with 'krlmm', an \code{anno} data.frame and \code{genome}.}
48 62
   \item{copynumber}{ 'logical.' Whether to store copy number intensities with SNP output.}
49 63
   \item{batch}{ character vector indicating the batch variable. Must be
50 64
 	the same length as the number of samples. See details.}
... ...
@@ -54,6 +68,8 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
54 68
   \item{useTarget}{'logical' (only used when \code{stripNorm=TRUE}).
55 69
     Should the reference HapMap intensities be used in strip-level normalization?}
56 70
   \item{quantile.method}{character string specifying the quantile normalization method to use ('within' or 'between' channels).}
71
+  \item{nopackage.norm}{character string specifying normalization to be used when \code{cdfName}='nopackage'.
72
+    Options are 'none', 'quantile' (within channel, between array) and 'loess'.}
57 73
   \item{mixtureSampleSize}{ Sample size to be use when fitting the mixture model.}
58 74
   \item{fitMixture}{ 'logical.' Whether to fit per-array mixture model.}
59 75
   \item{eps}{   Stop criteria.}
... ...
@@ -74,23 +90,15 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
74 90
 
75 91
 \details{
76 92
 
77
-	For large datasets it is important to utilize the large data
78
-	support by installing and loading the ff package before calling
79
-	the \code{genotype} function. In previous versions of the
80
-	\code{crlmm} package, we used different functions for
81
-	genotyping depending on whether the ff package is loaded, namely
82
-	\code{genotype} and \code{genotype2}.  The \code{genotype}
83
-	function now handles both instances.
84
-
85
-	\code{genotype.Illumina} is a wrapper of the \code{crlmm}
86
-	function for genotyping.  Differences include (1) that the copy
87
-	number probes (if present) are also quantile-normalized and (2)
88
-	the class of object returned by this function, \code{CNSet}, is
89
-	needed for subsequent copy number estimation.  Note that the
90
-	batch variable (a character string) that must be passed to this
91
-	function has no effect on the normalization or genotyping steps.
92
-	Rather, \code{batch} is required in order to initialize a
93
-	\code{CNSet} container with the appropriate dimensions.
93
+	\code{genotype.Illumina} (or equivalently \code{crlmmIllumina}) 
94
+        is a wrapper of the \code{crlmm} function for genotyping.  
95
+        Differences include (1) that the copy number probes (if present) 
96
+        are also quantile-normalized and (2) the class of object returned 
97
+        by this function, \code{CNSet}, is needed for subsequent copy number 
98
+        estimation.  Note that the batch variable (a character string) has 
99
+        no effect on the normalization or genotyping steps. Rather, \code{batch} 
100
+        is required in order to initialize a \code{CNSet} container with the 
101
+        appropriate dimensions.
94 102
 
95 103
         The new 'krlmm' option is available for certain chip types. Optional 
96 104
 	argument \code{trueCalls} matrix contains known Genotype calls 
... ...
@@ -103,6 +111,15 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
103 111
 	8 clusters. This is configurable by setting up an option named 
104 112
 	"krlmm.cores", e.g. options("krlmm.cores" = 16). 
105 113
 
114
+        In general, a chip specific annotation package is required to use the 
115
+        \code{genotype.Illumina} function. If this is not available (newer chip 
116
+        types or custom chips often don't have a chip-specific package available
117
+        on Bioconductor), consider using \code{cdfName}='nopackage' and specifying 
118
+        \code{anno} and \code{genome}, which runs 'krlmm' on the samples available.
119
+        Here \code{anno} is a data.frame read in from the relevant chip-specific
120
+        manifest, which must have additional columns 'isSnp' which is a logical that
121
+        indicates whether a probe is polymorphic or not, 'position', 'chromosome' and
122
+        'featureNames' that give the location on the chromosome and SNP name.
106 123
       }
107 124
 
108 125
 \value{	A \code{SnpSuperSet} instance.}
... ...
@@ -111,6 +128,10 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
111 128
   R/Bioconductor software for Illumina's Infinium whole-genome
112 129
   genotyping BeadChips. Bioinformatics. 2009 Oct 1;25(19):2621-3.
113 130
 
131
+  Liu R, Dai Z, Yeager M, Irizarry RA1, Ritchie ME.
132
+  KRLMM: an adaptive genotype calling method for common and low frequency variants.
133
+  BMC Bioinformatics. 2014 May 23;15:158.
134
+
114 135
   Carvalho B, Bengtsson H, Speed TP, Irizarry RA. Exploration,
115 136
   normalization, and genotype calls of high-density oligonucleotide SNP
116 137
   array data. Biostatistics. 2007 Apr;8(2):485-99. Epub 2006 Dec
... ...
@@ -119,19 +140,11 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
119 140
   Carvalho BS, Louis TA, Irizarry RA.
120 141
   Quantifying uncertainty in genotype calls.
121 142
   Bioinformatics. 2010 Jan 15;26(2):242-9.
122
-
123 143
 }
124 144
 
125 145
 \author{Matt Ritchie, Cynthia Liu, Zhiyin Dai}
126 146
 
127
-  \note{For large datasets, load the 'ff' package prior to genotyping
128
-\code{ldPath} and \code{ocSamples}.  The function
129
-\code{genotype.Illumina} supports parallelization, as the (not run)
130
-example below indicates.}
131
-
132 147
 \seealso{
133
-	\code{\link{crlmmIlluminaV2}},
134 148
 	\code{\link[oligoClasses]{ocSamples}},
135 149
 	\code{\link[oligoClasses]{ldOpts}}
136 150
 }
Browse code

Removed crlmm::: from .R files and set saveDate=FALSE in genotype.Illumina() man page to clean up Notes and Warnings

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@88085 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 28/03/2014 12:43:01
Showing 1 changed files
... ...
@@ -11,7 +11,7 @@
11 11
 genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
12 12
       arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
13 13
       highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), XY=NULL,
14
-      call.method="crlmm", trueCalls=NULL, cdfName, copynumber=TRUE, batch=NULL, saveDate=TRUE, stripNorm=TRUE, 
14
+      call.method="crlmm", trueCalls=NULL, cdfName, copynumber=TRUE, batch=NULL, saveDate=FALSE, stripNorm=TRUE, 
15 15
       useTarget=TRUE, quantile.method="between", mixtureSampleSize=10^5, fitMixture=TRUE,                               
16 16
       eps =0.1, verbose = TRUE, seed = 1, sns, probs = rep(1/3, 3), DF = 6, SNRMin = 5, 
17 17
       recallMin = 10, recallRegMin = 1000, gender = NULL, returnParams = TRUE, badSNP = 0.7)
Browse code

merge with collab

* collab:
Update DESCRIPTION
bug fixed for krlmm option, documentation changes

es

# Please enter a commit message to explain why this merge is necessary,
# especially if it merges an updated upstream into a topic branch.
#
# Lines starting with '#' will be ignored, and an empty message aborts
# the commit.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@81002 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 01/10/2013 12:03:56
Showing 1 changed files
... ...
@@ -93,8 +93,16 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
93 93
 	\code{CNSet} container with the appropriate dimensions.
94 94
 
95 95
         The new 'krlmm' option is available for certain chip types. Optional 
96
-	\code{trueCalls} matrix contains known Genotype calls (1 - AA, 2 - AB, 3 - BB)
97
-	for a subset of samples and features. 
96
+	argument \code{trueCalls} matrix contains known Genotype calls 
97
+	(1 - AA, 2 - AB, 3 - BB) for a subset of samples and features. This 
98
+	will used to compute KRLMM coefficients by calling \code{vglm} function 
99
+	from \code{VGAM} package.
100
+
101
+	The 'krlmm' method makes use of functions provided in \code{parallel} 
102
+	package to speed up the process. It by default initialises up to 
103
+	8 clusters. This is configurable by setting up an option named 
104
+	"krlmm.cores", e.g. options("krlmm.cores" = 16). 
105
+
98 106
       }
99 107
 
100 108
 \value{	A \code{SnpSuperSet} instance.}
... ...
@@ -114,7 +122,7 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
114 122
 
115 123
 }
116 124
 
117
-\author{Matt Ritchie}
125
+\author{Matt Ritchie, Cynthia Liu, Zhiyin Dai}
118 126
 
119 127
   \note{For large datasets, load the 'ff' package prior to genotyping
120 128
 -- this will greatly reduce the RAM required for big jobs.  See
... ...
@@ -129,6 +137,7 @@ example below indicates.}
129 137
 }
130 138
 \examples{
131 139
 \dontrun{
140
+	# example for 'crlmm' option
132 141
 	library(ff)
133 142
 	library(crlmm)
134 143
 	## to enable paralellization, set to TRUE
... ...
@@ -151,6 +160,32 @@ example below indicates.}
151 160
 				   arrayInfoColNames=arrayInfo,
152 161
 				   cdfName="human370v1c",
153 162
 				   batch=rep("1", nrow(samplesheet)))
163
+
164
+}
165
+\dontrun{
166
+	# example for 'krlmm' option
167
+	library(crlmm)
168
+	library(ff)
169
+	# line below is an optional step for krlmm to initialise 16 workers 
170
+	# options("krlmm.cores" = 16)
171
+	# read in raw X and Y intensities output by GenomeStudio's GenCall genotyping module
172
+	XY = readGenCallOutput(c("HumanOmni2-5_4v1_FinalReport_83TUSCAN.csv","HumanOmni2-5_4v1_FinalReport_88CHB-JPT.csv"),
173
+				cdfName="humanomni25quadv1b",
174
+				verbose=TRUE)
175
+	krlmmResult = genotype.Illumina(XY=XY, 
176
+		      			cdfName=ThiscdfName, 
177
+					call.method="krlmm", 
178
+					verbose=TRUE)
179
+
180
+	# example for 'krlmm' option with known genotype call for some SNPs and samples
181
+	library(VGAM)
182
+	hapmapCalls = load("hapmapCalls.rda")
183
+	# hapmapCalls should have rownames and colnames corresponding to XY featureNames and sampleNames
184
+	krlmmResult = genotype.Illumina(XY=XY,
185
+					cdfName=ThiscdfName, 
186
+					call.method="krlmm", 
187
+					trueCalls=hapmapCalls, 
188
+					verbose=TRUE)		
154 189
 }
155 190
 }
156 191
 \keyword{classif}
Browse code

fix conflict in description

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@80947 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 30/09/2013 14:07:17
Showing 1 changed files
1 1
old mode 100644
2 2
new mode 100755
... ...
@@ -11,7 +11,7 @@
11 11
 genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
12 12
       arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
13 13
       highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), XY=NULL,
14
-      call.method="crlmm", cdfName, copynumber=TRUE, batch=NULL, saveDate=TRUE, stripNorm=TRUE, 
14
+      call.method="crlmm", trueCalls=NULL, cdfName, copynumber=TRUE, batch=NULL, saveDate=TRUE, stripNorm=TRUE, 
15 15
       useTarget=TRUE, quantile.method="between", mixtureSampleSize=10^5, fitMixture=TRUE,                               
16 16
       eps =0.1, verbose = TRUE, seed = 1, sns, probs = rep(1/3, 3), DF = 6, SNRMin = 5, 
17 17
       recallMin = 10, recallRegMin = 1000, gender = NULL, returnParams = TRUE, badSNP = 0.7)
... ...
@@ -43,6 +43,7 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
43 43
     specify the .idat file extension for the Cy3 and Cy5 channels.}
44 44
   \item{XY}{\code{NChannelSet} containing X and Y intensities.}
45 45
   \item{call.method}{character string specifying the genotype calling algorithm to use ('crlmm' or 'krlmm').}
46
+  \item{trueCalls}{matrix specifying known Genotype calls(can contain some NAs) for a subset of samples and features (1 - AA, 2 - AB, 3 - BB).}
46 47
   \item{cdfName}{ annotation package  (see also \code{validCdfNames})}
47 48
   \item{copynumber}{ 'logical.' Whether to store copy number intensities with SNP output.}
48 49
   \item{batch}{ character vector indicating the batch variable. Must be
... ...
@@ -65,7 +66,7 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
65 66
   out samples.}
66 67
   \item{recallMin}{Minimum number of samples for recalibration. }
67 68
   \item{recallRegMin}{Minimum number of SNP's for regression.}
68
-  \item{gender}{  integer vector (  male = 1, female =2 ) or missing,
69
+  \item{gender}{  integer vector (  male = 1, female = 2 ) or missing,
69 70
   with same length as filenames.  If missing, the gender is predicted.}
70 71
   \item{returnParams}{'logical'. Return recalibrated parameters from crlmm.}
71 72
   \item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)}
... ...
@@ -91,7 +92,9 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
91 92
 	Rather, \code{batch} is required in order to initialize a
92 93
 	\code{CNSet} container with the appropriate dimensions.
93 94
 
94
-        The new 'krlmm' option is available for certain chip types.
95
+        The new 'krlmm' option is available for certain chip types. Optional 
96
+	\code{trueCalls} matrix contains known Genotype calls (1 - AA, 2 - AB, 3 - BB)
97
+	for a subset of samples and features. 
95 98
       }
96 99
 
97 100
 \value{	A \code{SnpSuperSet} instance.}
Browse code

merging from collab

* collab:
add warning in vignette about NAs with BafLrrSetList function
Added Human Omni Express Exome 8 v1.1b as a supported chip
updated version number of pacakge and man pages to reflect these changes
skeleton for krlmm capability added. genotype.Illumina() can now take and XY object as input
update copynumber.Rnw to use BafLrrSetList
updates to vignettes
update namespace

# Please enter a commit message to explain why this merge is necessary,
# especially if it merges an updated upstream into a topic branch.
#
# Lines starting with '#' will be ignored, and an empty message aborts
# the commit.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@79138 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 31/07/2013 01:37:34
Showing 1 changed files
... ...
@@ -10,11 +10,11 @@
10 10
 \usage{
11 11
 genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
12 12
       arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
13
-      highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"),
14
-      cdfName, copynumber=TRUE, batch, saveDate=TRUE, stripNorm=TRUE, useTarget=TRUE,
15
-      mixtureSampleSize=10^5, fitMixture=TRUE, eps =0.1, verbose = TRUE, seed = 1,
16
-      sns, probs = rep(1/3, 3), DF = 6, SNRMin = 5, recallMin = 10, recallRegMin = 1000,
17
-      gender = NULL, returnParams = TRUE, badSNP = 0.7)
13
+      highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), XY=NULL,
14
+      call.method="crlmm", cdfName, copynumber=TRUE, batch=NULL, saveDate=TRUE, stripNorm=TRUE, 
15
+      useTarget=TRUE, quantile.method="between", mixtureSampleSize=10^5, fitMixture=TRUE,                               
16
+      eps =0.1, verbose = TRUE, seed = 1, sns, probs = rep(1/3, 3), DF = 6, SNRMin = 5, 
17
+      recallMin = 10, recallRegMin = 1000, gender = NULL, returnParams = TRUE, badSNP = 0.7)
18 18
 }
19 19
 \arguments{
20 20
   \item{sampleSheet}{\code{data.frame} containing Illumina sample sheet
... ...
@@ -41,6 +41,8 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
41 41
     names.}
42 42
   \item{fileExt}{list containing elements 'Green' and 'Red' which
43 43
     specify the .idat file extension for the Cy3 and Cy5 channels.}
44
+  \item{XY}{\code{NChannelSet} containing X and Y intensities.}
45
+  \item{call.method}{character string specifying the genotype calling algorithm to use ('crlmm' or 'krlmm').}
44 46
   \item{cdfName}{ annotation package  (see also \code{validCdfNames})}
45 47
   \item{copynumber}{ 'logical.' Whether to store copy number intensities with SNP output.}
46 48
   \item{batch}{ character vector indicating the batch variable. Must be
... ...
@@ -50,6 +52,7 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
50 52
   \item{stripNorm}{'logical'.  Should the data be strip-level normalized?}
51 53
   \item{useTarget}{'logical' (only used when \code{stripNorm=TRUE}).
52 54
     Should the reference HapMap intensities be used in strip-level normalization?}
55
+  \item{quantile.method}{character string specifying the quantile normalization method to use ('within' or 'between' channels).}
53 56
   \item{mixtureSampleSize}{ Sample size to be use when fitting the mixture model.}
54 57
   \item{fitMixture}{ 'logical.' Whether to fit per-array mixture model.}
55 58
   \item{eps}{   Stop criteria.}
... ...
@@ -88,8 +91,7 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
88 91
 	Rather, \code{batch} is required in order to initialize a
89 92
 	\code{CNSet} container with the appropriate dimensions.
90 93
 
91
-
92
-
94
+        The new 'krlmm' option is available for certain chip types.
93 95
       }
94 96
 
95 97
 \value{	A \code{SnpSuperSet} instance.}
Browse code

Merge branch 'collab'

* collab:
remove getCluster() calls and replace with parStatus()
update man pages for crlmm and genotype.Illumina with respect to the setup for parallelization
add neededPkgs argument to ocLapply calls in crlmmGT2
bump dependency on oligoClasses
Update R/crlmm-illumina.R
contructInf, preprocessInf and genotypeInf no longer exported

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@64211 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 21/03/2012 02:52:50
Showing 1 changed files
... ...
@@ -129,8 +129,10 @@ example below indicates.}
129 129
 	## to enable paralellization, set to TRUE
130 130
 	if(FALSE){
131 131
 		library(snow)
132
-		## 10 workers
133
-		setCluster(10, "SOCK")
132
+		library(doSNOW)
133
+		## with 10 workers
134
+		cl <- makeCluster(10, type="SOCK")
135
+		registerDoSNOW(cl)
134 136
 	}
135 137
 	## path to idat files
136 138
 	datadir <- "/thumper/ctsa/snpmicroarray/illumina/IDATS/370k"
Browse code

Merge branch 'collab'

* collab:
replace splitIndicesByLength with splitIndicesByNode throughout cnrma-functions.R (check that snow is loaded and getCluster is not null)
add an example to genotype.Illumina that indicates how parallelization would be enabled. The example requires local data and is not run.
change outdir in IlluminaPreprocessCN and AffyGW
Update R/crlmm-illumina.R
bump version for parallelization of genotype.Illumina
Update R/crlmm-illumina.R

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@64151 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 20/03/2012 13:55:50
Showing 1 changed files
... ...
@@ -88,6 +88,8 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
88 88
 	Rather, \code{batch} is required in order to initialize a
89 89
 	\code{CNSet} container with the appropriate dimensions.
90 90
 
91
+
92
+
91 93
       }
92 94
 
93 95
 \value{	A \code{SnpSuperSet} instance.}
... ...
@@ -106,10 +108,14 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
106 108
   Bioinformatics. 2010 Jan 15;26(2):242-9.
107 109
 
108 110
 }
111
+
109 112
 \author{Matt Ritchie}
110
-\note{For large datasets, load the 'ff' package prior to genotyping --
111
-this will greatly reduce the RAM required for big jobs.  See
112
-\code{ldPath} and \code{ocSamples}.}
113
+
114
+  \note{For large datasets, load the 'ff' package prior to genotyping
115
+-- this will greatly reduce the RAM required for big jobs.  See
116
+\code{ldPath} and \code{ocSamples}.  The function
117
+\code{genotype.Illumina} supports parallelization, as the (not run)
118
+example below indicates.}
113 119
 
114 120
 \seealso{
115 121
 	\code{\link{crlmmIlluminaV2}},
... ...
@@ -117,6 +123,27 @@ this will greatly reduce the RAM required for big jobs.  See
117 123
 	\code{\link[oligoClasses]{ldOpts}}
118 124
 }
119 125
 \examples{
120
-  ##
126
+\dontrun{
127
+	library(ff)
128
+	library(crlmm)
129
+	## to enable paralellization, set to TRUE
130
+	if(FALSE){
131
+		library(snow)
132
+		## 10 workers
133
+		setCluster(10, "SOCK")
134
+	}
135
+	## path to idat files
136
+	datadir <- "/thumper/ctsa/snpmicroarray/illumina/IDATS/370k"
137
+	## read in your samplesheet
138
+	samplesheet = read.csv(file.path(datadir, "HumanHap370Duo_Sample_Map.csv"), header=TRUE, as.is=TRUE)
139
+	samplesheet <- samplesheet[-c(28:46,61:75,78:79), ]
140
+	arrayNames <- file.path(datadir, unique(samplesheet[, "SentrixPosition"]))
141
+	arrayInfo <- list(barcode=NULL, position="SentrixPosition")
142
+	cnSet <- genotype.Illumina(sampleSheet=samplesheet,
143
+				   arrayNames=arrayNames,
144
+				   arrayInfoColNames=arrayInfo,
145
+				   cdfName="human370v1c",
146
+				   batch=rep("1", nrow(samplesheet)))
147
+}
121 148
 }
122 149
 \keyword{classif}
Browse code

Merge branch 'mymac'

* mymac:
add AffyGW.pdf
update vignettes in inst/scripts
Change argument of validCEL to celfiles
Update constructInf to accommodate GenomeDataFrame class for featureData
bump version to 1.13.7
Add doRUnit.R
Add celfile-utils.Rd
Streamlne some of the Rd files
add validCEL function that checks whether all celfiles can be read
getFeatureData returns GenomeAnnotatedDataFrame
Remove imports from methods. Remove pdf of illumina_copynumber.pdf (large file) and copynumber.pdf
getFeatureDAta returns GenomeAnnotatedDataFrame
Remove separate vignette for copy number in inst/scripts. Include copynumber section in both affy and illumina pipelines.
update documentation files for genotype.Illumina, preprocessInf, and genotypeInf (cdfName added as argument. Indicate that 'batch' should be a character string)
pass cdfName to genotypeInf and preprocessInf
add unitTests and cn-functions for 'simple usage'
Combine AffyPreprocess and copynumber. Combine IlluminaPreprocess and copynumber
remove depency on ff to allow installation on my mac

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@62108 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 17/01/2012 19:13:44
Showing 1 changed files
... ...
@@ -11,8 +11,8 @@
11 11
 genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
12 12
       arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
13 13
       highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"),
14
-      cdfName, copynumber=TRUE, batch, saveDate=TRUE, stripNorm=TRUE, useTarget=TRUE, 
15
-      mixtureSampleSize=10^5, fitMixture=TRUE, eps =0.1, verbose = TRUE, seed = 1, 
14
+      cdfName, copynumber=TRUE, batch, saveDate=TRUE, stripNorm=TRUE, useTarget=TRUE,
15
+      mixtureSampleSize=10^5, fitMixture=TRUE, eps =0.1, verbose = TRUE, seed = 1,
16 16
       sns, probs = rep(1/3, 3), DF = 6, SNRMin = 5, recallMin = 10, recallRegMin = 1000,
17 17
       gender = NULL, returnParams = TRUE, badSNP = 0.7)
18 18
 }
... ...
@@ -42,8 +42,9 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
42 42
   \item{fileExt}{list containing elements 'Green' and 'Red' which
43 43
     specify the .idat file extension for the Cy3 and Cy5 channels.}
44 44
   \item{cdfName}{ annotation package  (see also \code{validCdfNames})}
45
-  \item{copynumber}{ 'logical.' Whether to store copy number intensities with SNP output.} 
46
-  \item{batch}{ batch variable. See details.}
45
+  \item{copynumber}{ 'logical.' Whether to store copy number intensities with SNP output.}
46
+  \item{batch}{ character vector indicating the batch variable. Must be
47
+	the same length as the number of samples. See details.}
47 48
   \item{saveDate}{'logical'.  Should the dates from each .idat be saved
48 49
     with sample information?}
49 50
   \item{stripNorm}{'logical'.  Should the data be strip-level normalized?}
... ...
@@ -82,16 +83,17 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
82 83
 	number probes (if present) are also quantile-normalized and (2)
83 84
 	the class of object returned by this function, \code{CNSet}, is
84 85
 	needed for subsequent copy number estimation.  Note that the
85
-	batch variable that must be passed to this function has no
86
-	effect on the normalization or genotyping steps.  Rather,
87
-	\code{batch} is required in order to initialize a \code{CNSet}
88
-	container with the appropriate dimensions.
89
-}
86
+	batch variable (a character string) that must be passed to this
87
+	function has no effect on the normalization or genotyping steps.
88
+	Rather, \code{batch} is required in order to initialize a
89
+	\code{CNSet} container with the appropriate dimensions.
90
+
91
+      }
90 92
 
91 93
 \value{	A \code{SnpSuperSet} instance.}
92 94
 \references{
93 95
   Ritchie ME, Carvalho BS, Hetrick KN, Tavar\'{e} S, Irizarry RA.
94
-  R/Bioconductor software for Illumina's Infinium whole-genome 
96
+  R/Bioconductor software for Illumina's Infinium whole-genome
95 97
   genotyping BeadChips. Bioinformatics. 2009 Oct 1;25(19):2621-3.
96 98
 
97 99
   Carvalho B, Bengtsson H, Speed TP, Irizarry RA. Exploration,
... ...
@@ -110,7 +112,7 @@ this will greatly reduce the RAM required for big jobs.  See
110 112
 \code{ldPath} and \code{ocSamples}.}
111 113
 
112 114
 \seealso{
113
-	\code{\link{crlmmIlluminaV2}}, 
115
+	\code{\link{crlmmIlluminaV2}},
114 116
 	\code{\link[oligoClasses]{ocSamples}},
115 117
 	\code{\link[oligoClasses]{ldOpts}}
116 118
 }
Browse code

Rewrote illumina_copynumber vignette. Add functions and docmentation for constructInf, preprocessInf, and genotypeInf.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@54164 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 30/03/2011 02:40:07
Showing 1 changed files
... ...
@@ -72,7 +72,7 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
72 72
 	For large datasets it is important to utilize the large data
73 73
 	support by installing and loading the ff package before calling
74 74
 	the \code{genotype} function. In previous versions of the
75
-	\code{crlmm} package, we useed different functions for
75
+	\code{crlmm} package, we used different functions for
76 76
 	genotyping depending on whether the ff package is loaded, namely
77 77
 	\code{genotype} and \code{genotype2}.  The \code{genotype}
78 78
 	function now handles both instances.
Browse code

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@51433 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 09/12/2010 17:33:21
Showing 1 changed files
... ...
@@ -11,7 +11,7 @@
11 11
 genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
12 12
       arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
13 13
       highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"),
14
-      cdfName, copynumber=TRUE, batch, outdir=".", saveDate=TRUE, stripNorm=TRUE, useTarget=TRUE, 
14
+      cdfName, copynumber=TRUE, batch, saveDate=TRUE, stripNorm=TRUE, useTarget=TRUE, 
15 15
       mixtureSampleSize=10^5, fitMixture=TRUE, eps =0.1, verbose = TRUE, seed = 1, 
16 16
       sns, probs = rep(1/3, 3), DF = 6, SNRMin = 5, recallMin = 10, recallRegMin = 1000,
17 17
       gender = NULL, returnParams = TRUE, badSNP = 0.7)
... ...
@@ -43,8 +43,7 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
43 43
     specify the .idat file extension for the Cy3 and Cy5 channels.}
44 44
   \item{cdfName}{ annotation package  (see also \code{validCdfNames})}
45 45
   \item{copynumber}{ 'logical.' Whether to store copy number intensities with SNP output.} 
46
-  \item{batch}{ batch variable. See details. }
47
-  \item{outdir}{character string specifying the location to store large data objects.}
46
+  \item{batch}{ batch variable. See details.}
48 47
   \item{saveDate}{'logical'.  Should the dates from each .idat be saved
49 48
     with sample information?}
50 49
   \item{stripNorm}{'logical'.  Should the data be strip-level normalized?}
Browse code

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@50951 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 18/11/2010 05:32:16
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,121 @@
1
+\name{genotype.Illumina}
2
+\alias{genotype.Illumina}
3
+
4
+\title{
5
+	Preprocessing and genotyping of Illumina Infinium II arrays.
6
+}
7
+\description{
8
+	Preprocessing and genotyping of Illumina Infinium II arrays.
9
+}
10
+\usage{
11
+genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
12
+      arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
13
+      highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"),
14
+      cdfName, copynumber=TRUE, batch, outdir=".", saveDate=TRUE, stripNorm=TRUE, useTarget=TRUE, 
15
+      mixtureSampleSize=10^5, fitMixture=TRUE, eps =0.1, verbose = TRUE, seed = 1, 
16
+      sns, probs = rep(1/3, 3), DF = 6, SNRMin = 5, recallMin = 10, recallRegMin = 1000,
17
+      gender = NULL, returnParams = TRUE, badSNP = 0.7)
18
+}
19
+\arguments{
20
+  \item{sampleSheet}{\code{data.frame} containing Illumina sample sheet
21
+    information (for required columns, refer to BeadStudio Genotyping
22
+    guide - Appendix A).}
23
+  \item{arrayNames}{character vector containing names of arrays to be
24
+    read in.  If \code{NULL}, all arrays that can be found in the
25
+    specified working directory will be read in.}
26
+  \item{ids}{vector containing ids of probes to be read in.  If
27
+    \code{NULL} all probes found on the first array are read in.}
28
+  \item{path}{character string specifying the location of files to be
29
+    read by the function}
30
+  \item{arrayInfoColNames}{(used when \code{sampleSheet} is specified)
31
+    list containing elements 'barcode' which indicates column names in
32
+    the \code{sampleSheet} which contains the arrayNumber/barcode number
33
+    and 'position' which indicates the strip number.  In older style
34
+    sample sheets, this information is combined (usually in a column
35
+    named 'SentrixPosition') and this should be specified as
36
+    \code{list(barcode=NULL, position="SentrixPosition")}}
37
+  \item{highDensity}{logical (used when \code{sampleSheet} is
38
+    specified). If \code{TRUE}, array extensions '\_A', '\_B' in
39
+    sampleSheet are replaced with 'R01C01', 'R01C02' etc.}
40
+  \item{sep}{character string specifying separator used in .idat file
41
+    names.}
42
+  \item{fileExt}{list containing elements 'Green' and 'Red' which
43
+    specify the .idat file extension for the Cy3 and Cy5 channels.}
44
+  \item{cdfName}{ annotation package  (see also \code{validCdfNames})}
45
+  \item{copynumber}{ 'logical.' Whether to store copy number intensities with SNP output.} 
46
+  \item{batch}{ batch variable. See details. }
47
+  \item{outdir}{character string specifying the location to store large data objects.}
48
+  \item{saveDate}{'logical'.  Should the dates from each .idat be saved
49
+    with sample information?}
50
+  \item{stripNorm}{'logical'.  Should the data be strip-level normalized?}
51
+  \item{useTarget}{'logical' (only used when \code{stripNorm=TRUE}).
52
+    Should the reference HapMap intensities be used in strip-level normalization?}
53
+  \item{mixtureSampleSize}{ Sample size to be use when fitting the mixture model.}
54
+  \item{fitMixture}{ 'logical.' Whether to fit per-array mixture model.}
55
+  \item{eps}{   Stop criteria.}
56
+  \item{verbose}{  'logical.'  Whether to print descriptive messages during processing.}
57
+  \item{seed}{ Seed to be used when sampling. Useful for reproducibility}
58
+  \item{sns}{The sample identifiers.  If missing, the default sample names are \code{basename(filenames)}}
59
+  \item{probs}{'numeric' vector with priors for AA, AB and BB.}
60
+  \item{DF}{'integer' with number of degrees of freedom to use with t-distribution.}
61
+  \item{SNRMin}{'numeric' scalar defining the minimum SNR used to filter
62
+  out samples.}
63
+  \item{recallMin}{Minimum number of samples for recalibration. }
64
+  \item{recallRegMin}{Minimum number of SNP's for regression.}
65
+  \item{gender}{  integer vector (  male = 1, female =2 ) or missing,
66
+  with same length as filenames.  If missing, the gender is predicted.}
67
+  \item{returnParams}{'logical'. Return recalibrated parameters from crlmm.}
68
+  \item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)}
69
+}
70
+
71
+\details{
72
+
73
+	For large datasets it is important to utilize the large data
74
+	support by installing and loading the ff package before calling
75
+	the \code{genotype} function. In previous versions of the
76
+	\code{crlmm} package, we useed different functions for
77
+	genotyping depending on whether the ff package is loaded, namely
78
+	\code{genotype} and \code{genotype2}.  The \code{genotype}
79
+	function now handles both instances.
80
+
81
+	\code{genotype.Illumina} is a wrapper of the \code{crlmm}
82
+	function for genotyping.  Differences include (1) that the copy
83
+	number probes (if present) are also quantile-normalized and (2)
84
+	the class of object returned by this function, \code{CNSet}, is
85
+	needed for subsequent copy number estimation.  Note that the
86
+	batch variable that must be passed to this function has no
87
+	effect on the normalization or genotyping steps.  Rather,
88
+	\code{batch} is required in order to initialize a \code{CNSet}
89
+	container with the appropriate dimensions.
90
+}
91
+
92
+\value{	A \code{SnpSuperSet} instance.}
93
+\references{
94
+  Ritchie ME, Carvalho BS, Hetrick KN, Tavar\'{e} S, Irizarry RA.
95
+  R/Bioconductor software for Illumina's Infinium whole-genome 
96
+  genotyping BeadChips. Bioinformatics. 2009 Oct 1;25(19):2621-3.
97
+
98
+  Carvalho B, Bengtsson H, Speed TP, Irizarry RA. Exploration,
99
+  normalization, and genotype calls of high-density oligonucleotide SNP
100
+  array data. Biostatistics. 2007 Apr;8(2):485-99. Epub 2006 Dec
101
+  22. PMID: 17189563.
102
+
103
+  Carvalho BS, Louis TA, Irizarry RA.
104
+  Quantifying uncertainty in genotype calls.
105
+  Bioinformatics. 2010 Jan 15;26(2):242-9.
106
+
107
+}
108
+\author{Matt Ritchie}
109
+\note{For large datasets, load the 'ff' package prior to genotyping --
110
+this will greatly reduce the RAM required for big jobs.  See
111
+\code{ldPath} and \code{ocSamples}.}
112
+
113
+\seealso{
114
+	\code{\link{crlmmIlluminaV2}}, 
115
+	\code{\link[oligoClasses]{ocSamples}},
116
+	\code{\link[oligoClasses]{ldOpts}}
117
+}
118
+\examples{
119
+  ##
120
+}
121
+\keyword{classif}