Browse code

illumina_copynumber vignette saves results to directory that depends on the R version

also, removed eol whitespace

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/branches/RELEASE_2_6/madman/Rpacks/crlmm@49105 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 30/08/2010 15:45:22
Showing1 changed files

... ...
@@ -18,9 +18,9 @@
18 18
 
19 19
 \begin{document}
20 20
 \title{Using \crlmm{} for copy number estimation and genotype calling
21
-  with Illumina platforms} 
21
+  with Illumina platforms}
22 22
 
23
-\date{\today} 
23
+\date{\today}
24 24
 
25 25
 \author{Rob Scharpf}
26 26
 \maketitle
... ...
@@ -43,15 +43,17 @@ that contains the IDAT files that will be used in our analysis.
43 43
 <<setup, echo=FALSE, results=hide>>=
44 44
 options(width=70)
45 45
 options(continue=" ")
46
-@ 
46
+@
47 47
 
48 48
 <<libraries>>=
49 49
 library(crlmm)
50 50
 crlmm:::validCdfNames()
51
-outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/crlmmVignette/release/illumina"
52
-dir.create(outdir, showWarnings=FALSE, recursive=TRUE)
51
+if(getRversion() < "2.12.0"){
52
+	rpath <- getRversion()
53
+} else rpath <- "trunk"
54
+outdir <- paste("/thumper/ctsa/snpmicroarray/rs/ProcessedData/crlmm/", rpath, "/illumina_vignette", sep="")
53 55
 datadir <- "/thumper/ctsa/snpmicroarray/illumina/IDATS/370k"
54
-@ 
56
+@
55 57
 
56 58
 To perform copy number analysis on the Illumina platform, several
57 59
 steps are required.  The first step is to read in the IDAT files and
... ...
@@ -85,18 +87,18 @@ samplesheet <- samplesheet[-c(28:46,61:75,78:79), ]
85 87
 arrayNames <- file.path(datadir, unique(samplesheet[, "SentrixPosition"]))
86 88
 grnfiles = all(file.exists(paste(arrayNames, "_Grn.idat", sep="")))
87 89
 redfiles = all(file.exists(paste(arrayNames, "_Red.idat", sep="")))
88
-@ 
90
+@
89 91
 
90 92
 <<samplesToProcess2, echo=FALSE>>=
91 93
 if(!exists("crlmmResult")){
92 94
 	if(!file.exists(file.path(outdir, "crlmmResult.rda"))){
93
-		RG <- readIdatFiles(samplesheet, 
94
-				    path=dirname(arrayNames[1]), 
95
-				    arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), 
95
+		RG <- readIdatFiles(samplesheet,
96
+				    path=dirname(arrayNames[1]),
97
+				    arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"),
96 98
 				    saveDate=TRUE)
97
-		crlmmResult <- crlmmIllumina(RG=RG, 
98
-					     cdfName="human370v1c", 
99
-					     sns=pData(RG)$ID, 
99
+		crlmmResult <- crlmmIllumina(RG=RG,
100
+					     cdfName="human370v1c",
101
+					     sns=pData(RG)$ID,
100 102
 					     returnParams=TRUE,
101 103
 					     cnFile=file.path(outdir, "cnFile.rda"),
102 104
 					     snpFile=file.path(outdir, "snpFile.rda"),
... ...
@@ -105,18 +107,18 @@ if(!exists("crlmmResult")){
105 107
 		range(protocolData(crlmmResult)$ScanDate)
106 108
 		save(crlmmResult, file=file.path(outdir, "crlmmResult.rda"))
107 109
 		rm(RG); gc()
108
-	} 
110
+	}
109 111
 }
110
-@ 
112
+@
111 113
 
112 114
 <<samplesToProcess3, eval=FALSE>>=
113
-RG <- readIdatFiles(samplesheet, 
114
-		    path=dirname(arrayNames[1]), 
115
-		    arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), 
115
+RG <- readIdatFiles(samplesheet,
116
+		    path=dirname(arrayNames[1]),
117
+		    arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"),
116 118
 		    saveDate=TRUE)
117
-crlmmResult <- crlmmIllumina(RG=RG, 
118
-			     cdfName="human370v1c", 
119
-			     sns=pData(RG)$ID, 
119
+crlmmResult <- crlmmIllumina(RG=RG,
120
+			     cdfName="human370v1c",
121
+			     sns=pData(RG)$ID,
120 122
 			     returnParams=TRUE,
121 123
 			     cnFile=file.path(outdir, "cnFile.rda"),
122 124
 			     snpFile=file.path(outdir, "snpFile.rda"),
... ...
@@ -124,7 +126,7 @@ crlmmResult <- crlmmIllumina(RG=RG,
124 126
 protocolData(crlmmResult)$ScanDate <- protocolData(RG)$ScanDate
125 127
 range(protocolData(crlmmResult)$ScanDate)
126 128
 rm(RG); gc()
127
-@ 
129
+@
128 130
 
129 131
 \noindent Finally, we load a few of the intermediate files that were
130 132
 created during the preprocessing and genotyping.
... ...
@@ -134,7 +136,7 @@ res <- get("res")
134 136
 load(file.path(outdir, "cnFile.rda"))
135 137
 cnAB <- get("cnAB")
136 138
 load(file.path(outdir, "crlmmResult.rda"))
137
-@ 
139
+@
138 140
 
139 141
 <<loadIntermediate, eval=TRUE, echo=FALSE>>=
140 142
 if(!exists("res")){
... ...
@@ -144,7 +146,7 @@ if(!exists("res")){
144 146
 	cnAB <- get("cnAB")
145 147
 	load(file.path(outdir, "crlmmResult.rda"))
146 148
 }
147
-@ 
149
+@
148 150
 
149 151
 After running the crlmm algorithm, we construct a container for
150 152
 storing the quantile normalized intensities, genotype calls, and
... ...
@@ -162,13 +164,13 @@ new.order <- order(fD$chromosome, fD$position)
162 164
 fD <- fD[new.order, ]
163 165
 aD <- constructAssayData(cnAB, res, crlmmResult, order.index=new.order)
164 166
 protocolData(crlmmResult)$batch <- vector("integer", ncol(crlmmResult))
165
-container <- new("CNSetLM", 
167
+container <- new("CNSetLM",
166 168
 		 assayData=aD,
167 169
 		 phenoData=phenoData(crlmmResult),
168 170
 		 protocolData=protocolData(crlmmResult),
169 171
 		 featureData=fD,
170 172
 		 annotation="human370v1c")
171
-@ 
173
+@
172 174
 
173 175
 <<container2,echo=FALSE>>=
174 176
 if(!file.exists(file.path(outdir, "cnSet"))){
... ...
@@ -179,14 +181,14 @@ if(!file.exists(file.path(outdir, "cnSet"))){
179 181
 	fD <- fD[new.order, ]
180 182
 	aD <- constructAssayData(cnAB, res, crlmmResult, order.index=new.order)
181 183
 	protocolData(crlmmResult)$batch <- vector("integer", ncol(crlmmResult))
182
-	container <- new("CNSetLM", 
184
+	container <- new("CNSetLM",
183 185
 			 assayData=aD,
184 186
 			 phenoData=phenoData(crlmmResult),
185 187
 			 protocolData=protocolData(crlmmResult),
186 188
 			 featureData=fD,
187 189
 			 annotation="human370v1c")
188 190
 }
189
-@ 
191
+@
190 192
 
191 193
 
192 194
 
... ...
@@ -209,11 +211,11 @@ if(!exists("cnSet")){
209 211
 		save(cnSet, file=file.path(outdir, "cnSet.rda"))
210 212
 	} else load(file.path(outdir, "cnSet.rda"))
211 213
 }
212
-@ 
214
+@
213 215
 
214 216
 <<estimateCopynumber, eval=FALSE>>=
215 217
 cnSet <- crlmmCopynumber(container, verbose=TRUE)
216
-@ 
218
+@
217 219
 
218 220
 %In the following code, we define helper functions to construct a
219 221
 %\Robject{featureData} object that chromosome and physical position
... ...
@@ -245,12 +247,12 @@ cb <- CB(cnSet)[marker.index, ]/100
245 247
 missing.index <- which(rowSums(is.na(ca))==ncol(cnSet))
246 248
 ca <- ca[-missing.index, ]
247 249
 cb <- cb[-missing.index, ]
248
-@ 
250
+@
249 251
 \noindent Negating the \Robject{isSnp} function could be used to
250 252
 extract the estimates at nonpolymorphic markers. For instance,
251 253
 <<monomorphic-accessor>>=
252 254
 cn.monomorphic <- CA(cnSet)[which(chromosome(cnSet) == 21 & !isSnp(cnSet)), ]/100
253
-@ 
255
+@
254 256
 
255 257
 At polymorphic loci, the total copy number is the sum of the number of
256 258
 copies of the A allele and the number of copies for the B allele.  At
... ...
@@ -266,7 +268,7 @@ cn.total2 <- totalCopyNumber(cnSet, i=marker.index)
266 268
 cn.total2 <- cn.total2[-missing.index, ]
267 269
 dimnames(cn.total) <- NULL
268 270
 all.equal(cn.total2, cn.total)
269
-@ 
271
+@
270 272
 
271 273
 A few simple visualizations may be helpful at this point. The first
272 274
 plot is a histogram of the signal to noise ratio of the sample -- an
... ...
@@ -275,11 +277,11 @@ statistic tends to be much higher for Illumina than for the Affymetrix
275 277
 platforms.) The second is a visualization of the total copy number
276 278
 estimates plotted versus physical position on chromosome 1 for the two
277 279
 samples with the lowest (top) and highest (bottom) signal to noise
278
-ratios.  
280
+ratios.
279 281
 
280 282
 <<snr, fig=TRUE>>=
281 283
 hist(cnSet$SNR, breaks=15)
282
-@ 
284
+@
283 285
 
284 286
 <<firstSample, fig=TRUE>>=
285 287
 low.snr <- which(cnSet$SNR == min(cnSet$SNR))
... ...
@@ -296,7 +298,7 @@ for(j in c(low.snr, high.snr)){
296 298
 }
297 299
 axis(1, at=pretty(x), labels=pretty(x/1e6))
298 300
 mtext("Mb", 1, outer=TRUE, line=2)
299
-@ 
301
+@
300 302
 
301 303
 Here's a very simple approach to handle outliers by applying a running
302 304
 median using a window of size 3.  Following outlier removal, we
... ...
@@ -318,7 +320,7 @@ for(j in c(low.snr, high.snr)){
318 320
 }
319 321
 axis(1, at=pretty(x), labels=pretty(x/1e6))
320 322
 mtext("Mb", 1, outer=TRUE, line=2)
321
-@ 
323
+@
322 324
 
323 325
 %Next we remove some of the waves using \Rpackage{limma}'s function
324 326
 %\Rfunction{loessFit}.
... ...
@@ -338,7 +340,7 @@ mtext("Mb", 1, outer=TRUE, line=2)
338 340
 %	y.smooth <- vector("list", length(y.split))
339 341
 %	for(i in seq_along(y.smooth)){
340 342
 %		fit <- loessFit(y=y.split[[i]], x= x.split[[i]], span=0.3)
341
-%		y.smooth[[i]] <- 2+fit$residuals 
343
+%		y.smooth[[i]] <- 2+fit$residuals
342 344
 %	}
343 345
 %	plot(unlist(x.split),
344 346
 %	     unlist(y.split),
... ...
@@ -347,17 +349,17 @@ mtext("Mb", 1, outer=TRUE, line=2)
347 349
 %	     unlist(y.smooth),
348 350
 %	     pch=".", ylab="copy number", xaxt="n", log="y", ylim=c(0.5,5), yaxt="n")
349 351
 %	legend("topright", bty="n", legend=paste("SNR =", round(cnSet$SNR[j], 1)))
350
-%	abline(h=2, col="grey70")	
352
+%	abline(h=2, col="grey70")
351 353
 %	if(j == high.snr){
352 354
 %		axis(1, at=pretty(x), labels=pretty(x/1e6))
353 355
 %		mtext("Mb", 1, outer=TRUE, line=2)
354 356
 %	}
355 357
 %}
356
-%@ 
358
+%@
357 359
 \section{Session information}
358 360
 <<sessionInfo, results=tex>>=
359 361
 toLatex(sessionInfo())
360
-@ 
362
+@
361 363
 
362 364
 
363 365
 \end{document}