Browse code

Finalize updates to copynumber and illumina_copynumber vignettes for v1.8.3 updates

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@52837 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 16/02/2011 15:58:28
Showing 4 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 useDynLib("crlmm", .registration=TRUE)
2 2
 ## this is temporary
3
- exportPattern("^[^\\.]")
3
+## exportPattern("^[^\\.]")
4 4
 
5 5
 ##---------------------------------------------------------------------------
6 6
 ## Biobase
... ...
@@ -254,6 +254,7 @@ cn.F <- CA(cnSet, i=npx.index, j=F)
254 254
 boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", col="grey60", outline=FALSE)
255 255
 @
256 256
 
257
+
257 258
 \begin{figure}[t!]
258 259
  \centering
259 260
  \includegraphics[width=0.8\textwidth]{copynumber-nonpolymorphicX}
... ...
@@ -262,6 +263,7 @@ boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", col="grey60", outline=FALSE)
262 263
     across samples at a given marker on X is 1 for men and 2 for women.}
263 264
 \end{figure}
264 265
 
266
+Polymorphic markers on chromosome X:
265 267
 
266 268
 
267 269
 \begin{figure}[t!]
... ...
@@ -268,24 +268,89 @@ The following code chunk can be used to create an instance of
268 268
 \Robject{CopyNumberSet} that contains the CA + CB at polymorphic
269 269
 markers and 'CA' at nonpolymorphic markers.
270 270
 
271
+Alternatively, total copy number can be obtained by
272
+<<totalCopynumber.snps>>=
273
+ct2 <- totalCopynumber(cnSet, i=snp.index, j=1:5)
274
+stopifnot(all.equal(ct, ct2))
275
+@
276
+
277
+Missing values can arise at polymorphic when the confidence score of
278
+the genotype calls are below the threshold indicated by the threshold
279
+\texttt{GT.CONF.THR} in \Robject{crlmmCopynumber}. See
280
+\Robject{?crlmmCopynumber} for additional details.  In the following
281
+codechunk, we compute the number of samples that had confidence scores
282
+below \Sexpr{GT.CONF.THR} at loci for which ${\hat CA}$ and ${\hat CB}$
283
+are missing.
284
+
285
+<<NAs.snps>>=
286
+missing.copynumber <- which(rowSums(is.na(ct)) > 0)
287
+invisible(open(snpCallProbability(cnSet)))
288
+gt.confidence <- i2p(snpCallProbability(cnSet)[snp.index, ])
289
+n.below.threshold <- rowSums(gt.confidence < 0.95)
290
+unique(n.below.threshold[missing.copynumber])
291
+@
292
+\noindent For loci with missing copy number, the confidence scores
293
+were all below the threshold.
294
+
295
+At nonpolymorphic loci, either the \Rfunction{CA} or
296
+\Rfunction{totalCopynumber} functions can be used to obtain estimates
297
+of total copy number.
298
+
299
+<<nonpolymorphicAutosomes>>=
300
+marker.index <- which(!isSnp(cnSet) & chromosome(cnSet) < 23)
301
+ct <- CA(cnSet, i=marker.index, j=1:5)
302
+## all zeros
303
+stopifnot(all(CB(cnSet, i=marker.index, j=1:5) == 0))
304
+ct2 <- totalCopynumber(cnSet, i=marker.index, j=1:5)
305
+stopifnot(all.equal(ct, ct2))
306
+@
307
+
308
+
309
+\begin{figure}[t!]
310
+  Nonpolymorphic markers on chromosome X:
311
+  \centering
312
+<<nonpolymorphicX, fig=TRUE, width=8, height=4>>=
313
+npx.index <- which(chromosome(cnSet)==23 & !isSnp(cnSet))
314
+M <- sample(which(cnSet$gender==1), 5)
315
+F <- sample(which(cnSet$gender==2), 5)
316
+cn.M <- CA(cnSet, i=npx.index, j=M)
317
+cn.F <- CA(cnSet, i=npx.index, j=F)
318
+boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", col="grey60", outline=FALSE)
319
+@
320
+\caption{Copy number estimates for nonpolymorphic loci on chromosome
321
+  X (5 men, 5 women).  crlmm assumes that the median copy number
322
+  across samples at a given marker on X is 1 for men and 2 for women.}
323
+\end{figure}
324
+
271 325
 
272
-<<copynumberObject>>=
273
-snp.index <- which(isSnp(cnSet))
274
-cn <- totalCopynumber(cnSet, i=snp.index, j=1:10)
275
-missing.snp <- which(rowSums(is.na(cn)) == 10) ##snps with no confidence
276
-table(chromosome(cnSet)[missing.snp])
277
-hist(i2p(snpCallProbability(cnSet)[missing.snp, 1:10]), breaks=100)
278 326
 
279
-np.index <- which(!isSnp(cnSet))
280
-ca <- CA(cnSet, i=np.index, j=1:10)
281
-cb <- CB(cnSet, i=np.index, j=1:10) ## most are missing
282 327
 
283
-cn <- totalCopynumber(cnSet, i=np.index, j=1:10)
284
-missing.np <- which(rowSums(is.na(cn)) == 10)
285
-table(chromosome(cnSet)[missing.np])
286
-table(chromosome(cnSet)[np.index])
328
+\begin{figure}[t!]
329
+ \centering
330
+<<polymorphicX, fig=TRUE, width=8, height=4>>=
331
+## copy number estimates on X for SNPs are biased towards small values.
332
+X.markers <- which(isSnp(cnSet) & chromosome(cnSet) == 23)
333
+ca.M <- CA(cnSet, i=X.markers, j=M)
334
+cb.M <- CB(cnSet, i=X.markers, j=M)
335
+ca.F <- CA(cnSet, i=X.markers, j=F)
336
+cb.F <- CB(cnSet, i=X.markers, j=F)
337
+cn.M <- ca.M+cb.M
338
+cn.F <- ca.F+cb.F
339
+boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", outline=FALSE, col="grey60")
340
+cn2 <- totalCopynumber(cnSet, i=X.markers, j=c(M, F))
341
+stopifnot(all.equal(cbind(cn.M, cn.F), cn2))
342
+@
343
+\caption{Copy number estimates for polymorphic markers on chromosome
344
+  X. }
345
+\end{figure}
287 346
 
347
+Often it is of interest to smooth the total copy number estimates (the
348
+sum of the allele-specific copy number) as a function of the physical
349
+position.  The following code chunk can be used to create an instance
350
+of \Robject{CopyNumberSet} that contains the CA + CB at polymorphic
351
+markers and 'CA' at nonpolymorphic markers.
288 352
 
353
+<<copynumberObject>>=
289 354
 marker.index <- which(chromosome(cnSet) <= 22)## & isSnp(cnSet))
290 355
 sample.index <- 1:5 ## first five samples
291 356
 invisible(open(cnSet))
... ...
@@ -332,7 +397,6 @@ for(j in 1:5){
332 397
 }
333 398
 @
334 399
 
335
-
336 400
 \section{Smoothing marker-level estimates of total copy number}
337 401
 
338 402
 In this section, we show how the total copy number can be smoothed
... ...
@@ -3,3 +3,4 @@ rsync -avuzb --exclude '*~' --exclude '.git*' -e ssh enigma2.jhsph.edu:~/madman/
3 3
 
4 4
 
5 5
 
6
+