git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@52837 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -254,6 +254,7 @@ cn.F <- CA(cnSet, i=npx.index, j=F) |
254 | 254 |
boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", col="grey60", outline=FALSE) |
255 | 255 |
@ |
256 | 256 |
|
257 |
+ |
|
257 | 258 |
\begin{figure}[t!] |
258 | 259 |
\centering |
259 | 260 |
\includegraphics[width=0.8\textwidth]{copynumber-nonpolymorphicX} |
... | ... |
@@ -262,6 +263,7 @@ boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", col="grey60", outline=FALSE) |
262 | 263 |
across samples at a given marker on X is 1 for men and 2 for women.} |
263 | 264 |
\end{figure} |
264 | 265 |
|
266 |
+Polymorphic markers on chromosome X: |
|
265 | 267 |
|
266 | 268 |
|
267 | 269 |
\begin{figure}[t!] |
... | ... |
@@ -268,24 +268,89 @@ The following code chunk can be used to create an instance of |
268 | 268 |
\Robject{CopyNumberSet} that contains the CA + CB at polymorphic |
269 | 269 |
markers and 'CA' at nonpolymorphic markers. |
270 | 270 |
|
271 |
+Alternatively, total copy number can be obtained by |
|
272 |
+<<totalCopynumber.snps>>= |
|
273 |
+ct2 <- totalCopynumber(cnSet, i=snp.index, j=1:5) |
|
274 |
+stopifnot(all.equal(ct, ct2)) |
|
275 |
+@ |
|
276 |
+ |
|
277 |
+Missing values can arise at polymorphic when the confidence score of |
|
278 |
+the genotype calls are below the threshold indicated by the threshold |
|
279 |
+\texttt{GT.CONF.THR} in \Robject{crlmmCopynumber}. See |
|
280 |
+\Robject{?crlmmCopynumber} for additional details. In the following |
|
281 |
+codechunk, we compute the number of samples that had confidence scores |
|
282 |
+below \Sexpr{GT.CONF.THR} at loci for which ${\hat CA}$ and ${\hat CB}$ |
|
283 |
+are missing. |
|
284 |
+ |
|
285 |
+<<NAs.snps>>= |
|
286 |
+missing.copynumber <- which(rowSums(is.na(ct)) > 0) |
|
287 |
+invisible(open(snpCallProbability(cnSet))) |
|
288 |
+gt.confidence <- i2p(snpCallProbability(cnSet)[snp.index, ]) |
|
289 |
+n.below.threshold <- rowSums(gt.confidence < 0.95) |
|
290 |
+unique(n.below.threshold[missing.copynumber]) |
|
291 |
+@ |
|
292 |
+\noindent For loci with missing copy number, the confidence scores |
|
293 |
+were all below the threshold. |
|
294 |
+ |
|
295 |
+At nonpolymorphic loci, either the \Rfunction{CA} or |
|
296 |
+\Rfunction{totalCopynumber} functions can be used to obtain estimates |
|
297 |
+of total copy number. |
|
298 |
+ |
|
299 |
+<<nonpolymorphicAutosomes>>= |
|
300 |
+marker.index <- which(!isSnp(cnSet) & chromosome(cnSet) < 23) |
|
301 |
+ct <- CA(cnSet, i=marker.index, j=1:5) |
|
302 |
+## all zeros |
|
303 |
+stopifnot(all(CB(cnSet, i=marker.index, j=1:5) == 0)) |
|
304 |
+ct2 <- totalCopynumber(cnSet, i=marker.index, j=1:5) |
|
305 |
+stopifnot(all.equal(ct, ct2)) |
|
306 |
+@ |
|
307 |
+ |
|
308 |
+ |
|
309 |
+\begin{figure}[t!] |
|
310 |
+ Nonpolymorphic markers on chromosome X: |
|
311 |
+ \centering |
|
312 |
+<<nonpolymorphicX, fig=TRUE, width=8, height=4>>= |
|
313 |
+npx.index <- which(chromosome(cnSet)==23 & !isSnp(cnSet)) |
|
314 |
+M <- sample(which(cnSet$gender==1), 5) |
|
315 |
+F <- sample(which(cnSet$gender==2), 5) |
|
316 |
+cn.M <- CA(cnSet, i=npx.index, j=M) |
|
317 |
+cn.F <- CA(cnSet, i=npx.index, j=F) |
|
318 |
+boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", col="grey60", outline=FALSE) |
|
319 |
+@ |
|
320 |
+\caption{Copy number estimates for nonpolymorphic loci on chromosome |
|
321 |
+ X (5 men, 5 women). crlmm assumes that the median copy number |
|
322 |
+ across samples at a given marker on X is 1 for men and 2 for women.} |
|
323 |
+\end{figure} |
|
324 |
+ |
|
271 | 325 |
|
272 |
-<<copynumberObject>>= |
|
273 |
-snp.index <- which(isSnp(cnSet)) |
|
274 |
-cn <- totalCopynumber(cnSet, i=snp.index, j=1:10) |
|
275 |
-missing.snp <- which(rowSums(is.na(cn)) == 10) ##snps with no confidence |
|
276 |
-table(chromosome(cnSet)[missing.snp]) |
|
277 |
-hist(i2p(snpCallProbability(cnSet)[missing.snp, 1:10]), breaks=100) |
|
278 | 326 |
|
279 |
-np.index <- which(!isSnp(cnSet)) |
|
280 |
-ca <- CA(cnSet, i=np.index, j=1:10) |
|
281 |
-cb <- CB(cnSet, i=np.index, j=1:10) ## most are missing |
|
282 | 327 |
|
283 |
-cn <- totalCopynumber(cnSet, i=np.index, j=1:10) |
|
284 |
-missing.np <- which(rowSums(is.na(cn)) == 10) |
|
285 |
-table(chromosome(cnSet)[missing.np]) |
|
286 |
-table(chromosome(cnSet)[np.index]) |
|
328 |
+\begin{figure}[t!] |
|
329 |
+ \centering |
|
330 |
+<<polymorphicX, fig=TRUE, width=8, height=4>>= |
|
331 |
+## copy number estimates on X for SNPs are biased towards small values. |
|
332 |
+X.markers <- which(isSnp(cnSet) & chromosome(cnSet) == 23) |
|
333 |
+ca.M <- CA(cnSet, i=X.markers, j=M) |
|
334 |
+cb.M <- CB(cnSet, i=X.markers, j=M) |
|
335 |
+ca.F <- CA(cnSet, i=X.markers, j=F) |
|
336 |
+cb.F <- CB(cnSet, i=X.markers, j=F) |
|
337 |
+cn.M <- ca.M+cb.M |
|
338 |
+cn.F <- ca.F+cb.F |
|
339 |
+boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", outline=FALSE, col="grey60") |
|
340 |
+cn2 <- totalCopynumber(cnSet, i=X.markers, j=c(M, F)) |
|
341 |
+stopifnot(all.equal(cbind(cn.M, cn.F), cn2)) |
|
342 |
+@ |
|
343 |
+\caption{Copy number estimates for polymorphic markers on chromosome |
|
344 |
+ X. } |
|
345 |
+\end{figure} |
|
287 | 346 |
|
347 |
+Often it is of interest to smooth the total copy number estimates (the |
|
348 |
+sum of the allele-specific copy number) as a function of the physical |
|
349 |
+position. The following code chunk can be used to create an instance |
|
350 |
+of \Robject{CopyNumberSet} that contains the CA + CB at polymorphic |
|
351 |
+markers and 'CA' at nonpolymorphic markers. |
|
288 | 352 |
|
353 |
+<<copynumberObject>>= |
|
289 | 354 |
marker.index <- which(chromosome(cnSet) <= 22)## & isSnp(cnSet)) |
290 | 355 |
sample.index <- 1:5 ## first five samples |
291 | 356 |
invisible(open(cnSet)) |
... | ... |
@@ -332,7 +397,6 @@ for(j in 1:5){ |
332 | 397 |
} |
333 | 398 |
@ |
334 | 399 |
|
335 |
- |
|
336 | 400 |
\section{Smoothing marker-level estimates of total copy number} |
337 | 401 |
|
338 | 402 |
In this section, we show how the total copy number can be smoothed |