inst/scripts/copynumber.Rnw
aa24f926
 %\VignetteIndexEntry{crlmm copy number Vignette}
f32a09dd
 %\VignetteDepends{crlmm, genomewidesnp6Crlmm}
 %\VignetteKeywords{crlmm, SNP 6}
 %\VignettePackage{crlmm}
 \documentclass{article}
 \usepackage{graphicx}
2ae7850e
 \usepackage{natbib}
f32a09dd
 \newcommand{\Rfunction}[1]{{\texttt{#1}}}
 \newcommand{\Rmethod}[1]{{\texttt{#1}}}
 \newcommand{\Rcode}[1]{{\texttt{#1}}}
 \newcommand{\Robject}[1]{{\texttt{#1}}}
 \newcommand{\Rpackage}[1]{{\textsf{#1}}}
 \newcommand{\Rclass}[1]{{\textit{#1}}}
 \newcommand{\oligo}{\Rpackage{oligo }}
 \newcommand{\R}{\textsf{R}}
 
 \begin{document}
2ae7850e
 \title{Copy number estimation and genotype calling with \Rpackage{crlmm}}
a6a2c12f
 \date{May, 2009}
f32a09dd
 \author{Rob Scharpf}
 \maketitle
 
 <<setup, echo=FALSE, results=hide>>=
 options(width=60)
 options(continue=" ")
 options(prompt="R> ")
 @ 
 
43b13ec9
 %\section{Estimating copy number}
f32a09dd
 
43b13ec9
 %At present, software for copy number estimation is provided only for the
 %Affymetrix 6.0 platform.  
f32a09dd
 
43b13ec9
 \begin{abstract}
   This vignette estimates copy number for HapMap samples on the
bd722b7c
   Affymetrix 6.0 platform.  See \citep{Scharpf2009} for the working
   paper.
   
43b13ec9
 \end{abstract}
f32a09dd
 
43b13ec9
 \section{Simple usage}
 
2ae7850e
 CRLMM supports the following platforms:
f32a09dd
 
2ae7850e
 <<cdfname>>=
f32a09dd
 library(crlmm)
2ae7850e
 crlmm:::validCdfNames()
f32a09dd
 @ 
2ae7850e
 
43b13ec9
 \paragraph{Preprocess and genotype.}
f32a09dd
 
43b13ec9
 Specify the coordinates of Affymetrix cel files and where to put
 intermediate files generated during the course of preprocessing / copy
 number estimation.
64463948
 
aa24f926
 <<celfiles>>=
43b13ec9
 myPath <- "/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m"
 celFiles <- list.celfiles(myPath, full.names=TRUE, pattern=".CEL")
2ae7850e
 #outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/affy"
 outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/tmp"
 dir.create(outdir)
f32a09dd
 @ 
 
 
2ae7850e
 
 \noindent The next code chunk quantile normalizes the samples to a
 target reference distribution and uses the crlmm algorithm to genotype.
 See \cite{Carvalho2007a} for details regarding the crlmm genotyping
 algorithm.
 
 <<preprocessAndGenotype, eval=FALSE>>=
 crlmmWrapper(celFiles,##[1:15], 
 	     cdfName="genomewidesnp6",
43b13ec9
 	     save.it=TRUE, 
2ae7850e
 	     load.it=FALSE,
43b13ec9
 	     intensityFile=file.path(outdir, "normalizedIntensities.rda"),
2ae7850e
 	     crlmmFile=file.path(outdir, "snpsetObject.rda"),
 	     platform="affymetrix")
f32a09dd
 @ 
 
43b13ec9
 As a result of the above wrapper, the following R objects are now created:
 
 <<>>=
 list.files(outdir)[grep("crlmmSetList", list.files(outdir))]
 @ 
 
 \paragraph{Locus- and allele-specific estimates of copy number.}
 
 Load the object for chromosome 22 and compute copy number:
f32a09dd
 
38fb1b80
 <<crlmmList-show>>=
 CHR <- 22
43b13ec9
 if(!exists("crlmmSetList")) load(file.path(outdir, paste("crlmmSetList_", CHR, ".rda", sep="")))
 show(crlmmSetList)
 if(length(crlmmSetList)==2){
 	crlmmSetList <- update(crlmmSetList, CHR=CHR)
 }
 show(crlmmSetList)
f32a09dd
 @ 
 
43b13ec9
 See the help file for \Robject{computeCopynumber} for arguments to
 \Robject{update}.  Provided that the assumption of integer copy number
 is reasonable, one can fit a hidden Markov model to the locus-level
 estimates of copy number and uncertainty.
f32a09dd
 
43b13ec9
 \paragraph{A hidden Markov model.}
f32a09dd
 
43b13ec9
 Emission probabilities for the hidden markov model can be computed from
 the copy number prediction regions based on bivariate normal scatter
 plots of the log A versus log B intensities.  (By contrast, a
 nonparamemtric segmentation would be applied to intensities on the scale
 of copy number.)
f32a09dd
 
43b13ec9
 <<emissionProbabilities>>=
 library(VanillaICE)
 copyNumberStates <- 0:5
 if(!exists("emission.cn")){
 	emission.cn <- suppressWarnings(crlmm:::computeEmission(crlmmSetList, copyNumberStates))
 	dim(emission.cn)
 }
f32a09dd
 @ 
 
2ae7850e
 *Before smoothing the estimates as a function of physical position by
 segmentation hidden Markov models, one should consider how to handle
 outliers.  In particular, samples with low signal to noise ratios are
 likely to have many copy number outliers. See suggested visualizations.
43b13ec9
 
 Initial state probabilities and transition probabilities for the HMM:
 
 <<probs>>=
 initialP <- rep(1/length(copyNumberStates), length(copyNumberStates))
 tau <- transitionProbability(chromosome=chromosome(crlmmSetList),
 			     position=position(crlmmSetList),
 			     TAUP=1e8)
 @ 
 
 The viterbi algorithm is used to identify the sequence of states that
 maximizes the likelihood:
 
 <<hmm>>=
 if(!exists("hmmPredictions")){
 hmmPredictions <- viterbi(emission=emission.cn,
 			  initialStateProbs=log(initialP), 
 			  tau=tau[, "transitionPr"],
 			  arm=tau[, "arm"], 
 			  normalIndex=3,
 			  normal2altered=0.01,
 			  altered2normal=1,
 			  altered2altered=0.001)	    
 }
 table(as.integer(hmmPredictions)-1)
 brks <- breaks(x=hmmPredictions, states=copyNumberStates, 
 	       position=tau[, "position"],
 	       chromosome=tau[, "chromosome"])
 str(brks)
 @ 
 
 \paragraph{Circular binary segmentation}
bd722b7c
 
 <<setup>>=
 library(DNAcopy)
 library(genefilter)
 ratioset <- crlmmSetList[[3]]
 meds <- rowMedians(copyNumber(ratioset), na.rm=TRUE)
 ratios <- log2(copyNumber(ratioset)) - log2(meds)
 ratioset <- new("SnpCopyNumberSet",
 		copyNumber=ratios,
 		phenoData=phenoData(ratioset),
 		featureData=featureData(ratioset),
 		annotation=annotation(ratioset))
 @ 
 
 The following code chunk (not evaluated) takes a while to run:
 
 <<cbs, eval=FALSE>>=
 CNA.object <- CNA(copyNumber(ratioset), 
 		  chromosome(ratioset),
 		  position(ratioset),
 		  data.type="logratio",
 		  sampleid=sampleNames(ratioset))
 smoothed.CNA.object <- smooth.CNA(CNA.object)
 segment.cna <- segment(smoothed.CNA.object, verbose=1)
 save(segment.cna, file=file.path(outdir, paste("segment.cna_", CHR, ".rda", sep="")))
 @ 
43b13ec9
 
 \section{Accessors}
 
 \subsection{Assay data accessors}
 
 \paragraph{\Robject{ABset}:  quantile normalized intensities}
 
 An object of class \Robject{ABset} is stored in the first element of the
 \Robject{crlmmSetList} object. The following accessors may be of use:
 
 Accessors for the quantile normalized intensities for the A allele:
 
 <<accessors>>=
 a <- A(crlmmSetList)
 dim(a)
 @ 
 
 The quantile normalized intensities for the nonpolymorphic probes are
 also stored in the 'A' assay data element.  To retrieve the quantile
 normalized intensities for the A allele only at polymorphic loci:
 
 <<A.polymorphic>>=
 a.snps <- A(crlmmSetList[snpIndex(crlmmSetList), ])
 dim(a.snps)
 @ 
 
 \noindent For the nonpolymorphic loci:
 <<A.nonpolymorphic>>=
 a.nps <- A(crlmmSetList[cnIndex(crlmmSetList), ])
 dim(a.nps)
 @ 
 
 Quantile normalized intensities for the B allele at polymorphic loci:
 
 <<B.polymorphic>>=
 b.snps <- B(crlmmSetList[snpIndex(crlmmSetList), ])
 @ 
 
 Note that NAs are recorded in the 'B' assay data element for
 nonpolymorphic loci:
 
 <<B.NAs>>=
 all(is.na(B(crlmmSetList[cnIndex(crlmmSetList), ])))
 @ 
 
 \paragraph{\Robject{SnpSet}: Genotype calls and confidence scores}
 
 Genotype calls:
 <<genotypes>>=
 genotypes <- calls(crlmmSetList)
 @ 
 Confidence scores of the genotype calls:
 <<confidenceScores>>=
 genotypeConf <- confs(crlmmSetList)
 @ 
 
 \paragraph{\Robject{CopyNumberSet}: allele-specific copy number}
 
 Allele-specific copy number at polymorphic loci:
 <<ca>>=
 ca <- CA(crlmmSetList[snpIndex(crlmmSetList), ])
 @ 
 
 Total copy number at nonpolymorphic loci:
 <<ca>>=
 cn.nonpolymorphic <- CA(crlmmSetList[cnIndex(crlmmSetList), ])
 range(cn.nonpolymorphic, na.rm=TRUE)
 median(cn.nonpolymorphic, na.rm=TRUE)
 @ 
 
 Total copy number at both polymorphic and nonpolymorphic loci:
 <<totalCopynumber>>=
 cn <- copyNumber(crlmmSetList)
 @ 
 
 \subsection{Other accessors}
 
 After running \Robject{update} on the \Robject{crlmmSetList} object,
 information on physical position and chromosome can be accessed by the
 following accessors:
 
 <<positionChromosome, eval=FALSE>>=
 xx <- position(crlmmSetList)
 yy <- chromosome(crlmmSetList)
 @ 
 
 There are many parameters computed during copy number estimation that
 are at present stored in the \Robject{featureData} slot of the
 \Robject{CopyNumberSet} element.  TO DO: Accessors for these parameters,
 as well as better containers for storing these parameters. See
 
 <<copynumberParameters>>=
 fvarLabels(crlmmSetList[[3]])
 @ 
 \section{Suggested visualizations}
 
 A histogram of the signal to noise ratio for the HapMap samples:
 
 <<plotSnr, fig=TRUE, include=FALSE>>=
 hist(crlmmSetList[[2]]$SNR, xlab="SNR", main="")
 @ 
 
 \begin{figure}
   \centering
   \includegraphics[width=0.6\textwidth]{copynumber-plotSnr}
   \caption{Signal to noise ratios for the HapMap samples.}
 \end{figure}
 
 <<snrmin, eval=TRUE, echo=FALSE>>=
5dab0fa8
 SNRmin <- 5
f32a09dd
 @ 
 
2ae7850e
 For Affymetrix 6.0, we suggest excluding samples with a signal to noise
 ratio less than \Sexpr{SNRmin}.  Adjusting by date or chemistry plate
 can be helpful for limiting the influence of batch effects.  Ideally,
 one would have 70+ files in a given batch. Here we make a table of date
 versus ancestry:
f32a09dd
 
43b13ec9
 <<specifyBatch, eval=FALSE, echo=FALSE>>=
38fb1b80
 sns <- sampleNames(crlmmResults)
f32a09dd
 sns[1]
b9c71726
 plate <- substr(basename(sns), 13, 13)
 table(plate)
4b098e36
 table(format(as.POSIXlt(protocolData(crlmmResults)[["ScanDate"]]), "%d %b %Y"), plate)
 dts <- protocolData(crlmmResults)[["ScanDate"]]
f32a09dd
 @ 
 
001bc9f5
 As all of these samples were run on the first week of March, we would
 expect that any systematic artifacts to the intensities that develop
 over time to be minimal (a best case scenario).  As this is typically
 not the case, we illustrate how one may adjust for batch using the
38fb1b80
 chemistry plate as an argument for \Robject{batch} in the
 \Robject{computeCopynumber} function.
 
2ae7850e
 *Note: the number of samples in the \Robject{CrlmmSetList} object after
b9c71726
 copy number estimation may be fewer than the number of samples in the
 \Robject{CrlmmSetList} object after preprocessing/genotyping.  This
 occurs when 1 or more samples have a signal-to-noise ratio less than
 value passed to \Robject{SNRmin}.  By default, intermediate forms of the
 data are stored in one object to ensure that each element in the
 \Robject{CrlmmSetList} have the same ordering of probes and samples.
 The object returned by \Robject{computeCopynumber} is ordered by
 chromosome and physical position (useful for downstream methods that
 smooth the copy number as a function of the physical position).  **
 
38fb1b80
 The above algorithm for estimating copy number is predicated on the
 assumption that most samples within a batch have copy number 2 at any
 given locus.  For common copy number variants, this assumption may not
45dab54b
 hold.  An additional iteration using a bias correction provides
 additional robustness to this assumption.  Set the \Robject{bias.adj}
 argument to \Robject{TRUE}:
 
 <<biasAdjustment, eval=FALSE>>=
43b13ec9
 update(crlmmSetList, CHR=22, bias.adj=TRUE)
 @ 
 
bd722b7c
 The following code chunk calculates the frequency of amplifications and
 deletions at each locus. Shaded regions above the zero line in Figure
 \ref{fig:hmm_hapmap} display the frequency of amplifications, whereas
 shaded regions below the zero line graphically display the frequency of
 hemizygous or homozygous deletions.
 
43b13ec9
 <<hmm_hapmap, fig=TRUE, include=FALSE, width=8, height=7>>=
 require(SNPchip)
 library(RColorBrewer)
 numberUp <- rowSums(hmmPredictions > 3, na.rm=TRUE)
 numberDown <- -rowSums(hmmPredictions < 3, na.rm=TRUE)
 poly.cols <- brewer.pal(7, "Accent")
 alt.brks <- brks[brks[, "state"] != "copy.number_2", ]
 op <- par(ask=FALSE)
 ylim <- c(min(numberDown)-5, max(numberUp)+5)
 xlim <- c(10*1e6, max(position(crlmmSetList)))
 plot(position(crlmmSetList), rep(0, nrow(crlmmSetList[[1]])),
      type="n", xlab="Physical position (Mb)",
      ylim=ylim,
      xlim=xlim,
      ylab="frequency", main="Chr 22",
      xaxt="n",
      xaxs="r")
 axis(1, at=pretty(xlim), labels=pretty(xlim)/1e6)
 polygon(x=c(position(crlmmSetList), rev(position(crlmmSetList))),
 	y=c(rep(0, nrow(crlmmSetList[[1]])), rev(numberUp)),
 	col=poly.cols[3], border=poly.cols[3])
 polygon(x=c(position(crlmmSetList), rev(position(crlmmSetList))),
 	y=c(rep(0, nrow(crlmmSetList[[1]])), rev(numberDown)),
 	col=poly.cols[5], border=poly.cols[5])
 ##plotCytoband(22, xlim=xlim, new=FALSE,
 ##	     label.cytoband=FALSE,
 ##	     cytoband.ycoords=c(-10, -8), xaxs="r")
 
 medLength <- round(median(alt.brks[, "nbases"]), 2)
 medMarkers <- median(alt.brks[, "nprobes"])
 sdMarkers <- round(mad(alt.brks[, "nprobes"]), 2)
 sdsLength <- round(mad(alt.brks[, "nbases"]), 2)
 legend("topright",
        bty="n",
        legend=c(paste("median length:", medLength, "(bp)"),
        paste("MAD length:", sdsLength, "(bp)"),
        paste("median # markers:", medMarkers),
        paste("MAD # markers:", sdMarkers)),
        cex=0.8, ncol=2)
 legend("topleft",
        fill=poly.cols[c(3, 5)],
        legend=c("amplifications", "deletions"), bty="n")
 par(op)
 gc()
f32a09dd
 @ 
 
43b13ec9
 \begin{figure}
   \centering
   \includegraphics[width=\textwidth]{copynumber-hmm_hapmap}
bd722b7c
   \caption{\label{fig:hmm_hapmap} The frequency of amplifications in the
     hapmap samples is displayed above the zero line.  The frequency of
     hemizygous or homozygous deletions are displayed below the zero
     line.}
43b13ec9
 \end{figure}
f32a09dd
 
bd722b7c
 \clearpage
43b13ec9
 \paragraph{One sample at a time: locus-level estimates}
f32a09dd
 
bd722b7c
 Figure \ref{fig:oneSample} plots physical position (horizontal axis)
 versus copy number (vertical axis) for the first sample.  
f32a09dd
 
fe3e3f61
 <<oneSample, fig=TRUE, width=8, height=4, include=FALSE>>=
f32a09dd
 par(las=1)
43b13ec9
 plot(position(crlmmSetList), copyNumber(crlmmSetList)[, 1], pch=".", cex=2, xaxt="n", col="grey20", ylim=c(0,6), 
f8af2fff
      ylab="copy number", xlab="physical position (Mb)",
43b13ec9
      main=paste(sampleNames(crlmmSetList)[1], ", CHR:", unique(chromosome(crlmmSetList))))
 points(position(crlmmSetList)[cnIndex(crlmmSetList)], copyNumber(crlmmSetList)[cnIndex(crlmmSetList), 1],
38fb1b80
        pch=".", cex=2, col="lightblue")
43b13ec9
 axis(1, at=pretty(range(position(crlmmSetList))), labels=pretty(range(position(crlmmSetList)))/1e6)
f8af2fff
 @ 
38fb1b80
 
 <<idiogram, eval=FALSE, echo=FALSE>>=
f8af2fff
 require(SNPchip)
 plotCytoband(22, new=FALSE, cytoband.ycoords=c(5.8, 6.0), label.cytoband=FALSE)
f32a09dd
 @ 
 
 \begin{figure}
   \includegraphics[width=0.9\textwidth]{copynumber-oneSample}
bd722b7c
   \caption{\label{fig:oneSample} Total copy number (y-axis) for
     chromosome 22 plotted against physical position (x-axis) for one
     sample.  Estimates at nonpolymorphic loci are plotted in light
     blue.}
 \end{figure}
 
 \noindent The following code chunk plots the locus-level copy number
 estimates and overlays the predictions from the hidden markov model.
 
 <<overlayHmmPredictions, fig=TRUE, include=FALSE>>=
 ask <- FALSE
 op <- par(mfrow=c(3, 1), las=1, mar=c(1, 4, 1, 1), oma=c(3, 1, 1, 1), ask=ask)
 ##Put fit on the copy number scale
 cns <- copyNumber(crlmmSetList)
 cnState <- hmmPredictions - as.integer(1)
 xlim <- c(10*1e6, max(position(crlmmSetList)))
 cols <- brewer.pal(8, "Dark2")[1:4]
 for(j in 1:3){
 	plot(position(crlmmSetList), cnState[, j], pch=".", col=cols[2], xaxt="n", 
 	     ylab="copy number", xlab="Physical position (Mb)", type="s", lwd=2,
 	     ylim=c(0,6), xlim=xlim)
 	points(position(crlmmSetList), cns[, j], pch=".", col=cols[3])
 	lines(position(crlmmSetList), cnState[,j], lwd=2, col=cols[2])
 	axis(1, at=pretty(position(crlmmSetList)), 
 	     labels=pretty(position(crlmmSetList))/1e6)
 	abline(h=c(1,3), lty=2, col=cols[1])	
 	legend("topright", bty="n", legend=sampleNames(crlmmSetList)[j])
 	legend("topleft", lty=1, col=cols[2], legend="copy number state",
 	       bty="n", lwd=2)
 	plotCytoband(CHR, cytoband.ycoords=c(5, 5.2), new=FALSE,
 		     label.cytoband=FALSE, xlim=xlim)
 }
 par(op)
 @ 
 
 \begin{figure}
   \includegraphics[width=\textwidth]{copynumber-overlayHmmPredictions}
   \caption{\label{fig:overlayHmmPredictions} Total copy number (y-axis)
     for chromosome 22 plotted against physical position (x-axis) for
     three samples.  Estimates at nonpolymorphic loci are plotted in
     light blue. }
f32a09dd
 \end{figure}
 
bd722b7c
 \clearpage
f32a09dd
 \paragraph{One SNP at a time}
 
bd722b7c
 Scatterplots of the A and B allele intensities (log-scale) can be useful
 for assessing the diallelic genotype calls.  The following code chunk is
 displayed in Figure \ref{fig:genotypeCalls}.
f32a09dd
 
fe3e3f61
 <<genotypeCalls, fig=TRUE, width=8, height=8, include=FALSE>>=
38fb1b80
 xlim <- ylim <- c(6.5,13)
 pch <- 21
 colors <- c("red", "blue", "green3")
 cex <- 0.6
 par(mfrow=c(3,3), las=1, pty="s", ask=FALSE, mar=c(2, 2, 2, 2), oma=c(2, 2, 1, 1))
 ##plot 9 at a time
43b13ec9
 indices <- split(snpIndex(crlmmSetList), rep(1:length(snpIndex(crlmmSetList)), each=9, length.out=length(snpIndex(crlmmSetList))))
bd722b7c
 ##par(ask=FALSE)
38fb1b80
 j <- 1
 for(i in indices[[j]]){
43b13ec9
 	gt <- calls(crlmmSetList)[i, ]
 	plot(crlmmSetList[i, ], 
38fb1b80
 	     pch=pch, 
 	     col=colors[gt], 
 	     bg=colors[gt], cex=cex,
 	     xlim=xlim, ylim=ylim)
 	mtext("A", 1, outer=TRUE, line=1)
 	mtext("B", 2, outer=TRUE, line=1)	
 }
bd722b7c
 @
 
 \begin{figure}
   \centering
   \includegraphics[width=0.8\textwidth]{copynumber-genotypeCalls}
   \caption{\label{fig:genotypeCalls}  Scatterplots of A versus B
     intensities.  Each panel displays a single SNP.}
 \end{figure}
 
 TODO: Plot the prediction regions for total copy number 2 and 3 for the
 first plate. Plotting symbols are the genotype calls (1=AA, 2=AB, 3=BB);
 light grey points are from other plates. One could also add the
 prediction regions for 0-4 copies, but it gets crowded.
 
38fb1b80
 
bd722b7c
 <<predictionRegion, fig=TRUE, width=8, height=8, include=FALSE, eval=FALSE, echo=FALSE>>=
38fb1b80
 require(RColorBrewer)
43b13ec9
 library(ellipse)
38fb1b80
 greens <- brewer.pal(9, "Greens")
43b13ec9
 J <- split(1:ncol(crlmmSetList), batch(crlmmSetList))
38fb1b80
 colors <- c("red", "blue", "green3")
 cex <- 0.6
 colors <- c("blue", greens[8], "red")
 pch.col <- c("grey40", "black", "grey40")
 xlim <- ylim <- c(6.5,13)
 plotpoints <- FALSE
 lwd <- 2
43b13ec9
 ##pdf("figures/snp22plots%02d.pdf", width=600, height=600)
38fb1b80
 ask <- FALSE
 par(mfrow=c(3,3), las=1, pty="s", ask=ask, mar=c(2, 2, 2, 2), oma=c(2, 2, 1, 1))
43b13ec9
 indices <- split(snpIndex(crlmmSetList), rep(1:length(snpIndex(crlmmSetList)), each=9, length.out=length(snpIndex(crlmmSetList))))
 ##for(j in seq(along=indices)[1:10]){
 j <- 1
38fb1b80
 	cat(j, "\n")
 	k <- 1
 	for(i in indices[[j]]){
43b13ec9
 		gt <- calls(crlmmSetList)[i, ]
38fb1b80
 		pch <- as.character(gt)
 		cex <- 0.9
43b13ec9
 		plot(crlmmSetList[i, ], 
38fb1b80
 		     pch=pch, 
 		     col=pch.col[gt],
 		     cex=cex,
 		     xlim=xlim, ylim=ylim,
 		     type="n")
 		if(plotpoints){
43b13ec9
 			for(b in seq(along=unique(batch(crlmmSetList)))){
 				points(crlmmSetList[i, J[[b]]], 
38fb1b80
 				       pch=pch, 
 				       col=colors[b], bg=colors[b], cex=cex,
 				       xlim=xlim, ylim=ylim)
 			}
 		}
43b13ec9
 		for(b in seq(along=unique(batch(crlmmSetList)))){
 			ellipse(crlmmSetList[i, J[[b]]], copynumber=2, col=colors[b], lwd=lwd)
38fb1b80
 		}
43b13ec9
 		##legend("bottomright", bty="n", legend=featureNames(crlmmSetList)[i])
38fb1b80
 		if(k == 1) {
 			legend("bottomleft", bty="n", fill=colors, legend=c("CEPH", "Yoruba", "Asian"))	
 			mtext("A", 1, outer=TRUE, line=1)
 			mtext("B", 2, outer=TRUE, line=0)			
 		}
 		k <- k+1
f32a09dd
 	}
38fb1b80
 @
 
43b13ec9
 %\begin{figure}
 %  \includegraphics[width=0.9\textwidth]{copynumber-predictionRegion}
 %  \caption{Prediction regions for copy number 2 for one SNP.  The
 %    plate effects are negligible for this SNP and we only plot the
 %    prediction region for the first plate.}
 %\end{figure}
38fb1b80
 
43b13ec9
 \section{Details for the copy number estimation procedure}
bd722b7c
 
 We assume a linear relationship between the normalized intensities and
 the allele-specific copy number.  SNP-specific parameters are estimated
 only from samples with a suitable confidence score for the diallelic
 genotype calls.  The default confidence threshold is 0.99, but can be
 adjusted by passing the argument \Robject{CONF.THR} to the
 \Robject{update} method.  TODO: illustrate this approach with boxplots
 of the A and B intensities stratified by genotype.
 
 <<linearModel, fig=TRUE,include=FALSE, eval=FALSE, echo=FALSE>>=
 cols <- brewer.pal(7, "Accent")[c(1, 4, 7)]
 i <- match("SNP_A-8608180", featureNames(crlmmSetList))
 B <- batch(crlmmSetList) == unique(batch(crlmmSetList))[1]
 gt.conf <- confs(crlmmSetList[i, B])
 op <- par(mfrow=c(2, 1), las=1, ask=FALSE, mar=c(3.5, 4, 0.5, 0.5), oma=c(0, 0, 2, 0))
 crlmm:::boxplot(crlmmSetList[i, B][[1]])
 par(op)
 @ 
 
 %\begin{figure}
 %  \centering
 %  \includegraphics[width=0.8\textwidth]{copynumber-linearModel}
 %
 %  \caption{\label{fig:linearModel} The SNP-specific intercept and slope
 %    for the first batch of samples.  These plots can sometimes be
 %    misleading as the genotypes estimated with low confidence do not
 %    influence the regression line.}
 %\end{figure}
e0644635
 
f32a09dd
 \section{Session information}
45dab54b
 <<sessionInfo, results=tex>>=
 toLatex(sessionInfo())
f32a09dd
 @ 
 
bd722b7c
 \section*{References}
f32a09dd
 
bd722b7c
 %\begin{bibliography}
   \bibliographystyle{plain}
   \bibliography{refs}
 %\end{bibliography}
b9c71726
 
f32a09dd
 \end{document}