Parts of the 'normal' matrix appear to be used in fit.lm4 -- the
wrapper for nonpolymorphic markers on X. Testing here will be needed
once these lines are commented.
Should snpflags and normal be part of the CNSet object an instantiated
upon initialization? I think so. snpflags need only be the same
dimension as the parameters and can be included in the
LinerModelParameter class.
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@48953 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -789,13 +789,12 @@ fit.lm1 <- function(idxBatch, |
789 | 789 |
|
790 | 790 |
open(object) |
791 | 791 |
open(snpflags) |
792 |
- open(normal) |
|
792 |
+## open(normal) |
|
793 | 793 |
|
794 | 794 |
corrAB <- corrBB <- corrAA <- sig2B <- sig2A <- tau2B <- tau2A <- matrix(NA, length(snps), length(unique(batch(object)))) |
795 | 795 |
flags <- nuA <- nuB <- phiA <- phiB <- corrAB |
796 | 796 |
|
797 |
- normal.snps <- normal[snps, ] |
|
798 |
- ##cB <- cA <- matrix(NA, length(snps), ncol(object)) |
|
797 |
+## normal.snps <- normal[snps, ] |
|
799 | 798 |
GG <- as.matrix(calls(object)[snps, ]) |
800 | 799 |
CP <- as.matrix(snpCallProbability(object)[snps, ]) |
801 | 800 |
AA <- as.matrix(A(object)[snps, ]) |
... | ... |
@@ -804,7 +803,7 @@ fit.lm1 <- function(idxBatch, |
804 | 803 |
for(k in batches){ |
805 | 804 |
zzzz <- zzzz+1 |
806 | 805 |
G <- GG[, k] |
807 |
- NORM <- normal.snps[, k] |
|
806 |
+ ##NORM <- normal.snps[, k] |
|
808 | 807 |
xx <- CP[, k] |
809 | 808 |
highConf <- (1-exp(-xx/1000)) > GT.CONF.THR |
810 | 809 |
G <- G*highConf*NORM |
... | ... |
@@ -998,7 +997,7 @@ fit.lm2 <- function(idxBatch, |
998 | 997 |
|
999 | 998 |
open(object) |
1000 | 999 |
open(snpflags) |
1001 |
- open(normal) |
|
1000 |
+## open(normal) |
|
1002 | 1001 |
|
1003 | 1002 |
## cA <- matrix(NA, length(snps), ncol(object)) |
1004 | 1003 |
ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object)) |
... | ... |
@@ -1017,8 +1016,8 @@ fit.lm2 <- function(idxBatch, |
1017 | 1016 |
|
1018 | 1017 |
AA.snp <- as.matrix(A(object)[snp.ind, ]) |
1019 | 1018 |
BB.snp <- as.matrix(B(object)[snp.ind, ]) |
1020 |
- NNORM.snp <- as.matrix(normal[snp.ind, ]) |
|
1021 |
- NORM.np <- as.matrix(normal[snps, ]) |
|
1019 |
+## NNORM.snp <- as.matrix(normal[snp.ind, ]) |
|
1020 |
+## NORM.np <- as.matrix(normal[snps, ]) |
|
1022 | 1021 |
AA.np <- as.matrix(A(object)[snps, ]) |
1023 | 1022 |
GG <- as.matrix(calls(object)[snp.ind, ]) |
1024 | 1023 |
CP <- as.matrix(snpCallProbability(object)[snp.ind, ]) |
... | ... |
@@ -2896,7 +2895,6 @@ computeCN <- function(object, |
2896 | 2895 |
sns=sampleNames(object), |
2897 | 2896 |
fns=featureNames(object)[marker.index]) |
2898 | 2897 |
ocLapply(seq(along=index.strata), |
2899 |
- ##match.fun(FUN), |
|
2900 | 2898 |
FUN, |
2901 | 2899 |
marker.index=marker.index, |
2902 | 2900 |
object=obj, |
... | ... |
@@ -143,8 +143,7 @@ setMethod("CB", signature=signature(object="CNSet"), |
143 | 143 |
return(cb) |
144 | 144 |
}) |
145 | 145 |
|
146 |
-##setMethod("totalCopyNumber", signature=signature(object="CNSet"), |
|
147 |
-totalCopyNumber <- function(object, ..., verbose=TRUE, dimnames=FALSE){ |
|
146 |
+totalCopyNumber <- function(object, ...){ |
|
148 | 147 |
ca <- CA(object, ...) |
149 | 148 |
cb <- CB(object, ...) |
150 | 149 |
is.snp <- isSnp(object) |
... | ... |
@@ -160,44 +159,6 @@ totalCopyNumber <- function(object, ..., verbose=TRUE, dimnames=FALSE){ |
160 | 159 |
return(ca+cb) |
161 | 160 |
} |
162 | 161 |
|
163 |
-## |
|
164 |
-## |
|
165 |
-## |
|
166 |
-## |
|
167 |
-## if(missing(i) & !missing(j)){ |
|
168 |
-## ca <- CA(object, i, j) |
|
169 |
-## snp.index <- which(isSnp(object)) |
|
170 |
-## cn.total <- as.matrix(CA(object)[, j]) |
|
171 |
-## if(length(snp.index) > 0){ |
|
172 |
-## cb <- as.matrix(CB(object)[snp.index, j]) |
|
173 |
-## snps <- (1:nrow(cn.total))[i %in% snp.index] |
|
174 |
-## cn.total[snps, ] <- cn.total[snps, j] + cb |
|
175 |
-## } |
|
176 |
-## } |
|
177 |
-## if(!missing(i) & missing(j)){ |
|
178 |
-## snp.index <- intersect(which(isSnp(object)), i) |
|
179 |
-## cn.total <- as.matrix(CA(object)[i, ]) |
|
180 |
-## if(length(snp.index) > 0){ |
|
181 |
-## cb <- as.matrix(CB(object)[snp.index, ]) |
|
182 |
-## snps <- (1:nrow(cn.total))[i %in% snp.index] |
|
183 |
-## cn.total[snps, ] <- cn.total[snps, ] + cb |
|
184 |
-## } |
|
185 |
-## } |
|
186 |
-## if(!missing(i) & !missing(j)){ |
|
187 |
-## snp.index <- intersect(which(isSnp(object)), i) |
|
188 |
-## cn.total <- as.matrix(CA(object)[i, j]) |
|
189 |
-## if(length(snp.index) > 0){ |
|
190 |
-## cb <- as.matrix(CB(object)[snp.index, j]) |
|
191 |
-## snps <- (1:nrow(cn.total))[i %in% snp.index] |
|
192 |
-## cn.total[snps, ] <- cn.total[snps, ] + cb |
|
193 |
-## } |
|
194 |
-## } |
|
195 |
-## ##cn.total <- cn.total/100 |
|
196 |
-## dimnames(cn.total) <- NULL |
|
197 |
-## return(cn.total) |
|
198 |
-## }) |
|
199 |
- |
|
200 |
- |
|
201 | 162 |
setReplaceMethod("snpCall", c("CNSet", "ff_or_matrix"), |
202 | 163 |
function(object, ..., value){ |
203 | 164 |
assayDataElementReplace(object, "call", value) |
... | ... |
@@ -24,100 +24,122 @@ |
24 | 24 |
|
25 | 25 |
<<setup, echo=FALSE, results=hide>>= |
26 | 26 |
options(continue=" ", width=70) |
27 |
-@ |
|
27 |
+@ |
|
28 | 28 |
|
29 | 29 |
%\section{Estimating copy number} |
30 | 30 |
|
31 | 31 |
%At present, software for copy number estimation is provided only for the |
32 |
-%Affymetrix 6.0 platform. |
|
32 |
+%Affymetrix 6.0 platform. |
|
33 | 33 |
|
34 | 34 |
\begin{abstract} |
35 | 35 |
This vignette estimates copy number for HapMap samples on the |
36 | 36 |
Affymetrix 6.0 platform. See \citep{Scharpf2009} for the working |
37 | 37 |
paper. |
38 |
- |
|
38 |
+ |
|
39 | 39 |
\end{abstract} |
40 | 40 |
|
41 | 41 |
\section{Simple usage} |
42 | 42 |
|
43 |
-CRLMM supports the following platforms: |
|
43 |
+ |
|
44 | 44 |
|
45 | 45 |
<<cdfname>>= |
46 |
-library(oligoClasses) |
|
47 | 46 |
library(crlmm) |
48 |
-crlmm:::validCdfNames() |
|
47 |
+@ |
|
48 |
+ |
|
49 |
+Several genotyping platforms are currently supported. Supported |
|
50 |
+platforms must have a corresponding annotation package (installed |
|
51 |
+separately) that are listed below. |
|
52 |
+ |
|
53 |
+<<annotationPackages>>= |
|
54 |
+annotationPackages() |
|
55 |
+@ |
|
56 |
+ |
|
57 |
+Only the annotation package that end with the 'Crlmm' postfix are |
|
58 |
+supported for copy number estimation. For instance, |
|
59 |
+'pd.genomewidesnp6' and 'genomewidesnp6Crlmm' are both annotation |
|
60 |
+packages for the Affymetrix 6.0 platform, but the |
|
61 |
+'genomewidesnp6Crlmm' annotation package must be used for copy number |
|
62 |
+estimation. The annotation package is specified through the 'cdfName' |
|
63 |
+-- the identifier without the 'Crlmm' postfix. In the following code, |
|
64 |
+we specify the cdf name for Affymetrix 6.0, provide the complete path |
|
65 |
+to the CEL files, and indicate where intermediate files from the copy |
|
66 |
+number estimation are to be saved. |
|
67 |
+ |
|
68 |
+<<>>= |
|
49 | 69 |
cdfName <- "genomewidesnp6" |
50 | 70 |
pathToCels <- "/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m" |
51 | 71 |
if(getRversion() < "2.12.0"){ |
52 | 72 |
rpath <- getRversion() |
53 | 73 |
} else rpath <- "trunk" |
54 | 74 |
outdir <- paste("/thumper/ctsa/snpmicroarray/rs/ProcessedData/crlmm/", rpath, "/copynumber_vignette", sep="") |
55 |
-@ |
|
75 |
+@ |
|
56 | 76 |
|
57 |
-\paragraph{About this vignette.} This vignette was created using CEL |
|
58 |
-files that are located in a specific directory on my computer |
|
59 |
-(\Robject{pathToCels}). Long computations are saved in the output |
|
60 |
-directory \Robject{outdir}. Users should change these variables as |
|
61 |
-appropriate. The following code chunk should fail unless the above |
|
62 |
-arguments have been set appropriately. |
|
77 |
+All long computations are saved in the output directory |
|
78 |
+\Robject{outdir}. Users should change these variables as appropriate. |
|
79 |
+The following code chunk should fail unless the above arguments have |
|
80 |
+been set appropriately. |
|
63 | 81 |
|
64 | 82 |
<<checkSetup>>= |
65 |
-if(!file.exists(outdir)) stop("Please specify valid directory for storing output") |
|
83 |
+if(!file.exists(outdir)) stop("Please specify a valid directory for storing output") |
|
66 | 84 |
if(!file.exists(pathToCels)) stop("Please specify the correct path to the CEL files") |
67 |
-@ |
|
85 |
+@ |
|
68 | 86 |
|
69 | 87 |
Processed data from codechunks requiring long computations are saved |
70 |
-to disk automatically by wrapping function calls in the |
|
71 |
-\Robject{checkExists} function. After the initial batch job, |
|
72 |
-subsequent calls to \verb@Sweave@ will load saved computations from |
|
73 |
-disk and can therefore be run interactively. This may be useful as you |
|
74 |
-begin to explore some of the tools for accessing processed data and |
|
75 |
-visualizations. |
|
88 |
+to disk by wrapping function calls in the \Robject{checkExists} |
|
89 |
+function. After running this vignette as a batch job, subsequent |
|
90 |
+calls to \verb@Sweave@ will load the saved computations from disk. For |
|
91 |
+additional information, see the examples in the help file for the |
|
92 |
+\Rfunction{checkExists} function. |
|
76 | 93 |
|
77 | 94 |
\paragraph{Preprocessing and genotyping.} |
78 | 95 |
|
79 | 96 |
In the following code chunk, we provide the complete path to the |
80 | 97 |
Affymetrix CEL files and define a 'batch' variable. The |
81 |
-\Robject{batch} variable will later be useful when we estimate copy |
|
82 |
-number. We typically use the 96 well chemistry plate or the scan date |
|
83 |
-of the array as a surrogate for batch. Adjusting by date or chemistry |
|
84 |
-plate can be critical for limiting the influence of batch effects. For |
|
85 |
-the HapMap CEL files in our analysis, the CEPH (C) and Yoruban (Y) |
|
86 |
-samples were run on separate plates and can be extracted from the CEL |
|
87 |
-filename as below. |
|
98 |
+\Robject{batch} variable will be used to initialize a container for |
|
99 |
+storing the normalized intensities, the genotype calls, and the |
|
100 |
+parameter estimates for copy number. Often the chemistry plate or the |
|
101 |
+scan date of the array is a useful surrogate for batch. For the HapMap |
|
102 |
+CEL files in our analysis, the CEPH (C) and Yoruban (Y) samples were |
|
103 |
+prepared on separate plates. In the following code chunk, we extract |
|
104 |
+the population identifier from the CEL file names and use this as the |
|
105 |
+batch variable. |
|
88 | 106 |
|
89 | 107 |
<<celfiles>>= |
90 | 108 |
celFiles <- list.celfiles(pathToCels, full.names=TRUE, pattern=".CEL") |
91 |
-if(length(celFiles) < 1) stop("Please specify the appropriate path to CEL files") |
|
92 |
-## CEPH and Yoruban |
|
109 |
+celFiles <- celFiles[substr(basename(celFiles), 13, 13) %in% c("C", "Y")] |
|
93 | 110 |
batch <- substr(basename(celFiles), 13, 13) |
94 |
-celFiles <- celFiles[batch %in% c("C", "Y")] |
|
95 |
-batch <- batch[batch %in% c("C", "Y")] |
|
96 |
-stopifnot(length(batch) == length(celFiles)) |
|
97 |
-@ |
|
111 |
+@ |
|
98 | 112 |
|
99 | 113 |
The preprocessing steps for copy number estimation includes quantile |
100 |
-normalization of the polymorphic and nonpolymorphic markers and a |
|
101 |
-summarization step whereby the quantile normalized intenities are |
|
102 |
-summarized for each locus. As the markers at polymorphic loci on the |
|
103 |
-Affymetrix 6.0 platform are identical, we summarize the intensities by |
|
104 |
-the median. For the nonpolymorphic markers, no summarization step is |
|
105 |
-performs. Next, the \crlmm{} package provides a genotype call and a |
|
106 |
-confidence score at each polymorphic locus. Unless the dataset is |
|
107 |
-small (e.g., fewer than 50 samples), we suggest installing and loading |
|
108 |
-the \R{} package \Rpackage{ff}. The function \Rfunction{genotype} |
|
109 |
-checks to see whether the \Rpackage{ff} is loaded. If loaded, the |
|
110 |
-normalized intensities and genotype are stored as \Robject{ff} objects |
|
111 |
-on disk. Otherwise, the genotypes and normalized intensities are |
|
112 |
-stored in matrices. A word of caution: the \Rfunction{genotype} |
|
113 |
-function without \Rpackage{ff} requires a potentially large amount of |
|
114 |
-RAM. Therefore, we illustrate the \Robject{genotype} function using |
|
115 |
-only the first 20 CEL files. We wrap the function \Rfunction{genotype} |
|
116 |
-in the \Rfunction{checkExists} so that a user can load and inspect the |
|
117 |
-saved object. We first check that the file \Robject{cnSet.rda} does |
|
118 |
-not exist. Existence of this file implies that we have already run the |
|
119 |
-copy number estimation and, therefore, we do not need to preprocess |
|
120 |
-and genotype the files a second time. |
|
114 |
+normalization of the raw intensities for each probe and a step that |
|
115 |
+summarizes the intensities of multiple probes at a single locus. For |
|
116 |
+example, the Affymetrix 6.0 platform has 3 or 4 identical probes at |
|
117 |
+each polymorphic locus and the normalized intensities are summarized |
|
118 |
+by a median. For the nonpolymorphic markers on Affymetrix 6.0, only |
|
119 |
+one probe per locus is available and the summarization step is not |
|
120 |
+needed. |
|
121 |
+ |
|
122 |
+After preprocessing the arrays, the \crlmm{} package estimates the |
|
123 |
+genotype and provides a confidence score at each polymorphic locus. |
|
124 |
+Unless the dataset is small (e.g., fewer than 50 samples), we suggest |
|
125 |
+installing and loading the \R{} package \Rpackage{ff} to reduce the |
|
126 |
+RAM required for preprocessing and genotyping. Loading the |
|
127 |
+\Rpackage{ff} package at this point will automatically enable large |
|
128 |
+data support (LDS). |
|
129 |
+ |
|
130 |
+The function \Rfunction{genotype} checks to see whether the |
|
131 |
+\Rpackage{ff} is loaded. If loaded, the normalized intensities and |
|
132 |
+genotype are stored as \Robject{ff} objects on disk. Otherwise, the |
|
133 |
+genotypes and normalized intensities are stored in matrices. A word |
|
134 |
+of caution: the \Rfunction{genotype} function without \Rpackage{ff} |
|
135 |
+requires a potentially large amount of RAM. Therefore, we illustrate |
|
136 |
+the \Robject{genotype} function using only the first 20 CEL files. We |
|
137 |
+wrap the function \Rfunction{genotype} in the \Rfunction{checkExists} |
|
138 |
+so that a user can load and inspect the saved object. We first check |
|
139 |
+that the file \Robject{cnSet.rda} does not exist. Existence of this |
|
140 |
+file implies that we have already run the copy number estimation and, |
|
141 |
+therefore, we do not need to preprocess and genotype the files a |
|
142 |
+second time. |
|
121 | 143 |
|
122 | 144 |
<<genotype, eval=FALSE>>= |
123 | 145 |
if(!file.exists(file.path(outdir, "cnSet.assayData_matrix.rda"))){ |
... | ... |
@@ -130,7 +152,7 @@ if(!file.exists(file.path(outdir, "cnSet.assayData_matrix.rda"))){ |
130 | 152 |
class(calls(gtSet.assayData_matrix)) |
131 | 153 |
} |
132 | 154 |
##q("no") |
133 |
-@ |
|
155 |
+@ |
|
134 | 156 |
|
135 | 157 |
Next, we estimate copy number for the 20 CEL files. The copy number |
136 | 158 |
estimation step requirees a minimum number of CEL files in each batch |
... | ... |
@@ -146,7 +168,7 @@ cnSet.assayData_matrix <- checkExists("cnSet.assayData_matrix", |
146 | 168 |
.FUN=crlmmCopynumber, |
147 | 169 |
object=gtSet.assayData_matrix, |
148 | 170 |
chromosome=22) |
149 |
-@ |
|
171 |
+@ |
|
150 | 172 |
|
151 | 173 |
|
152 | 174 |
A more memory efficient approach to preprocessing and genotyping is |
... | ... |
@@ -162,7 +184,7 @@ library(ff) |
162 | 184 |
ldPath(outdir) |
163 | 185 |
ocProbesets(50000) |
164 | 186 |
ocSamples(200) |
165 |
-@ |
|
187 |
+@ |
|
166 | 188 |
|
167 | 189 |
With LDS now enabled, we preprocess and genotype 180 samples from the |
168 | 190 |
CEPH and Yoruban populations using the \R{} function |
... | ... |
@@ -170,7 +192,7 @@ CEPH and Yoruban populations using the \R{} function |
170 | 192 |
polymorphic loci (and not the copy number estimates) should instead |
171 | 193 |
use the \R{} function \Rfunction{crlmm} or \Rfunction{crlmm2}. Again, |
172 | 194 |
we wrap the call to \Rfunction{genotypeLD} in \Rfunction{checkExists} |
173 |
-so that subsequent calls to \verb@Sweave@ can be run interactively. |
|
195 |
+so that subsequent calls to \verb@Sweave@ can be run interactively. |
|
174 | 196 |
|
175 | 197 |
|
176 | 198 |
<<LDS_genotype>>= |
... | ... |
@@ -185,7 +207,7 @@ if(!file.exists(file.path(outdir, "cnSet.assayData_ff.rda"))){ |
185 | 207 |
Rprof(NULL) |
186 | 208 |
class(calls(gtSet.assayData_ff)) |
187 | 209 |
} |
188 |
-@ |
|
210 |
+@ |
|
189 | 211 |
|
190 | 212 |
The analogous function for copy number estimation follows. In |
191 | 213 |
practice, once the object \Robject{cnSet.assayData_ff} is created the |
... | ... |
@@ -204,7 +226,7 @@ Rprof(NULL) |
204 | 226 |
## unlink(file.path(outdir, "gtSet.assayData_ff.rda")) |
205 | 227 |
gc() |
206 | 228 |
q("no") |
207 |
-@ |
|
229 |
+@ |
|
208 | 230 |
|
209 | 231 |
The objects returned by the \Rfunction{genotypeLD} and |
210 | 232 |
\Rfunction{crlmmCopynumberLD} have assay data elements that are |
... | ... |
@@ -234,7 +256,7 @@ if(FALSE){ |
234 | 256 |
gt1 <- calls(cnSet.assayData_matrix)[, 1:50] |
235 | 257 |
gt2 <- calls(cnSet.assayData_ff)[, 1:50] |
236 | 258 |
all.equal(gt1, gt2) |
237 |
-@ |
|
259 |
+@ |
|
238 | 260 |
|
239 | 261 |
Note that for the Affymetrix 6.0 platform the assay data elements each |
240 | 262 |
have a row dimension corresponding to the total number of polymorphic |
... | ... |
@@ -266,7 +288,7 @@ posterior.prob <- tryCatch(i2p(snpCallProbability(cnSet.assayData_ff)), |
266 | 288 |
posterior.prob1 <- i2p(snpCallProbability(cnSet.assayData_matrix)[rows, cols]) |
267 | 289 |
posterior.prob2 <- i2p(snpCallProbability(cnSet.assayData_ff)[rows, cols]) |
268 | 290 |
all.equal(posterior.prob1, posterior.prob2) |
269 |
-@ |
|
291 |
+@ |
|
270 | 292 |
|
271 | 293 |
Next, we obtain locus-level estimates of copy number by fitting a linear |
272 | 294 |
model to each SNP. A variable named 'batch' must be indicated in the |
... | ... |
@@ -289,13 +311,13 @@ ocProbesets(75e3) |
289 | 311 |
##ff objects |
290 | 312 |
system.time(cnSet2 <- crlmmCopynumber2(cnSet2)) |
291 | 313 |
save(cnSet2, file=file.path(outdir, "cnSet2.rda")) |
292 |
-@ |
|
314 |
+@ |
|
293 | 315 |
|
294 | 316 |
<<timings, eval=FALSE>>= |
295 | 317 |
tryCatch(print(paste("Time for matrix version:", time1)), error=function(e) print("timing for CN estimation not available")) |
296 | 318 |
tryCatch(print(paste("Time for ff version:", time2)), error=function(e) print("timing for CN estimation not available")) |
297 | 319 |
rm(cnSet2); gc() |
298 |
-@ |
|
320 |
+@ |
|
299 | 321 |
|
300 | 322 |
|
301 | 323 |
\section{Accessors} |
... | ... |
@@ -307,7 +329,7 @@ visualizations using the sample object provided in the |
307 | 329 |
<<sampleset>>= |
308 | 330 |
data(sample.CNSetLM) |
309 | 331 |
x <- sample.CNSetLM |
310 |
-@ |
|
332 |
+@ |
|
311 | 333 |
|
312 | 334 |
|
313 | 335 |
\subsection{Quantile-normalized intensities} |
... | ... |
@@ -320,7 +342,7 @@ snp.index <- which(isSnp(x)) |
320 | 342 |
np.index <- which(!isSnp(x)) |
321 | 343 |
a <- (A(x))[snp.index, ] |
322 | 344 |
dim(a) |
323 |
-@ |
|
345 |
+@ |
|
324 | 346 |
|
325 | 347 |
The extra set of parentheses surrounding \Robject{A(cnSet2)} above is |
326 | 348 |
added to emphasize the appropriate order of operations. Subsetting the |
... | ... |
@@ -329,42 +351,42 @@ large datasets. |
329 | 351 |
|
330 | 352 |
<<eval=FALSE>>= |
331 | 353 |
a <- A(x[snp.index, ]) |
332 |
-@ |
|
354 |
+@ |
|
333 | 355 |
|
334 | 356 |
The quantile-normalized intensities for nonpolymorphic loci are obtained |
335 | 357 |
by: |
336 | 358 |
|
337 | 359 |
<<>>= |
338 | 360 |
npIntensities <- (A(x))[np.index, ] |
339 |
-@ |
|
361 |
+@ |
|
340 | 362 |
|
341 | 363 |
Quantile normalized intensities for the B allele at polymorphic loci: |
342 | 364 |
|
343 | 365 |
<<B.polymorphic>>= |
344 | 366 |
b.snps <- (B(x))[snp.index, ] |
345 |
-@ |
|
367 |
+@ |
|
346 | 368 |
|
347 | 369 |
Note that NAs are recorded in the 'B' assay data element for |
348 | 370 |
nonpolymorphic loci: |
349 | 371 |
|
350 | 372 |
<<B.NAs>>= |
351 | 373 |
all(is.na((B(x))[np.index, ])) |
352 |
-@ |
|
374 |
+@ |
|
353 | 375 |
|
354 | 376 |
<<clean, echo=FALSE>>= |
355 | 377 |
rm(b.snps, a, npIntensities); gc() |
356 |
-@ |
|
378 |
+@ |
|
357 | 379 |
|
358 | 380 |
\paragraph{\Robject{SnpSet}: Genotype calls and confidence scores} |
359 | 381 |
|
360 | 382 |
Genotype calls: |
361 | 383 |
<<genotypes>>= |
362 | 384 |
genotypes <- (snpCall(x))[snp.index, ] |
363 |
-@ |
|
385 |
+@ |
|
364 | 386 |
Confidence scores of the genotype calls: |
365 | 387 |
<<confidenceScores>>= |
366 | 388 |
genotypeConf <- integerScoreToProbability(snpCallProbability(x)[snp.index[1:10], ]) |
367 |
-@ |
|
389 |
+@ |
|
368 | 390 |
|
369 | 391 |
\paragraph{\Robject{CopyNumberSet}: allele-specific copy number} |
370 | 392 |
|
... | ... |
@@ -377,19 +399,19 @@ ca <- CA(x, i=snp.index) |
377 | 399 |
ca <- ACN(x, "A", i=snp.index) |
378 | 400 |
cb <- CB(x, i=snp.index) |
379 | 401 |
ct <- ca+cb |
380 |
-@ |
|
402 |
+@ |
|
381 | 403 |
|
382 | 404 |
Total copy number at nonpolymorphic loci: |
383 | 405 |
<<ca>>= |
384 | 406 |
cn.nonpolymorphic <- CA(obj, i=which(!isSnp(obj))) |
385 |
-@ |
|
407 |
+@ |
|
386 | 408 |
|
387 | 409 |
Total copy number at both polymorphic and nonpolymorphic loci: |
388 | 410 |
<<totalCopynumber>>= |
389 | 411 |
##cn <- copyNumber(x) |
390 | 412 |
cn <- totalCopyNumber(x, sample(1:nrow(x), 1e4), 1:5) |
391 | 413 |
apply(cn, 2, median, na.rm=TRUE) |
392 |
-@ |
|
414 |
+@ |
|
393 | 415 |
|
394 | 416 |
\subsection{Other accessors} |
395 | 417 |
|
... | ... |
@@ -398,15 +420,15 @@ Information on physical position and chromosome can be accessed as follows: |
398 | 420 |
<<positionChromosome>>= |
399 | 421 |
xx <- position(x) |
400 | 422 |
yy <- chromosome(x) |
401 |
-@ |
|
423 |
+@ |
|
402 | 424 |
|
403 | 425 |
Parameters from the linear model used to estimate copy number are |
404 |
-stored in the slot \Robject{lM}. |
|
426 |
+stored in the slot \Robject{lM}. |
|
405 | 427 |
|
406 | 428 |
<<copynumberParameters>>= |
407 | 429 |
names(lM(x)) |
408 | 430 |
dim(lM(x)[[1]]) |
409 |
-@ |
|
431 |
+@ |
|
410 | 432 |
|
411 | 433 |
|
412 | 434 |
|
... | ... |
@@ -418,7 +440,7 @@ A histogram of the signal to noise ratio for the HapMap samples: |
418 | 440 |
|
419 | 441 |
<<plotSnr, fig=TRUE, include=FALSE>>= |
420 | 442 |
hist(x$SNR, xlab="SNR", main="", breaks=25) |
421 |
-@ |
|
443 |
+@ |
|
422 | 444 |
|
423 | 445 |
\begin{figure} |
424 | 446 |
\centering |
... | ... |
@@ -434,23 +456,23 @@ Figure \ref{fig:oneSample} plots physical position (horizontal axis) |
434 | 456 |
versus copy number (vertical axis) for the first sample. There is less |
435 | 457 |
information to estimate copy number at nonpolymorphic loci; improvements |
436 | 458 |
to the univariate prediction regions at nonpolymorphic loci are a future |
437 |
-area of research. |
|
459 |
+area of research. |
|
438 | 460 |
|
439 | 461 |
<<oneSample, fig=TRUE, width=8, height=4, include=FALSE>>= |
440 | 462 |
par(las=1, mar=c(4, 5, 4, 2)) |
441 |
-plot(position(x), copyNumber(x)[, 1], pch=".", |
|
442 |
- cex=2, xaxt="n", col="grey20", ylim=c(0,4), |
|
463 |
+plot(position(x), copyNumber(x)[, 1], pch=".", |
|
464 |
+ cex=2, xaxt="n", col="grey20", ylim=c(0,4), |
|
443 | 465 |
ylab="copy number", xlab="physical position (Mb)", |
444 | 466 |
main=paste(sampleNames(x)[1], ", CHR:", unique(chromosome(x)))) |
445 | 467 |
points(position(x)[!isSnp(x)], copyNumber(x)[!isSnp(x), 1], |
446 | 468 |
pch=".", cex=2, col="lightblue") |
447 | 469 |
axis(1, at=pretty(range(position(x))), labels=pretty(range(position(x)))/1e6) |
448 |
-@ |
|
470 |
+@ |
|
449 | 471 |
|
450 | 472 |
<<idiogram, eval=FALSE, echo=FALSE>>= |
451 | 473 |
require(SNPchip) |
452 | 474 |
plotCytoband(22, new=FALSE, cytoband.ycoords=c(3.8, 4), label.cytoband=FALSE) |
453 |
-@ |
|
475 |
+@ |
|
454 | 476 |
|
455 | 477 |
\begin{figure} |
456 | 478 |
\includegraphics[width=0.9\textwidth]{copynumber-oneSample} |
... | ... |
@@ -470,14 +492,14 @@ plotCytoband(22, new=FALSE, cytoband.ycoords=c(3.8, 4), label.cytoband=FALSE) |
470 | 492 |
%xlim <- c(10*1e6, max(position(cnSet2))) |
471 | 493 |
%cols <- brewer.pal(8, "Dark2")[1:4] |
472 | 494 |
%for(j in 1:3){ |
473 |
-% plot(position(cnSet2), cnState[, j], pch=".", col=cols[2], xaxt="n", |
|
495 |
+% plot(position(cnSet2), cnState[, j], pch=".", col=cols[2], xaxt="n", |
|
474 | 496 |
% ylab="copy number", xlab="Physical position (Mb)", type="s", lwd=2, |
475 | 497 |
% ylim=c(0,6), xlim=xlim) |
476 | 498 |
% points(position(cnSet2), cns[, j], pch=".", col=cols[3]) |
477 | 499 |
% lines(position(cnSet2), cnState[,j], lwd=2, col=cols[2]) |
478 |
-% axis(1, at=pretty(position(cnSet)), |
|
500 |
+% axis(1, at=pretty(position(cnSet)), |
|
479 | 501 |
% labels=pretty(position(cnSet))/1e6) |
480 |
-% abline(h=c(1,3), lty=2, col=cols[1]) |
|
502 |
+% abline(h=c(1,3), lty=2, col=cols[1]) |
|
481 | 503 |
% legend("topright", bty="n", legend=sampleNames(cnSet)[j]) |
482 | 504 |
% legend("topleft", lty=1, col=cols[2], legend="copy number state", |
483 | 505 |
% bty="n", lwd=2) |
... | ... |
@@ -485,7 +507,7 @@ plotCytoband(22, new=FALSE, cytoband.ycoords=c(3.8, 4), label.cytoband=FALSE) |
485 | 507 |
% label.cytoband=FALSE, xlim=xlim) |
486 | 508 |
%} |
487 | 509 |
%par(op) |
488 |
-%@ |
|
510 |
+%@ |
|
489 | 511 |
% |
490 | 512 |
%\begin{figure} |
491 | 513 |
% \includegraphics[width=\textwidth]{copynumber-overlayHmmPredictions} |
... | ... |
@@ -524,13 +546,13 @@ plot(i, x, copynumber=2) |
524 | 546 |
##for(i in index){ |
525 | 547 |
## gt <- calls(cnSet)[i, ] |
526 | 548 |
## if(i != 89){ |
527 |
-## myScatter(cnSet[i, ], |
|
528 |
-## pch=pch, |
|
529 |
-## col=colors[snpCall(cnSet)[i, ]], |
|
549 |
+## myScatter(cnSet[i, ], |
|
550 |
+## pch=pch, |
|
551 |
+## col=colors[snpCall(cnSet)[i, ]], |
|
530 | 552 |
## bg=colors[snpCall(cnSet)[i, ]], cex=cex, |
531 | 553 |
## xlim=xlim, ylim=ylim) |
532 | 554 |
## mtext("A", 1, outer=TRUE, line=1) |
533 |
-## mtext("B", 2, outer=TRUE, line=1) |
|
555 |
+## mtext("B", 2, outer=TRUE, line=1) |
|
534 | 556 |
## crlmm:::ellipse.CNSet(cnSet[i, ], copynumber=2, batch="C", lwd=2, col="black") |
535 | 557 |
## crlmm:::ellipse.CNSet(cnSet[i, ], copynumber=2, batch="Y", lwd=2, col="grey50") |
536 | 558 |
## } else { |
... | ... |
@@ -558,7 +580,7 @@ plot(i, x, copynumber=2) |
558 | 580 |
\section{Session information} |
559 | 581 |
<<sessionInfo, results=tex>>= |
560 | 582 |
toLatex(sessionInfo()) |
561 |
-@ |
|
583 |
+@ |
|
562 | 584 |
|
563 | 585 |
\section*{References} |
564 | 586 |
|
... | ... |
@@ -566,6 +588,6 @@ toLatex(sessionInfo()) |
566 | 588 |
\bibliographystyle{plain} |
567 | 589 |
\bibliography{refs} |
568 | 590 |
%\end{bibliography} |
569 |
- |
|
591 |
+ |
|
570 | 592 |
|
571 | 593 |
\end{document} |