git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@40309 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -163,3 +163,17 @@ is decoded and scanned |
163 | 163 |
2009-06-08 R Scharpf - committed version 1.3.4 |
164 | 164 |
|
165 | 165 |
* fixed bug in the biasAdjNP function. updated biasAdj function |
166 |
+ |
|
167 |
+2009-06-20 R Scharpf - committed version 1.3.5 |
|
168 |
+ |
|
169 |
+* created CrlmmSetList, ABset, and CopyNumberSet classes. |
|
170 |
+ - added a few methods to support these classes |
|
171 |
+ |
|
172 |
+* crlmmWrapper does normalization / genotyping and optionally splits |
|
173 |
+ the results by chromosome |
|
174 |
+ |
|
175 |
+* several changes to the copynumber vignette |
|
176 |
+ |
|
177 |
+ |
|
178 |
+ |
|
179 |
+ |
... | ... |
@@ -1,15 +1,21 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.3.4 |
|
4 |
+Version: 1.3.5 |
|
5 | 5 |
Date: 2009-06-17 |
6 | 6 |
Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
7 | 7 |
Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
8 | 8 |
Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 6.0. |
9 | 9 |
License: Artistic-2.0 |
10 |
-Imports: affyio, preprocessCore, utils, stats, genefilter, splines, Biobase (>= 2.5.3), mvtnorm |
|
11 |
-Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2), genomewidesnp6Crlmm (>= 1.0.2), methods, GGdata, snpMatrix |
|
10 |
+Depends: methods |
|
11 |
+Imports: affyio, preprocessCore, utils, stats, genefilter, splines, Biobase (>= 2.5.3), mvtnorm, oligoClasses, ellipse, methods |
|
12 |
+Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2), genomewidesnp6Crlmm (>= 1.0.2), GGdata, snpMatrix |
|
12 | 13 |
Collate: AllClasses.R |
14 |
+ AllGenerics.R |
|
15 |
+ methods-ABset.R |
|
16 |
+ methods-CopyNumberSet.R |
|
17 |
+ methods-CrlmmSetList.R |
|
18 |
+ methods-eSet.R |
|
13 | 19 |
methods-SnpSet.R |
14 | 20 |
cnrma-functions.R |
15 | 21 |
crlmm-functions.R |
... | ... |
@@ -1,10 +1,83 @@ |
1 | 1 |
useDynLib("crlmm", .registration=TRUE) |
2 |
-import(Biobase) |
|
2 |
+ |
|
3 |
+##importClassesFrom(methods, ANY, character, data.frame, "function", integer, matrix, numeric) |
|
4 |
+ |
|
5 |
+importClassesFrom(Biobase, AnnotatedDataFrame, AssayData, eSet, |
|
6 |
+ SnpSet, MIAME, Versioned, VersionedBiobase, Versions) |
|
7 |
+ |
|
8 |
+importClassesFrom(oligoClasses, SnpLevelSet) |
|
9 |
+ |
|
10 |
+importMethodsFrom(Biobase, annotation, "annotation<-", annotatedDataFrameFrom, |
|
11 |
+ assayData, "assayData<-", combine, |
|
12 |
+ experimentData, "experimentData<-", |
|
13 |
+ fData, featureData, "featureData<-", featureNames, |
|
14 |
+ fvarMetadata, fvarLabels, |
|
15 |
+ pData, phenoData, "phenoData<-", pubMedIds, rowMedians, sampleNames, scanDates, |
|
16 |
+ "scanDates<-", storageMode, updateObject) |
|
17 |
+ |
|
18 |
+ |
|
19 |
+##importMethodsFrom(oligoClasses, chromosome, copyNumber, position) |
|
20 |
+importFrom(oligoClasses, chromosome, copyNumber, position, calls) |
|
21 |
+ |
|
22 |
+##importMethodsFrom(methods, initialize, show) |
|
23 |
+ |
|
24 |
+##importFrom(methods, "@<-", callNextMethod, new, validObject) |
|
25 |
+ |
|
26 |
+importFrom(Biobase, assayDataElement, assayDataElementNames, |
|
27 |
+ assayDataElementReplace, assayDataNew, classVersion) |
|
28 |
+ |
|
29 |
+importFrom(graphics, abline, axis, layout, legend, mtext, par, plot, |
|
30 |
+ polygon, rect, segments, text, points) |
|
31 |
+ |
|
32 |
+importFrom(grDevices, grey) |
|
33 |
+ |
|
3 | 34 |
importFrom(affyio, read.celfile.header, read.celfile) |
35 |
+ |
|
4 | 36 |
importFrom(preprocessCore, normalize.quantiles.use.target, normalize.quantiles) |
37 |
+ |
|
5 | 38 |
importFrom(utils, data, packageDescription, setTxtProgressBar, txtProgressBar) |
39 |
+ |
|
6 | 40 |
importFrom(stats, coef, cov, dnorm, kmeans, lm, mad, median, quantile, sd) |
41 |
+ |
|
7 | 42 |
importFrom(genefilter, rowSds) |
43 |
+ |
|
8 | 44 |
importFrom(mvtnorm, dmvnorm) |
9 |
-export("crlmm", "list.celfiles", "computeCopynumber", "cnrma", "celDates", "crlmmIllumina", "readIdatFiles", "snprma") |
|
10 |
-exportMethods("calls", "confs") |
|
45 |
+ |
|
46 |
+importFrom(ellipse, ellipse) |
|
47 |
+ |
|
48 |
+exportClasses(ABset, CopyNumberSet, CrlmmSetList) |
|
49 |
+##S3method(ellipse, CopyNumberSet) |
|
50 |
+exportMethods(calls, |
|
51 |
+ CA, |
|
52 |
+ "CA<-", |
|
53 |
+ CB, |
|
54 |
+ "CB<-", |
|
55 |
+ cnIndex, |
|
56 |
+ confs, |
|
57 |
+ plot, |
|
58 |
+ points, |
|
59 |
+ show, |
|
60 |
+ snpIndex) |
|
61 |
+ |
|
62 |
+export(celDates, |
|
63 |
+ calls, |
|
64 |
+ chromosome, |
|
65 |
+ cnrma, |
|
66 |
+ combine, |
|
67 |
+ computeCopynumber, |
|
68 |
+ copyNumber, |
|
69 |
+ crlmm, |
|
70 |
+ crlmmIllumina, |
|
71 |
+ crlmmWrapper, |
|
72 |
+ ellipse, |
|
73 |
+ computeEmission, |
|
74 |
+ list.celfiles, |
|
75 |
+ position, |
|
76 |
+ readIdatFiles, |
|
77 |
+ sampleNames, |
|
78 |
+ scanDates, |
|
79 |
+ "scanDates<-", |
|
80 |
+ snprma) |
|
81 |
+ |
|
82 |
+ |
|
83 |
+ |
... | ... |
@@ -1,2 +1,11 @@ |
1 | 1 |
## Class definition |
2 |
+setClass("ABset", contains="eSet", |
|
3 |
+ prototype = prototype( |
|
4 |
+ new("VersionedBiobase", |
|
5 |
+ versions=c(classVersion("eSet"), SnpSet="1.0.0")))) |
|
2 | 6 |
setClass("crlmmSet", contains="eSet") |
7 |
+setClass("CrlmmSetList", contains="list") |
|
8 |
+setClass("CopyNumberSet", contains="eSet", |
|
9 |
+ prototype = prototype( |
|
10 |
+ new("VersionedBiobase", |
|
11 |
+ versions=c(classVersion("eSet"), SnpSet="1.0.0")))) |
... | ... |
@@ -127,6 +127,127 @@ celDates <- function(celfiles){ |
127 | 127 |
return(celdts) |
128 | 128 |
} |
129 | 129 |
|
130 |
+predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){ |
|
131 |
+ pkgname <- paste(cdfName, "Crlmm", sep="") |
|
132 |
+ path <- system.file("extdata", package=pkgname) |
|
133 |
+ loader("23index.rda", .crlmmPkgEnv, pkgname) |
|
134 |
+ index <- getVarInEnv("index") |
|
135 |
+ ##load(file.path(path, "23index.rda")) |
|
136 |
+ XIndex <- index[[1]] |
|
137 |
+ if(class(res) != "ABset"){ |
|
138 |
+ A <- res$A |
|
139 |
+ B <- res$B |
|
140 |
+ } else{ |
|
141 |
+ A <- res@assayData[["A"]] |
|
142 |
+ B <- res@assayData[["B"]] |
|
143 |
+ } |
|
144 |
+ XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2 |
|
145 |
+ SNR <- res$SNR |
|
146 |
+ if(sum(SNR>SNRMin)==1){ |
|
147 |
+ gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5))) |
|
148 |
+ }else{ |
|
149 |
+ gender <- kmeans(XMedian, c(min(XMedian[SNR>SNRMin]), max(XMedian[SNR>SNRMin])))[["cluster"]] |
|
150 |
+ } |
|
151 |
+ return(gender) |
|
152 |
+} |
|
153 |
+ |
|
154 |
+combineIntensities <- function(res, cnrmaResult, cdfName){ |
|
155 |
+ rownames(res$B) <- rownames(res$A) <- res$gns |
|
156 |
+ colnames(res$B) <- colnames(res$A) <- res$sns |
|
157 |
+ NP <- cnrmaResult$NP |
|
158 |
+ blank <- matrix(NA, nrow(NP), ncol(NP)) |
|
159 |
+ dimnames(blank) <- dimnames(NP) |
|
160 |
+ A <- rbind(res$A, NP) |
|
161 |
+ B <- rbind(res$B, blank) |
|
162 |
+ aD <- assayDataNew("lockedEnvironment", |
|
163 |
+ A=A, |
|
164 |
+ B=B) |
|
165 |
+ ABset <- new("ABset", |
|
166 |
+ assayData=aD, |
|
167 |
+ featureData=annotatedDataFrameFrom(A, byrow=TRUE), |
|
168 |
+ phenoData=annotatedDataFrameFrom(A, byrow=FALSE), |
|
169 |
+ annotation="genomewidesnp6") |
|
170 |
+ ABset$SNR <- res$SNR |
|
171 |
+ ABset$gender <- predictGender(res=res, cdfName=cdfName) |
|
172 |
+ return(ABset) |
|
173 |
+} |
|
174 |
+ |
|
175 |
+harmonizeSnpSet <- function(crlmmResult, ABset){ |
|
176 |
+ blank <- matrix(NA, length(cnNames(ABset)), ncol(ABset)) |
|
177 |
+ rownames(blank) <- cnNames(ABset) |
|
178 |
+ colnames(blank) <- sampleNames(ABset) |
|
179 |
+ crlmmCalls <- rbind(calls(crlmmResult), blank) |
|
180 |
+ crlmmConf <- rbind(confs(crlmmResult), blank) |
|
181 |
+ fD <- as.matrix(fData(crlmmResult)) |
|
182 |
+ fD2 <- matrix(NA, nrow(blank), ncol(fD)) |
|
183 |
+ rownames(fD2) <- rownames(blank) |
|
184 |
+ fD <- rbind(fD, fD2) |
|
185 |
+ aD <- assayDataNew("lockedEnvironment", |
|
186 |
+ calls=crlmmCalls, |
|
187 |
+ callProbability=crlmmConf) |
|
188 |
+ ##Make crlmmResult the same dimension as ABset |
|
189 |
+ fD <- new("AnnotatedDataFrame", |
|
190 |
+ data=data.frame(fD), |
|
191 |
+ varMetadata=fvarMetadata(crlmmResult)) |
|
192 |
+ crlmmResult <- new("SnpSet", |
|
193 |
+ call=crlmmCalls, |
|
194 |
+ callProbability=crlmmConf, |
|
195 |
+ featureData=fD, |
|
196 |
+ phenoData=phenoData(crlmmResult), |
|
197 |
+ scanDates=scanDates(ABset), |
|
198 |
+ annotation=annotation(ABset)) |
|
199 |
+ stopifnot(all.equal(dimnames(crlmmResult), dimnames(ABset))) |
|
200 |
+ crlmmResult |
|
201 |
+} |
|
202 |
+ |
|
203 |
+harmonizeDimnamesTo <- function(object1, object2){ |
|
204 |
+ object1 <- object1[match(featureNames(object2), featureNames(object1)), ] |
|
205 |
+ object1 <- object1[, match(sampleNames(object2), sampleNames(object1))] |
|
206 |
+ stopifnot(all.equal(featureNames(object1), featureNames(object2))) |
|
207 |
+ stopifnot(all.equal(sampleNames(object1), sampleNames(object2))) |
|
208 |
+ return(object1) |
|
209 |
+} |
|
210 |
+ |
|
211 |
+crlmmWrapper <- function(filenames, outdir="./", cdfName="genomewidesnp6", save.it, splitByChr=TRUE, ...){ |
|
212 |
+ if(!file.exists(file.path(outdir, "crlmmResult.rda"))){ |
|
213 |
+ crlmmResult <- crlmm(filenames=filenames, cdfName=cdfName, save.it=TRUE, ...) |
|
214 |
+ if(save.it) save(crlmmResult, file=file.path(outdir, "crlmmResult.rda")) |
|
215 |
+ } else { |
|
216 |
+ message("Loading crlmmResult...") |
|
217 |
+ load(file.path(outdir, "crlmmResult.rda")) |
|
218 |
+ } |
|
219 |
+ |
|
220 |
+ ##- Make crlmmResult the same dimension as ABset |
|
221 |
+ ## -Create a list object that where rows and columns can be subset using '[' methods |
|
222 |
+ if(!file.exists(file.path(outdir, "cnrmaResult.rda"))){ |
|
223 |
+ message("Quantile normalizing the copy number probes") |
|
224 |
+ cnrmaResult <- cnrma(filenames=filenames, cdfName=cdfName) |
|
225 |
+ if(save.it) save(cnrmaResult, file=file.path(outdir, "cnrmaResult.rda")) |
|
226 |
+ } else{ |
|
227 |
+ message("Loading cnrmaResult...") |
|
228 |
+ load(file.path(outdir, "cnrmaResult.rda")) |
|
229 |
+ } |
|
230 |
+## loader("intensities.rda", pkgname=pkgname, envir=.crlmmPkgEnv) |
|
231 |
+## res <- get("res", envir=.crlmmPkgEnv) |
|
232 |
+ load(file.path(outdir, "intensities.rda")) |
|
233 |
+ ABset <- combineIntensities(res, cnrmaResult, cdfName) |
|
234 |
+ scanDates(ABset) <- as.character(celDates(filenames)) |
|
235 |
+ crlmmResult <- harmonizeSnpSet(crlmmResult, ABset) |
|
236 |
+ stopifnot(all.equal(dimnames(crlmmResult), dimnames(ABset))) |
|
237 |
+ crlmmList <- list(ABset, |
|
238 |
+ crlmmResult) |
|
239 |
+ crlmmList <- as(crlmmList, "CrlmmSetList") |
|
240 |
+ |
|
241 |
+ if(splitByChr){ |
|
242 |
+ message("Saving by chromosome") |
|
243 |
+ splitByChromosome(crlmmList, cdfName=cdfName, outdir=outdir) |
|
244 |
+ } else{ |
|
245 |
+ message("Saving crlmmList object to ", outdir) |
|
246 |
+ save(crlmmList, file=file.path(outdir, "crlmmList.rda")) |
|
247 |
+ } |
|
248 |
+ return() |
|
249 |
+} |
|
250 |
+ |
|
130 | 251 |
cnrma <- function(filenames, cdfName="genomewidesnp6", sns, seed=1, verbose=FALSE){ |
131 | 252 |
pkgname <- getCrlmmAnnotationName(cdfName) |
132 | 253 |
require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available") |
... | ... |
@@ -186,22 +307,29 @@ goodSnps <- function(phi.thr, envir, fewAA=20, fewBB=20){ |
186 | 307 |
return(flags) |
187 | 308 |
} |
188 | 309 |
|
189 |
-instantiateObjects <- function(calls, conf, NP, plate, envir, chrom, A, B, |
|
310 |
+instantiateObjects <- function(calls, conf, NP, plate, envir, |
|
311 |
+ chrom, |
|
312 |
+ A, B, |
|
190 | 313 |
gender, SNRmin=5, SNR, |
191 | 314 |
pkgname, |
192 | 315 |
locusSet){ |
193 |
- pkgname <- paste(pkgname, "Crlmm", sep="") |
|
316 |
+## pkgname <- paste(pkgname, "Crlmm", sep="") |
|
194 | 317 |
envir[["chrom"]] <- chrom |
195 |
- CHR_INDEX <- paste(chrom, "index", sep="") |
|
196 |
- fname <- paste(CHR_INDEX, ".rda", sep="") |
|
197 |
- loader(fname, .crlmmPkgEnv, pkgname) |
|
198 |
- index <- get("index", envir=.crlmmPkgEnv) |
|
199 |
-## data(list=CHR_INDEX, package="genomewidesnp6Crlmm") |
|
200 |
- A <- A[index[[1]], SNR > SNRmin] |
|
201 |
- B <- B[index[[1]], SNR > SNRmin] |
|
202 |
- calls <- calls[index[[1]], SNR > SNRmin] |
|
203 |
- conf <- conf[index[[1]], SNR > SNRmin] |
|
204 |
- NP <- NP[index[[2]], SNR > SNRmin] |
|
318 |
+## CHR_INDEX <- paste(chrom, "index", sep="") |
|
319 |
+## fname <- paste(CHR_INDEX, ".rda", sep="") |
|
320 |
+## loader(fname, .crlmmPkgEnv, pkgname) |
|
321 |
+## index <- get("index", envir=.crlmmPkgEnv) |
|
322 |
+#### data(list=CHR_INDEX, package="genomewidesnp6Crlmm") |
|
323 |
+## A <- A[index[[1]], SNR > SNRmin] |
|
324 |
+## B <- B[index[[1]], SNR > SNRmin] |
|
325 |
+## calls <- calls[index[[1]], SNR > SNRmin] |
|
326 |
+## conf <- conf[index[[1]], SNR > SNRmin] |
|
327 |
+## NP <- NP[index[[2]], SNR > SNRmin] |
|
328 |
+ A <- A[, SNR > SNRmin] |
|
329 |
+ B <- B[, SNR > SNRmin] |
|
330 |
+ calls <- calls[, SNR > SNRmin] |
|
331 |
+ conf <- conf[, SNR > SNRmin] |
|
332 |
+ NP <- NP[, SNR > SNRmin] |
|
205 | 333 |
plate <- plate[SNR > SNRmin] |
206 | 334 |
uplate <- unique(plate) |
207 | 335 |
SNR <- SNR[SNR > SNRmin] |
... | ... |
@@ -279,7 +407,7 @@ instantiateObjects <- function(calls, conf, NP, plate, envir, chrom, A, B, |
279 | 407 |
envir[["tau2B"]] <- tau2A |
280 | 408 |
envir[["sig2A"]] <- tau2A |
281 | 409 |
envir[["sig2B"]] <- tau2A |
282 |
- sig2T <- matrix(NA, nrow(NP), ncol(NP)) |
|
410 |
+ sig2T <- matrix(NA, nrow(NP), length(uplate)) |
|
283 | 411 |
envir[["sig2T"]] <- sig2T |
284 | 412 |
envir[["corr"]] <- tau2A |
285 | 413 |
envir[["corrA.BB"]] <- tau2A |
... | ... |
@@ -294,16 +422,275 @@ instantiateObjects <- function(calls, conf, NP, plate, envir, chrom, A, B, |
294 | 422 |
envir[["normalNP"]] <- normalNP |
295 | 423 |
} |
296 | 424 |
|
425 |
+computeCopynumber <- function(object, |
|
426 |
+ CHR, |
|
427 |
+ bias.adj=FALSE, |
|
428 |
+ batch, |
|
429 |
+ SNRmin=5, |
|
430 |
+ cdfName="genomewidesnp6", ...){ |
|
431 |
+ ##require(oligoClasses) |
|
432 |
+ if(missing(CHR)) stop("Must specify CHR") |
|
433 |
+ if(missing(batch)) stop("Must specify batch") |
|
434 |
+ if(length(batch) != ncol(object[[1]])) stop("Batch must be the same length as the number of samples") |
|
435 |
+ ##the AB intensities |
|
436 |
+ Nset <- object[[1]] |
|
437 |
+ ##indices of polymorphic loci |
|
438 |
+ index <- snpIndex(Nset) |
|
439 |
+ ABset <- Nset[index, ] |
|
440 |
+ ##indices of nonpolymorphic loci |
|
441 |
+ NPset <- Nset[-index, ] |
|
442 |
+ ##genotypes/confidences |
|
443 |
+ snpset <- object[[2]][index,] |
|
444 |
+ ##previous version of compute copy number |
|
445 |
+ envir <- new.env() |
|
446 |
+ message("Fitting model for copy number estimation...") |
|
447 |
+ .computeCopynumber(chrom=CHR, |
|
448 |
+ A=A(ABset), |
|
449 |
+ B=B(ABset), |
|
450 |
+ calls=calls(snpset), |
|
451 |
+ conf=confs(snpset), |
|
452 |
+ NP=A(NPset), |
|
453 |
+ plate=batch, |
|
454 |
+ envir=envir, |
|
455 |
+ SNR=ABset$SNR, |
|
456 |
+ bias.adj=FALSE, |
|
457 |
+ SNRmin=SNRmin, |
|
458 |
+ ...) |
|
459 |
+ message("Organizing results...") |
|
460 |
+ locusSet <- list2locusSet(envir, ABset=ABset, NPset=NPset, CHR=CHR, cdfName=cdfName) |
|
461 |
+ if(bias.adj){ |
|
462 |
+ message("Running bias adjustment...") |
|
463 |
+ .computeCopynumber(chrom=CHR, |
|
464 |
+ A=A(ABset), |
|
465 |
+ B=B(ABset), |
|
466 |
+ calls=calls(snpset), |
|
467 |
+ conf=confs(snpset), |
|
468 |
+ NP=A(NPset), |
|
469 |
+ plate=batch, |
|
470 |
+ envir=envir, |
|
471 |
+ SNR=ABset$SNR, |
|
472 |
+ bias.adj=TRUE, |
|
473 |
+ SNRmin=SNRmin, |
|
474 |
+ ...) |
|
475 |
+ message("Organizing results...") |
|
476 |
+ locusSet <- list2locusSet(envir, ABset=ABset, NPset=NPset, CHR=CHR, cdfName=cdfName) |
|
477 |
+ ##.GlobalEnv[["locusSetAdj"]] <- locusSetAdj |
|
478 |
+ } |
|
479 |
+ return(locusSet) |
|
480 |
+} |
|
481 |
+ |
|
482 |
+##getCopyNumberEnvironment <- function(crlmmSetList, cnSet){ |
|
483 |
+## envir <- new.env() |
|
484 |
+## envir[["A"]] <- A(crlmmSetList)[snpIndex(crlmmSetList), ] |
|
485 |
+## envir[["B"]] <- A(crlmmSetList)[snpIndex(crlmmSetList), ] |
|
486 |
+## envir[["CA"]] <- CA(cnSet)[snpIndex(cnSet), ]/100 |
|
487 |
+## envir[["CB"]] <- CB(cnSet)[snpIndex(cnSet), ]/100 |
|
488 |
+## envir[["NP"]] <- A(crlmmSetList)[cnIndex(crlmmSetList), ] |
|
489 |
+## envir[["calls"]] <- calls(crlmmSetList)[snpIndex(crlmmSetList), ] |
|
490 |
+## envir[["chrom"]] <- unique(chromosome(cnSet)) |
|
491 |
+## envir[["cnvs"]] <- cnNames(crlmmSetList) |
|
492 |
+## envir[["conf"]] <- conf(crlmmSetList) |
|
493 |
+## envir[["corr"]] <- fData(cnSet)$corr |
|
494 |
+## envir[["corrA.BB"]] <- fData(cnSet)$corrA.BB |
|
495 |
+## envir[["corrB.AA"]] <- fData(cnSet)$corrB.AA |
|
496 |
+## envir[["CT"]] <- CA(cnSet)[cnIndex(cnSet), ]/100 |
|
497 |
+## ##envir[["CT.sds"]] <- |
|
498 |
+## ##envir[["GT.A"]] |
|
499 |
+## ##envir[["GT.B"]] |
|
500 |
+## ##envir[["index"]] |
|
501 |
+## ##envir[["muA"]] |
|
502 |
+## ##envir[["muB"]] |
|
503 |
+## ##envir[["normal"]] |
|
504 |
+## ##envir[["normalNP"]] |
|
505 |
+## ##envir[["npflags"]] |
|
506 |
+## ##envir[["Ns"]] |
|
507 |
+## nuA <- fData(cnSet)[, grep("nuA", fvarLabels(cnSet))] |
|
508 |
+## nuB <- fData(cnSet)[, grep("nuB", fvarLabels(cnSet))] |
|
509 |
+## phiA <- fData(cnSet)[, grep("phiA", fvarLabels(cnSet))] |
|
510 |
+## phiB <- fData(cnSet)[, grep("phiB", fvarLabels(cnSet))] |
|
511 |
+## envir[["nuA"]] <- nuA[snpIndex(cnSet), ] |
|
512 |
+## envir[["nuB"]] <- nuB[snpIndex(cnSet), ] |
|
513 |
+## envir[["phiA"]] <- phiA[snpIndex(cnSet), ] |
|
514 |
+## envir[["phiB"]] <- phiB[snpIndex(cnSet), ] |
|
515 |
+## envir[["nuT"]] <- nuA[cnIndex(cnSet), ] |
|
516 |
+## envir[["phiT"]] <- phiA[cnIndex(cnSet), ] |
|
517 |
+## envir[["plate"]] <- cnSet$batch |
|
518 |
+## |
|
519 |
+##} |
|
520 |
+ |
|
521 |
+updateNuPhi <- function(crlmmSetList, cnSet){ |
|
522 |
+ ##TODO: remove the use of environments. |
|
523 |
+ ##repopulate the environment |
|
524 |
+ crlmmSetList <- crlmmSetList[, match(sampleNames(cnSet), sampleNames(crlmmSetList))] |
|
525 |
+ crlmmSetList <- crlmmSetList[match(featureNames(cnSet), featureNames(crlmmSetList)), ] |
|
526 |
+ ##envir <- getCopyNumberEnvironment(crlmmSetList, cnSet) |
|
527 |
+ Nset <- crlmmSetList[[1]] |
|
528 |
+ ##indices of polymorphic loci |
|
529 |
+ index <- snpIndex(Nset) |
|
530 |
+ ABset <- Nset[index, ] |
|
531 |
+ ##indices of nonpolymorphic loci |
|
532 |
+ NPset <- Nset[-index, ] |
|
533 |
+ ##genotypes/confidences |
|
534 |
+ snpset <- crlmmSetList[[2]][index,] |
|
535 |
+ ##previous version of compute copy number |
|
536 |
+ envir <- new.env() |
|
537 |
+ message("Running bias adjustment...") |
|
538 |
+## .computeCopynumber(chrom=CHR, |
|
539 |
+## A=A(ABset), |
|
540 |
+## B=B(ABset), |
|
541 |
+## calls=calls(snpset), |
|
542 |
+## conf=confs(snpset), |
|
543 |
+## NP=A(NPset), |
|
544 |
+## plate=batch, |
|
545 |
+## envir=envir, |
|
546 |
+## SNR=ABset$SNR, |
|
547 |
+## bias.adj=TRUE, |
|
548 |
+## SNRmin=SNRmin, |
|
549 |
+## ...) |
|
550 |
+} |
|
551 |
+ |
|
552 |
+list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){ |
|
553 |
+ if(missing(CHR)) stop("Must specify chromosome") |
|
554 |
+ pkgname <- paste(cdfName, "Crlmm", sep="") |
|
555 |
+ path <- system.file("extdata", package=pkgname) |
|
556 |
+ loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv) |
|
557 |
+ cnProbes <- get("cnProbes", envir=.crlmmPkgEnv) |
|
558 |
+ loader("snpProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv) |
|
559 |
+ snpProbes <- get("snpProbes", envir=.crlmmPkgEnv) |
|
560 |
+ ##require(oligoClasses) || stop("oligoClasses package not available") |
|
561 |
+ if(length(ls(envir)) == 0) stop("environment empty") |
|
562 |
+ batch <- envir[["plate"]] |
|
563 |
+ |
|
564 |
+ ##SNP copy number |
|
565 |
+ CA <- envir[["CA"]] |
|
566 |
+ dimnames(CA) <- list(envir[["snps"]], envir[["sns"]]) |
|
567 |
+ CB <- envir[["CB"]] |
|
568 |
+ dimnames(CB) <- dimnames(CA) |
|
569 |
+ |
|
570 |
+ ##NP copy number |
|
571 |
+ CT <- envir[["CT"]] |
|
572 |
+ rownames(CT) <- envir[["cnvs"]] |
|
573 |
+ colnames(CT) <- envir[["sns"]] |
|
574 |
+ sig2T <- envir[["sig2T"]] |
|
575 |
+ rownames(sig2T) <- rownames(CT) |
|
576 |
+ nuT <- envir[["nuT"]] |
|
577 |
+ colnames(nuT) <- paste("nuT", unique(batch), sep="_") |
|
578 |
+ ##nuA <- rbind(nuA, nuT) |
|
579 |
+ ##nuB <- rbind(nuA, blank) |
|
580 |
+ phiT <- envir[["phiT"]] |
|
581 |
+ colnames(phiT) <- paste("phiT", unique(batch), sep="_") |
|
582 |
+ naMatrix <- matrix(NA, nrow(CT), ncol(CT)) |
|
583 |
+ dimnames(naMatrix) <- dimnames(CT) |
|
584 |
+ |
|
585 |
+ CA <- rbind(CA, CT) |
|
586 |
+ CB <- rbind(CB, naMatrix) |
|
587 |
+ |
|
588 |
+ ##SNP parameters |
|
589 |
+ tau2A <- envir[["tau2A"]] |
|
590 |
+ colnames(tau2A) <- paste("tau2A", unique(batch), sep="_") |
|
591 |
+ tau2B <- envir[["tau2B"]] |
|
592 |
+ colnames(tau2B) <- paste("tau2B", unique(batch), sep="_") |
|
593 |
+ sig2A <- envir[["sig2A"]] |
|
594 |
+ colnames(sig2A) <- paste("sig2A", unique(batch), sep="_") |
|
595 |
+ sig2B <- envir[["sig2B"]] |
|
596 |
+ colnames(sig2B) <- paste("sig2B", unique(batch), sep="_") |
|
597 |
+ nuA <- envir[["nuA"]] |
|
598 |
+ colnames(nuA) <- paste("nuA", unique(batch), sep="_") |
|
599 |
+ nuB <- envir[["nuB"]] |
|
600 |
+ colnames(nuB) <- paste("nuB", unique(batch), sep="_") |
|
601 |
+ phiA <- envir[["phiA"]] |
|
602 |
+ colnames(phiA) <- paste("phiA", unique(batch), sep="_") |
|
603 |
+ phiB <- envir[["phiB"]] |
|
604 |
+ colnames(phiB) <- paste("phiB", unique(batch), sep="_") |
|
605 |
+ corr <- envir[["corr"]] |
|
606 |
+ colnames(corr) <- paste("corr", unique(batch), sep="_") |
|
607 |
+ corrA.BB <- envir[["corrA.BB"]] |
|
608 |
+ colnames(corrA.BB) <- paste("corrA.BB", unique(batch), sep="_") |
|
609 |
+ corrB.AA <- envir[["corrB.AA"]] |
|
610 |
+ colnames(corrB.AA) <- paste("corrB.AA", unique(batch), sep="_") |
|
611 |
+ |
|
612 |
+ |
|
613 |
+ ##Combine SNP and NP parameters |
|
614 |
+ naMatrixParams <- matrix(NA, nrow(CT), length(unique(batch))) |
|
615 |
+ dimnames(naMatrixParams) <- list(rownames(CT), unique(batch)) |
|
616 |
+ tau2A <- rbind(tau2A, naMatrixParams) |
|
617 |
+ tau2B <- rbind(tau2B, naMatrixParams) |
|
618 |
+ sig2A <- rbind(sig2A, sig2T) |
|
619 |
+ sig2B <- rbind(sig2B, naMatrixParams) |
|
620 |
+ corr <- rbind(corr, naMatrixParams) |
|
621 |
+ corrA.BB <- rbind(corrA.BB, naMatrixParams) |
|
622 |
+ corrB.AA <- rbind(corrB.AA, naMatrixParams) |
|
623 |
+ nuA <- rbind(nuA, nuT) |
|
624 |
+ phiA <- rbind(phiA, phiT) |
|
625 |
+ nuB <- rbind(nuB, naMatrixParams) |
|
626 |
+ phiB <- rbind(phiB, naMatrixParams) |
|
627 |
+ rownames(tau2A) <- rownames(tau2B) <- rownames(sig2A) <- rownames(sig2B) <- rownames(CA) |
|
628 |
+ rownames(corr) <- rownames(corrA.BB) <- rownames(corrB.AA) <- rownames(CA) |
|
629 |
+ rownames(nuA) <- rownames(phiA) <- rownames(nuB) <- rownames(phiB) <- rownames(CA) |
|
630 |
+ |
|
631 |
+ ##phenodata |
|
632 |
+ phenodata <- phenoData(ABset) |
|
633 |
+ phenodata <- phenodata[match(envir[["sns"]], sampleNames(phenodata)), ] |
|
634 |
+ if(!("batch" %in% varLabels(phenodata))) phenodata$batch <- envir[["plate"]] |
|
635 |
+ |
|
636 |
+ ##Feature Data |
|
637 |
+ position.snp <- snpProbes[match(envir[["snps"]], rownames(snpProbes)), "position"] |
|
638 |
+ position.np <- cnProbes[match(envir[["cnvs"]], rownames(cnProbes)), "position"] |
|
639 |
+ position <- c(position.snp, position.np) |
|
640 |
+ if(!(identical(names(position), rownames(CA)))){ |
|
641 |
+ position <- position[match(rownames(CA), names(position))] |
|
642 |
+ } |
|
643 |
+ fd <- data.frame(cbind(CHR, |
|
644 |
+ position, |
|
645 |
+ tau2A, |
|
646 |
+ tau2B, |
|
647 |
+ sig2A, |
|
648 |
+ sig2B, |
|
649 |
+ nuA, |
|
650 |
+ nuB, |
|
651 |
+ phiA, |
|
652 |
+ phiB, |
|
653 |
+ corr, |
|
654 |
+ corrA.BB, |
|
655 |
+ corrB.AA)) |
|
656 |
+ colnames(fd)[1:2] <- c("chromosome", "position") |
|
657 |
+ rownames(fd) <- rownames(CA) |
|
658 |
+ fD <- new("AnnotatedDataFrame", |
|
659 |
+ data=fd, |
|
660 |
+ varMetadata=data.frame(labelDescription=colnames(fd))) |
|
661 |
+ assayData <- assayDataNew("lockedEnvironment", |
|
662 |
+ CA=CA, |
|
663 |
+ CB=CB) |
|
664 |
+ cnset <- new("CopyNumberSet", |
|
665 |
+ assayData=assayData, |
|
666 |
+ featureData=fD, |
|
667 |
+ phenoData=phenodata, |
|
668 |
+ annotation="genomewidesnp6") |
|
669 |
+ cnset <- thresholdCopyNumberSet(cnset) |
|
670 |
+ return(cnset) |
|
671 |
+} |
|
672 |
+ |
|
673 |
+thresholdCopyNumberSet <- function(object){ |
|
674 |
+ ca <- CA(object) |
|
675 |
+ cb <- CB(object) |
|
676 |
+ ca[ca < 5] <- 5 |
|
677 |
+ ca[ca > 500] <- 500 |
|
678 |
+ cb[cb < 5] <- 5 |
|
679 |
+ cb[cb > 500] <- 500 |
|
680 |
+ CA(object) <- ca |
|
681 |
+ CB(object) <- cb |
|
682 |
+ return(object) |
|
683 |
+} |
|
297 | 684 |
|
298 | 685 |
|
299 |
-computeCopynumber <- function(chrom, |
|
686 |
+.computeCopynumber <- function(chrom, |
|
300 | 687 |
A, |
301 | 688 |
B, |
302 | 689 |
calls, |
303 | 690 |
conf, |
304 | 691 |
NP, |
305 | 692 |
plate, |
306 |
- MIN.OBS=1, |
|
693 |
+ MIN.OBS=5, |
|
307 | 694 |
envir, |
308 | 695 |
P, |
309 | 696 |
DF.PRIOR=50, |
... | ... |
@@ -321,11 +708,18 @@ computeCopynumber <- function(chrom, |
321 | 708 |
stop("plate must the same length as the number of columns of A") |
322 | 709 |
} |
323 | 710 |
set.seed(seed) |
324 |
- if(missing(chrom)) stop("must specify chromosome") |
|
711 |
+ ##if(missing(chrom)) stop("must specify chromosome") |
|
325 | 712 |
if(length(ls(envir)) == 0) { |
326 |
- instantiateObjects(calls=calls, conf=conf, NP=NP, plate=plate, |
|
327 |
- envir=envir, chrom=chrom, A=A, B=B, |
|
328 |
- gender=gender, SNR=SNR, SNRmin=SNRmin, |
|
713 |
+ instantiateObjects(calls=calls, |
|
714 |
+ conf=conf, |
|
715 |
+ NP=NP, |
|
716 |
+ plate=plate, |
|
717 |
+ envir=envir, |
|
718 |
+ chrom=chrom, |
|
719 |
+ A=A, B=B, |
|
720 |
+ gender=gender, |
|
721 |
+ SNR=SNR, |
|
722 |
+ SNRmin=SNRmin, |
|
329 | 723 |
pkgname=cdfName) |
330 | 724 |
} |
331 | 725 |
plate <- envir[["plate"]] |
... | ... |
@@ -510,7 +904,8 @@ nonpolymorphic <- function(plateIndex, NP, envir, CONF.THR=0.99, DF.PRIOR=50, pk |
510 | 904 |
CT[, plate==uplate[p]] <- matrix(as.integer(100*T), nrow(T), ncol(T)) |
511 | 905 |
|
512 | 906 |
##Variance for prediction region |
513 |
- sig2T[, plate==uplate[[p]]] <- rowMAD(log2(NP*normalNP), na.rm=TRUE)^2 |
|
907 |
+ ##sig2T[, plate==uplate[[p]]] <- rowMAD(log2(NP*normalNP), na.rm=TRUE)^2 |
|
908 |
+ sig2T[, p] <- rowMAD(log2(NP*normalNP), na.rm=TRUE)^2 |
|
514 | 909 |
envir[["sig2T"]] <- sig2T |
515 | 910 |
envir[["CT"]] <- CT |
516 | 911 |
envir[["phiT"]] <- phiT |
... | ... |
@@ -653,7 +1048,7 @@ oneBatch <- function(plateIndex, |
653 | 1048 |
Ns <- envir[["Ns"]] |
654 | 1049 |
|
655 | 1050 |
##--------------------------------------------------------------------------- |
656 |
- ## Predict sufficient statistics for unobserved genotypes (plate-specific) |
|
1051 |
+ ## Impute sufficient statistics for unobserved genotypes (plate-specific) |
|
657 | 1052 |
##--------------------------------------------------------------------------- |
658 | 1053 |
index.AA <- which(Ns[, p, "AA"] >= 3) |
659 | 1054 |
index.AB <- which(Ns[, p, "AB"] >= 3) |
... | ... |
@@ -1042,11 +1437,11 @@ biasAdj <- function(plateIndex, envir, priorProb, PROP=0.75){ |
1042 | 1437 |
proportionSamplesAltered <- rowMeans(mostLikelyState != 3) |
1043 | 1438 |
}else{ |
1044 | 1439 |
##should also consider pseud |
1045 |
- browser() |
|
1440 |
+## browser() |
|
1046 | 1441 |
gender <- envir[["gender"]] |
1047 |
- tmp3 <- tmp2 |
|
1048 |
- tmp3[, gender=="male"] <- tmp2[, gender=="male"] != 2 |
|
1049 |
- tmp3[, gender=="female"] <- tmp2[, gender=="female"] != 3 |
|
1442 |
+## tmp3 <- tmp2 |
|
1443 |
+## tmp3[, gender=="male"] <- tmp2[, gender=="male"] != 2 |
|
1444 |
+## tmp3[, gender=="female"] <- tmp2[, gender=="female"] != 3 |
|
1050 | 1445 |
} |
1051 | 1446 |
##Those near 1 have NaNs for nu and phi. this occurs by NaNs in the muA[,, "A"] or muA[, , "B"] for X chromosome |
1052 | 1447 |
##ii <- proportionSamplesAltered < PROP |
... | ... |
@@ -1301,7 +1696,6 @@ biasAdjNP <- function(plateIndex, envir, priorProb){ |
1301 | 1696 |
sig2T <- envir[["sig2T"]] |
1302 | 1697 |
normalNP <- normalNP[, plate==uplate[p]] |
1303 | 1698 |
NP <- NP[, plate==uplate[p]] |
1304 |
- ##sig2T <- sig2T[, plate==uplate[p]] |
|
1305 | 1699 |
sig2T <- sig2T[, p] |
1306 | 1700 |
|
1307 | 1701 |
|
... | ... |
@@ -1358,3 +1752,159 @@ biasAdjNP <- function(plateIndex, envir, priorProb){ |
1358 | 1752 |
envir[["normalNP"]] <- tmp |
1359 | 1753 |
} |
1360 | 1754 |
|
1755 |
+ |
|
1756 |
+getParams <- function(object, batch){ |
|
1757 |
+ batch <- unique(object$batch) |
|
1758 |
+ if(length(batch) > 1) stop("batch variable not unique") |
|
1759 |
+ nuA <- as.numeric(fData(object)[, match(paste("nuA", batch, sep="_"), fvarLabels(object))]) |
|
1760 |
+ nuB <- as.numeric(fData(object)[, match(paste("nuB", batch, sep="_"), fvarLabels(object))]) |
|
1761 |
+ phiA <- as.numeric(fData(object)[, match(paste("phiA", batch, sep="_"), fvarLabels(object))]) |
|
1762 |
+ phiB <- as.numeric(fData(object)[, match(paste("phiB", batch, sep="_"), fvarLabels(object))]) |
|
1763 |
+ tau2A <- as.numeric(fData(object)[, match(paste("tau2A", batch, sep="_"), fvarLabels(object))]) |
|
1764 |
+ tau2B <- as.numeric(fData(object)[, match(paste("tau2B", batch, sep="_"), fvarLabels(object))]) |
|
1765 |
+ sig2A <- as.numeric(fData(object)[, match(paste("sig2A", batch, sep="_"), fvarLabels(object))]) |
|
1766 |
+ sig2B <- as.numeric(fData(object)[, match(paste("sig2B", batch, sep="_"), fvarLabels(object))]) |
|
1767 |
+ corrA.BB <- as.numeric(fData(object)[, match(paste("corrA.BB", batch, sep="_"), fvarLabels(object))]) |
|
1768 |
+ corrB.AA <- as.numeric(fData(object)[, match(paste("corrB.AA", batch, sep="_"), fvarLabels(object))]) |
|
1769 |
+ corr <- as.numeric(fData(object)[, match(paste("corr", batch, sep="_"), fvarLabels(object))]) |
|
1770 |
+ params <- list(nuA=nuA, |
|
1771 |
+ nuB=nuB, |
|
1772 |
+ phiA=phiA, |
|
1773 |
+ phiB=phiB, |
|
1774 |
+ tau2A=tau2A, |
|
1775 |
+ tau2B=tau2B, |
|
1776 |
+ sig2A=sig2A, |
|
1777 |
+ sig2B=sig2B, |
|
1778 |
+ corrA.BB=corrA.BB, |
|
1779 |
+ corrB.AA=corrB.AA, |
|
1780 |
+ corr=corr) |
|
1781 |
+ return(params) |
|
1782 |
+} |
|
1783 |
+ |
|
1784 |
+computeEmission <- function(crlmmResults, cnset, copyNumberStates=0:5, MIN=2^3){ |
|
1785 |
+ ##threshold small nu's and phis |
|
1786 |
+ cnset <- thresholdModelParams(cnset, MIN=MIN) |
|
1787 |
+ index <- order(chromosome(cnset), position(cnset)) |
|
1788 |
+ if(any(diff(index) > 1)) stop("must be ordered by chromosome and physical position") |
|
1789 |
+ emissionProbs <- array(NA, dim=c(nrow(cnset), ncol(cnset), length(copyNumberStates))) |
|
1790 |
+ dimnames(emissionProbs) <- list(featureNames(crlmmResults), |
|
1791 |
+ sampleNames(crlmmResults), |
|
1792 |
+ paste("copy.number_", copyNumberStates, sep="")) |
|
1793 |
+ batch <- cnset$batch |
|
1794 |
+ for(i in seq(along=unique(batch))){ |
|
1795 |
+ if(i == 1) cat("Computing emission probabilities \n") |
|
1796 |
+ message("batch ", unique(batch)[i], "...") |
|
1797 |
+ emissionProbs[, batch == unique(batch)[i], ] <- .getEmission(crlmmResults, cnset, batch=unique(batch)[i], |
|
1798 |
+ copyNumberStates=copyNumberStates) |
|
1799 |
+ } |
|
1800 |
+ emissionProbs |
|
1801 |
+} |
|
1802 |
+ |
|
1803 |
+thresholdModelParams <- function(object, MIN=2^3){ |
|
1804 |
+ nuA <- fData(object)[, grep("nuA", fvarLabels(object))] |
|
1805 |
+ nuB <- fData(object)[, grep("nuB", fvarLabels(object))] |
|
1806 |
+ phiA <- fData(object)[, grep("phiA", fvarLabels(object))] |
|
1807 |
+ phiB <- fData(object)[, grep("phiB", fvarLabels(object))] |
|
1808 |
+ |
|
1809 |
+ nuA[nuA < MIN] <- MIN |
|
1810 |
+ nuB[nuB < MIN] <- MIN |
|
1811 |
+ phiA[phiA < MIN] <- MIN |
|
1812 |
+ phiB[phiB < MIN] <- MIN |
|
1813 |
+ fData(object)[, grep("nuA", fvarLabels(object))] <- nuA |
|
1814 |
+ fData(object)[, grep("nuB", fvarLabels(object))] <- nuB |
|
1815 |
+ fData(object)[, grep("phiA", fvarLabels(object))] <- phiA |
|
1816 |
+ fData(object)[, grep("phiB", fvarLabels(object))] <- phiB |
|
1817 |
+ object |
|
1818 |
+} |
|
1819 |
+ |
|
1820 |
+.getEmission <- function(crlmmResults, cnset, batch, copyNumberStates){ |
|
1821 |
+ if(length(batch) > 1) stop("batch variable not unique") |
|
1822 |
+ crlmmResults <- crlmmResults[, cnset$batch==batch] |
|
1823 |
+ cnset <- cnset[, cnset$batch == batch] |
|
1824 |
+ emissionProbs <- array(NA, dim=c(nrow(crlmmResults[[1]]), ncol(crlmmResults[[1]]), length(copyNumberStates))) |
|
1825 |
+ |
|
1826 |
+ snpset <- cnset[snpIndex(cnset), ] |
|
1827 |
+ params <- getParams(snpset, batch=batch) |
|
1828 |
+ ##attach(params) |
|
1829 |
+ corr <- params[["corr"]] |
|
1830 |
+ corrA.BB <- params[["corrA.BB"]] |
|
1831 |
+ corrB.AA <- params[["corrB.AA"]] |
|
1832 |
+ nuA <- params[["nuA"]] |
|
1833 |
+ nuB <- params[["nuB"]] |
|
1834 |
+ phiA <- params[["phiA"]] |
|
1835 |
+ phiB <- params[["phiB"]] |
|
1836 |
+ sig2A <- params[["sig2A"]] |
|
1837 |
+ sig2B <- params[["sig2B"]] |
|
1838 |
+ tau2A <- params[["tau2A"]] |
|
1839 |
+ tau2B <- params[["tau2B"]] |
|
1840 |
+ a <- as.numeric(log2(A(crlmmResults[snpIndex(crlmmResults), ]))) |
|
1841 |
+ b <- as.numeric(log2(B(crlmmResults[snpIndex(crlmmResults), ]))) |
|
1842 |
+ for(k in seq(along=copyNumberStates)){ |
|
1843 |
+ ##cat(k, " ") |
|
1844 |
+ CN <- copyNumberStates[k] |
|
1845 |
+ f.x.y <- matrix(0, nrow(snpset), ncol(snpset)) |
|
1846 |
+ for(CA in 0:CN){ |
|
1847 |
+ CB <- CN-CA |
|
1848 |
+ sigmaA <- sqrt(tau2A*(CA==0) + sig2A*(CA > 0)) |
|
1849 |
+ sigmaB <- sqrt(tau2B*(CB==0) + sig2B*(CB > 0)) |
|
1850 |
+ if(CA == 0 & CB > 0) r <- corrA.BB |
|
1851 |
+ if(CA > 0 & CB == 0) r <- corrB.AA |
|
1852 |
+ if(CA > 0 & CB > 0) r <- corr |
|
1853 |
+ if(CA == 0 & CB == 0) r <- 0 |
|
1854 |
+ muA <- log2(nuA+CA*phiA) |
|
1855 |
+ muB <- log2(nuB+CB*phiB) |
|
1856 |
+ |
|
1857 |
+ ##might want to allow the variance to be sample-specific |
|
1858 |
+ ##TODO: |
|
1859 |
+ ## rho, sd.A, and sd.B are locus-specific |
|
1860 |
+ ## Some samples are more noisy than others. |
|
1861 |
+ ## |
|
1862 |
+ ## - scale the variances by a sample-specific estimate of the variances |
|
1863 |
+ ## var(I_A, ijp) = sigma_A_ip * sigma_A_jp |
|
1864 |
+ |
|
1865 |
+ meanA <- as.numeric(matrix(muA, nrow(snpset), ncol(snpset))) |
|
1866 |
+ meanB <- as.numeric(matrix(muB, nrow(snpset), ncol(snpset))) |
|
1867 |
+ rho <- as.numeric(matrix(r, nrow(snpset), ncol(snpset))) |
|
1868 |
+ sd.A <- as.numeric(matrix(sigmaA, nrow(snpset), ncol(snpset))) |
|
1869 |
+ sd.B <- as.numeric(matrix(sigmaB, nrow(snpset), ncol(snpset))) |
|
1870 |
+ Q.x.y <- 1/(1-rho^2)*(((a - meanA)/sd.A)^2 + ((b - meanB)/sd.B)^2 - 2*rho*((a - meanA)*(b - meanB))/(sd.A*sd.B)) |
|
1871 |
+ ##for CN states > 1, assume that any of the possible genotypes are equally likely a priori...just take the sum |
|
1872 |
+ ##for instance, for state copy number 2 there are three combinations: AA, AB, BB |
|
1873 |
+ ## -- two of the three combinations should be near zero. |
|
1874 |
+ ## TODO: copy-neutral LOH would put near-zero mass on CA > 0, CB > 0 |
|
1875 |
+ f.x.y <- f.x.y + matrix(1/(2*pi*sd.A*sd.B*sqrt(1-rho^2))*exp(-0.5*Q.x.y), nrow(snpset), ncol(snpset)) |
|
1876 |
+ } |
|
1877 |
+ emissionProbs[snpIndex(crlmmResults), , k] <- log(f.x.y) |
|
1878 |
+ } |
|
1879 |
+ |
|
1880 |
+ |
|
1881 |
+ cnset <- cnset[cnIndex(cnset), ] |
|
1882 |
+ params <- getParams(cnset, batch=batch) |
|
1883 |
+ nuA <- params[["nuA"]] |
|
1884 |
+ phiA <- params[["phiA"]] |
|
1885 |
+ sig2A <- params[["sig2A"]] |
|
1886 |
+ a <- as.numeric(log2(A(crlmmResults[cnIndex(crlmmResults), ]))) |
|
1887 |
+ |
|
1888 |
+## ids=featureNames(crlmmResults)[index] |
|
1889 |
+## aa=A(crlmmResults[cnIndex(crlmmResults), ]) |
|
1890 |
+## ii <- match(ids, rownames(aa)) |
|
1891 |
+## ii <- ii[!is.na(ii)] |
|
1892 |
+## ##putative deletion |
|
1893 |
+## log2(aa[ii, 1]) |
|
1894 |
+## M <- matrix(NA, length(ii), length(copyNumberStates)) |
|
1895 |
+## copynumber=1/phiA[ii]*(aa[ii, 1] - nuA[ii]) |
|
1896 |
+ for(k in seq(along=copyNumberStates)){ |
|
1897 |
+ CT <- copyNumberStates[k] |
|
1898 |
+ mus.matrix=matrix(log2(nuA + CT*phiA), nrow(cnset), ncol(cnset)) |
|
1899 |
+ mus <- as.numeric(matrix(log2(nuA + CT*phiA), nrow(cnset), ncol(cnset))) |
|
1900 |
+ ##Again, should make sds sample-specific |
|
1901 |
+ sds.matrix <- matrix(sqrt(sig2A), nrow(cnset), ncol(cnset)) |
|
1902 |
+ sds <- as.numeric(matrix(sqrt(sig2A), nrow(cnset), ncol(cnset))) |
|
1903 |
+ |
|
1904 |
+ tmp <- matrix(dnorm(a, mean=mus, sd=sds), nrow(cnset), ncol(cnset)) |
|
1905 |
+ emissionProbs[cnIndex(crlmmResults), , k] <- log(tmp) |
|
1906 |
+ ##first samples emission probs |
|
1907 |
+## M[, k] <- log(tmp)[ii, 1] |
|
1908 |
+ } |
|
1909 |
+ emissionProbs |
|
1910 |
+} |
... | ... |
@@ -1,7 +1,5 @@ |
1 | 1 |
## Methods for crlmm |
2 | 2 |
|
3 |
-setGeneric("calls", function(x) standardGeneric("calls")) |
|
4 |
-setMethod("calls", "SnpSet", function(x) assayData(x)$call) |
|
5 | 3 |
|
6 |
-setGeneric("confs", function(x) standardGeneric("confs")) |
|
4 |
+setMethod("calls", "SnpSet", function(object) assayData(object)$call) |
|
7 | 5 |
setMethod("confs", "SnpSet", function(x) assayData(x)$callProbability) |
... | ... |
@@ -23,9 +23,12 @@ B Carvalho |
23 | 23 |
### FOR CNRMA |
24 | 24 |
##################################### |
25 | 25 |
- Bias adjustment for X chromosome |
26 |
+ |
|
26 | 27 |
- adjust nu for altered copy number (crosshyb) |
27 |
-- a class for the assay data elements and metadata -- oligoSnpSet? |
|
28 |
- - should we consider a name change -- e.g., LocusSet |
|
29 |
-- a class for the intermediate files and methods for SNP-specific plots |
|
30 |
-- uncertainty estimates for copy number. Put in cnConfidence slot |
|
28 |
+ |
|
29 |
+- should store parameters in something besides the featureData slot |
|
30 |
+ |
|
31 |
+ - Perhaps add a slot for parameters and a slot for batch. |
|
32 |
+ |
|
33 |
+ |
|
31 | 34 |
|
... | ... |
@@ -36,6 +36,15 @@ HapMap samples. |
36 | 36 |
We preprocess and genotype the samples as described in the CRLMM |
37 | 37 |
vignette. |
38 | 38 |
|
39 |
+<<test, eval=FALSE, echo=FALSE>>= |
|
40 |
+library(crlmm) |
|
41 |
+load("~/madman/Rpacks/crlmm/inst/scripts/example.cnset.rda") |
|
42 |
+ |
|
43 |
+chromosome(example.cnset)[1:5] |
|
44 |
+position(example.cnset)[1:5] |
|
45 |
+scanDates(example.cnset)[1:5] |
|
46 |
+@ |
|
47 |
+ |
|
39 | 48 |
<<requiredPackages>>= |
40 | 49 |
library(crlmm) |
41 | 50 |
library(genomewidesnp6Crlmm) |
... | ... |
@@ -49,31 +58,24 @@ celFiles <- list.celfiles("/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m", full |
49 | 58 |
outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/affy" |
50 | 59 |
@ |
51 | 60 |
|
52 |
-Preprocess and genotype (for more info see the crlmm vignette). |
|
61 |
+Preprocess and genotype (for more info see the crlmm vignette): |
|
53 | 62 |
|
54 |
-<<genotype>>= |
|
55 |
-if (!exists("crlmmResult")) { |
|
56 |
- if (file.exists(file.path(outdir, "crlmmResult.rda"))){ |
|
57 |
- load(file.path(outdir, "intensities.rda")) |
|
58 |
- load(file.path(outdir, "crlmmResult.rda")) |
|
59 |
- } |
|
60 |
- else { |
|
61 |
- crlmmResult <- crlmm(celFiles, save.it=TRUE, intensityFile=file.path(outdir, "intensities.rda")) |
|
62 |
- save(crlmmResult, file=file.path(outdir, "crlmmResult.rda")) |
|
63 |
- } |
|
63 |
+<<preprocessAndGenotype>>= |
|
64 |
+crlmmFilenames <- file.path(outdir, paste("crlmmResults_", 1:24, ".rda", sep="")) |
|
65 |
+if(!all(file.exists(crlmmFilenames))){ |
|
66 |
+ message("Preprocessing and crlmm genotyping. Results are written to", outdir, "...") |
|
67 |
+ intensityFile <- file.path(outdir, "intensities.rda") |
|
68 |
+ crlmmWrapper(celFiles, outdir, save.it=TRUE, intensityFile=intensityFile) |
|
64 | 69 |
} |
65 | 70 |
@ |
66 | 71 |
|
67 |
-Quantile normalize the nonpolymorphic probes and save the results. |
|
72 |
+Load results for chromosome 22. |
|
68 | 73 |
|
69 |
-<<cnrma>>= |
|
70 |
-if(!exists("cnrmaResult")){ |
|
71 |
- if(file.exists(file.path(outdir, "cnrmaResult.rda"))) load(file.path(outdir, "cnrmaResult.rda")) |
|
72 |
- else { |
|
73 |
- cnrmaResult <- cnrma(celFiles, cdfName="genomewidesnp6") |
|
74 |
- save(cnrmaResult, file=file.path(outdir, "cnrmaResult.rda")) |
|
75 |
- } |
|
76 |
-} |
|
74 |
+<<crlmmList-show>>= |
|
75 |
+CHR <- 22 |
|
76 |
+if(!exists("crlmmResults")) load(file.path(outdir, paste("crlmmResults_", CHR, ".rda", sep=""))) |
|
77 |
+class(crlmmResults) |
|
78 |
+show(crlmmResults) |
|
77 | 79 |
@ |
78 | 80 |
|
79 | 81 |
\subsection{Copy number} |
... | ... |
@@ -89,29 +91,14 @@ We require 6 items for copy number estimation: |
89 | 91 |
\item signal to noise ratio (SNR) of the samples (J) |
90 | 92 |
\end{itemize} |
91 | 93 |
|
92 |
- These items are extracted as follows: |
|
93 |
- |
|
94 |
-<<snpAndCnSummaries>>= |
|
95 |
-A <- res$A |
|
96 |
-B <- res$B |
|
97 |
-genotypes <- crlmm:::calls(crlmmResult)##$calls |
|
98 |
-conf <- confs(crlmmResult)##$conf |
|
99 |
-gender <- crlmmResult$gender |
|
100 |
-SNR <- crlmmResult$SNR |
|
101 |
-NP <- cnrmaResult$NP |
|
102 |
-gc() |
|
103 |
-@ |
|
104 |
- |
|
105 |
-A histogram of the signal to noise ratio for the HapMap samples: |
|
94 |
+ The above items are contained in the \Robject{crlmmResults} object. A |
|
95 |
+ histogram of the signal to noise ratio for the HapMap samples: |
|
106 | 96 |
|
107 | 97 |
<<plotSnr, fig=TRUE>>= |
108 |
-hist(SNR, xlab="SNR", main="") |
|
98 |
+hist(crlmmResults[[2]]$SNR, xlab="SNR", main="") |
|
109 | 99 |
@ |
110 | 100 |
|
111 |
- |
|
112 |
-<<dates>>= |
|
113 |
-dts <- celDates(celFiles) |
|
114 |
-table(format(dts, "%d %b %Y")) |
|
101 |
+<<setSNRmin, echo=FALSE>>= |
|
115 | 102 |
SNRmin <- 5 |
116 | 103 |
@ |
117 | 104 |
|
... | ... |
@@ -122,149 +109,61 @@ chemistry plate. Ideally, one would have 70+ files in a given |
122 | 109 |
batch. Here we make a table of date versus ancestry: |
123 | 110 |
|
124 | 111 |
<<specifyBatch>>= |
125 |
-require(Biobase) |
|
126 |
-sns <- sampleNames(crlmmResult) |
|
112 |
+sns <- sampleNames(crlmmResults) |
|
127 | 113 |
sns[1] |
128 |
-plate <- substr(basename(sns), 13, 13) |
|
129 |
-table(plate) |
|
130 |
-table(format(dts, "%d %b %Y"), plate) |
|
114 |
+batch <- substr(basename(sns), 13, 13) |
|
115 |
+table(batch) |
|
116 |
+table(format(as.POSIXlt(scanDates(crlmmResults)), "%d %b %Y"), batch) |
|
131 | 117 |
@ |
132 | 118 |
|
133 | 119 |
As all of these samples were run on the first week of March, we would |
134 | 120 |
expect that any systematic artifacts to the intensities that develop |
135 | 121 |
over time to be minimal (a best case scenario). As this is typically |
136 | 122 |
not the case, we illustrate how one may adjust for batch using the |
137 |
-chemistry plate as an argument to the computeCopynumber function. |
|
138 |
- |
|
139 |
-<<chromosome22>>= |
|
140 |
-if(!exists("env")){ |
|
141 |
- CHR <- 22 |
|
142 |
- if(!file.exists(file.path(outdir, paste("env_chr", CHR, ".rda", sep="")))){ |
|
143 |
- env <- new.env() |
|
144 |
- computeCopynumber(chrom=CHR, |
|
145 |
- A=A, |
|
146 |
- B=B, |
|
147 |
- calls=genotypes, |
|
148 |
- conf=conf, |
|
149 |
- NP=NP, |
|
150 |
- plate=plate, |
|
151 |
- envir=env, |
|
152 |
- DF.PRIOR=50, |
|
153 |
- MIN.OBS=1, |
|
154 |
- SNR=SNR, |
|
155 |
- SNRmin=SNRmin) |
|
156 |
- save(env, file=file.path(outdir, paste("env_chr", CHR, ".rda", sep=""))) |
|
157 |
- } else { |
|
158 |
- load(file.path(outdir, paste("env_chr", CHR, ".rda", sep=""))) |
|
123 |
+chemistry plate as an argument for \Robject{batch} in the |
|
124 |
+\Robject{computeCopynumber} function. |
|
125 |
+ |
|
126 |
+<<computeCopynumber>>= |
|
127 |
+if(!exists("cnset")){ |
|
128 |
+ if(file.exists(file.path(outdir, paste("cnset_", CHR, ".rda", sep="")))) load(file.path(outdir, paste("cnset_", CHR, ".rda", sep=""))) |
|
129 |
+ else { |
|
130 |
+ cnset <- computeCopynumber(crlmmResults, SNRmin=5, batch=batch, CHR=CHR, cdfName="genomewidesnp6") |
|
131 |
+ save(cnset, file=file.path(outdir, paste("cnset_", CHR, ".rda", sep=""))) |
|
159 | 132 |
} |
160 | 133 |
} |
134 |
+scanDates(cnset) <- scanDates(crlmmResults) |
|
135 |
+cnset <- cnset[order(chromosome(cnset), position(cnset)), ] |
|
136 |
+crlmmResults <- crlmm:::harmonizeDimnamesTo(crlmmResults, cnset) |
|
161 | 137 |
@ |
162 | 138 |
|
163 |
-A class representation would be useful for this sort of data (TODO). |
|
164 |
-The \Rpackage{oligoSnpSet} class used below is a bit inefficient as the |
|
165 |
-assay data elements are forced to be the same size. The advantage is |
|
166 |
-that all of the assay data is bound to the meta-data. |
|
167 |
- |
|
168 |
-<<oligoSnpSet>>= |
|
169 |
-require(oligoClasses) |
|
170 |
-path <- system.file("extdata", package="genomewidesnp6Crlmm") |
|
171 |
-load(file.path(path, "snpProbes.rda")) |
|
172 |
-load(file.path(path, "cnProbes.rda")) |
|
173 |
-position <- snpProbes[match(env[["snps"]], rownames(snpProbes)), "position"] |
|
174 |
-position.np <- cnProbes[match(rownames(env[["NP"]]), rownames(cnProbes)), "position"] |
|
175 |
-CA <- env[["CA"]] |
|
176 |
-CB <- env[["CB"]] |
|
177 |
-snpCT <- CA+CB |
|
178 |
-npCT <- env[["CT"]] |
|
179 |
-CT <- rbind(snpCT, npCT) |
|
180 |
-dimnames(CT) <- list(c(env[["snps"]], rownames(env[["NP"]])), env[["sns"]]) |
|
181 |
-genotypes <- matrix(NA, nrow(CT), ncol(CT)) |
|
182 |
-dimnames(genotypes) <- dimnames(CT) |
|
183 |
-genotypes[1:length(env[["snps"]]), ] <- env[["calls"]] |
|
184 |
-polymorphic <- c(rep(1, length(env[["snps"]])), rep(0, nrow(npCT))) |
|
185 |
-fD <- new("AnnotatedDataFrame", |
|
186 |
- data=data.frame(chromosome=rep(CHR, nrow(CT)), |
|
187 |
- polymorphic=polymorphic, |
|
188 |
- position=c(position, position.np)), |
|
189 |
- varMetadata=data.frame(labelDescription=c("chromosome", |
|
190 |
- "polymorphic", |
|
191 |
- "position"))) |
|
192 |
-locusSet <- new("oligoSnpSet", |
|
193 |
- copyNumber=CT, |
|
194 |
- calls=genotypes, |
|
195 |
- featureData=fD, |
|
196 |
- phenoData=annotatedDataFrameFrom(CT, byrow=FALSE), |
|
197 |
- annotation="genomewidesnp6") |
|
139 |
+<<example.cnset, eval=FALSE, echo=FALSE>>= |
|
140 |
+example.crlmmResults <- crlmmResults[1:1000, 1:100] |
|
141 |
+example.cnset <- cnset[1:1000, 1:100] |
|
142 |
+save(example.cnset, file="~/madman/Rpacks/crlmm/inst/scripts/example.cnset.rda") |
|
143 |
+save(example.crlmmResults, file="~/madman/Rpacks/crlmm/inst/scripts/example.crlmmResults.rda") |
|
198 | 144 |
@ |
199 | 145 |
|
200 |
-Note that an indicator for the polymorphic probes was created in the |
|
201 |
-code chunk above. I use this indicator to smooth the estimates of copy |
|
202 |
-number from the nonpolymorphic and polymorphic probes separately. The |
|
203 |
-uncertainty estimates (work in progress) should reflect that the |
|
204 |
-nonpolymorphic probes have more variance and the smoothing will take |
|
205 |
-this into account. The above algorithm for estimating copy number is |
|
206 |
-predicated on the assumption that most samples within a batch have copy |
|
207 |
-number 2 at any given SNP. For common copy number variants, this |
|
208 |
-assumption may not hold. An additional iteration using a bias |
|
209 |
-correction can improve the estimates. Set the \Robject{bias.adj} |
|
210 |
-argument to \Robject{TRUE}. |
|
211 |
- |
|
212 |
-%The total copy number for all chromosomes can be extracted from a |
|
213 |
-%\emph{for} loop. |
|
214 |
- |
|
215 |
-<<remainingChromosomes, eval=FALSE, echo=FALSE>>= |
|
216 |
-require(oligoClasses) |
|
217 |
-if(!exists("env")){ |
|
218 |
-## for(CHR in c(1:21, 23)){ |
|
219 |
- for(CHR in 22){ |
|
220 |
- cat(CHR, ", ") |
|
221 |
- if(!file.exists(file.path(outdir, paste("CT_chr", CHR, ".rda", sep="")))){ |
|
222 |
- env <- new.env() |
|
223 |
- computeCopynumber(chrom=CHR, |
|
224 |
- A=A, |
|
225 |
- B=B, |
|
226 |
- calls=genotypes, |
|
227 |
- conf=conf, |
|
228 |
- NP=NP, |
|
229 |
- plate=plate, |
|
230 |
- envir=env, |
|
231 |
- DF.PRIOR=50, |
|
232 |
- MIN.OBS=1, |
|
233 |
- SNR=SNR, |
|
234 |
- SNRmin=5) |
|
235 |
- CA <- env[["CA"]] |
|
236 |
- CB <- env[["CB"]] |
|
237 |
- snpCT <- CA+CB |
|
238 |
- npCT <- env[["CT"]] |
|
239 |
- position.snp <- snpProbes[match(env[["snps"]], rownames(snpProbes)), "position"] |
|
240 |
- position.np <- cnProbes[match(rownames(env[["NP"]]), rownames(cnProbes)), "position"] |
|
241 |
- CT <- rbind(snpCT, npCT) |
|
242 |
- dimnames(CT) <- list(c(env[["snps"]], rownames(env[["NP"]])), env[["sns"]]) |
|
243 |
- genotypes <- matrix(NA, nrow(CT), ncol(CT)) |
|
244 |
- dimnames(genotypes) <- dimnames(CT) |
|
245 |
- genotypes[1:length(env[["snps"]]), ] <- env[["calls"]] |
|
246 |
- polymorphic <- c(rep(1, length(env[["snps"]])), rep(0, nrow(npCT))) |
|
247 |
- fD <- new("AnnotatedDataFrame", |
|
248 |
- data=data.frame(chromosome=rep(CHR, nrow(CT)), |
|
249 |
- polymorphic=polymorphic, |
|
250 |
- position=c(position.snp, position.np)), |
|
251 |
- varMetadata=data.frame(labelDescription=c("chromosome", |
|
252 |
- "polymorphic", |
|
253 |
- "position"))) |
|
254 |
- pD <- annotatedDataFrameFrom(CT, byrow=FALSE) |
|
255 |
- locusSet <- new("oligoSnpSet", |
|
256 |
- copyNumber=CT, |
|
257 |
- calls=genotypes, |
|
258 |
- featureData=fD, |
|
259 |
- phenoData=pD, |
|
260 |
- annotation="genomewidesnp6") |
|
261 |
- save(locusSet, file=file.path(outdir, paste("locusSet_", CHR, ".rda", sep=""))) |
|
262 |
- } else { |
|
263 |
- load(file.path(outdir, paste("CT_chr", CHR, ".rda", sep=""))) |
|
264 |
- } |
|
265 |
- rm(env); gc() |
|
266 |
- } |
|
267 |
-} |
|
146 |
+The above algorithm for estimating copy number is predicated on the |
|
147 |
+assumption that most samples within a batch have copy number 2 at any |
|
148 |
+given locus. For common copy number variants, this assumption may not |
|
149 |
+hold. An additional iteration using a bias correction can improve the |
|
150 |
+estimates. Set the \Robject{bias.adj} argument to \Robject{TRUE}. |
|
151 |
+ |
|
152 |
+<<biasAdjustment, eval=FALSE, echo=FALSE>>= |
|
153 |
+library(oligoClasses);library(genefilter) |
|
154 |
+source("~/madman/Rpacks/crlmm/R/AllClasses.R") |
|
155 |
+source("~/madman/Rpacks/crlmm/R/AllGenerics.R") |
|
156 |
+source("~/madman/Rpacks/crlmm/R/cnrma-functions.R") |
|
157 |
+source("~/madman/Rpacks/crlmm/R/methods-eSet.R") |
|
158 |
+source("~/madman/Rpacks/crlmm/R/methods-ABset.R") |
|
159 |
+source("~/madman/Rpacks/crlmm/R/methods-CrlmmSetList.R") |
|
160 |
+source("~/madman/Rpacks/crlmm/R/methods-CopyNumberSet.R") |
|
161 |
+source("~/madman/Rpacks/crlmm/R/utils.R") |
|
162 |
+.crlmmPkgEnv <- new.env() |
|
163 |
+envir2 <- envir |
|
164 |
+envir <- getCopyNumberEnvironment(crlmmSetList, cnSet) |
|
165 |
+updateNuPhi(crlmmSetList, cnSet) |
|
166 |
+##TODO |
|
268 | 167 |
@ |
269 | 168 |
|
270 | 169 |
\section{Suggested plots} |
... | ... |
@@ -275,24 +174,21 @@ Plot physical position versus copy number for the first sample. Recall |
275 | 174 |
that the copy number estimates were multiplied by 100 and stored as an |
276 | 175 |
integer. |
277 | 176 |
|
278 |
-<<oneSample, fig=TRUE, width=8, height=4, include=FALSE>>= |
|
279 |
-require(ellipse) |
|
177 |
+<<oneSample, fig=TRUE, width=8, height=4, include=FALSE, eval=FALSE>>= |
|
280 | 178 |
par(las=1) |
281 |
-plot(position(locusSet), copyNumber(locusSet)[, 1]/100, pch=".", cex=2, xaxt="n", col="grey70", ylim=c(0,6), |
|
179 |
+plot(position(cnset), copyNumber(cnset)[, 1], pch=".", cex=2, xaxt="n", col="grey20", ylim=c(0,6), |
|
282 | 180 |
ylab="copy number", xlab="physical position (Mb)", |
283 |
- main=paste(sampleNames(locusSet)[1], ", CHR:", unique(chromosome(locusSet)))) |
|
284 |
-axis(1, at=pretty(range(position(locusSet))), labels=pretty(range(position(locusSet)))/1e6) |
|
285 |
-I <- fData(locusSet)$polymorphic==0 |
|
286 |
-points(position(locusSet)[I], copyNumber(locusSet)[I, 1]/100, pch=".", col="blue", cex=1) |
|
181 |
+ main=paste(sampleNames(cnset)[1], ", CHR:", unique(chromosome(cnset)))) |
|
182 |
+points(position(cnset)[cnIndex(cnset)], copyNumber(cnset)[cnIndex(cnset), 1], |
|
183 |
+ pch=".", cex=2, col="lightblue") |
|
184 |
+axis(1, at=pretty(range(position(cnset))), labels=pretty(range(position(cnset)))/1e6) |
|
287 | 185 |
@ |
288 |
-<<eval=FALSE>>= |
|
186 |
+ |
|
187 |
+<<idiogram, eval=FALSE, echo=FALSE>>= |
|
289 | 188 |
require(SNPchip) |
290 | 189 |
plotCytoband(22, new=FALSE, cytoband.ycoords=c(5.8, 6.0), label.cytoband=FALSE) |
291 | 190 |
@ |
292 | 191 |
|
293 |
-In this example, the estimates of copy number for the nonpolymorphic |
|
294 |
-probes appear correlated with the polymorphic probes -- a good sign. |
|
295 |
- |
|
296 | 192 |
\begin{figure} |
297 | 193 |
\includegraphics[width=0.9\textwidth]{copynumber-oneSample} |
298 | 194 |
\caption{Total copy number (y-axis) for chromosome 22 plotted |
... | ... |
@@ -301,57 +197,87 @@ probes appear correlated with the polymorphic probes -- a good sign. |
301 | 197 |
|
302 | 198 |
\paragraph{One SNP at a time} |
303 | 199 |
|
304 |
-This section needs to be cleaned up (TODO). The parameters needed for |
|
305 |
-drawing prediction regions are plate-specific. |
|
306 |
- |
|
307 |
-<<intermediateFiles>>= |
|
308 |
-tau2A <- env[["tau2A"]] |
|
309 |
-tau2B <- env[["tau2B"]] |
|
310 |
-sig2A <- env[["sig2A"]] |
|
311 |
-sig2B <- env[["sig2B"]] |
|
312 |
-nuA <- env[["nuA"]] |
|
313 |
-nuB <- env[["nuB"]] |
|
314 |
-phiA <- env[["phiA"]] |
|
315 |
-phiB <- env[["phiB"]] |
|
316 |
-corr <- env[["corr"]] |
|
317 |
-corrA.BB <- env[["corrA.BB"]] |
|
318 |
-corrB.AA <- env[["corrB.AA"]] |
|
319 |
-A <- env[["A"]] |
|
320 |
-B <- env[["B"]] |
|
321 |
-@ |
|
322 |
- |
|
323 | 200 |
Plot the prediction regions for total copy number 2 and 3 for the first |
324 | 201 |
plate. Plotting symbols are the genotype calls (1=AA, 2=AB, 3=BB); light |
325 | 202 |
grey points are from other plates. One could also add the prediction |
326 | 203 |
regions for 0-4 copies, but it gets crowded. |
327 | 204 |
|
328 |
-<<predictionRegion, fig=TRUE, width=8, height=8, include=FALSE>>= |
|
329 |
-par(las=1, pty="s") |
|
330 |
-p <- 1 ##indicates plate |
|
331 |
-J <- grep(unique(plate)[p], plate) ##sample indices for this plate |
|
332 |
-ylim <- c(6.5,13) |
|
333 |
-I <- which(phiA > 10 & phiB > 10) |
|
334 |
-i <- I[1] |
|
335 |
-##plate effects are small |
|
336 |
-log2(phiA[i, ]) |
|
337 |
-log2(phiB[i, ]) |
|
338 |
-plot(log2(A[i, ]), log2(B[i, ]), pch=as.character(genotypes[i, ]), col="grey60", cex=0.9, ylim=ylim, xlim=ylim, xlab="A", ylab="B") |
|
339 |
-points(log2(A[i, J]), log2(B[i, J]), col="black", pch=as.character(genotypes[i, J])) |
|
340 |
-for(CT in 2){ |
|
341 |
- if(CT == 2) ellipse.col <- "black" ##else ellipse.col <- "purple" |
|
342 |
- for(CA in 0:CT){ |
|
343 |
- CB <- CT-CA |
|
344 |
- A.scale <- sqrt(tau2A[i, p]*(CA==0) + sig2A[i, p]*(CA > 0)) |
|
345 |
- B.scale <- sqrt(tau2B[i, p]*(CB==0) + sig2B[i, p]*(CB > 0)) |
|
346 |
- scale <- c(A.scale, B.scale) |
|
347 |
- if(CA == 0 & CB > 0) rho <- corrA.BB[i, p] |
|
348 |
- if(CA > 0 & CB == 0) rho <- corrB.AA[i, p] |
|
349 |
- if(CA > 0 & CB > 0) rho <- corr[i, p] |
|
350 |
- lines(ellipse(x=rho, centre=c(log2(nuA[i, p]+CA*phiA[i, p]), log2(nuB[i, p]+CB*phiB[i, p])), |
|
351 |
- scale=scale), col=ellipse.col, lwd=2) |
|
205 |
+<<genotypeCalls, fig=TRUE, width=8, height=8, include=FALSE, eval=FALSE>>= |
|
206 |
+xlim <- ylim <- c(6.5,13) |
|
207 |
+pch <- 21 |
|
208 |
+colors <- c("red", "blue", "green3") |
|
209 |
+cex <- 0.6 |
|
210 |
+par(mfrow=c(3,3), las=1, pty="s", ask=FALSE, mar=c(2, 2, 2, 2), oma=c(2, 2, 1, 1)) |
|
211 |
+##plot 9 at a time |
|
212 |
+indices <- split(snpIndex(crlmmResults), rep(1:length(snpIndex(crlmmResults)), each=9, length.out=length(snpIndex(crlmmResults)))) |
|
213 |
+par(ask=TRUE) |
|
214 |
+j <- 1 |
|
215 |
+for(i in indices[[j]]){ |
|
216 |
+ gt <- calls(crlmmResults)[i, ] |
|
217 |
+ plot(crlmmResults[i, ], |
|
218 |
+ pch=pch, |
|
219 |
+ col=colors[gt], |
|
220 |
+ bg=colors[gt], cex=cex, |
|
221 |
+ xlim=xlim, ylim=ylim) |
|
222 |
+ mtext("A", 1, outer=TRUE, line=1) |
|
223 |
+ mtext("B", 2, outer=TRUE, line=1) |
|
224 |
+} |
|
225 |
+@ |
|
226 |
+ |
|
227 |
+<<predictionRegion, fig=TRUE, width=8, height=8, include=FALSE, eval=FALSE>>= |
|
228 |
+require(RColorBrewer) |
|
229 |
+greens <- brewer.pal(9, "Greens") |
|
230 |
+J <- split(1:ncol(cnset), cnset$batch) |
|
231 |
+colors <- c("red", "blue", "green3") |
|
232 |
+cex <- 0.6 |
|
233 |
+colors <- c("blue", greens[8], "red") |
|
234 |
+pch.col <- c("grey40", "black", "grey40") |
|
235 |
+xlim <- ylim <- c(6.5,13) |
|
236 |
+plotpoints <- FALSE |
|
237 |
+lwd <- 2 |
|
238 |
+pdf("figures/snp22plots%02d.pdf", width=600, height=600) |
|
239 |
+ask <- FALSE |
|
240 |
+par(mfrow=c(3,3), las=1, pty="s", ask=ask, mar=c(2, 2, 2, 2), oma=c(2, 2, 1, 1)) |
|
241 |
+indices <- split(snpIndex(crlmmResults), rep(1:length(snpIndex(crlmmResults)), each=9, length.out=length(snpIndex(crlmmResults)))) |
|
242 |
+for(j in seq(along=indices)[1:10]){ |
|
243 |
+ cat(j, "\n") |
|
244 |
+ k <- 1 |
|
245 |
+ for(i in indices[[j]]){ |
|
246 |
+ gt <- calls(crlmmResults)[i, ] |
|
247 |
+ pch <- as.character(gt) |
|
248 |
+ cex <- 0.9 |
|
249 |
+ plot(crlmmResults[i, ], |
|
250 |
+ pch=pch, |
|
251 |
+ col=pch.col[gt], |
|
252 |
+ cex=cex, |
|
253 |
+ xlim=xlim, ylim=ylim, |
|
254 |
+ type="n") |
|
255 |
+ if(plotpoints){ |
|
256 |
+ for(b in seq(along=unique(cnset$batch))){ |
|
257 |
+ points(crlmmResults[i, J[[b]]], |
|
258 |
+ pch=pch, |
|
259 |
+ col=colors[b], bg=colors[b], cex=cex, |
|
260 |
+ xlim=xlim, ylim=ylim) |
|
261 |
+ } |
|
262 |
+ } |
|
263 |
+ for(b in seq(along=unique(cnset$batch))){ |
|
264 |
+ ellipse(cnset[i, J[[b]]], copynumber=2, col=colors[b], lwd=lwd) |
|
265 |
+ } |
|
266 |
+ ##legend("bottomright", bty="n", legend=featureNames(crlmmResults)[i]) |
|
267 |
+ if(k == 1) { |
|
268 |
+ legend("bottomleft", bty="n", fill=colors, legend=c("CEPH", "Yoruba", "Asian")) |
|
269 |
+ mtext("A", 1, outer=TRUE, line=1) |
|
270 |
+ mtext("B", 2, outer=TRUE, line=0) |
|
271 |
+ } |
|
272 |
+ k <- k+1 |
|
352 | 273 |
} |
353 | 274 |
} |
354 |
-legend("topright", lwd=3, col="black", legend="2 copies", bty="n") |
|
275 |
+dev.off() |
|
276 |
+@ |
|
277 |
+ |
|
278 |
+<<problems, echo=FALSE, eval=FALSE>>= |
|
279 |
+##8300117, 8469401, 8706404, 2296864, 8659838, 1936806, 4198761, 8584395 |
|
280 |
+ |
|
355 | 281 |
@ |
356 | 282 |
|
357 | 283 |
\begin{figure} |
... | ... |
@@ -362,106 +288,68 @@ legend("topright", lwd=3, col="black", legend="2 copies", bty="n") |
362 | 288 |
\end{figure} |
363 | 289 |
|
364 | 290 |
|
365 |
-<<predictionRegion, eval=FALSE, echo=FALSE>>= |
|
366 |
-par(las=1, pty="s", ask=TRUE) |
|
367 |
-p <- 1 ##indicates plate |
|
368 |
-J <- grep(unique(plate)[p], plate) ##sample indices for this plate |
|
369 |
-ylim <- c(6.5,13) |
|
370 |
-I <- which(phiA > 10 & phiB > 10) |
|
371 |
-col <- c("grey0", "grey30", "grey70") |
|
372 |
-cex <- 3 |
|
373 |
-for(j in seq(along=I)){ |
|
374 |
- i <- I[j] |
|
375 |
- J1 <- grep("C", plate) ##sample indices for this plate |
|
376 |
- J2 <- grep("Y", plate) ##sample indices for this plate |
|
377 |
- J3 <- grep("A", plate) |
|
378 |
- plot(log2(A[i, ]), log2(B[i, ]), pch=21, col="grey60", type="n", cex=0.9, ylim=ylim, xlim=ylim, xlab="A", ylab="B") |
|
379 |
- points(log2(A[i, J1]), log2(B[i, J1]), col=col[1], pch=".", cex=cex) |
|
380 |
- points(log2(A[i, J2]), log2(B[i, J2]), col=col[2], pch=".", cex=cex) |
|
381 |
- points(log2(A[i, J3]), log2(B[i, J3]), col=col[3], pch=".", cex=cex) |
|
382 |
- P <- 1:3 |
|
383 |
- ellipse.col <- col |
|
384 |
- for(p in seq(along=P)){ |
|
385 |
- for(CT in 2){ |
|
386 |
- for(CA in 0:CT){ |
|
387 |
- CB <- CT-CA |
|
388 |
- A.scale <- sqrt(tau2A[i, p]*(CA==0) + sig2A[i, p]*(CA > 0)) |
|
389 |
- B.scale <- sqrt(tau2B[i, p]*(CB==0) + sig2B[i, p]*(CB > 0)) |
|
390 |
- scale <- c(A.scale, B.scale) |
|
391 |
- if(CA == 0 & CB > 0) rho <- corrA.BB[i, p] |
|
392 |
- if(CA > 0 & CB == 0) rho <- corrB.AA[i, p] |
|
393 |
- if(CA > 0 & CB > 0) rho <- corr[i, p] |
|
394 |
- lines(ellipse(x=rho, centre=c(log2(nuA[i, p]+CA*phiA[i, p]), log2(nuB[i, p]+CB*phiB[i, p])), |
|
395 |
- scale=scale), col=ellipse.col[p], lwd=2) |
|
396 |
- } |
|
397 |
- } |
|
291 |
+\section{Smoothing via a hidden Markov model} |
|
398 | 292 |
|
399 |
- } |
|
400 |
- legend("topright", lwd=3, col=ellipse.col, legend=unique(plate), bty="n") |
|
401 |
-} |
|
402 |
-@ |
|
293 |
+Here we smooth via a hidden Markov model. To facilitate comparisons |
|
294 |
+with the Birdseye HMM, the same transition probabilities are specified. |
|
403 | 295 |
|
404 |
-Look at the distribution of shifts in the predicted centers across the |
|
405 |
-plates. The biggest shifts are for SNPs that have no observations in a |
|
406 |
-subset of the plates -- need more shrinkage here. |
|
407 |
- |
|
408 |
-<<shifts, eval=FALSE, echo=FALSE>>= |
|
409 |
-shiftA <- log2(nuA+phiA) |
|
410 |
-d <- suppressWarnings(apply(shiftA, 1, function(x) diff(range(x, na.rm=TRUE)))) |
|
411 |
-hist(d) |
|
412 |
-index <- which(d > 1.5) |
|
413 |
-plate1 <- plate2 <- rep(NA, length(index)) |
|
414 |
-for(i in seq(along=index)){ |
|
415 |
- plate1[i] <- which(shiftA[index[i], ] == maxA[index[i]]) |
|
416 |
- plate2[i] <- which(shiftA[index[i], ] == minA[index[i]]) |
|
417 |
-} |
|
418 |
-par(ask=TRUE) |
|
419 |
-par(las=1) |
|
420 |
-for(j in seq(along=index)){ |
|
421 |
- i <- index[j] |
|
422 |
- J1 <- grep(plate[plate1[j]], plate) ##sample indices for this plate |
|
423 |
- J2 <- grep(plate[plate2[j]], plate) ##sample indices for this plate |
|
424 |
- ylim <- c(6.5,13) |
|
425 |
- plot(log2(A[i, ]), log2(B[i, ]), pch=as.character(genotypes[i, ]), col="grey60", cex=0.9, ylim=ylim, xlim=ylim, xlab="A", ylab="B") |
|
426 |
- points(log2(A[i, J1]), log2(B[i, J1]), col="brown", pch=as.character(genotypes[i, J1])) |
|
427 |
- points(log2(A[i, J2]), log2(B[i, J2]), col="blue", pch=as.character(genotypes[i, J2])) |
|
428 |
- CT <- 2 |
|
429 |
- P <- c(plate1[j], plate2[j]) |
|
430 |
- for(p in seq(along=P)){ |
|
431 |
- if(p == 1) ellipse.col="brown" else ellipse.col="blue" |
|
432 |
- for(CA in 0:CT){ |
|
433 |
- CB <- CT-CA |
|
434 |
- A.scale <- sqrt(tau2A[i, p]*(CA==0) + sig2A[i, p]*(CA > 0)) |
|
435 |
- B.scale <- sqrt(tau2B[i, p]*(CB==0) + sig2B[i, p]*(CB > 0)) |
|
436 |
- scale <- c(A.scale, B.scale) |
|
437 |
- if(CA == 0 & CB > 0) rho <- corrA.BB[i, p] |
|
438 |
- if(CA > 0 & CB == 0) rho <- corrB.AA[i, p] |
|
439 |
- if(CA > 0 & CB > 0) rho <- corr[i, p] |
|
440 |
- lines(ellipse(x=rho, centre=c(log2(nuA[i, p]+CA*phiA[i, p]), log2(nuB[i, p]+CB*phiB[i, p])), |
|
441 |
- scale=scale), col=ellipse.col, lwd=2) |
|