git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@41171 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -256,3 +256,11 @@ is decoded and scanned |
256 | 256 |
2009-08-04 R. Scharpf - committed version 1.3.17 |
257 | 257 |
|
258 | 258 |
* changed readIdatFiles function to check whether arrayNames is NULL |
259 |
+ |
|
260 |
+2009-08-13 R. Scharpf - committed version 1.3.18 |
|
261 |
+ |
|
262 |
+* move Biobase to Depends |
|
263 |
+* removed GGdata from suggests (problem with loading illuminaHumanv1.db) |
|
264 |
+* overhaul of vignette |
|
265 |
+* added update() method for copy number |
|
266 |
+* added documentation and error checks for crlmmWrapper |
... | ... |
@@ -1,15 +1,15 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.3.17 |
|
4 |
+Version: 1.3.18 |
|
5 | 5 |
Date: 2009-07-22 |
6 | 6 |
Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
7 | 7 |
Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
8 | 8 |
Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 6.0. |
9 | 9 |
License: Artistic-2.0 |
10 |
-Depends: methods |
|
11 |
-Imports: affyio, preprocessCore, utils, stats, genefilter, splines, Biobase (>= 2.5.5), mvtnorm, oligoClasses, ellipse, methods |
|
12 |
-Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2),genomewidesnp6Crlmm (>= 1.0.2), GGdata, snpMatrix, VanillaICE (>= 1.7.8) |
|
10 |
+Depends: methods, Biobase (>= 2.5.5) |
|
11 |
+Imports: affyio, preprocessCore, utils, stats, genefilter, splines, mvtnorm, oligoClasses, ellipse, methods |
|
12 |
+Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2),genomewidesnp6Crlmm (>= 1.0.2), snpMatrix, VanillaICE (>= 1.7.8), GGdata |
|
13 | 13 |
Collate: AllClasses.R |
14 | 14 |
AllGenerics.R |
15 | 15 |
methods-ABset.R |
... | ... |
@@ -30,6 +30,8 @@ importFrom(Biobase, assayDataElement, assayDataElementNames, |
30 | 30 |
importFrom(graphics, abline, axis, layout, legend, mtext, par, plot, |
31 | 31 |
polygon, rect, segments, text, points) |
32 | 32 |
|
33 |
+importFrom(stats, update) |
|
34 |
+ |
|
33 | 35 |
importFrom(grDevices, grey) |
34 | 36 |
|
35 | 37 |
importFrom(affyio, read.celfile.header, read.celfile) |
... | ... |
@@ -74,6 +76,7 @@ export(batch, |
74 | 76 |
copyNumber, |
75 | 77 |
crlmm, |
76 | 78 |
crlmmIllumina, |
79 |
+ crlmmIlluminaWrapper, |
|
77 | 80 |
crlmmWrapper, |
78 | 81 |
## ellipse, |
79 | 82 |
## computeEmission, |
... | ... |
@@ -85,7 +88,7 @@ export(batch, |
85 | 88 |
sampleNames, |
86 | 89 |
protocolData, |
87 | 90 |
"protocolData<-", |
88 |
- snprma) |
|
91 |
+ snprma, update) |
|
89 | 92 |
|
90 | 93 |
|
91 | 94 |
|
... | ... |
@@ -8,12 +8,12 @@ setGeneric("CB", function(object, ...) standardGeneric("CB")) |
8 | 8 |
setGeneric("CA<-", function(object, value) standardGeneric("CA<-")) |
9 | 9 |
setGeneric("CB<-", function(object, value) standardGeneric("CB<-")) |
10 | 10 |
##setGeneric("chromosome", function(object) standardGeneric("chromosome")) |
11 |
-setGeneric("cnIndex", function(object) standardGeneric("cnIndex")) |
|
12 |
-setGeneric("cnNames", function(object) standardGeneric("cnNames")) |
|
11 |
+setGeneric("cnIndex", function(object, ...) standardGeneric("cnIndex")) |
|
12 |
+setGeneric("cnNames", function(object, ...) standardGeneric("cnNames")) |
|
13 | 13 |
##setGeneric("copyNumber", function(object) standardGeneric("copyNumber")) |
14 | 14 |
##setGeneric("position", function(object) standardGeneric("position")) |
15 | 15 |
setGeneric(".harmonizeDimnames", function(object) standardGeneric(".harmonizeDimnames")) |
16 |
-setGeneric("snpIndex", function(object) standardGeneric("snpIndex")) |
|
17 |
-setGeneric("snpNames", function(object) standardGeneric("snpNames")) |
|
16 |
+setGeneric("snpIndex", function(object, ...) standardGeneric("snpIndex")) |
|
17 |
+setGeneric("snpNames", function(object, ...) standardGeneric("snpNames")) |
|
18 | 18 |
setGeneric("splitByChromosome", function(object, ...) standardGeneric("splitByChromosome")) |
19 | 19 |
|
... | ... |
@@ -177,9 +177,9 @@ combineIntensities <- function(res, cnrmaResult, cdfName){ |
177 | 177 |
return(ABset) |
178 | 178 |
} |
179 | 179 |
|
180 |
-harmonizeSnpSet <- function(crlmmResult, ABset){ |
|
181 |
- blank <- matrix(NA, length(cnNames(ABset)), ncol(ABset)) |
|
182 |
- rownames(blank) <- cnNames(ABset) |
|
180 |
+harmonizeSnpSet <- function(crlmmResult, ABset, cdfName){ |
|
181 |
+ blank <- matrix(NA, length(cnNames(ABset, cdfName)), ncol(ABset)) |
|
182 |
+ rownames(blank) <- cnNames(ABset, cdfName) |
|
183 | 183 |
colnames(blank) <- sampleNames(ABset) |
184 | 184 |
crlmmCalls <- rbind(calls(crlmmResult), blank) |
185 | 185 |
crlmmConf <- rbind(confs(crlmmResult), blank) |
... | ... |
@@ -215,33 +215,56 @@ harmonizeDimnamesTo <- function(object1, object2){ |
215 | 215 |
return(object1) |
216 | 216 |
} |
217 | 217 |
|
218 |
-crlmmIlluminaWrapper <- function(sampleSheet, outdir="./", cdfName, |
|
218 |
+crlmmIlluminaWrapper <- function(outdir="./", |
|
219 |
+ cdfName, |
|
219 | 220 |
save.intermediate=FALSE, |
220 |
- splitByChr=TRUE,...){ |
|
221 |
- if(file.exists(file.path(outdir, "RG.rda"))) load(file.path(outdir, "RG.rda")) |
|
222 |
- else { |
|
223 |
- RG <- readIdatFiles(sampleSheet=get("samplesheet5"), |
|
224 |
- arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), |
|
225 |
- saveDate=TRUE, path=get("path")) |
|
226 |
- J <- match(c("1_A", "3_A", "5_A", "7_A"), sampleNames(RG)) |
|
227 |
- RG <- RG[, -J] |
|
221 |
+ splitByChr=TRUE, |
|
222 |
+ intensityFile, |
|
223 |
+ ...){ ##additional arguments to readIdatFiles |
|
224 |
+ stopifnot(basename(intensityFile) == "res.rda") |
|
225 |
+ if(!file.exists(outdir)) stop(outdir, " does not exist.") |
|
226 |
+ if(!isValidCdfName(cdfName, platform="illumina")) stop(cdfName, " not supported.") |
|
227 |
+ if(file.exists(file.path(outdir, "RG.rda"))) { |
|
228 |
+ message("Loading RG.rda...") |
|
229 |
+ load(file.path(outdir, "RG.rda")) |
|
230 |
+ } else { |
|
231 |
+ message("Reading Idat files...") |
|
232 |
+ RG <- readIdatFiles(...) |
|
233 |
+ ##J <- match(c("1_A", "3_A", "5_A", "7_A"), sampleNames(RG)) |
|
234 |
+ ##RG <- RG[, -J] |
|
228 | 235 |
if(save.intermediate) save(RG, file=file.path(outdir, "RG.rda")) ##935M for 91 samples...better not to save this |
229 | 236 |
} |
230 |
- if(!file.exists(file.path(outdir, "res.rda"))){ |
|
231 |
- crlmmOut <- crlmmIllumina(RG=RG, cdfName=cdfName, |
|
232 |
- sns=pData(RG)$ID, |
|
237 |
+ if(!file.exists(intensityFile)){ |
|
238 |
+ message("Quantile normalization / genotyping...") |
|
239 |
+ crlmmOut <- crlmmIllumina(RG=RG, |
|
240 |
+ cdfName=cdfName, |
|
241 |
+ sns=sampleNames(RG), |
|
233 | 242 |
returnParams=TRUE, |
234 | 243 |
save.it=TRUE, |
235 |
- intensityFile=file.path(outdir, "res.rda")) |
|
236 |
- if(save.intermediate) save(crlmmOut, file=file.path(outdir, "crlmmOut.rda")) |
|
244 |
+ intensityFile=intensityFile) |
|
245 |
+ if(save.intermediate) save(crlmmOut, file=file.path(outdir, "crlmmOut.rda")) |
|
246 |
+ } else{ |
|
247 |
+ load(file.path(outdir, "crlmmOut.rda")) |
|
248 |
+ } |
|
249 |
+ message("Loading ", intensityFile, "...") |
|
250 |
+ load(intensityFile) |
|
251 |
+ if(exists("cnAB")){ |
|
252 |
+ np.A <- as.integer(cnAB$A) |
|
253 |
+ np.B <- as.integer(cnAB$B) |
|
254 |
+ np <- ifelse(np.A > np.B, np.A, np.B) |
|
255 |
+ np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A)) |
|
256 |
+ rownames(np) <- cnAB$gns |
|
257 |
+ colnames(np) <- cnAB$sns |
|
258 |
+ cnAB$NP <- np |
|
259 |
+ } |
|
260 |
+ sampleNames(crlmmOut) <- res$sns |
|
261 |
+ if(exists("cnAB")){ |
|
262 |
+ ABset <- combineIntensities(get("res"), cnAB, cdfName=cdfName) |
|
237 | 263 |
} else{ |
238 |
- message("Loading...") |
|
239 |
- load(file.path(outdir, "res.rda")) |
|
240 |
- load(file.path(outdir, "crlmmOut.rda")) |
|
264 |
+ ABset <- combineIntensities(get("res"), NULL, cdfName=cdfName) |
|
241 | 265 |
} |
242 |
- ABset <- combineIntensities(get("res"), NULL, cdfName=cdfName) |
|
243 |
- protocolData(ABset)[["ScanDate"]] <- as.character(pData(RG)$ScanDate) |
|
244 |
- crlmmResult <- harmonizeSnpSet(crlmmOut, ABset) |
|
266 |
+ ##protocolData(ABset)[["ScanDate"]] <- as.character(pData(RG)$ScanDate) |
|
267 |
+ crlmmResult <- harmonizeSnpSet(crlmmOut, ABset, cdfName) |
|
245 | 268 |
stopifnot(all.equal(dimnames(crlmmOut), dimnames(ABset))) |
246 | 269 |
crlmmList <- list(ABset, |
247 | 270 |
crlmmResult) |
... | ... |
@@ -255,47 +278,174 @@ crlmmIlluminaWrapper <- function(sampleSheet, outdir="./", cdfName, |
255 | 278 |
} |
256 | 279 |
message("CrlmmSetList objects saved to ", outdir) |
257 | 280 |
} |
281 |
+ |
|
282 |
+##quantileNormalize1m <- function(cel.files, |
|
283 |
+## outdir, |
|
284 |
+## pkgname, |
|
285 |
+## reference=TRUE, |
|
286 |
+## createCels=TRUE, |
|
287 |
+## verbose=TRUE, |
|
288 |
+## computeCopyNumberReference=FALSE, |
|
289 |
+## normalizeNonpolymorphic=FALSE){ |
|
290 |
+## if(computeCopyNumberReference){ |
|
291 |
+## stopifnot(file.exists("/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m")) |
|
292 |
+## } else{ |
|
293 |
+## load(system.file("1m_reference_cn.rda", package="CN")) |
|
294 |
+## } |
|
295 |
+## conn <- db(get(pkgname)) |
|
296 |
+## tmp <- dbGetQuery(conn, paste("SELECT fid, man_fsetid, allele, featureSet.chrom, featureSet.physical_pos", |
|
297 |
+## "FROM pmfeature, featureSet", |
|
298 |
+## "WHERE pmfeature.fsetid = featureSet.fsetid")) |
|
299 |
+## tmp[is.na(tmp$chrom), "chrom"] <- 0 |
|
300 |
+## tmp[is.na(tmp$physical_pos), "physical_pos"] <- 0 |
|
301 |
+## tmp <- tmp[order(tmp$chrom, tmp$physical_pos, tmp$man_fsetid, tmp$allele),] |
|
302 |
+## |
|
303 |
+## if(reference){ |
|
304 |
+## load(system.file("extdata", paste(pkgname, "Ref.rda", sep=""), package=pkgname)) |
|
305 |
+## } else{ |
|
306 |
+## reference <- normalize.quantiles.determine.target(readCelIntensities(celFiles, |
|
307 |
+## indices=tmp$fid)) |
|
308 |
+## if (verbose) message("normalization vector created") |
|
309 |
+## } |
|
310 |
+## reference <- sort(reference) |
|
311 |
+## message("creating empty cel files") |
|
312 |
+## out.celFiles <- file.path(outdir, paste("QN-", basename(cel.files), sep="")) |
|
313 |
+## if(createCels){ |
|
314 |
+## hh <- readCelHeader(cel.files[1]) |
|
315 |
+## message(paste("creating empty cel files in", outdir)) |
|
316 |
+## out.celFiles <- sapply(out.celFiles, function(x) suppressWarnings(createCel(x, header=hh, overwrite=TRUE))) |
|
317 |
+## } |
|
318 |
+## |
|
319 |
+## message("Quantile normalizing SNP probes to hapmap target distribution") |
|
320 |
+## for(i in 1:length(cel.files)){ |
|
321 |
+## if(i%%10==0) cat(".") |
|
322 |
+## pms <- normalize.quantiles.use.target(readCelIntensities(cel.files[i], |
|
323 |
+## indices=tmp$fid), |
|
324 |
+## reference, copy=FALSE) |
|
325 |
+## updateCel(out.celFiles[i], indices=tmp$fid, intensities=as.integer(pms)) |
|
326 |
+## } |
|
327 |
+## |
|
328 |
+#### --------------------------------------------------------------------------- |
|
329 |
+#### copy number probes |
|
330 |
+#### --------------------------------------------------------------------------- |
|
331 |
+## if(normalizeNonpolymorphic){ |
|
332 |
+## cntmp <- dbGetQuery(conn, paste("SELECT fid, man_fsetid, featureSetCNV.chrom, featureSetCNV.chrom_start", |
|
333 |
+## "FROM pmfeatureCNV, featureSetCNV", |
|
334 |
+## "WHERE pmfeatureCNV.fsetid = featureSetCNV.fsetid")) |
|
335 |
+## cntmp[is.na(cntmp$chrom), "chrom"] <- 0 |
|
336 |
+## cntmp[is.na(cntmp$chrom_start), "chrom_start"] <- 0 |
|
337 |
+## cntmp <- cntmp[order(cntmp$chrom, cntmp$chrom_start, cntmp$man_fsetid), ] |
|
338 |
+## |
|
339 |
+## if(computeCopyNumberReference){ |
|
340 |
+## message("computing reference distribution for copy number probes. May take a while...") |
|
341 |
+## celFiles <- list.celfiles("/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m", full.names=TRUE) |
|
342 |
+## reference <- computeCopyNumberReference(cel.files=celFiles, fid=cntmp$fid) |
|
343 |
+## message("finished computing CN reference distribution.") |
|
344 |
+## } |
|
345 |
+## message("Quantile normalizing copy number probes to hapmap target distribution") |
|
346 |
+## for(i in 1:length(cel.files)){ |
|
347 |
+## if(i%%10==0) cat(".") |
|
348 |
+## pms <- normalize.quantiles.use.target(readCelIntensities(cel.files[i], |
|
349 |
+## indices=cntmp$fid), |
|
350 |
+## reference, copy=FALSE) |
|
351 |
+## updateCel(out.celFiles[i], indices=cntmp$fid, intensities=as.integer(pms)) |
|
352 |
+## } |
|
353 |
+## } else { |
|
354 |
+## message("Not quantile normalizing the nonpolymorphic probes") |
|
355 |
+## } |
|
356 |
+##} |
|
357 |
+ |
|
358 |
+validCdfNames <- function(platform){ |
|
359 |
+ if(missing(platform)) stop("missing platform") |
|
360 |
+ if(!platform %in% c("illumina", "affymetrix")) |
|
361 |
+ stop("only illumina and affymetrix platforms are supported.") |
|
362 |
+ if(platform=="illumina"){ |
|
363 |
+ chipList = c("human1mv1c", # 1M |
|
364 |
+ "human370v1c", # 370CNV |
|
365 |
+ "human650v3a", # 650Y |
|
366 |
+ "human610quadv1b", # 610 quad |
|
367 |
+ "human660quadv1a", # 660 quad |
|
368 |
+ "human370quadv3c", # 370CNV quad |
|
369 |
+ "human550v3b", # 550K |
|
370 |
+ "human1mduov3b") # 1M Duo |
|
371 |
+ } |
|
372 |
+ if(platform=="affymetrix"){ |
|
373 |
+ chipList=c("genomewidesnp6") |
|
374 |
+ } |
|
375 |
+ return(chipList) |
|
376 |
+} |
|
377 |
+ |
|
378 |
+isValidCdfName <- function(cdfName, platform){ |
|
379 |
+ chipList <- validCdfNames(platform) |
|
380 |
+ if(!(cdfName %in% chipList)){ |
|
381 |
+ warning("cdfName must be one of the following: ", |
|
382 |
+ chipList) |
|
383 |
+ } |
|
384 |
+ result <- cdfName %in% chipList |
|
385 |
+ return(result) |
|
386 |
+} |
|
258 | 387 |
|
259 | 388 |
|
260 | 389 |
|
261 |
-crlmmWrapper <- function(filenames, outdir="./", cdfName="genomewidesnp6", |
|
390 |
+crlmmWrapper <- function(filenames, |
|
391 |
+ cdfName="genomewidesnp6", |
|
392 |
+ load.it=FALSE, |
|
262 | 393 |
save.it=FALSE, |
263 |
- splitByChr=TRUE, ...){ |
|
264 |
- ##no visible binding for res |
|
265 |
- if(!file.exists(file.path(outdir, "crlmmResult.rda"))){ |
|
266 |
- crlmmResult <- crlmm(filenames=filenames, cdfName=cdfName, save.it=TRUE, ...) |
|
267 |
- if(save.it) save(crlmmResult, file=file.path(outdir, "crlmmResult.rda")) |
|
268 |
- } else { |
|
269 |
- message("Loading crlmmResult...") |
|
270 |
- load(file.path(outdir, "crlmmResult.rda")) |
|
394 |
+ splitByChr=TRUE, |
|
395 |
+ crlmmFile, |
|
396 |
+ intensityFile, ...){ |
|
397 |
+ outfiles <- file.path(dirname(crlmmFile), paste("crlmmSetList_", 1:24, ".rda", sep="")) |
|
398 |
+ if(load.it & all(file.exists(outfiles))){ |
|
399 |
+ load(outfiles[1]) |
|
400 |
+ crlmmSetList <- get("crlmmSetList") |
|
401 |
+ if(!all(sampleNames(crlmmSetList) == basename(filenames))){ |
|
402 |
+ stop("load.it is TRUE, but sampleNames(crlmmSetList != basename(filenames))") |
|
403 |
+ } else{ |
|
404 |
+ return("load.it is TRUE and 'crlmmSetList_<CHR>.rda' objects found. Nothing to do...") |
|
405 |
+ } |
|
271 | 406 |
} |
272 |
- if(!file.exists(file.path(outdir, "cnrmaResult.rda"))){ |
|
273 |
- message("Quantile normalizing the copy number probes") |
|
407 |
+ if(missing(intensityFile)) stop("must specify 'intensityFile'.") |
|
408 |
+ if(missing(crlmmFile)) stop("must specify 'crlmmFile'.") |
|
409 |
+ if(load.it){ |
|
410 |
+ if(!file.exists(crlmmFile)){ |
|
411 |
+ message("load.it is TRUE, but 'crlmmFile' does not exist. Rerunning the genotype calling algorithm") |
|
412 |
+ load.it <- FALSE |
|
413 |
+ } |
|
414 |
+ } |
|
415 |
+ if(!file.exists(outdir)) stop(outdir, " does not exist.") |
|
416 |
+ if(!isValidCdfName(cdfName, platform="affymetrix")) |
|
417 |
+ stop(cdfName, " is not a valid entry. See crlmm:::validCdfNames(platform='affymetrix')") |
|
418 |
+ if(!file.exists(crlmmFile) | !load.it){ |
|
419 |
+ crlmmResult <- crlmm(filenames=filenames, |
|
420 |
+ cdfName=cdfName, |
|
421 |
+ save.it=TRUE, |
|
422 |
+ load.it=load.it, |
|
423 |
+ intensityFile=intensityFile, ...) |
|
424 |
+ message("Quantile normalizing the copy number probes...") |
|
274 | 425 |
cnrmaResult <- cnrma(filenames=filenames, cdfName=cdfName) |
275 |
- if(save.it) save(cnrmaResult, file=file.path(outdir, "cnrmaResult.rda")) |
|
276 |
- } else{ |
|
277 |
- message("Loading cnrmaResult...") |
|
278 |
- load(file.path(outdir, "cnrmaResult.rda")) |
|
426 |
+ if(save.it){ |
|
427 |
+ message("Saving crlmmResult and cnrmaResult to", crlmmFile) |
|
428 |
+ save(crlmmResult, cnrmaResult, file=crlmmFile) |
|
429 |
+ } |
|
430 |
+ } else { |
|
431 |
+ message("Loading ", crlmmFile, "...") |
|
432 |
+ load(crlmmFile) |
|
433 |
+ crlmmResult <- get("crlmmResult") |
|
434 |
+ cnrmaResult <- get("cnrmaResult") |
|
279 | 435 |
} |
280 |
- load(file.path(outdir, "intensities.rda")) |
|
436 |
+ load(intensityFile) |
|
281 | 437 |
ABset <- combineIntensities(get("res"), cnrmaResult, cdfName) |
282 | 438 |
protocolData(ABset)[["ScanDate"]] <- as.character(celDates(filenames)) |
283 |
- crlmmResult <- harmonizeSnpSet(crlmmResult, ABset) |
|
439 |
+ crlmmResult <- harmonizeSnpSet(crlmmResult, ABset, cdfName) |
|
284 | 440 |
stopifnot(all.equal(dimnames(crlmmResult), dimnames(ABset))) |
285 |
- crlmmResults <- list(ABset, |
|
286 |
- crlmmResult) |
|
287 |
- crlmmResults <- as(crlmmResults, "CrlmmSetList") |
|
288 |
- |
|
441 |
+ crlmmSetList <- as(list(ABset, crlmmResult), "CrlmmSetList") |
|
289 | 442 |
if(splitByChr){ |
290 | 443 |
message("Saving by chromosome") |
291 |
- splitByChromosome(crlmmResults, cdfName=cdfName, outdir=outdir) |
|
292 |
- } else{ |
|
293 |
- message("Saving crlmmList object to ", outdir) |
|
294 |
- save(crlmmResults, file=file.path(outdir, "crlmmResults.rda")) |
|
295 |
- } |
|
444 |
+ splitByChromosome(crlmmSetList, cdfName=cdfName, outdir=dirname(crlmmFile)) |
|
445 |
+ } |
|
296 | 446 |
if(!save.it){ |
297 | 447 |
message("Cleaning up") |
298 |
- unlink(file.path(outdir, "intensities.rda")) |
|
448 |
+ unlink(intensityFile) |
|
299 | 449 |
} |
300 | 450 |
return() |
301 | 451 |
} |
... | ... |
@@ -461,24 +611,33 @@ instantiateObjects <- function(calls, conf, NP, plate, envir, |
461 | 611 |
envir[["normalNP"]] <- normalNP |
462 | 612 |
} |
463 | 613 |
|
614 |
+setMethod("update", "CrlmmSetList", function(object, ...){ |
|
615 |
+ computeCopynumber(object, ...) |
|
616 |
+}) |
|
617 |
+ |
|
464 | 618 |
computeCopynumber <- function(object, |
465 | 619 |
CHR, |
466 | 620 |
bias.adj=FALSE, |
467 | 621 |
batch, |
468 | 622 |
SNRmin=5, |
469 |
- cdfName="genomewidesnp6", ...){ |
|
623 |
+ cdfName, ...){ |
|
624 |
+ if(missing(cdfName)) |
|
625 |
+ cdfName <- annotation(object) |
|
626 |
+ if(!isValidCdfName(cdfName, platform="affymetrix")) stop(cdfName, " not supported.") |
|
470 | 627 |
if(ncol(object) < 10) |
471 | 628 |
stop("Must have at least 10 samples in each batch to estimate model parameters....preferably closer to 90 samples per batch") |
472 |
- |
|
473 | 629 |
##require(oligoClasses) |
474 | 630 |
if(missing(CHR)) stop("Must specify CHR") |
475 | 631 |
if(CHR == 24) stop("Nothing available yet for chromosome Y") |
476 |
- if(missing(batch)) stop("Must specify batch") |
|
632 |
+ if(missing(batch)) { |
|
633 |
+ message("'batch' missing. Assuming all samples were processed together in the same batch.") |
|
634 |
+ batch <- rep("A", ncol(object)) |
|
635 |
+ } |
|
477 | 636 |
if(length(batch) != ncol(object[[1]])) stop("Batch must be the same length as the number of samples") |
478 | 637 |
##the AB intensities |
479 | 638 |
Nset <- object[[1]] |
480 | 639 |
##indices of polymorphic loci |
481 |
- index <- snpIndex(Nset) |
|
640 |
+ index <- snpIndex(Nset, cdfName) |
|
482 | 641 |
ABset <- Nset[index, ] |
483 | 642 |
##indices of nonpolymorphic loci |
484 | 643 |
NPset <- Nset[-index, ] |
... | ... |
@@ -624,13 +783,14 @@ cnIllumina <- function(object, |
624 | 783 |
|
625 | 784 |
updateNuPhi <- function(crlmmSetList, cnSet){ |
626 | 785 |
##TODO: remove the use of environments. |
786 |
+ cdfName <- annotation(crlmmSetList[[1]]) |
|
627 | 787 |
##repopulate the environment |
628 | 788 |
crlmmSetList <- crlmmSetList[, match(sampleNames(cnSet), sampleNames(crlmmSetList))] |
629 | 789 |
crlmmSetList <- crlmmSetList[match(featureNames(cnSet), featureNames(crlmmSetList)), ] |
630 | 790 |
##envir <- getCopyNumberEnvironment(crlmmSetList, cnSet) |
631 | 791 |
Nset <- crlmmSetList[[1]] |
632 | 792 |
##indices of polymorphic loci |
633 |
- index <- snpIndex(Nset) |
|
793 |
+ index <- snpIndex(Nset, cdfName) |
|
634 | 794 |
ABset <- Nset[index, ] |
635 | 795 |
##indices of nonpolymorphic loci |
636 | 796 |
NPset <- Nset[-index, ] |
... | ... |
@@ -1865,23 +2025,27 @@ getParams <- function(object, batch){ |
1865 | 2025 |
return(params) |
1866 | 2026 |
} |
1867 | 2027 |
|
1868 |
-computeEmission <- function(crlmmResults, copyNumberStates=0:5, MIN=2^3, |
|
2028 |
+computeEmission <- function(object, |
|
2029 |
+ copyNumberStates=0:5, |
|
2030 |
+ MIN=2^3, |
|
1869 | 2031 |
EMIT.THR, |
1870 | 2032 |
scaleSds=TRUE){ |
2033 |
+ cdfName <- annotation(object) |
|
1871 | 2034 |
##threshold small nu's and phis |
1872 |
- cnset <- thresholdModelParams(crlmmResults[[3]], MIN=MIN) |
|
2035 |
+ cnset <- thresholdModelParams(object[[3]], MIN=MIN) |
|
1873 | 2036 |
index <- order(chromosome(cnset), position(cnset)) |
1874 | 2037 |
|
1875 | 2038 |
if(any(diff(index) > 1)) stop("must be ordered by chromosome and physical position") |
1876 | 2039 |
emissionProbs <- array(NA, dim=c(nrow(cnset), ncol(cnset), length(copyNumberStates))) |
1877 |
- dimnames(emissionProbs) <- list(featureNames(crlmmResults), |
|
1878 |
- sampleNames(crlmmResults), |
|
2040 |
+ dimnames(emissionProbs) <- list(featureNames(object), |
|
2041 |
+ sampleNames(object), |
|
1879 | 2042 |
paste("copy.number_", copyNumberStates, sep="")) |
1880 | 2043 |
b <- batch(cnset) |
1881 | 2044 |
for(i in seq(along=unique(b))){ |
1882 | 2045 |
if(i == 1) cat("Computing emission probabilities \n") |
1883 | 2046 |
message("batch ", unique(b)[i], "...") |
1884 |
- emissionProbs[, b == unique(b)[i], ] <- .getEmission(crlmmResults, cnset, batch=unique(b)[i], |
|
2047 |
+ emissionProbs[, b == unique(b)[i], ] <- .getEmission(object, cnset, |
|
2048 |
+ batch=unique(b)[i], |
|
1885 | 2049 |
copyNumberStates=copyNumberStates, |
1886 | 2050 |
scaleSds=scaleSds) |
1887 | 2051 |
} |
... | ... |
@@ -1910,18 +2074,14 @@ thresholdModelParams <- function(object, MIN=2^3){ |
1910 | 2074 |
object |
1911 | 2075 |
} |
1912 | 2076 |
|
1913 |
-.getEmission <- function(crlmmResults, cnset, batch, copyNumberStates, scaleSds=TRUE){ |
|
2077 |
+.getEmission <- function(object, cnset, batch, copyNumberStates, scaleSds=TRUE){ |
|
2078 |
+ cdfName <- annotation(object) |
|
1914 | 2079 |
if(length(batch) > 1) stop("batch variable not unique") |
1915 |
- crlmmResults <- crlmmResults[, cnset$batch==batch] |
|
2080 |
+ object <- object[, cnset$batch==batch] |
|
1916 | 2081 |
cnset <- cnset[, cnset$batch == batch] |
1917 |
- |
|
1918 |
-## a <- A(crlmmResults) |
|
1919 |
-## b <- B(crlmmResults) |
|
1920 |
-## sds.a <- apply(log2(a), 2, sd, na.rm=TRUE) |
|
1921 |
-## sds.b <- apply(log2(b), 2, sd, na.rm=TRUE) |
|
1922 | 2082 |
if(scaleSds){ |
1923 |
- a <- CA(crlmmResults) |
|
1924 |
- b <- CB(crlmmResults) |
|
2083 |
+ a <- CA(object) |
|
2084 |
+ b <- CB(object) |
|
1925 | 2085 |
sds.a <- apply(a, 2, sd, na.rm=TRUE) |
1926 | 2086 |
sds.b <- apply(b, 2, sd, na.rm=TRUE) |
1927 | 2087 |
|
... | ... |
@@ -1932,14 +2092,9 @@ thresholdModelParams <- function(object, MIN=2^3){ |
1932 | 2092 |
sds.b <- matrix(sds.b, nrow(cnset), ncol(cnset), byrow=TRUE) |
1933 | 2093 |
|
1934 | 2094 |
} else sds.a <- sds.b <- matrix(0, nrow(cnset), ncol(cnset)) |
1935 |
-## ca <- CA(cnset) |
|
1936 |
-## sds.ca <- apply(ca, 2, sd, na.rm=T) |
|
1937 |
-## sds.ca <- sds.ca/median(sds.ca) |
|
1938 |
-## sds.scale <- sds/median(sds) #scale snp-specific variance by measure of the relative sample noise |
|
1939 |
- |
|
1940 |
- emissionProbs <- array(NA, dim=c(nrow(crlmmResults[[1]]), |
|
1941 |
- ncol(crlmmResults[[1]]), length(copyNumberStates))) |
|
1942 |
- snpset <- cnset[snpIndex(cnset), ] |
|
2095 |
+ emissionProbs <- array(NA, dim=c(nrow(object[[1]]), |
|
2096 |
+ ncol(object[[1]]), length(copyNumberStates))) |
|
2097 |
+ snpset <- cnset[snpIndex(cnset, annotation(cnset)), ] |
|
1943 | 2098 |
params <- getParams(snpset, batch=batch) |
1944 | 2099 |
##attach(params) |
1945 | 2100 |
corr <- params[["corr"]] |
... | ... |
@@ -1953,8 +2108,8 @@ thresholdModelParams <- function(object, MIN=2^3){ |
1953 | 2108 |
sig2B <- params[["sig2B"]] |
1954 | 2109 |
tau2A <- params[["tau2A"]] |
1955 | 2110 |
tau2B <- params[["tau2B"]] |
1956 |
- a <- as.numeric(log2(A(crlmmResults[snpIndex(crlmmResults), ]))) |
|
1957 |
- b <- as.numeric(log2(B(crlmmResults[snpIndex(crlmmResults), ]))) |
|
2111 |
+ a <- as.numeric(log2(A(object[snpIndex(object), ]))) |
|
2112 |
+ b <- as.numeric(log2(B(object[snpIndex(object), ]))) |
|
1958 | 2113 |
for(k in seq(along=copyNumberStates)){ |
1959 | 2114 |
##cat(k, " ") |
1960 | 2115 |
CN <- copyNumberStates[k] |
... | ... |
@@ -1972,8 +2127,8 @@ thresholdModelParams <- function(object, MIN=2^3){ |
1972 | 2127 |
|
1973 | 2128 |
sigmaA <- matrix(sigmaA, nrow=length(sigmaA), ncol=ncol(cnset), byrow=FALSE) |
1974 | 2129 |
sigmaB <- matrix(sigmaB, nrow=length(sigmaB), ncol=ncol(cnset), byrow=FALSE) |
1975 |
- sigmaA <- sigmaA+sds.a[snpIndex(crlmmResults), ] |
|
1976 |
- sigmaB <- sigmaB+sds.b[snpIndex(crlmmResults), ] |
|
2130 |
+ sigmaA <- sigmaA+sds.a[snpIndex(object), ] |
|
2131 |
+ sigmaB <- sigmaB+sds.b[snpIndex(object), ] |
|
1977 | 2132 |
|
1978 | 2133 |
##might want to allow the variance to be sample-specific |
1979 | 2134 |
##TODO: |
... | ... |
@@ -1995,21 +2150,20 @@ thresholdModelParams <- function(object, MIN=2^3){ |
1995 | 2150 |
## TODO: copy-neutral LOH would put near-zero mass on CA > 0, CB > 0 |
1996 | 2151 |
f.x.y <- f.x.y + matrix(1/(2*pi*sd.A*sd.B*sqrt(1-rho^2))*exp(-0.5*Q.x.y), nrow(snpset), ncol(snpset)) |
1997 | 2152 |
} |
1998 |
- emissionProbs[snpIndex(crlmmResults), , k] <- log(f.x.y) |
|
2153 |
+ emissionProbs[snpIndex(object), , k] <- log(f.x.y) |
|
1999 | 2154 |
} |
2000 | 2155 |
|
2001 |
- |
|
2002 | 2156 |
##************************************************** |
2003 | 2157 |
## |
2004 | 2158 |
##Emission probabilities for nonpolymorphic probes |
2005 | 2159 |
## |
2006 | 2160 |
##************************************************** |
2007 |
- cnset <- cnset[cnIndex(cnset), ] |
|
2161 |
+ cnset <- cnset[cnIndex(cnset, annotation(cnset)), ] |
|
2008 | 2162 |
params <- getParams(cnset, batch=batch) |
2009 | 2163 |
nuA <- params[["nuA"]] |
2010 | 2164 |
phiA <- params[["phiA"]] |
2011 | 2165 |
sig2A <- params[["sig2A"]] |
2012 |
- a <- as.numeric(log2(A(crlmmResults[cnIndex(crlmmResults), ]))) |
|
2166 |
+ a <- as.numeric(log2(A(object[cnIndex(object), ]))) |
|
2013 | 2167 |
for(k in seq(along=copyNumberStates)){ |
2014 | 2168 |
CT <- copyNumberStates[k] |
2015 | 2169 |
mus.matrix=matrix(log2(nuA + CT*phiA), nrow(cnset), ncol(cnset)) |
... | ... |
@@ -2017,10 +2171,10 @@ thresholdModelParams <- function(object, MIN=2^3){ |
2017 | 2171 |
##Again, should make sds sample-specific |
2018 | 2172 |
sds.matrix <- matrix(sqrt(sig2A), nrow(cnset), ncol(cnset)) |
2019 | 2173 |
|
2020 |
- sds.matrix <- sds.matrix + sds.a[cnIndex(crlmmResults), ] |
|
2174 |
+ sds.matrix <- sds.matrix + sds.a[cnIndex(object), ] |
|
2021 | 2175 |
sds <- as.numeric(sds.matrix) |
2022 | 2176 |
suppressWarnings(tmp <- matrix(dnorm(a, mean=mus, sd=sds), nrow(cnset), ncol(cnset))) |
2023 |
- emissionProbs[cnIndex(crlmmResults), , k] <- log(tmp) |
|
2177 |
+ emissionProbs[cnIndex(object), , k] <- log(tmp) |
|
2024 | 2178 |
} |
2025 | 2179 |
emissionProbs |
2026 | 2180 |
} |
... | ... |
@@ -6,7 +6,6 @@ crlmm <- function(filenames, row.names=TRUE, col.names=TRUE, |
6 | 6 |
returnParams=FALSE, badSNP=.7){ |
7 | 7 |
if ((load.it | save.it) & missing(intensityFile)) |
8 | 8 |
stop("'intensityFile' is missing, and you chose either load.it or save.it") |
9 |
- |
|
10 | 9 |
if (missing(sns)) sns <- basename(filenames) |
11 | 10 |
if (!missing(intensityFile)) |
12 | 11 |
if (load.it & !file.exists(intensityFile)){ |
... | ... |
@@ -3,9 +3,15 @@ |
3 | 3 |
# or to use the optional 'Path' column in sampleSheet |
4 | 4 |
# - there is a lot of header information that is currently discarded - could try and store this somewhere in the resulting NChannelSet |
5 | 5 |
|
6 |
-readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
|
6 |
+readIdatFiles <- function(sampleSheet=NULL, |
|
7 |
+ arrayNames=NULL, |
|
8 |
+ ids=NULL, |
|
9 |
+ path=".", |
|
7 | 10 |
arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
8 |
- highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), saveDate=FALSE) { |
|
11 |
+ highDensity=FALSE, |
|
12 |
+ sep="_", |
|
13 |
+ fileExt=list(green="Grn.idat", red="Red.idat"), |
|
14 |
+ saveDate=FALSE) { |
|
9 | 15 |
if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet |
10 | 16 |
if(!is.null(arrayNames)){ |
11 | 17 |
##arrayNames=NULL |
... | ... |
@@ -27,6 +33,7 @@ readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
27 | 33 |
} |
28 | 34 |
} |
29 | 35 |
pd = new("AnnotatedDataFrame", data = sampleSheet) |
36 |
+ sampleNames(pd) <- arrayNames |
|
30 | 37 |
} |
31 | 38 |
if(is.null(arrayNames)) { |
32 | 39 |
arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path)) |
... | ... |
@@ -88,13 +95,18 @@ readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
88 | 95 |
colnames(tmpmat) = sampleSheet$Sample_ID |
89 | 96 |
}else colnames(tmpmat) = arrayNames |
90 | 97 |
RG <- new("NChannelSet", |
91 |
- R=tmpmat, G=tmpmat, Rnb=tmpmat, Gnb=tmpmat, |
|
92 |
- Rse=tmpmat, Gse=tmpmat, annotation=headerInfo$Manifest[1], |
|
93 |
- phenoData=pd, storage.mode="environment") |
|
98 |
+ R=tmpmat, |
|
99 |
+ G=tmpmat, |
|
100 |
+ Rnb=tmpmat, |
|
101 |
+ Gnb=tmpmat, |
|
102 |
+ Rse=tmpmat, |
|
103 |
+ Gse=tmpmat, |
|
104 |
+ annotation=headerInfo$Manifest[1], |
|
105 |
+ phenoData=pd, |
|
106 |
+ storage.mode="environment") |
|
94 | 107 |
rm(tmpmat) |
95 | 108 |
gc() |
96 | 109 |
} |
97 |
- |
|
98 | 110 |
if(length(ids)==length(idsG)) { |
99 | 111 |
if(sum(ids==idsG)==nprobes) { |
100 | 112 |
RG@assayData$G[,i] = G$Quants[, "Mean"] |
... | ... |
@@ -394,6 +406,8 @@ readBPM <- function(bpmFile){ |
394 | 406 |
} |
395 | 407 |
|
396 | 408 |
|
409 |
+ |
|
410 |
+ |
|
397 | 411 |
RGtoXY = function(RG, chipType, verbose=TRUE) { |
398 | 412 |
chipList = c("human1mv1c", # 1M |
399 | 413 |
"human370v1c", # 370CNV |
... | ... |
@@ -402,11 +416,10 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
402 | 416 |
"human660quadv1a", # 660 quad |
403 | 417 |
"human370quadv3c", # 370CNV quad |
404 | 418 |
"human550v3b", # 550K |
405 |
- "human1mduov3b") # 1M Duo |
|
406 |
- if(missing(chipType)) |
|
407 |
- chipType = match.arg(annotation(RG), chipList) |
|
408 |
- else |
|
409 |
- chipType = match.arg(chipType, chipList) |
|
419 |
+ "human1mduov3b") # 1M Duo |
|
420 |
+ if(missing(chipType)){ |
|
421 |
+ chipType = match.arg(annotation(RG), chipList) |
|
422 |
+ } else chipType = match.arg(chipType, chipList) |
|
410 | 423 |
|
411 | 424 |
pkgname <- getCrlmmAnnotationName(chipType) |
412 | 425 |
if(!require(pkgname, character.only=TRUE, quietly=!verbose)){ |
... | ... |
@@ -558,8 +571,17 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) { |
558 | 571 |
} |
559 | 572 |
|
560 | 573 |
|
561 |
-preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, fitMixture=TRUE, eps=0.1, verbose=TRUE, seed=1, |
|
562 |
- cdfName, sns, stripNorm=TRUE, useTarget=TRUE, save.it=FALSE, intensityFile) { |
|
574 |
+preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
|
575 |
+ fitMixture=TRUE, |
|
576 |
+ eps=0.1, |
|
577 |
+ verbose=TRUE, |
|
578 |
+ seed=1, |
|
579 |
+ cdfName, |
|
580 |
+ sns, |
|
581 |
+ stripNorm=TRUE, |
|
582 |
+ useTarget=TRUE, |
|
583 |
+ save.it=FALSE, |
|
584 |
+ intensityFile) { |
|
563 | 585 |
if(stripNorm) |
564 | 586 |
XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose) |
565 | 587 |
|
... | ... |
@@ -35,13 +35,18 @@ setMethod("[", "CrlmmSetList", function(x, i, j, ..., drop = FALSE){ |
35 | 35 |
##}) |
36 | 36 |
|
37 | 37 |
setMethod("A", "CrlmmSetList", function(object) A(object[[1]])) |
38 |
+ |
|
39 |
+setMethod("annotation", "CrlmmSetList", function(object) annotation(object[[1]])) |
|
40 |
+ |
|
38 | 41 |
setMethod("B", "CrlmmSetList", function(object) B(object[[1]])) |
39 | 42 |
|
40 | 43 |
setMethod("CA", "CrlmmSetList", function(object, ...) CA(object[[3]], ...)) |
41 | 44 |
setMethod("CB", "CrlmmSetList", function(object, ...) CB(object[[3]], ...)) |
42 | 45 |
|
43 | 46 |
setMethod("calls", "CrlmmSetList", function(object) calls(object[[2]])) |
44 |
-setMethod("cnIndex", "CrlmmSetList", function(object) match(cnNames(object[[1]]), featureNames(object))) |
|
47 |
+setMethod("cnIndex", "CrlmmSetList", function(object, ...) { |
|
48 |
+ match(cnNames(object[[1]], annotation(object)), featureNames(object)) |
|
49 |
+}) |
|
45 | 50 |
|
46 | 51 |
setMethod("copyNumber", "CrlmmSetList", function(object) copyNumber(object[[3]])) |
47 | 52 |
|
... | ... |
@@ -92,11 +97,23 @@ setMethod("points", signature(x="CrlmmSetList"), |
92 | 97 |
setMethod("sampleNames", "CrlmmSetList", function(object) sampleNames(object[[1]])) |
93 | 98 |
|
94 | 99 |
setMethod("show", "CrlmmSetList", function(object){ |
95 |
- for(i in seq(along=object)) show(object[[i]]) |
|
100 |
+ ##for(i in seq(along=object)) show(object[[i]]) |
|
101 |
+ cat("\n Elements in CrlmmSetList object: \n") |
|
102 |
+ cat("\n") |
|
103 |
+ for(i in 1:length(object)){ |
|
104 |
+ cat("class: ", class(object[[i]]), "\n") |
|
105 |
+ cat("assayData elements: ", ls(assayData(object[[i]])), "\n") |
|
106 |
+ cat("Dimensions: ", dim(object[[i]])) |
|
107 |
+ cat("\n \n") |
|
108 |
+ } |
|
109 |
+## cat("\n") |
|
110 |
+## cat("Dimensions:\n") |
|
111 |
+## print(dims(object)) |
|
96 | 112 |
}) |
97 | 113 |
|
98 | 114 |
setMethod("chromosome", "CrlmmSetList", function(object) chromosome(object[[3]])) |
99 | 115 |
setMethod("position", "CrlmmSetList", function(object) position(object[[3]])) |
116 |
+setMethod("confs", "CrlmmSetList", function(object) confs(object[[2]])) |
|
100 | 117 |
|
101 | 118 |
|
102 | 119 |
setMethod(".harmonizeDimnames", "CrlmmSetList", function(object){ |
... | ... |
@@ -126,7 +143,9 @@ setMethod("$", "CrlmmSetList", function(x, name) { |
126 | 143 |
}) |
127 | 144 |
|
128 | 145 |
|
129 |
-setMethod("snpIndex", "CrlmmSetList", function(object) match(snpNames(object[[1]]), featureNames(object))) |
|
146 |
+setMethod("snpIndex", "CrlmmSetList", function(object, ...){ |
|
147 |
+ match(snpNames(object[[1]], annotation(object)), featureNames(object)) |
|
148 |
+}) |
|
130 | 149 |
setMethod("splitByChromosome", "CrlmmSetList", function(object, cdfName, outdir){ |
131 | 150 |
path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep="")) |
132 | 151 |
load(file.path(path, "snpProbes.rda")) |
... | ... |
@@ -139,8 +158,9 @@ setMethod("splitByChromosome", "CrlmmSetList", function(object, cdfName, outdir) |
139 | 158 |
cnps <- rownames(cnProbes)[cnProbes[, k] == CHR] |
140 | 159 |
index <- c(match(snps, featureNames(object)), |
141 | 160 |
match(cnps, featureNames(object))) |
142 |
- crlmmResults <- object[index, ] |
|
143 |
- save(crlmmResults, file=file.path(outdir, paste("crlmmResults_", CHR, ".rda", sep=""))) |
|
161 |
+ index <- index[!is.na(index)] |
|
162 |
+ crlmmSetList <- object[index, ] |
|
163 |
+ save(crlmmSetList, file=file.path(outdir, paste("crlmmSetList_", CHR, ".rda", sep=""))) |
|
144 | 164 |
} |
145 | 165 |
}) |
146 | 166 |
|
... | ... |
@@ -1,5 +1,20 @@ |
1 |
-setMethod("cnIndex", "eSet", function(object) match(cnNames(object), featureNames(object))) |
|
2 |
-setMethod("cnNames", "eSet", function(object) featureNames(object)[grep("CN_", featureNames(object))]) |
|
1 |
+setMethod("cnIndex", "eSet", function(object, cdfName) match(cnNames(object, cdfName), featureNames(object))) |
|
2 |
+setMethod("cnNames", "eSet", function(object, cdfName) { |
|
3 |
+ path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep="")) |
|
4 |
+ load(file.path(path, "cnProbes.rda")) |
|
5 |
+ cnps <- rownames(cnProbes) |
|
6 |
+ cnps <- cnps[cnps %in% featureNames(object)] |
|
7 |
+ featureNames(object)[match(cnps, featureNames(object))] |
|
8 |
+ ##featureNames(object)[grep("CN_", featureNames(object))]) |
|
9 |
+ }) |
|
10 |
+setMethod("snpIndex", "eSet", function(object, cdfName) match(snpNames(object, cdfName), featureNames(object))) |
|
11 |
+setMethod("snpNames", "eSet", function(object, cdfName){ |
|
12 |
+ path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep="")) |
|
13 |
+ load(file.path(path, "snpProbes.rda")) |
|
14 |
+ snps <- rownames(snpProbes) |
|
15 |
+ snps <- snps[snps %in% featureNames(object)] |
|
16 |
+ featureNames(object)[match(snps, featureNames(object))] |
|
17 |
+ }) |
|
3 | 18 |
##setMethod("combine", signature=signature(x="eSet", y="eSet"), |
4 | 19 |
## function(x, y, ...){ |
5 | 20 |
## ##Check that both x and y are valid objects |
... | ... |
@@ -25,7 +40,6 @@ setMethod("cnNames", "eSet", function(object) featureNames(object)[grep("CN_", f |
25 | 40 |
## phenoData(x) <- pd |
26 | 41 |
## x |
27 | 42 |
## }) |
28 |
-setMethod("snpIndex", "eSet", function(object) match(snpNames(object), featureNames(object))) |
|
29 |
-setMethod("snpNames", "eSet", function(object) featureNames(object)[grep("SNP_", featureNames(object))]) |
|
43 |
+ |
|
30 | 44 |
|
31 | 45 |
|
... | ... |
@@ -25,74 +25,235 @@ options(continue=" ") |
25 | 25 |
options(prompt="R> ") |
26 | 26 |
@ |
27 | 27 |
|
28 |
-\section{Estimating copy number} |
|
28 |
+%\section{Estimating copy number} |
|
29 | 29 |
|
30 |
-At present, software for copy number estimation is provided only for the |
|
31 |
-Affymetrix 6.0 platform. This vignette estimates copy number for the |
|
32 |
-HapMap samples. |
|
30 |
+%At present, software for copy number estimation is provided only for the |
|
31 |
+%Affymetrix 6.0 platform. |
|
33 | 32 |
|
34 |
-\subsection{Preprocess and genotype the samples} |
|
33 |
+\begin{abstract} |
|
34 |
+ This vignette estimates copy number for HapMap samples on the |
|
35 |
+ Affymetrix 6.0 platform. |
|
36 |
+\end{abstract} |
|
35 | 37 |
|
36 |
-We preprocess and genotype the samples as described in the CRLMM |
|
37 |
-vignette. |
|
38 |
+\section{Simple usage} |
|
39 |
+ |
|
40 |
+The following packages are required: |
|
38 | 41 |
|
39 | 42 |
<<requiredPackages>>= |
40 | 43 |
library(crlmm) |
41 | 44 |
library(genomewidesnp6Crlmm) |
42 |
-library(Biobase) |
|
43 | 45 |
@ |
46 |
+\paragraph{Preprocess and genotype.} |
|
44 | 47 |
|
45 |
-Specify the complete path for the CEL files and a directory in which to |
|
46 |
-store intermediate files: |
|
47 |
- |
|
48 |
+Specify the coordinates of Affymetrix cel files and where to put |
|
49 |
+intermediate files generated during the course of preprocessing / copy |
|
50 |
+number estimation. |
|
48 | 51 |
|
49 | 52 |
<<celfiles>>= |
50 |
-celFiles <- list.celfiles("/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m", |
|
51 |
- full.names=TRUE, pattern=".CEL") |
|
53 |
+myPath <- "/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m" |
|
54 |
+celFiles <- list.celfiles(myPath, full.names=TRUE, pattern=".CEL") |
|
52 | 55 |
outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/affy" |
53 | 56 |
@ |
54 | 57 |
|
55 |
-Preprocess and genotype (for more info see the crlmm vignette): |
|
58 |
+\noindent Preprocess and genotype (for more info see the crlmm vignette): |
|
56 | 59 |
|
57 | 60 |
<<preprocessAndGenotype>>= |
58 |
-crlmmFilenames <- file.path(outdir, paste("crlmmResults_", 1:24, ".rda", sep="")) |
|
59 |
-if(!all(file.exists(crlmmFilenames))){ |
|
60 |
- message("Preprocessing and crlmm genotyping. Results are written to", outdir, "...") |
|
61 |
- intensityFile <- file.path(outdir, "intensities.rda") |
|
62 |
- crlmmWrapper(celFiles, outdir, save.it=TRUE, intensityFile=intensityFile) |
|
63 |
-} |
|
61 |
+crlmmWrapper(celFiles[1:15], |
|
62 |
+ save.it=TRUE, |
|
63 |
+ load.it=TRUE, |
|
64 |
+ intensityFile=file.path(outdir, "normalizedIntensities.rda"), |
|
65 |
+ crlmmFile=file.path(outdir, "snpsetObject.rda")) |
|
64 | 66 |
@ |
65 | 67 |
|
66 |
-Load results for chromosome 22. |
|
68 |
+As a result of the above wrapper, the following R objects are now created: |
|
69 |
+ |
|
70 |
+<<>>= |
|
71 |
+list.files(outdir)[grep("crlmmSetList", list.files(outdir))] |
|
72 |
+@ |
|
73 |
+ |
|
74 |
+\paragraph{Locus- and allele-specific estimates of copy number.} |
|
75 |
+ |
|
76 |
+Load the object for chromosome 22 and compute copy number: |
|
67 | 77 |
|
68 | 78 |
<<crlmmList-show>>= |
69 | 79 |
CHR <- 22 |
70 |
-if(!exists("crlmmResults")) load(file.path(outdir, paste("crlmmResults_", CHR, ".rda", sep=""))) |
|
71 |
-class(crlmmResults) |
|
72 |
-show(crlmmResults) |
|
80 |
+if(!exists("crlmmSetList")) load(file.path(outdir, paste("crlmmSetList_", CHR, ".rda", sep=""))) |
|
81 |
+show(crlmmSetList) |
|
82 |
+if(length(crlmmSetList)==2){ |
|
83 |
+ crlmmSetList <- update(crlmmSetList, CHR=CHR) |
|
84 |
+} |
|
85 |
+show(crlmmSetList) |
|
73 | 86 |
@ |
74 | 87 |
|
75 |
-\subsection{Copy number} |
|
88 |
+See the help file for \Robject{computeCopynumber} for arguments to |
|
89 |
+\Robject{update}. Provided that the assumption of integer copy number |
|
90 |
+is reasonable, one can fit a hidden Markov model to the locus-level |
|
91 |
+estimates of copy number and uncertainty. |
|
76 | 92 |
|
77 |
-We require 6 items for copy number estimation: |
|
93 |
+\paragraph{A hidden Markov model.} |
|
78 | 94 |
|
79 |
-\begin{itemize} |
|
80 |
- \item quantile-normalized A intensities (I1 x J) |
|
81 |
- \item quantile-normalized B intensities (I1 x J) |
|
82 |
- \item quantile-normalized intensities from nonpolymorphic (NP) probes (I2 x J) |
|
83 |
- \item genotype calls (I1 x J) |
|
84 |
- \item confidence scores of the genotype calls (I1 x J) |
|
85 |
- \item signal to noise ratio (SNR) of the samples (J) |
|
86 |
- \end{itemize} |
|
87 |
- |
|
88 |
- The above items are contained in the \Robject{crlmmResults} object. A |
|
89 |
- histogram of the signal to noise ratio for the HapMap samples: |
|
95 |
+Emission probabilities for the hidden markov model can be computed from |
|
96 |
+the copy number prediction regions based on bivariate normal scatter |
|
97 |
+plots of the log A versus log B intensities. (By contrast, a |
|
98 |
+nonparamemtric segmentation would be applied to intensities on the scale |
|
99 |
+of copy number.) |
|
90 | 100 |
|
91 |
-<<plotSnr, fig=TRUE>>= |
|
92 |
-hist(crlmmResults[[2]]$SNR, xlab="SNR", main="") |
|
101 |
+<<emissionProbabilities>>= |
|
102 |
+library(VanillaICE) |
|
103 |
+copyNumberStates <- 0:5 |
|
104 |
+if(!exists("emission.cn")){ |
|
105 |
+ emission.cn <- suppressWarnings(crlmm:::computeEmission(crlmmSetList, copyNumberStates)) |
|
106 |
+ dim(emission.cn) |
|
107 |
+} |
|
93 | 108 |
@ |
94 | 109 |
|
95 |
-<<setSNRmin, echo=FALSE>>= |
|
110 |
+*Warning: more can be done than currently implemented to protect against |
|
111 |
+outliers. In addition, improved estimates of uncertainty for the copy |
|
112 |
+number prediction regions will also help.* |
|
113 |
+ |
|
114 |
+Initial state probabilities and transition probabilities for the HMM: |
|
115 |
+ |
|
116 |
+<<probs>>= |
|
117 |
+initialP <- rep(1/length(copyNumberStates), length(copyNumberStates)) |
|
118 |
+tau <- transitionProbability(chromosome=chromosome(crlmmSetList), |
|
119 |
+ position=position(crlmmSetList), |
|
120 |
+ TAUP=1e8) |
|
121 |
+@ |
|
122 |
+ |
|
123 |
+The viterbi algorithm is used to identify the sequence of states that |
|
124 |
+maximizes the likelihood: |
|
125 |
+ |
|
126 |
+<<hmm>>= |
|
127 |
+if(!exists("hmmPredictions")){ |
|
128 |
+hmmPredictions <- viterbi(emission=emission.cn, |
|
129 |
+ initialStateProbs=log(initialP), |
|
130 |
+ tau=tau[, "transitionPr"], |
|
131 |
+ arm=tau[, "arm"], |
|
132 |
+ normalIndex=3, |
|
133 |
+ normal2altered=0.01, |
|
134 |
+ altered2normal=1, |
|
135 |
+ altered2altered=0.001) |
|
136 |
+} |
|
137 |
+table(as.integer(hmmPredictions)-1) |
|
138 |
+brks <- breaks(x=hmmPredictions, states=copyNumberStates, |
|
139 |
+ position=tau[, "position"], |
|
140 |
+ chromosome=tau[, "chromosome"]) |
|
141 |
+str(brks) |
|
142 |
+@ |
|
143 |
+ |
|
144 |
+\paragraph{Circular binary segmentation} |
|
145 |
+TO DO. |
|
146 |
+ |
|
147 |
+\section{Accessors} |
|
148 |
+ |
|
149 |
+\subsection{Assay data accessors} |
|
150 |
+ |
|
151 |
+\paragraph{\Robject{ABset}: quantile normalized intensities} |
|
152 |
+ |
|
153 |
+An object of class \Robject{ABset} is stored in the first element of the |
|
154 |
+\Robject{crlmmSetList} object. The following accessors may be of use: |
|
155 |
+ |
|
156 |
+Accessors for the quantile normalized intensities for the A allele: |
|
157 |
+ |
|
158 |
+<<accessors>>= |
|
159 |
+a <- A(crlmmSetList) |
|
160 |
+dim(a) |
|
161 |
+@ |
|
162 |
+ |
|
163 |
+The quantile normalized intensities for the nonpolymorphic probes are |
|
164 |
+also stored in the 'A' assay data element. To retrieve the quantile |
|
165 |
+normalized intensities for the A allele only at polymorphic loci: |
|
166 |
+ |
|
167 |
+<<A.polymorphic>>= |
|
168 |
+a.snps <- A(crlmmSetList[snpIndex(crlmmSetList), ]) |
|
169 |
+dim(a.snps) |
|
170 |
+@ |
|
171 |
+ |
|
172 |
+\noindent For the nonpolymorphic loci: |
|
173 |
+<<A.nonpolymorphic>>= |
|
174 |
+a.nps <- A(crlmmSetList[cnIndex(crlmmSetList), ]) |
|
175 |
+dim(a.nps) |
|
176 |
+@ |
|
177 |
+ |
|
178 |
+Quantile normalized intensities for the B allele at polymorphic loci: |
|
179 |
+ |
|
180 |
+<<B.polymorphic>>= |
|
181 |
+b.snps <- B(crlmmSetList[snpIndex(crlmmSetList), ]) |
|
182 |
+@ |
|
183 |
+ |
|
184 |
+Note that NAs are recorded in the 'B' assay data element for |
|
185 |
+nonpolymorphic loci: |
|
186 |
+ |
|
187 |
+<<B.NAs>>= |
|
188 |
+all(is.na(B(crlmmSetList[cnIndex(crlmmSetList), ]))) |
|
189 |
+@ |
|
190 |
+ |
|
191 |
+\paragraph{\Robject{SnpSet}: Genotype calls and confidence scores} |
|
192 |
+ |
|
193 |
+Genotype calls: |
|
194 |
+<<genotypes>>= |
|
195 |
+genotypes <- calls(crlmmSetList) |
|
196 |
+@ |
|
197 |
+Confidence scores of the genotype calls: |
|
198 |
+<<confidenceScores>>= |
|
199 |
+genotypeConf <- confs(crlmmSetList) |
|
200 |
+@ |
|
201 |
+ |
|
202 |
+\paragraph{\Robject{CopyNumberSet}: allele-specific copy number} |
|
203 |
+ |
|
204 |
+Allele-specific copy number at polymorphic loci: |
|
205 |
+<<ca>>= |
|
206 |
+ca <- CA(crlmmSetList[snpIndex(crlmmSetList), ]) |
|
207 |
+@ |
|
208 |
+ |
|
209 |
+Total copy number at nonpolymorphic loci: |
|
210 |
+<<ca>>= |
|
211 |
+cn.nonpolymorphic <- CA(crlmmSetList[cnIndex(crlmmSetList), ]) |
|
212 |
+range(cn.nonpolymorphic, na.rm=TRUE) |
|
213 |
+median(cn.nonpolymorphic, na.rm=TRUE) |
|
214 |
+@ |
|
215 |
+ |
|
216 |
+Total copy number at both polymorphic and nonpolymorphic loci: |
|
217 |
+<<totalCopynumber>>= |
|
218 |
+cn <- copyNumber(crlmmSetList) |
|
219 |
+@ |
|
220 |
+ |
|
221 |
+\subsection{Other accessors} |
|
222 |
+ |
|
223 |
+After running \Robject{update} on the \Robject{crlmmSetList} object, |
|
224 |
+information on physical position and chromosome can be accessed by the |
|
225 |
+following accessors: |
|
226 |
+ |
|
227 |
+<<positionChromosome, eval=FALSE>>= |
|
228 |
+xx <- position(crlmmSetList) |
|
229 |
+yy <- chromosome(crlmmSetList) |
|
230 |
+@ |
|
231 |
+ |
|
232 |
+There are many parameters computed during copy number estimation that |
|
233 |
+are at present stored in the \Robject{featureData} slot of the |
|
234 |
+\Robject{CopyNumberSet} element. TO DO: Accessors for these parameters, |
|
235 |
+as well as better containers for storing these parameters. See |
|
236 |
+ |
|
237 |
+<<copynumberParameters>>= |
|
238 |
+fvarLabels(crlmmSetList[[3]]) |
|
239 |
+@ |
|
240 |
+ |
|
241 |
+ |
|
242 |
+\section{Suggested visualizations} |
|
243 |
+ |
|
244 |
+A histogram of the signal to noise ratio for the HapMap samples: |
|
245 |
+ |
|
246 |
+<<plotSnr, fig=TRUE, include=FALSE>>= |
|
247 |
+hist(crlmmSetList[[2]]$SNR, xlab="SNR", main="") |
|
248 |
+@ |
|
249 |
+ |
|
250 |
+\begin{figure} |
|
251 |
+ \centering |
|
252 |
+ \includegraphics[width=0.6\textwidth]{copynumber-plotSnr} |
|
253 |
+ \caption{Signal to noise ratios for the HapMap samples.} |
|
254 |
+\end{figure} |
|
255 |
+ |
|
256 |
+<<snrmin, eval=TRUE, echo=FALSE>>= |
|
96 | 257 |
SNRmin <- 5 |
97 | 258 |
@ |
98 | 259 |
|
... | ... |
@@ -102,7 +263,7 @@ quantile-normalized intensities, we suggest adjusting for date or |
102 | 263 |
chemistry plate. Ideally, one would have 70+ files in a given |
103 | 264 |
batch. Here we make a table of date versus ancestry: |
104 | 265 |
|
105 |
-<<specifyBatch>>= |
|
266 |
+<<specifyBatch, eval=FALSE, echo=FALSE>>= |
|
106 | 267 |
sns <- sampleNames(crlmmResults) |
107 | 268 |
sns[1] |
108 | 269 |
plate <- substr(basename(sns), 13, 13) |
... | ... |
@@ -118,14 +279,6 @@ not the case, we illustrate how one may adjust for batch using the |
118 | 279 |
chemistry plate as an argument for \Robject{batch} in the |
119 | 280 |
\Robject{computeCopynumber} function. |
120 | 281 |
|
121 |
-<<computeCopynumber>>= |
|
122 |
-if(!exists("crlmmResults2")){ |
|
123 |
- crlmmResults2 <- crlmmResults |
|
124 |
- crlmmResults <- computeCopynumber(crlmmResults, SNRmin=5, batch=plate, CHR=CHR, cdfName="genomewidesnp6") |
|
125 |
- protocolData(crlmmResults[[3]])[["ScanDate"]] <- protocolData(crlmmResults[[3]])[["ScanDate"]] |
|
126 |
-} |
|
127 |
-@ |
|
128 |
- |
|
129 | 282 |
**Note: the number of samples in the \Robject{CrlmmSetList} object after |
130 | 283 |
copy number estimation may be fewer than the number of samples in the |
131 | 284 |
\Robject{CrlmmSetList} object after preprocessing/genotyping. This |
... | ... |
@@ -137,20 +290,6 @@ The object returned by \Robject{computeCopynumber} is ordered by |
137 | 290 |
chromosome and physical position (useful for downstream methods that |
138 | 291 |
smooth the copy number as a function of the physical position). ** |
139 | 292 |
|
140 |
-<<dimnamesMayDiffer>>= |
|
141 |
-identical(featureNames(crlmmResults), featureNames(crlmmResults2)) ##not identical |
|
142 |
-identical(sampleNames(crlmmResults), sampleNames(crlmmResults2)) |
|
143 |
-gc() |
|
144 |
-@ |
|
145 |
- |
|
146 |
- |
|
147 |
-<<example.cnset, eval=FALSE, echo=FALSE>>= |
|
148 |
-example.crlmmResults <- crlmmResults[1:1000, 1:100] |
|
149 |
-##example.cnset <- cnset[1:1000, 1:100] |
|
150 |
-##save(example.cnset, file="~/madman/Rpacks/crlmm/inst/scripts/example.cnset.rda") |
|
151 |
-save(example.crlmmResults, file="~/madman/Rpacks/crlmm/inst/scripts/example.crlmmResults.rda") |
|
152 |
-@ |
|
153 |
- |
|
154 | 293 |
The above algorithm for estimating copy number is predicated on the |
155 | 294 |
assumption that most samples within a batch have copy number 2 at any |
156 | 295 |
given locus. For common copy number variants, this assumption may not |
... | ... |
@@ -159,13 +298,63 @@ additional robustness to this assumption. Set the \Robject{bias.adj} |
159 | 298 |
argument to \Robject{TRUE}: |
160 | 299 |
|
161 | 300 |
<<biasAdjustment, eval=FALSE>>= |
162 |
-crlmmResults[[3]] <- computeCopynumber(crlmmResults, SNRmin=5, batch=plate, CHR=CHR, |
|
163 |
- cdfName="genomewidesnp6", bias.adj=TRUE) |
|
301 |
+update(crlmmSetList, CHR=22, bias.adj=TRUE) |
|
302 |
+@ |
|
303 |
+ |
|
304 |
+Calculate the frequency of amplifications and deletions at each locus. |
|
305 |
+<<hmm_hapmap, fig=TRUE, include=FALSE, width=8, height=7>>= |
|
306 |
+require(SNPchip) |
|
307 |
+library(RColorBrewer) |
|
308 |
+numberUp <- rowSums(hmmPredictions > 3, na.rm=TRUE) |
|
309 |
+numberDown <- -rowSums(hmmPredictions < 3, na.rm=TRUE) |
|
310 |
+poly.cols <- brewer.pal(7, "Accent") |
|
311 |
+alt.brks <- brks[brks[, "state"] != "copy.number_2", ] |
|
312 |
+op <- par(ask=FALSE) |
|
313 |
+ylim <- c(min(numberDown)-5, max(numberUp)+5) |
|
314 |
+xlim <- c(10*1e6, max(position(crlmmSetList))) |
|
315 |
+plot(position(crlmmSetList), rep(0, nrow(crlmmSetList[[1]])), |
|
316 |
+ type="n", xlab="Physical position (Mb)", |
|
317 |
+ ylim=ylim, |
|
318 |
+ xlim=xlim, |
|
319 |
+ ylab="frequency", main="Chr 22", |
|
320 |
+ xaxt="n", |
|
321 |
+ xaxs="r") |
|
322 |
+axis(1, at=pretty(xlim), labels=pretty(xlim)/1e6) |
|
323 |
+polygon(x=c(position(crlmmSetList), rev(position(crlmmSetList))), |
|
324 |
+ y=c(rep(0, nrow(crlmmSetList[[1]])), rev(numberUp)), |
|
325 |
+ col=poly.cols[3], border=poly.cols[3]) |
|
326 |
+polygon(x=c(position(crlmmSetList), rev(position(crlmmSetList))), |
|
327 |
+ y=c(rep(0, nrow(crlmmSetList[[1]])), rev(numberDown)), |
|
328 |
+ col=poly.cols[5], border=poly.cols[5]) |
|
329 |
+##plotCytoband(22, xlim=xlim, new=FALSE, |
|
330 |
+## label.cytoband=FALSE, |
|
331 |
+## cytoband.ycoords=c(-10, -8), xaxs="r") |
|
332 |
+ |
|
333 |
+medLength <- round(median(alt.brks[, "nbases"]), 2) |
|
334 |
+medMarkers <- median(alt.brks[, "nprobes"]) |
|
335 |
+sdMarkers <- round(mad(alt.brks[, "nprobes"]), 2) |
|
336 |
+sdsLength <- round(mad(alt.brks[, "nbases"]), 2) |
|
337 |
+legend("topright", |
|
338 |
+ bty="n", |
|
339 |
+ legend=c(paste("median length:", medLength, "(bp)"), |
|
340 |
+ paste("MAD length:", sdsLength, "(bp)"), |
|
341 |
+ paste("median # markers:", medMarkers), |
|
342 |
+ paste("MAD # markers:", sdMarkers)), |
|
343 |
+ cex=0.8, ncol=2) |
|
344 |
+legend("topleft", |
|
345 |
+ fill=poly.cols[c(3, 5)], |
|
346 |
+ legend=c("amplifications", "deletions"), bty="n") |
|
347 |
+par(op) |
|
348 |
+gc() |
|
164 | 349 |
@ |
165 | 350 |
|
166 |
-\section{Suggested plots} |
|
351 |
+\begin{figure} |
|
352 |
+ \centering |
|
353 |
+ \includegraphics[width=\textwidth]{copynumber-hmm_hapmap} |
|
354 |
+ \caption{} |
|
355 |
+\end{figure} |
|
167 | 356 |
|
168 |
-\paragraph{One sample at a time} |
|
357 |
+\paragraph{One sample at a time: locus-level estimates} |
|
169 | 358 |
|
170 | 359 |
Plot physical position versus copy number for the first sample. Recall |
171 | 360 |
that the copy number estimates were multiplied by 100 and stored as an |
... | ... |
@@ -173,12 +362,12 @@ integer. |
173 | 362 |
|
174 | 363 |
<<oneSample, fig=TRUE, width=8, height=4, include=FALSE>>= |
175 | 364 |
par(las=1) |
176 |
-plot(position(crlmmResults), copyNumber(crlmmResults)[, 1], pch=".", cex=2, xaxt="n", col="grey20", ylim=c(0,6), |
|
365 |
+plot(position(crlmmSetList), copyNumber(crlmmSetList)[, 1], pch=".", cex=2, xaxt="n", col="grey20", ylim=c(0,6), |
|
177 | 366 |
ylab="copy number", xlab="physical position (Mb)", |
178 |
- main=paste(sampleNames(crlmmResults)[1], ", CHR:", unique(chromosome(crlmmResults)))) |
|
179 |
-points(position(crlmmResults)[cnIndex(crlmmResults)], copyNumber(crlmmResults)[cnIndex(crlmmResults), 1], |
|
367 |
+ main=paste(sampleNames(crlmmSetList)[1], ", CHR:", unique(chromosome(crlmmSetList)))) |
|
368 |
+points(position(crlmmSetList)[cnIndex(crlmmSetList)], copyNumber(crlmmSetList)[cnIndex(crlmmSetList), 1], |
|
180 | 369 |
pch=".", cex=2, col="lightblue") |
181 |
-axis(1, at=pretty(range(position(crlmmResults))), labels=pretty(range(position(crlmmResults)))/1e6) |
|
370 |
+axis(1, at=pretty(range(position(crlmmSetList))), labels=pretty(range(position(crlmmSetList)))/1e6) |
|
182 | 371 |
@ |
183 | 372 |
|
184 | 373 |
<<idiogram, eval=FALSE, echo=FALSE>>= |
... | ... |
@@ -189,7 +378,8 @@ plotCytoband(22, new=FALSE, cytoband.ycoords=c(5.8, 6.0), label.cytoband=FALSE) |
189 | 378 |
\begin{figure} |
190 | 379 |
\includegraphics[width=0.9\textwidth]{copynumber-oneSample} |
191 | 380 |
\caption{Total copy number (y-axis) for chromosome 22 plotted |
192 |
- against physical position (x-axis) for one sample. } |
|
381 |
+ against physical position (x-axis) for one sample. Estimates at |
|
382 |
+ nonpolymorphic loci are plotted in light blue.} |
|
193 | 383 |
\end{figure} |
194 | 384 |
|
195 | 385 |
\paragraph{One SNP at a time} |
... | ... |
@@ -206,12 +396,12 @@ colors <- c("red", "blue", "green3") |
206 | 396 |
cex <- 0.6 |
207 | 397 |
par(mfrow=c(3,3), las=1, pty="s", ask=FALSE, mar=c(2, 2, 2, 2), oma=c(2, 2, 1, 1)) |
208 | 398 |
##plot 9 at a time |
209 |
-indices <- split(snpIndex(crlmmResults), rep(1:length(snpIndex(crlmmResults)), each=9, length.out=length(snpIndex(crlmmResults)))) |
|
210 |
-par(ask=TRUE) |
|
399 |
+indices <- split(snpIndex(crlmmSetList), rep(1:length(snpIndex(crlmmSetList)), each=9, length.out=length(snpIndex(crlmmSetList)))) |
|
400 |
+par(ask=FALSE) |
|
211 | 401 |
j <- 1 |
212 | 402 |
for(i in indices[[j]]){ |
213 |
- gt <- calls(crlmmResults)[i, ] |
|
214 |
- plot(crlmmResults[i, ], |
|
403 |
+ gt <- calls(crlmmSetList)[i, ] |
|
404 |
+ plot(crlmmSetList[i, ], |
|
215 | 405 |
pch=pch, |
216 | 406 |
col=colors[gt], |
217 | 407 |
bg=colors[gt], cex=cex, |
... | ... |
@@ -221,10 +411,11 @@ for(i in indices[[j]]){ |
221 | 411 |
} |
222 | 412 |
@ |
223 | 413 |
|
224 |
-<<predictionRegion, fig=TRUE, width=8, height=8, include=FALSE>>= |
|
414 |
+<<predictionRegion, fig=TRUE, width=8, height=8, include=FALSE, eval=FALSE>>= |
|
225 | 415 |
require(RColorBrewer) |
416 |
+library(ellipse) |
|
226 | 417 |
greens <- brewer.pal(9, "Greens") |
227 |
-J <- split(1:ncol(crlmmResults), batch(crlmmResults)) |
|
418 |
+J <- split(1:ncol(crlmmSetList), batch(crlmmSetList)) |
|
228 | 419 |
colors <- c("red", "blue", "green3") |
229 | 420 |
cex <- 0.6 |
230 | 421 |
colors <- c("blue", greens[8], "red") |
... | ... |
@@ -232,35 +423,36 @@ pch.col <- c("grey40", "black", "grey40") |
232 | 423 |
xlim <- ylim <- c(6.5,13) |
233 | 424 |
plotpoints <- FALSE |
234 | 425 |
lwd <- 2 |
235 |
-pdf("figures/snp22plots%02d.pdf", width=600, height=600) |
|
426 |
+##pdf("figures/snp22plots%02d.pdf", width=600, height=600) |
|
236 | 427 |
ask <- FALSE |
237 | 428 |
par(mfrow=c(3,3), las=1, pty="s", ask=ask, mar=c(2, 2, 2, 2), oma=c(2, 2, 1, 1)) |
238 |
-indices <- split(snpIndex(crlmmResults), rep(1:length(snpIndex(crlmmResults)), each=9, length.out=length(snpIndex(crlmmResults)))) |
|
239 |
-for(j in seq(along=indices)[1:10]){ |
|
429 |
+indices <- split(snpIndex(crlmmSetList), rep(1:length(snpIndex(crlmmSetList)), each=9, length.out=length(snpIndex(crlmmSetList)))) |
|
430 |
+##for(j in seq(along=indices)[1:10]){ |
|
431 |
+j <- 1 |
|
240 | 432 |
cat(j, "\n") |
241 | 433 |
k <- 1 |
242 | 434 |
for(i in indices[[j]]){ |
243 |
- gt <- calls(crlmmResults)[i, ] |
|
435 |
+ gt <- calls(crlmmSetList)[i, ] |
|
244 | 436 |
pch <- as.character(gt) |
245 | 437 |
cex <- 0.9 |
246 |
- plot(crlmmResults[i, ], |
|
438 |
+ plot(crlmmSetList[i, ], |
|
247 | 439 |
pch=pch, |
248 | 440 |
col=pch.col[gt], |
249 | 441 |
cex=cex, |
250 | 442 |
xlim=xlim, ylim=ylim, |
251 | 443 |
type="n") |
252 | 444 |
if(plotpoints){ |
253 |
- for(b in seq(along=unique(batch(crlmmResults)))){ |
|
254 |
- points(crlmmResults[i, J[[b]]], |
|
445 |
+ for(b in seq(along=unique(batch(crlmmSetList)))){ |
|
446 |
+ points(crlmmSetList[i, J[[b]]], |
|
255 | 447 |
pch=pch, |
256 | 448 |
col=colors[b], bg=colors[b], cex=cex, |
257 | 449 |
xlim=xlim, ylim=ylim) |
258 | 450 |
} |
259 | 451 |
} |
260 |
- for(b in seq(along=unique(batch(crlmmResults)))){ |
|
261 |
- ellipse(crlmmResults[i, J[[b]]], copynumber=2, col=colors[b], lwd=lwd) |
|
452 |
+ for(b in seq(along=unique(batch(crlmmSetList)))){ |
|
453 |
+ ellipse(crlmmSetList[i, J[[b]]], copynumber=2, col=colors[b], lwd=lwd) |
|
262 | 454 |
} |
263 |
- ##legend("bottomright", bty="n", legend=featureNames(crlmmResults)[i]) |
|
455 |
+ ##legend("bottomright", bty="n", legend=featureNames(crlmmSetList)[i]) |
|
264 | 456 |
if(k == 1) { |
265 | 457 |
legend("bottomleft", bty="n", fill=colors, legend=c("CEPH", "Yoruba", "Asian")) |
266 | 458 |
mtext("A", 1, outer=TRUE, line=1) |
... | ... |
@@ -268,76 +460,17 @@ for(j in seq(along=indices)[1:10]){ |
268 | 460 |
} |
269 | 461 |
k <- k+1 |
270 | 462 |
} |
271 |
-} |
|
272 |
-dev.off() |
|
273 | 463 |
@ |
274 | 464 |
|
465 |
+%\begin{figure} |
|
466 |
+% \includegraphics[width=0.9\textwidth]{copynumber-predictionRegion} |
|
467 |
+% \caption{Prediction regions for copy number 2 for one SNP. The |
|
468 |
+% plate effects are negligible for this SNP and we only plot the |
|
469 |
+% prediction region for the first plate.} |
|
470 |
+%\end{figure} |
|
275 | 471 |
|
276 |
- |
|
277 |
-\begin{figure} |
|
278 |
- \includegraphics[width=0.9\textwidth]{copynumber-predictionRegion} |
|
279 |
- \caption{Prediction regions for copy number 2 for one SNP. The |
|
280 |
- plate effects are negligible for this SNP and we only plot the |
|
281 |
- prediction region for the first plate.} |
|
282 |
-\end{figure} |
|
283 |
- |
|
284 |
- |
|
285 |
-\section{Smoothing via a hidden Markov model} |
|
286 |
- |
|
287 |
-Here we smooth via a hidden Markov model. To facilitate comparisons |
|
288 |
-with the Birdseye HMM, the same transition probabilities are specified. |
|
289 |
- |
|
290 |
-<<segment>>= |
|
291 |
-library(VanillaICE) |
|
292 |
-copyNumberStates <- 0:5 |
|
293 |
-##crlmmResults <- crlmm:::thresholdModelParams(crlmmResults) |
|
294 |
-if(!exists("fit")){ |
|
295 |
- if(file.exists(file.path(outdir, paste("fit_", CHR, ".rda", sep="")))) load(file.path(outdir, paste("fit_", CHR, ".rda", sep=""))) |
|
296 |
- else { |
|
297 |
- emission.cn <- crlmm:::computeEmission(crlmmResults, copyNumberStates) |
|
298 |
- tau <- transitionProbability(chromosome=chromosome(crlmmResults), |
|
299 |
- position=position(crlmmResults), |
|
300 |
- TAUP=1e8) |
|
301 |
- initialP <- rep(1/length(copyNumberStates), length(copyNumberStates)) |
|
302 |
- fit <- viterbi(initialStateProbs=log(initialP), |
|
303 |
- emission=emission.cn, |
|
304 |
- tau=tau[, "transitionPr"], |
|
305 |
- arm=tau[, "arm"], |
|
306 |
- normalIndex=3, |
|
307 |
- normal2altered=0.005, |
|
308 |
- altered2normal=0.5, |
|
309 |
- altered2altered=0.0025) |
|
310 |
- brks <- breaks(x=fit, states=copyNumberStates, position=tau[, "position"], |
|
311 |
- chromosome=tau[, "chromosome"]) |
|
312 |
- } |
|
313 |
-} |
|
314 |
-@ |
|
315 |
- |
|
316 |
-<<visualizations, eval=FALSE>>= |
|
317 |
-dir.create("figures") |
|
318 |
-par(las=1, mfrow=c(1,1), ask=FALSE) |
|
319 |
-bmp("figures/chr22plots%02d.png", width=800, height=500) |
|
320 |
-for(j in 1:ncol(crlmmResults)){ |
|
321 |
- cat(j, " ") |
|
322 |
- ##A region that we would miss if we only looked at the polymorphic probes...is it real? |
|
323 |
- ##index <- which(position(crlmmResults) > 21.2*1e6 & position(crlmmResults) < 21.6*1e6 & mns < 1) |
|
324 |
- ##fit <- fit[index,] |
|
325 |
- index <- 1:nrow(fit) |
|
326 |
- plot(0:1, type="n", xlim=range(position(crlmmResults)[index]), |
|
327 |
- xaxt="n", ylim=c(-0.5, 6), |
|
328 |
- ylab="copy number", xlab="physical position (Mb)", |
|
329 |
- main=paste(sampleNames(crlmmResults)[j], ", CHR:", unique(chromosome(crlmmResults))), |
|
330 |
- lwd=2, |
|
331 |
- col="blue") |
|
332 |
- points(position(crlmmResults)[index], copyNumber(crlmmResults)[index, j], pch=".", cex=2, col="grey60") |
|
333 |
- lines(position(crlmmResults)[index], fit[index, j]-1, type="s", lwd=2, col="blue") |
|
334 |
- abline(h=0:4, col="grey60") |
|
335 |
- axis(1, at=pretty(range(position(crlmmResults)[index])), labels=pretty(range(position(crlmmResults)[index]))/1e6) |
|
336 |
-} |
|
337 |
-dev.off() |
|
338 |
-@ |
|
339 |
- |
|
340 |
- |
|
472 |
+\section{Details for the copy number estimation procedure} |
|
473 |
+TO DO. |
|
341 | 474 |
|
342 | 475 |
\section{Session information} |
343 | 476 |
<<sessionInfo, results=tex>>= |
345 | 478 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,57 @@ |
1 |
+\name{crlmmIlluminaWrapper} |
|
2 |
+\Rdversion{1.1} |
|
3 |
+\alias{crlmmIlluminaWrapper} |
|
4 |
+\title{ |
|
5 |
+ Reads idat files, quantile normalizes the raw intensities, and |
|
6 |
+ provides diallelic genotype calls at polymorphic loci. |
|
7 |
+} |
|
8 |
+\description{ |
|
9 |
+ This function is a wrapper for utilities used to preprocess and |
|
10 |
+ genotype files from the illumina platform. See Section see also. |
|
11 |
+} |
|
12 |
+\usage{ |
|
13 |
+crlmmIlluminaWrapper(outdir = "./", cdfName, save.intermediate = FALSE, splitByChr = TRUE, intensityFile, ...) |
|
14 |
+} |
|
15 |
+\arguments{ |
|
16 |
+ \item{outdir}{Character string. The complete path to the directory |
|
17 |
+ where output will be saved.} |
|
18 |
+ \item{cdfName}{Character string. Annotation package. See |
|
19 |
+ crlmm:::validCdfNames() for supported packages. |
|
20 |
+ } |
|
21 |
+ \item{save.intermediate}{Logical. Whether to save intermediate |
|
22 |
+ files. This is used primarily for debugging. |
|
23 |
+ } |
|
24 |
+ \item{splitByChr}{Logical. Whether to split output files by |
|
25 |
+ chromosome. This is recommended for datasets with 50 or more samples. |
|
26 |
+ } |
|
27 |
+ \item{intensityFile}{Character string. Name of intermediate file for storing |
|
28 |
+ quantile normalized intensities. This is always saved regardless of |
|
29 |
+ the value of save.intermediate. |
|
30 |
+ } |
|
31 |
+ \item{\dots}{Additional arguments to readIdatFiles. |
|
32 |
+ } |
|
33 |
+} |
|
34 |
+\details{ |
|
35 |
+} |
|
36 |
+\value{ |
|
37 |
+ An object of class \code{CrlmmSetList}. This object will have the |
|
38 |
+ following two elements: |
|
39 |
+ |
|
40 |
+ - an object of class \code{ABset} |
|
41 |
+ - an object of class \code{SnpSet} |
|
42 |
+ |
|
43 |
+} |
|
44 |
+\author{ |
|
45 |
+ Rob Scharpf |
|
46 |
+} |
|
47 |
+\note{ |
|
48 |
+} |
|
49 |
+\seealso{ |
|
50 |
+ \code{\linkS4class{CrlmmSetList}} |
|
51 |
+ \code{\linkS4class{ABset}} |
|
52 |
+ \code{\link[Biobase]{SnpSet}} |
|
53 |
+} |
|
54 |
+\examples{ |
|
55 |
+} |
|
56 |
+\keyword{manip} |
|
57 |
+ |