Browse code

several updates to vignette, documentation, and copy number for illumina.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@41171 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 13/08/2009 19:47:13
Showing 14 changed files

... ...
@@ -256,3 +256,11 @@ is decoded and scanned
256 256
 2009-08-04 R. Scharpf - committed version 1.3.17
257 257
 
258 258
 * changed readIdatFiles function to check whether arrayNames is NULL
259
+
260
+2009-08-13 R. Scharpf - committed version 1.3.18
261
+
262
+* move Biobase to Depends
263
+* removed GGdata from suggests (problem with loading illuminaHumanv1.db)
264
+* overhaul of vignette
265
+* added update() method for copy number
266
+* added documentation and error checks for crlmmWrapper
... ...
@@ -1,15 +1,15 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.3.17
4
+Version: 1.3.18
5 5
 Date: 2009-07-22
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 6.0.
9 9
 License: Artistic-2.0
10
-Depends: methods
11
-Imports: affyio, preprocessCore, utils, stats, genefilter, splines, Biobase (>= 2.5.5), mvtnorm, oligoClasses, ellipse, methods
12
-Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2),genomewidesnp6Crlmm (>= 1.0.2), GGdata, snpMatrix, VanillaICE (>= 1.7.8)
10
+Depends: methods, Biobase (>= 2.5.5)
11
+Imports: affyio, preprocessCore, utils, stats, genefilter, splines, mvtnorm, oligoClasses, ellipse, methods
12
+Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2),genomewidesnp6Crlmm (>= 1.0.2), snpMatrix, VanillaICE (>= 1.7.8), GGdata
13 13
 Collate: AllClasses.R
14 14
 	 AllGenerics.R
15 15
 	 methods-ABset.R
... ...
@@ -30,6 +30,8 @@ importFrom(Biobase, assayDataElement, assayDataElementNames,
30 30
 importFrom(graphics, abline, axis, layout, legend, mtext, par, plot,
31 31
            polygon, rect, segments, text, points)
32 32
 
33
+importFrom(stats, update)
34
+
33 35
 importFrom(grDevices, grey)
34 36
 
35 37
 importFrom(affyio, read.celfile.header, read.celfile)
... ...
@@ -74,6 +76,7 @@ export(batch,
74 76
        copyNumber,
75 77
        crlmm,
76 78
        crlmmIllumina,
79
+       crlmmIlluminaWrapper,
77 80
        crlmmWrapper,
78 81
 ##       ellipse,
79 82
 ##       computeEmission,
... ...
@@ -85,7 +88,7 @@ export(batch,
85 88
        sampleNames,
86 89
        protocolData,
87 90
        "protocolData<-",
88
-       snprma)
91
+       snprma, update)
89 92
 
90 93
 
91 94
 
... ...
@@ -8,12 +8,12 @@ setGeneric("CB", function(object, ...) standardGeneric("CB"))
8 8
 setGeneric("CA<-", function(object, value) standardGeneric("CA<-"))
9 9
 setGeneric("CB<-", function(object, value) standardGeneric("CB<-"))
10 10
 ##setGeneric("chromosome", function(object) standardGeneric("chromosome"))
11
-setGeneric("cnIndex", function(object) standardGeneric("cnIndex"))
12
-setGeneric("cnNames", function(object) standardGeneric("cnNames"))
11
+setGeneric("cnIndex", function(object, ...) standardGeneric("cnIndex"))
12
+setGeneric("cnNames", function(object, ...) standardGeneric("cnNames"))
13 13
 ##setGeneric("copyNumber", function(object) standardGeneric("copyNumber"))
14 14
 ##setGeneric("position", function(object) standardGeneric("position"))
15 15
 setGeneric(".harmonizeDimnames", function(object) standardGeneric(".harmonizeDimnames"))
16
-setGeneric("snpIndex", function(object) standardGeneric("snpIndex"))
17
-setGeneric("snpNames", function(object) standardGeneric("snpNames"))
16
+setGeneric("snpIndex", function(object, ...) standardGeneric("snpIndex"))
17
+setGeneric("snpNames", function(object, ...) standardGeneric("snpNames"))
18 18
 setGeneric("splitByChromosome", function(object, ...) standardGeneric("splitByChromosome"))
19 19
 
... ...
@@ -177,9 +177,9 @@ combineIntensities <- function(res, cnrmaResult, cdfName){
177 177
 	return(ABset)
178 178
 }
179 179
 
180
-harmonizeSnpSet <- function(crlmmResult, ABset){
181
-	blank <- matrix(NA, length(cnNames(ABset)), ncol(ABset))
182
-	rownames(blank) <- cnNames(ABset)
180
+harmonizeSnpSet <- function(crlmmResult, ABset, cdfName){
181
+	blank <- matrix(NA, length(cnNames(ABset, cdfName)), ncol(ABset))
182
+	rownames(blank) <- cnNames(ABset, cdfName)
183 183
 	colnames(blank) <- sampleNames(ABset)
184 184
 	crlmmCalls <- rbind(calls(crlmmResult), blank)
185 185
 	crlmmConf <- rbind(confs(crlmmResult), blank)
... ...
@@ -215,33 +215,56 @@ harmonizeDimnamesTo <- function(object1, object2){
215 215
 	return(object1)
216 216
 }
217 217
 
218
-crlmmIlluminaWrapper <- function(sampleSheet, outdir="./", cdfName,
218
+crlmmIlluminaWrapper <- function(outdir="./",
219
+				 cdfName,
219 220
 				 save.intermediate=FALSE,
220
-				 splitByChr=TRUE,...){
221
-	if(file.exists(file.path(outdir, "RG.rda"))) load(file.path(outdir, "RG.rda"))
222
-	else {
223
-		RG <- readIdatFiles(sampleSheet=get("samplesheet5"),
224
-                                    arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"),
225
-                                    saveDate=TRUE, path=get("path"))
226
-		J <- match(c("1_A", "3_A", "5_A", "7_A"), sampleNames(RG))
227
-		RG <- RG[, -J]
221
+				 splitByChr=TRUE,
222
+				 intensityFile,
223
+				 ...){  ##additional arguments to readIdatFiles
224
+	stopifnot(basename(intensityFile) == "res.rda")
225
+	if(!file.exists(outdir)) stop(outdir, " does not exist.")
226
+	if(!isValidCdfName(cdfName, platform="illumina")) stop(cdfName, " not supported.")
227
+	if(file.exists(file.path(outdir, "RG.rda"))) {
228
+		message("Loading RG.rda...")
229
+		load(file.path(outdir, "RG.rda"))
230
+	} else {
231
+		message("Reading Idat files...")		
232
+		RG <- readIdatFiles(...)
233
+		##J <- match(c("1_A", "3_A", "5_A", "7_A"), sampleNames(RG))
234
+		##RG <- RG[, -J]
228 235
 		if(save.intermediate) save(RG, file=file.path(outdir, "RG.rda"))  ##935M for 91 samples...better not to save this
229 236
 	}	
230
-	if(!file.exists(file.path(outdir, "res.rda"))){
231
-		crlmmOut <- crlmmIllumina(RG=RG, cdfName=cdfName,
232
-                                          sns=pData(RG)$ID,
237
+	if(!file.exists(intensityFile)){
238
+		message("Quantile normalization / genotyping...")				
239
+		crlmmOut <- crlmmIllumina(RG=RG,
240
+					  cdfName=cdfName,
241
+                                          sns=sampleNames(RG),
233 242
                                           returnParams=TRUE,
234 243
                                           save.it=TRUE,
235
-                                          intensityFile=file.path(outdir, "res.rda"))
236
-		if(save.intermediate) save(crlmmOut, file=file.path(outdir, "crlmmOut.rda"))				
244
+                                          intensityFile=intensityFile)
245
+		if(save.intermediate) save(crlmmOut, file=file.path(outdir, "crlmmOut.rda"))
246
+	} else{
247
+		load(file.path(outdir, "crlmmOut.rda"))
248
+	}
249
+	message("Loading ", intensityFile, "...")		
250
+	load(intensityFile)
251
+	if(exists("cnAB")){
252
+		np.A <- as.integer(cnAB$A)
253
+		np.B <- as.integer(cnAB$B)
254
+		np <- ifelse(np.A > np.B, np.A, np.B)
255
+		np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A))
256
+		rownames(np) <- cnAB$gns
257
+		colnames(np) <- cnAB$sns
258
+		cnAB$NP <- np
259
+	}
260
+	sampleNames(crlmmOut) <- res$sns	
261
+	if(exists("cnAB")){
262
+		ABset <- combineIntensities(get("res"), cnAB, cdfName=cdfName)
237 263
 	} else{
238
-		message("Loading...")		
239
-		load(file.path(outdir, "res.rda"))
240
-		load(file.path(outdir, "crlmmOut.rda"))		
264
+		ABset <- combineIntensities(get("res"), NULL, cdfName=cdfName)
241 265
 	}
242
-	ABset <- combineIntensities(get("res"), NULL, cdfName=cdfName)
243
-	protocolData(ABset)[["ScanDate"]] <- as.character(pData(RG)$ScanDate)
244
-	crlmmResult <- harmonizeSnpSet(crlmmOut, ABset)
266
+	##protocolData(ABset)[["ScanDate"]] <- as.character(pData(RG)$ScanDate)
267
+	crlmmResult <- harmonizeSnpSet(crlmmOut, ABset, cdfName)
245 268
 	stopifnot(all.equal(dimnames(crlmmOut), dimnames(ABset)))
246 269
 	crlmmList <- list(ABset,
247 270
 			  crlmmResult)
... ...
@@ -255,47 +278,174 @@ crlmmIlluminaWrapper <- function(sampleSheet, outdir="./", cdfName,
255 278
 	}
256 279
 	message("CrlmmSetList objects saved to ", outdir)
257 280
 }
281
+
282
+##quantileNormalize1m <- function(cel.files,
283
+##				outdir,
284
+##				pkgname,
285
+##				reference=TRUE,
286
+##				createCels=TRUE,
287
+##				verbose=TRUE,
288
+##				computeCopyNumberReference=FALSE,
289
+##				normalizeNonpolymorphic=FALSE){
290
+##	if(computeCopyNumberReference){
291
+##		stopifnot(file.exists("/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m"))
292
+##	} else{
293
+##		load(system.file("1m_reference_cn.rda", package="CN"))
294
+##	}
295
+##	conn <- db(get(pkgname))
296
+##	tmp <- dbGetQuery(conn, paste("SELECT fid, man_fsetid, allele, featureSet.chrom, featureSet.physical_pos",
297
+##				      "FROM pmfeature, featureSet",
298
+##				      "WHERE pmfeature.fsetid = featureSet.fsetid"))
299
+##	tmp[is.na(tmp$chrom), "chrom"] <- 0
300
+##	tmp[is.na(tmp$physical_pos), "physical_pos"] <- 0
301
+##	tmp <- tmp[order(tmp$chrom, tmp$physical_pos, tmp$man_fsetid, tmp$allele),]
302
+##
303
+##	if(reference){
304
+##		load(system.file("extdata", paste(pkgname, "Ref.rda", sep=""), package=pkgname))
305
+##	} else{
306
+##		reference <- normalize.quantiles.determine.target(readCelIntensities(celFiles,
307
+##										     indices=tmp$fid))
308
+##		if (verbose) message("normalization vector created")
309
+##	}
310
+##	reference <- sort(reference)
311
+##	message("creating empty cel files")
312
+##	out.celFiles <- file.path(outdir, paste("QN-", basename(cel.files), sep=""))
313
+##	if(createCels){
314
+##		hh <- readCelHeader(cel.files[1])
315
+##		message(paste("creating empty cel files in", outdir))
316
+##		out.celFiles <- sapply(out.celFiles, function(x) suppressWarnings(createCel(x, header=hh, overwrite=TRUE)))
317
+##	}
318
+##
319
+##	message("Quantile normalizing SNP probes to hapmap target distribution")
320
+##	for(i in 1:length(cel.files)){
321
+##		if(i%%10==0) cat(".")
322
+##		pms <- normalize.quantiles.use.target(readCelIntensities(cel.files[i],
323
+##									 indices=tmp$fid),
324
+##						      reference, copy=FALSE)
325
+##		updateCel(out.celFiles[i], indices=tmp$fid, intensities=as.integer(pms))	  
326
+##	}
327
+##
328
+####	---------------------------------------------------------------------------
329
+####	 copy number probes
330
+####	---------------------------------------------------------------------------
331
+##	if(normalizeNonpolymorphic){
332
+##		cntmp <- dbGetQuery(conn, paste("SELECT fid, man_fsetid, featureSetCNV.chrom, featureSetCNV.chrom_start",
333
+##						"FROM pmfeatureCNV, featureSetCNV",
334
+##						"WHERE pmfeatureCNV.fsetid = featureSetCNV.fsetid"))
335
+##		cntmp[is.na(cntmp$chrom), "chrom"] <- 0
336
+##		cntmp[is.na(cntmp$chrom_start), "chrom_start"] <- 0
337
+##		cntmp <- cntmp[order(cntmp$chrom, cntmp$chrom_start, cntmp$man_fsetid), ]
338
+##
339
+##		if(computeCopyNumberReference){
340
+##			message("computing reference distribution for copy number probes.  May take a while...")
341
+##			celFiles <- list.celfiles("/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m", full.names=TRUE)
342
+##			reference <- computeCopyNumberReference(cel.files=celFiles, fid=cntmp$fid)
343
+##			message("finished computing CN reference distribution.")
344
+##		} 
345
+##		message("Quantile normalizing copy number probes to hapmap target distribution")
346
+##		for(i in 1:length(cel.files)){
347
+##			if(i%%10==0) cat(".")
348
+##			pms <- normalize.quantiles.use.target(readCelIntensities(cel.files[i],
349
+##										 indices=cntmp$fid),
350
+##							      reference, copy=FALSE)
351
+##			updateCel(out.celFiles[i], indices=cntmp$fid, intensities=as.integer(pms))	  
352
+##		}
353
+##	} else {
354
+##		message("Not quantile normalizing the nonpolymorphic probes")
355
+##	}
356
+##}
357
+
358
+validCdfNames <- function(platform){
359
+	if(missing(platform)) stop("missing platform")
360
+	if(!platform %in% c("illumina", "affymetrix"))
361
+		stop("only illumina and affymetrix platforms are supported.")
362
+	if(platform=="illumina"){
363
+		chipList = c("human1mv1c",             # 1M
364
+		"human370v1c",            # 370CNV
365
+		"human650v3a",            # 650Y
366
+		"human610quadv1b",        # 610 quad
367
+		"human660quadv1a",        # 660 quad
368
+		"human370quadv3c",        # 370CNV quad
369
+		"human550v3b",            # 550K
370
+		"human1mduov3b")          # 1M Duo
371
+	}
372
+	if(platform=="affymetrix"){
373
+		chipList=c("genomewidesnp6")
374
+	}
375
+	return(chipList)
376
+}
377
+
378
+isValidCdfName <- function(cdfName, platform){
379
+	chipList <- validCdfNames(platform)
380
+	if(!(cdfName %in% chipList)){
381
+		warning("cdfName must be one of the following: ",
382
+			chipList)
383
+	}
384
+	result <- cdfName %in% chipList
385
+	return(result)
386
+}
258 387
 	
259 388
 	
260 389
 	
261
-crlmmWrapper <- function(filenames, outdir="./", cdfName="genomewidesnp6",
390
+crlmmWrapper <- function(filenames,
391
+			 cdfName="genomewidesnp6",
392
+			 load.it=FALSE,
262 393
 			 save.it=FALSE,
263
-			 splitByChr=TRUE, ...){
264
-	##no visible binding for res
265
-	if(!file.exists(file.path(outdir, "crlmmResult.rda"))){
266
-		crlmmResult <- crlmm(filenames=filenames, cdfName=cdfName, save.it=TRUE, ...)
267
-		if(save.it) save(crlmmResult, file=file.path(outdir, "crlmmResult.rda"))
268
-	} else {
269
-		message("Loading crlmmResult...")
270
-		load(file.path(outdir, "crlmmResult.rda"))
394
+			 splitByChr=TRUE,
395
+			 crlmmFile,
396
+			 intensityFile, ...){
397
+	outfiles <- file.path(dirname(crlmmFile), paste("crlmmSetList_", 1:24, ".rda", sep=""))
398
+	if(load.it & all(file.exists(outfiles))){
399
+		load(outfiles[1])
400
+		crlmmSetList <- get("crlmmSetList")
401
+		if(!all(sampleNames(crlmmSetList) == basename(filenames))){
402
+			stop("load.it is TRUE, but sampleNames(crlmmSetList != basename(filenames))")
403
+		} else{
404
+			return("load.it is TRUE and 'crlmmSetList_<CHR>.rda' objects found. Nothing to do...")
405
+		}
271 406
 	}
272
-	if(!file.exists(file.path(outdir, "cnrmaResult.rda"))){
273
-		message("Quantile normalizing the copy number probes")		
407
+	if(missing(intensityFile)) stop("must specify 'intensityFile'.")
408
+	if(missing(crlmmFile)) stop("must specify 'crlmmFile'.")
409
+	if(load.it){
410
+		if(!file.exists(crlmmFile)){
411
+			message("load.it is TRUE, but 'crlmmFile' does not exist.  Rerunning the genotype calling algorithm") 
412
+			load.it <- FALSE
413
+		}
414
+	}
415
+	if(!file.exists(outdir)) stop(outdir, " does not exist.")
416
+	if(!isValidCdfName(cdfName, platform="affymetrix"))
417
+		stop(cdfName, " is not a valid entry.  See crlmm:::validCdfNames(platform='affymetrix')")
418
+	if(!file.exists(crlmmFile) | !load.it){
419
+		crlmmResult <- crlmm(filenames=filenames,
420
+				     cdfName=cdfName,
421
+				     save.it=TRUE,
422
+				     load.it=load.it,
423
+				     intensityFile=intensityFile, ...)
424
+		message("Quantile normalizing the copy number probes...")		
274 425
 		cnrmaResult <- cnrma(filenames=filenames, cdfName=cdfName)
275
-		if(save.it) save(cnrmaResult, file=file.path(outdir, "cnrmaResult.rda"))
276
-	} else{
277
-		message("Loading cnrmaResult...")		
278
-		load(file.path(outdir, "cnrmaResult.rda"))
426
+		if(save.it){
427
+			message("Saving crlmmResult and cnrmaResult to", crlmmFile)
428
+			save(crlmmResult, cnrmaResult, file=crlmmFile)
429
+		}
430
+	} else {
431
+		message("Loading ", crlmmFile, "...")
432
+		load(crlmmFile)
433
+		crlmmResult <- get("crlmmResult")
434
+		cnrmaResult <- get("cnrmaResult")
279 435
 	}
280
-	load(file.path(outdir, "intensities.rda"))
436
+	load(intensityFile)
281 437
 	ABset <- combineIntensities(get("res"), cnrmaResult, cdfName)
282 438
 	protocolData(ABset)[["ScanDate"]] <- as.character(celDates(filenames))	
283
-	crlmmResult <- harmonizeSnpSet(crlmmResult, ABset)
439
+	crlmmResult <- harmonizeSnpSet(crlmmResult, ABset, cdfName)
284 440
 	stopifnot(all.equal(dimnames(crlmmResult), dimnames(ABset)))
285
-	crlmmResults <- list(ABset,
286
-			     crlmmResult)
287
-	crlmmResults <- as(crlmmResults, "CrlmmSetList")
288
-	
441
+	crlmmSetList <- as(list(ABset, crlmmResult), "CrlmmSetList")
289 442
 	if(splitByChr){
290 443
 		message("Saving by chromosome")
291
-		splitByChromosome(crlmmResults, cdfName=cdfName, outdir=outdir)
292
-	} else{
293
-		message("Saving crlmmList object to ", outdir)
294
-		save(crlmmResults, file=file.path(outdir, "crlmmResults.rda"))
295
-	}
444
+		splitByChromosome(crlmmSetList, cdfName=cdfName, outdir=dirname(crlmmFile))
445
+	} 
296 446
 	if(!save.it){
297 447
 		message("Cleaning up")
298
-		unlink(file.path(outdir, "intensities.rda"))
448
+		unlink(intensityFile)
299 449
 	}
300 450
 	return()
301 451
 }
... ...
@@ -461,24 +611,33 @@ instantiateObjects <- function(calls, conf, NP, plate, envir,
461 611
 	envir[["normalNP"]] <- normalNP
462 612
 }
463 613
 
614
+setMethod("update", "CrlmmSetList", function(object, ...){
615
+	computeCopynumber(object, ...)
616
+})
617
+
464 618
 computeCopynumber <- function(object,
465 619
 			      CHR,
466 620
 			      bias.adj=FALSE,
467 621
 			      batch,
468 622
 			      SNRmin=5,
469
-			      cdfName="genomewidesnp6", ...){
623
+			      cdfName, ...){
624
+	if(missing(cdfName))
625
+		cdfName <- annotation(object)
626
+	if(!isValidCdfName(cdfName, platform="affymetrix")) stop(cdfName, " not supported.")	
470 627
 	if(ncol(object) < 10)
471 628
 		stop("Must have at least 10 samples in each batch to estimate model parameters....preferably closer to 90 samples per batch")
472
-
473 629
 	##require(oligoClasses)
474 630
 	if(missing(CHR)) stop("Must specify CHR")
475 631
 	if(CHR == 24) stop("Nothing available yet for chromosome Y")
476
-	if(missing(batch)) stop("Must specify batch")
632
+	if(missing(batch)) {
633
+		message("'batch' missing.  Assuming all samples were processed together in the same batch.")
634
+		batch <- rep("A", ncol(object))
635
+	}
477 636
 	if(length(batch) != ncol(object[[1]])) stop("Batch must be the same length as the number of samples")
478 637
 	##the AB intensities
479 638
 	Nset <- object[[1]]
480 639
 	##indices of polymorphic loci
481
-	index <- snpIndex(Nset)
640
+	index <- snpIndex(Nset, cdfName)
482 641
 	ABset <- Nset[index, ]
483 642
 	##indices of nonpolymorphic loci
484 643
 	NPset <- Nset[-index, ]
... ...
@@ -624,13 +783,14 @@ cnIllumina <- function(object,
624 783
 
625 784
 updateNuPhi <- function(crlmmSetList, cnSet){
626 785
 	##TODO: remove the use of environments.
786
+	cdfName <- annotation(crlmmSetList[[1]])
627 787
 	##repopulate the environment
628 788
 	crlmmSetList <- crlmmSetList[, match(sampleNames(cnSet), sampleNames(crlmmSetList))]
629 789
 	crlmmSetList <- crlmmSetList[match(featureNames(cnSet), featureNames(crlmmSetList)), ]
630 790
 	##envir <- getCopyNumberEnvironment(crlmmSetList, cnSet)
631 791
 	Nset <- crlmmSetList[[1]]
632 792
 	##indices of polymorphic loci
633
-	index <- snpIndex(Nset)
793
+	index <- snpIndex(Nset, cdfName)
634 794
 	ABset <- Nset[index, ]
635 795
 	##indices of nonpolymorphic loci
636 796
 	NPset <- Nset[-index, ]
... ...
@@ -1865,23 +2025,27 @@ getParams <- function(object, batch){
1865 2025
 	return(params)
1866 2026
 }
1867 2027
 
1868
-computeEmission <- function(crlmmResults, copyNumberStates=0:5, MIN=2^3,
2028
+computeEmission <- function(object,
2029
+			    copyNumberStates=0:5,
2030
+			    MIN=2^3,
1869 2031
 			    EMIT.THR,
1870 2032
 			    scaleSds=TRUE){
2033
+	cdfName <- annotation(object)
1871 2034
 	##threshold small nu's and phis
1872
-	cnset <- thresholdModelParams(crlmmResults[[3]], MIN=MIN)
2035
+	cnset <- thresholdModelParams(object[[3]], MIN=MIN)
1873 2036
 	index <- order(chromosome(cnset), position(cnset))
1874 2037
 	
1875 2038
 	if(any(diff(index) > 1)) stop("must be ordered by chromosome and physical position")
1876 2039
 	emissionProbs <- array(NA, dim=c(nrow(cnset), ncol(cnset), length(copyNumberStates)))
1877
-	dimnames(emissionProbs) <- list(featureNames(crlmmResults),
1878
-					sampleNames(crlmmResults),
2040
+	dimnames(emissionProbs) <- list(featureNames(object),
2041
+					sampleNames(object),
1879 2042
 					paste("copy.number_", copyNumberStates, sep=""))	
1880 2043
 	b <- batch(cnset)
1881 2044
 	for(i in seq(along=unique(b))){
1882 2045
 		if(i == 1) cat("Computing emission probabilities \n")
1883 2046
 		message("batch ", unique(b)[i], "...")
1884
-		emissionProbs[, b == unique(b)[i], ] <- .getEmission(crlmmResults, cnset, batch=unique(b)[i],
2047
+		emissionProbs[, b == unique(b)[i], ] <- .getEmission(object, cnset,
2048
+				batch=unique(b)[i],
1885 2049
 				copyNumberStates=copyNumberStates,
1886 2050
 				scaleSds=scaleSds)
1887 2051
 	}
... ...
@@ -1910,18 +2074,14 @@ thresholdModelParams <- function(object, MIN=2^3){
1910 2074
 	object
1911 2075
 }
1912 2076
 
1913
-.getEmission <- function(crlmmResults, cnset, batch, copyNumberStates, scaleSds=TRUE){
2077
+.getEmission <- function(object, cnset, batch, copyNumberStates, scaleSds=TRUE){
2078
+	cdfName <- annotation(object)
1914 2079
 	if(length(batch) > 1) stop("batch variable not unique")
1915
-	crlmmResults <- crlmmResults[, cnset$batch==batch]
2080
+	object <- object[, cnset$batch==batch]
1916 2081
 	cnset <- cnset[, cnset$batch == batch]
1917
-
1918
-##	a <- A(crlmmResults)
1919
-##	b <- B(crlmmResults)	
1920
-##	sds.a <- apply(log2(a), 2, sd, na.rm=TRUE)
1921
-##	sds.b <- apply(log2(b), 2, sd, na.rm=TRUE)
1922 2082
 	if(scaleSds){
1923
-		a <- CA(crlmmResults)
1924
-		b <- CB(crlmmResults)
2083
+		a <- CA(object)
2084
+		b <- CB(object)
1925 2085
 		sds.a <- apply(a, 2, sd, na.rm=TRUE)
1926 2086
 		sds.b <- apply(b, 2, sd, na.rm=TRUE)	
1927 2087
 	
... ...
@@ -1932,14 +2092,9 @@ thresholdModelParams <- function(object, MIN=2^3){
1932 2092
 		sds.b <- matrix(sds.b, nrow(cnset), ncol(cnset), byrow=TRUE)
1933 2093
 
1934 2094
 	} else sds.a <- sds.b <- matrix(0, nrow(cnset), ncol(cnset))
1935
-##	ca <- CA(cnset)
1936
-##	sds.ca <- apply(ca, 2, sd, na.rm=T)
1937
-##	sds.ca <- sds.ca/median(sds.ca)
1938
-##	sds.scale <- sds/median(sds)  #scale snp-specific variance by measure of the relative sample noise
1939
-	
1940
-	emissionProbs <- array(NA, dim=c(nrow(crlmmResults[[1]]),
1941
-				   ncol(crlmmResults[[1]]), length(copyNumberStates)))
1942
-	snpset <- cnset[snpIndex(cnset), ]
2095
+	emissionProbs <- array(NA, dim=c(nrow(object[[1]]),
2096
+				   ncol(object[[1]]), length(copyNumberStates)))
2097
+	snpset <- cnset[snpIndex(cnset, annotation(cnset)), ]
1943 2098
 	params <- getParams(snpset, batch=batch)
1944 2099
 	##attach(params)
1945 2100
 	corr <- params[["corr"]]
... ...
@@ -1953,8 +2108,8 @@ thresholdModelParams <- function(object, MIN=2^3){
1953 2108
 	sig2B <- params[["sig2B"]]
1954 2109
 	tau2A <- params[["tau2A"]]
1955 2110
 	tau2B <- params[["tau2B"]]
1956
-	a <- as.numeric(log2(A(crlmmResults[snpIndex(crlmmResults), ])))
1957
-	b <- as.numeric(log2(B(crlmmResults[snpIndex(crlmmResults), ])))
2111
+	a <- as.numeric(log2(A(object[snpIndex(object), ])))
2112
+	b <- as.numeric(log2(B(object[snpIndex(object), ])))
1958 2113
 	for(k in seq(along=copyNumberStates)){
1959 2114
 		##cat(k, " ")
1960 2115
 		CN <- copyNumberStates[k]
... ...
@@ -1972,8 +2127,8 @@ thresholdModelParams <- function(object, MIN=2^3){
1972 2127
 
1973 2128
 			sigmaA <- matrix(sigmaA, nrow=length(sigmaA), ncol=ncol(cnset), byrow=FALSE)
1974 2129
 			sigmaB <- matrix(sigmaB, nrow=length(sigmaB), ncol=ncol(cnset), byrow=FALSE)
1975
-			sigmaA <- sigmaA+sds.a[snpIndex(crlmmResults), ]
1976
-			sigmaB <- sigmaB+sds.b[snpIndex(crlmmResults), ]			
2130
+			sigmaA <- sigmaA+sds.a[snpIndex(object), ]
2131
+			sigmaB <- sigmaB+sds.b[snpIndex(object), ]			
1977 2132
 
1978 2133
 			##might want to allow the variance to be sample-specific
1979 2134
 			##TODO:
... ...
@@ -1995,21 +2150,20 @@ thresholdModelParams <- function(object, MIN=2^3){
1995 2150
 			## TODO: copy-neutral LOH would put near-zero mass on CA > 0, CB > 0
1996 2151
 			f.x.y <- f.x.y + matrix(1/(2*pi*sd.A*sd.B*sqrt(1-rho^2))*exp(-0.5*Q.x.y), nrow(snpset), ncol(snpset))
1997 2152
 		}
1998
-		emissionProbs[snpIndex(crlmmResults), , k] <- log(f.x.y)
2153
+		emissionProbs[snpIndex(object), , k] <- log(f.x.y)
1999 2154
 	}
2000 2155
 
2001
-
2002 2156
 	##**************************************************
2003 2157
 	##
2004 2158
 	##Emission probabilities for nonpolymorphic probes
2005 2159
 	##	
2006 2160
 	##**************************************************	
2007
-	cnset <- cnset[cnIndex(cnset), ]
2161
+	cnset <- cnset[cnIndex(cnset, annotation(cnset)), ]
2008 2162
 	params <- getParams(cnset, batch=batch)
2009 2163
 	nuA <- params[["nuA"]]
2010 2164
 	phiA <- params[["phiA"]]
2011 2165
 	sig2A <- params[["sig2A"]]
2012
-	a <- as.numeric(log2(A(crlmmResults[cnIndex(crlmmResults), ])))
2166
+	a <- as.numeric(log2(A(object[cnIndex(object), ])))
2013 2167
 	for(k in seq(along=copyNumberStates)){
2014 2168
 		CT <- copyNumberStates[k]
2015 2169
 		mus.matrix=matrix(log2(nuA + CT*phiA), nrow(cnset), ncol(cnset))
... ...
@@ -2017,10 +2171,10 @@ thresholdModelParams <- function(object, MIN=2^3){
2017 2171
 		##Again, should make sds sample-specific
2018 2172
 		sds.matrix <- matrix(sqrt(sig2A), nrow(cnset), ncol(cnset))
2019 2173
 
2020
-		sds.matrix <- sds.matrix + sds.a[cnIndex(crlmmResults), ]
2174
+		sds.matrix <- sds.matrix + sds.a[cnIndex(object), ]
2021 2175
 		sds <- as.numeric(sds.matrix)
2022 2176
 		suppressWarnings(tmp <- matrix(dnorm(a, mean=mus, sd=sds), nrow(cnset), ncol(cnset)))
2023
-		emissionProbs[cnIndex(crlmmResults), , k] <- log(tmp)
2177
+		emissionProbs[cnIndex(object), , k] <- log(tmp)
2024 2178
 	}
2025 2179
 	emissionProbs
2026 2180
 }
... ...
@@ -6,7 +6,6 @@ crlmm <- function(filenames, row.names=TRUE, col.names=TRUE,
6 6
                   returnParams=FALSE, badSNP=.7){
7 7
   if ((load.it | save.it) & missing(intensityFile))
8 8
 	  stop("'intensityFile' is missing, and you chose either load.it or save.it")
9
-  
10 9
   if (missing(sns)) sns <- basename(filenames)
11 10
   if (!missing(intensityFile))
12 11
     if (load.it & !file.exists(intensityFile)){
... ...
@@ -3,9 +3,15 @@
3 3
 # or to use the optional 'Path' column in sampleSheet
4 4
 # - there is a lot of header information that is currently discarded - could try and store this somewhere in the resulting NChannelSet
5 5
 
6
-readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
6
+readIdatFiles <- function(sampleSheet=NULL,
7
+			  arrayNames=NULL,
8
+			  ids=NULL,
9
+			  path=".",
7 10
 			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
8
-			  highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), saveDate=FALSE) {
11
+			  highDensity=FALSE,
12
+			  sep="_",
13
+			  fileExt=list(green="Grn.idat", red="Red.idat"),
14
+			  saveDate=FALSE) {
9 15
        if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
10 16
 	       if(!is.null(arrayNames)){
11 17
 		       ##arrayNames=NULL
... ...
@@ -27,6 +33,7 @@ readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
27 33
 		       }
28 34
 	       }
29 35
 	       pd = new("AnnotatedDataFrame", data = sampleSheet)
36
+	       sampleNames(pd) <- arrayNames
30 37
        }
31 38
        if(is.null(arrayNames)) {
32 39
 	       arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
... ...
@@ -88,13 +95,18 @@ readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
88 95
 			       colnames(tmpmat) = sampleSheet$Sample_ID
89 96
 		       }else  colnames(tmpmat) = arrayNames
90 97
 		       RG <- new("NChannelSet",
91
-				 R=tmpmat, G=tmpmat, Rnb=tmpmat, Gnb=tmpmat,
92
-				 Rse=tmpmat, Gse=tmpmat, annotation=headerInfo$Manifest[1],
93
-				 phenoData=pd, storage.mode="environment")
98
+				 R=tmpmat,
99
+				 G=tmpmat,
100
+				 Rnb=tmpmat,
101
+				 Gnb=tmpmat,
102
+				 Rse=tmpmat,
103
+				 Gse=tmpmat,
104
+				 annotation=headerInfo$Manifest[1],
105
+				 phenoData=pd,
106
+				 storage.mode="environment")
94 107
 		       rm(tmpmat)
95 108
 		       gc()
96 109
 	       }
97
-         
98 110
 	       if(length(ids)==length(idsG)) {
99 111
 		       if(sum(ids==idsG)==nprobes) {
100 112
 			       RG@assayData$G[,i] = G$Quants[, "Mean"]
... ...
@@ -394,6 +406,8 @@ readBPM <- function(bpmFile){
394 406
 }
395 407
 
396 408
 
409
+
410
+
397 411
 RGtoXY = function(RG, chipType, verbose=TRUE) {
398 412
   chipList = c("human1mv1c",             # 1M
399 413
                "human370v1c",            # 370CNV
... ...
@@ -402,11 +416,10 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
402 416
                "human660quadv1a",        # 660 quad
403 417
                "human370quadv3c",        # 370CNV quad
404 418
                "human550v3b",            # 550K
405
-               "human1mduov3b")          # 1M Duo   
406
-  if(missing(chipType))
407
-     chipType = match.arg(annotation(RG), chipList)
408
-  else
409
-     chipType = match.arg(chipType, chipList)
419
+               "human1mduov3b")          # 1M Duo
420
+  if(missing(chipType)){
421
+	  chipType = match.arg(annotation(RG), chipList)
422
+  } else chipType = match.arg(chipType, chipList)
410 423
   
411 424
   pkgname <- getCrlmmAnnotationName(chipType)
412 425
   if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
... ...
@@ -558,8 +571,17 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
558 571
 }
559 572
 
560 573
 
561
-preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, fitMixture=TRUE, eps=0.1, verbose=TRUE, seed=1,
562
-                               cdfName, sns, stripNorm=TRUE, useTarget=TRUE, save.it=FALSE, intensityFile) {
574
+preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
575
+				fitMixture=TRUE,
576
+				eps=0.1,
577
+				verbose=TRUE,
578
+				seed=1,
579
+				cdfName,
580
+				sns,
581
+				stripNorm=TRUE,
582
+				useTarget=TRUE,
583
+				save.it=FALSE,
584
+				intensityFile) {
563 585
   if(stripNorm)
564 586
     XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)
565 587
 
... ...
@@ -35,13 +35,18 @@ setMethod("[", "CrlmmSetList", function(x, i, j, ..., drop = FALSE){
35 35
 ##})
36 36
 
37 37
 setMethod("A", "CrlmmSetList", function(object) A(object[[1]]))
38
+
39
+setMethod("annotation", "CrlmmSetList", function(object) annotation(object[[1]]))
40
+
38 41
 setMethod("B", "CrlmmSetList", function(object) B(object[[1]]))
39 42
 
40 43
 setMethod("CA", "CrlmmSetList", function(object, ...) CA(object[[3]], ...))
41 44
 setMethod("CB", "CrlmmSetList", function(object, ...) CB(object[[3]], ...))
42 45
 
43 46
 setMethod("calls", "CrlmmSetList", function(object) calls(object[[2]]))
44
-setMethod("cnIndex", "CrlmmSetList", function(object) match(cnNames(object[[1]]), featureNames(object)))
47
+setMethod("cnIndex", "CrlmmSetList", function(object, ...) {
48
+	match(cnNames(object[[1]], annotation(object)), featureNames(object))
49
+})
45 50
 
46 51
 setMethod("copyNumber", "CrlmmSetList", function(object) copyNumber(object[[3]]))
47 52
 
... ...
@@ -92,11 +97,23 @@ setMethod("points", signature(x="CrlmmSetList"),
92 97
 setMethod("sampleNames", "CrlmmSetList", function(object) sampleNames(object[[1]]))
93 98
 
94 99
 setMethod("show", "CrlmmSetList", function(object){
95
-	for(i in seq(along=object)) show(object[[i]])
100
+	##for(i in seq(along=object)) show(object[[i]])
101
+	cat("\n Elements in CrlmmSetList object: \n")
102
+	cat("\n")
103
+	for(i in 1:length(object)){
104
+		cat("class: ", class(object[[i]]), "\n")
105
+		cat("assayData elements: ", ls(assayData(object[[i]])), "\n")
106
+		cat("Dimensions: ", dim(object[[i]]))		
107
+		cat("\n \n")
108
+	}
109
+##	cat("\n")
110
+##	cat("Dimensions:\n")
111
+##	print(dims(object))
96 112
 })
97 113
 
98 114
 setMethod("chromosome", "CrlmmSetList", function(object) chromosome(object[[3]]))
99 115
 setMethod("position", "CrlmmSetList", function(object) position(object[[3]]))
116
+setMethod("confs", "CrlmmSetList", function(object) confs(object[[2]]))
100 117
 
101 118
 
102 119
 setMethod(".harmonizeDimnames", "CrlmmSetList", function(object){
... ...
@@ -126,7 +143,9 @@ setMethod("$", "CrlmmSetList", function(x, name) {
126 143
 })
127 144
 
128 145
 	
129
-setMethod("snpIndex", "CrlmmSetList", function(object) match(snpNames(object[[1]]), featureNames(object)))
146
+setMethod("snpIndex", "CrlmmSetList", function(object, ...){
147
+	match(snpNames(object[[1]], annotation(object)), featureNames(object))
148
+})
130 149
 setMethod("splitByChromosome", "CrlmmSetList", function(object, cdfName, outdir){
131 150
 	path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep=""))	
132 151
 	load(file.path(path, "snpProbes.rda"))
... ...
@@ -139,8 +158,9 @@ setMethod("splitByChromosome", "CrlmmSetList", function(object, cdfName, outdir)
139 158
 		cnps <- rownames(cnProbes)[cnProbes[, k] == CHR]
140 159
 		index <- c(match(snps, featureNames(object)),
141 160
 			   match(cnps, featureNames(object)))
142
-		crlmmResults <- object[index, ]
143
-		save(crlmmResults, file=file.path(outdir, paste("crlmmResults_", CHR, ".rda", sep="")))
161
+		index <- index[!is.na(index)]
162
+		crlmmSetList <- object[index, ]
163
+		save(crlmmSetList, file=file.path(outdir, paste("crlmmSetList_", CHR, ".rda", sep="")))
144 164
 	}
145 165
 })
146 166
 
... ...
@@ -1,5 +1,20 @@
1
-setMethod("cnIndex", "eSet", function(object) match(cnNames(object), featureNames(object)))
2
-setMethod("cnNames", "eSet", function(object) featureNames(object)[grep("CN_", featureNames(object))])
1
+setMethod("cnIndex", "eSet", function(object, cdfName) match(cnNames(object, cdfName), featureNames(object)))
2
+setMethod("cnNames", "eSet", function(object, cdfName) {
3
+	path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep=""))
4
+	load(file.path(path, "cnProbes.rda"))
5
+	cnps <- rownames(cnProbes)
6
+	cnps <- cnps[cnps %in% featureNames(object)]
7
+	featureNames(object)[match(cnps, featureNames(object))]	
8
+	##featureNames(object)[grep("CN_", featureNames(object))])
9
+  })
10
+setMethod("snpIndex", "eSet", function(object, cdfName) match(snpNames(object, cdfName), featureNames(object)))
11
+setMethod("snpNames", "eSet", function(object, cdfName){
12
+	path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep=""))
13
+	load(file.path(path, "snpProbes.rda"))
14
+	snps <- rownames(snpProbes)
15
+	snps <- snps[snps %in% featureNames(object)]
16
+	featureNames(object)[match(snps, featureNames(object))]
17
+  })
3 18
 ##setMethod("combine", signature=signature(x="eSet", y="eSet"),
4 19
 ##	  function(x, y, ...){
5 20
 ##		  ##Check that both x and y are valid objects
... ...
@@ -25,7 +40,6 @@ setMethod("cnNames", "eSet", function(object) featureNames(object)[grep("CN_", f
25 40
 ##		  phenoData(x) <- pd
26 41
 ##		  x
27 42
 ##          })
28
-setMethod("snpIndex", "eSet", function(object) match(snpNames(object), featureNames(object)))
29
-setMethod("snpNames", "eSet", function(object) featureNames(object)[grep("SNP_", featureNames(object))])
43
+
30 44
 
31 45
 
... ...
@@ -25,74 +25,235 @@ options(continue=" ")
25 25
 options(prompt="R> ")
26 26
 @ 
27 27
 
28
-\section{Estimating copy number}
28
+%\section{Estimating copy number}
29 29
 
30
-At present, software for copy number estimation is provided only for the
31
-Affymetrix 6.0 platform.  This vignette estimates copy number for the
32
-HapMap samples.
30
+%At present, software for copy number estimation is provided only for the
31
+%Affymetrix 6.0 platform.  
33 32
 
34
-\subsection{Preprocess and genotype the samples}
33
+\begin{abstract}
34
+  This vignette estimates copy number for HapMap samples on the
35
+  Affymetrix 6.0 platform.
36
+\end{abstract}
35 37
 
36
-We preprocess and genotype the samples as described in the CRLMM
37
-vignette.
38
+\section{Simple usage}
39
+
40
+The following packages are required:
38 41
 
39 42
 <<requiredPackages>>=
40 43
 library(crlmm)
41 44
 library(genomewidesnp6Crlmm)
42
-library(Biobase) 
43 45
 @ 
46
+\paragraph{Preprocess and genotype.}
44 47
 
45
-Specify the complete path for the CEL files and a directory in which to
46
-store intermediate files:
47
-
48
+Specify the coordinates of Affymetrix cel files and where to put
49
+intermediate files generated during the course of preprocessing / copy
50
+number estimation.
48 51
 
49 52
 <<celfiles>>=
50
-celFiles <- list.celfiles("/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m", 
51
-			  full.names=TRUE, pattern=".CEL")
53
+myPath <- "/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m"
54
+celFiles <- list.celfiles(myPath, full.names=TRUE, pattern=".CEL")
52 55
 outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/affy"
53 56
 @ 
54 57
 
55
-Preprocess and genotype (for more info see the crlmm vignette):
58
+\noindent Preprocess and genotype (for more info see the crlmm vignette):
56 59
 
57 60
 <<preprocessAndGenotype>>=
58
-crlmmFilenames <- file.path(outdir, paste("crlmmResults_", 1:24, ".rda", sep=""))
59
-if(!all(file.exists(crlmmFilenames))){
60
-	message("Preprocessing and crlmm genotyping.  Results are written to", outdir, "...")
61
-	intensityFile <- file.path(outdir, "intensities.rda")		
62
-	crlmmWrapper(celFiles, outdir, save.it=TRUE, intensityFile=intensityFile)
63
-}
61
+crlmmWrapper(celFiles[1:15], 
62
+	     save.it=TRUE, 
63
+	     load.it=TRUE,
64
+	     intensityFile=file.path(outdir, "normalizedIntensities.rda"),
65
+	     crlmmFile=file.path(outdir, "snpsetObject.rda"))
64 66
 @ 
65 67
 
66
-Load results for chromosome 22.
68
+As a result of the above wrapper, the following R objects are now created:
69
+
70
+<<>>=
71
+list.files(outdir)[grep("crlmmSetList", list.files(outdir))]
72
+@ 
73
+
74
+\paragraph{Locus- and allele-specific estimates of copy number.}
75
+
76
+Load the object for chromosome 22 and compute copy number:
67 77
 
68 78
 <<crlmmList-show>>=
69 79
 CHR <- 22
70
-if(!exists("crlmmResults")) load(file.path(outdir, paste("crlmmResults_", CHR, ".rda", sep="")))
71
-class(crlmmResults)
72
-show(crlmmResults)
80
+if(!exists("crlmmSetList")) load(file.path(outdir, paste("crlmmSetList_", CHR, ".rda", sep="")))
81
+show(crlmmSetList)
82
+if(length(crlmmSetList)==2){
83
+	crlmmSetList <- update(crlmmSetList, CHR=CHR)
84
+}
85
+show(crlmmSetList)
73 86
 @ 
74 87
 
75
-\subsection{Copy number}
88
+See the help file for \Robject{computeCopynumber} for arguments to
89
+\Robject{update}.  Provided that the assumption of integer copy number
90
+is reasonable, one can fit a hidden Markov model to the locus-level
91
+estimates of copy number and uncertainty.
76 92
 
77
-We require 6 items for copy number estimation:
93
+\paragraph{A hidden Markov model.}
78 94
 
79
-\begin{itemize}
80
-  \item quantile-normalized A intensities (I1 x J)
81
-  \item quantile-normalized B intensities (I1 x J)
82
-  \item quantile-normalized intensities from nonpolymorphic (NP) probes (I2 x J)
83
-  \item genotype calls (I1 x J)
84
-  \item confidence scores of the genotype calls  (I1 x J)
85
-  \item signal to noise ratio (SNR) of the samples (J)
86
-  \end{itemize}
87
-  
88
-  The above items are contained in the \Robject{crlmmResults} object. A
89
-  histogram of the signal to noise ratio for the HapMap samples:
95
+Emission probabilities for the hidden markov model can be computed from
96
+the copy number prediction regions based on bivariate normal scatter
97
+plots of the log A versus log B intensities.  (By contrast, a
98
+nonparamemtric segmentation would be applied to intensities on the scale
99
+of copy number.)
90 100
 
91
-<<plotSnr, fig=TRUE>>=
92
-hist(crlmmResults[[2]]$SNR, xlab="SNR", main="")
101
+<<emissionProbabilities>>=
102
+library(VanillaICE)
103
+copyNumberStates <- 0:5
104
+if(!exists("emission.cn")){
105
+	emission.cn <- suppressWarnings(crlmm:::computeEmission(crlmmSetList, copyNumberStates))
106
+	dim(emission.cn)
107
+}
93 108
 @ 
94 109
 
95
-<<setSNRmin, echo=FALSE>>=
110
+*Warning: more can be done than currently implemented to protect against
111
+outliers.  In addition, improved estimates of uncertainty for the copy
112
+number prediction regions will also help.*
113
+
114
+Initial state probabilities and transition probabilities for the HMM:
115
+
116
+<<probs>>=
117
+initialP <- rep(1/length(copyNumberStates), length(copyNumberStates))
118
+tau <- transitionProbability(chromosome=chromosome(crlmmSetList),
119
+			     position=position(crlmmSetList),
120
+			     TAUP=1e8)
121
+@ 
122
+
123
+The viterbi algorithm is used to identify the sequence of states that
124
+maximizes the likelihood:
125
+
126
+<<hmm>>=
127
+if(!exists("hmmPredictions")){
128
+hmmPredictions <- viterbi(emission=emission.cn,
129
+			  initialStateProbs=log(initialP), 
130
+			  tau=tau[, "transitionPr"],
131
+			  arm=tau[, "arm"], 
132
+			  normalIndex=3,
133
+			  normal2altered=0.01,
134
+			  altered2normal=1,
135
+			  altered2altered=0.001)	    
136
+}
137
+table(as.integer(hmmPredictions)-1)
138
+brks <- breaks(x=hmmPredictions, states=copyNumberStates, 
139
+	       position=tau[, "position"],
140
+	       chromosome=tau[, "chromosome"])
141
+str(brks)
142
+@ 
143
+
144
+\paragraph{Circular binary segmentation}
145
+TO DO.
146
+
147
+\section{Accessors}
148
+
149
+\subsection{Assay data accessors}
150
+
151
+\paragraph{\Robject{ABset}:  quantile normalized intensities}
152
+
153
+An object of class \Robject{ABset} is stored in the first element of the
154
+\Robject{crlmmSetList} object. The following accessors may be of use:
155
+
156
+Accessors for the quantile normalized intensities for the A allele:
157
+
158
+<<accessors>>=
159
+a <- A(crlmmSetList)
160
+dim(a)
161
+@ 
162
+
163
+The quantile normalized intensities for the nonpolymorphic probes are
164
+also stored in the 'A' assay data element.  To retrieve the quantile
165
+normalized intensities for the A allele only at polymorphic loci:
166
+
167
+<<A.polymorphic>>=
168
+a.snps <- A(crlmmSetList[snpIndex(crlmmSetList), ])
169
+dim(a.snps)
170
+@ 
171
+
172
+\noindent For the nonpolymorphic loci:
173
+<<A.nonpolymorphic>>=
174
+a.nps <- A(crlmmSetList[cnIndex(crlmmSetList), ])
175
+dim(a.nps)
176
+@ 
177
+
178
+Quantile normalized intensities for the B allele at polymorphic loci:
179
+
180
+<<B.polymorphic>>=
181
+b.snps <- B(crlmmSetList[snpIndex(crlmmSetList), ])
182
+@ 
183
+
184
+Note that NAs are recorded in the 'B' assay data element for
185
+nonpolymorphic loci:
186
+
187
+<<B.NAs>>=
188
+all(is.na(B(crlmmSetList[cnIndex(crlmmSetList), ])))
189
+@ 
190
+
191
+\paragraph{\Robject{SnpSet}: Genotype calls and confidence scores}
192
+
193
+Genotype calls:
194
+<<genotypes>>=
195
+genotypes <- calls(crlmmSetList)
196
+@ 
197
+Confidence scores of the genotype calls:
198
+<<confidenceScores>>=
199
+genotypeConf <- confs(crlmmSetList)
200
+@ 
201
+
202
+\paragraph{\Robject{CopyNumberSet}: allele-specific copy number}
203
+
204
+Allele-specific copy number at polymorphic loci:
205
+<<ca>>=
206
+ca <- CA(crlmmSetList[snpIndex(crlmmSetList), ])
207
+@ 
208
+
209
+Total copy number at nonpolymorphic loci:
210
+<<ca>>=
211
+cn.nonpolymorphic <- CA(crlmmSetList[cnIndex(crlmmSetList), ])
212
+range(cn.nonpolymorphic, na.rm=TRUE)
213
+median(cn.nonpolymorphic, na.rm=TRUE)
214
+@ 
215
+
216
+Total copy number at both polymorphic and nonpolymorphic loci:
217
+<<totalCopynumber>>=
218
+cn <- copyNumber(crlmmSetList)
219
+@ 
220
+
221
+\subsection{Other accessors}
222
+
223
+After running \Robject{update} on the \Robject{crlmmSetList} object,
224
+information on physical position and chromosome can be accessed by the
225
+following accessors:
226
+
227
+<<positionChromosome, eval=FALSE>>=
228
+xx <- position(crlmmSetList)
229
+yy <- chromosome(crlmmSetList)
230
+@ 
231
+
232
+There are many parameters computed during copy number estimation that
233
+are at present stored in the \Robject{featureData} slot of the
234
+\Robject{CopyNumberSet} element.  TO DO: Accessors for these parameters,
235
+as well as better containers for storing these parameters. See
236
+
237
+<<copynumberParameters>>=
238
+fvarLabels(crlmmSetList[[3]])
239
+@ 
240
+
241
+
242
+\section{Suggested visualizations}
243
+
244
+A histogram of the signal to noise ratio for the HapMap samples:
245
+
246
+<<plotSnr, fig=TRUE, include=FALSE>>=
247
+hist(crlmmSetList[[2]]$SNR, xlab="SNR", main="")
248
+@ 
249
+
250
+\begin{figure}
251
+  \centering
252
+  \includegraphics[width=0.6\textwidth]{copynumber-plotSnr}
253
+  \caption{Signal to noise ratios for the HapMap samples.}
254
+\end{figure}
255
+
256
+<<snrmin, eval=TRUE, echo=FALSE>>=
96 257
 SNRmin <- 5
97 258
 @ 
98 259
 
... ...
@@ -102,7 +263,7 @@ quantile-normalized intensities, we suggest adjusting for date or
102 263
 chemistry plate.  Ideally, one would have 70+ files in a given
103 264
 batch. Here we make a table of date versus ancestry:
104 265
 
105
-<<specifyBatch>>=
266
+<<specifyBatch, eval=FALSE, echo=FALSE>>=
106 267
 sns <- sampleNames(crlmmResults)
107 268
 sns[1]
108 269
 plate <- substr(basename(sns), 13, 13)
... ...
@@ -118,14 +279,6 @@ not the case, we illustrate how one may adjust for batch using the
118 279
 chemistry plate as an argument for \Robject{batch} in the
119 280
 \Robject{computeCopynumber} function.
120 281
 
121
-<<computeCopynumber>>=
122
-if(!exists("crlmmResults2")){
123
-	crlmmResults2 <- crlmmResults
124
-	crlmmResults <- computeCopynumber(crlmmResults, SNRmin=5, batch=plate, CHR=CHR, cdfName="genomewidesnp6")
125
-	protocolData(crlmmResults[[3]])[["ScanDate"]] <- protocolData(crlmmResults[[3]])[["ScanDate"]]
126
-}
127
-@ 
128
-
129 282
 **Note: the number of samples in the \Robject{CrlmmSetList} object after
130 283
 copy number estimation may be fewer than the number of samples in the
131 284
 \Robject{CrlmmSetList} object after preprocessing/genotyping.  This
... ...
@@ -137,20 +290,6 @@ The object returned by \Robject{computeCopynumber} is ordered by
137 290
 chromosome and physical position (useful for downstream methods that
138 291
 smooth the copy number as a function of the physical position).  **
139 292
 
140
-<<dimnamesMayDiffer>>=
141
-identical(featureNames(crlmmResults), featureNames(crlmmResults2)) ##not identical
142
-identical(sampleNames(crlmmResults), sampleNames(crlmmResults2))
143
-gc()
144
-@ 
145
-
146
-
147
-<<example.cnset, eval=FALSE, echo=FALSE>>=
148
-example.crlmmResults <- crlmmResults[1:1000, 1:100]
149
-##example.cnset <- cnset[1:1000, 1:100]
150
-##save(example.cnset, file="~/madman/Rpacks/crlmm/inst/scripts/example.cnset.rda")
151
-save(example.crlmmResults, file="~/madman/Rpacks/crlmm/inst/scripts/example.crlmmResults.rda")
152
-@ 
153
-
154 293
 The above algorithm for estimating copy number is predicated on the
155 294
 assumption that most samples within a batch have copy number 2 at any
156 295
 given locus.  For common copy number variants, this assumption may not
... ...
@@ -159,13 +298,63 @@ additional robustness to this assumption.  Set the \Robject{bias.adj}
159 298
 argument to \Robject{TRUE}:
160 299
 
161 300
 <<biasAdjustment, eval=FALSE>>=
162
-crlmmResults[[3]] <- computeCopynumber(crlmmResults, SNRmin=5, batch=plate, CHR=CHR, 
163
-				       cdfName="genomewidesnp6", bias.adj=TRUE)
301
+update(crlmmSetList, CHR=22, bias.adj=TRUE)
302
+@ 
303
+
304
+Calculate the frequency of amplifications and deletions at each locus.
305
+<<hmm_hapmap, fig=TRUE, include=FALSE, width=8, height=7>>=
306
+require(SNPchip)
307
+library(RColorBrewer)
308
+numberUp <- rowSums(hmmPredictions > 3, na.rm=TRUE)
309
+numberDown <- -rowSums(hmmPredictions < 3, na.rm=TRUE)
310
+poly.cols <- brewer.pal(7, "Accent")
311
+alt.brks <- brks[brks[, "state"] != "copy.number_2", ]
312
+op <- par(ask=FALSE)
313
+ylim <- c(min(numberDown)-5, max(numberUp)+5)
314
+xlim <- c(10*1e6, max(position(crlmmSetList)))
315
+plot(position(crlmmSetList), rep(0, nrow(crlmmSetList[[1]])),
316
+     type="n", xlab="Physical position (Mb)",
317
+     ylim=ylim,
318
+     xlim=xlim,
319
+     ylab="frequency", main="Chr 22",
320
+     xaxt="n",
321
+     xaxs="r")
322
+axis(1, at=pretty(xlim), labels=pretty(xlim)/1e6)
323
+polygon(x=c(position(crlmmSetList), rev(position(crlmmSetList))),
324
+	y=c(rep(0, nrow(crlmmSetList[[1]])), rev(numberUp)),
325
+	col=poly.cols[3], border=poly.cols[3])
326
+polygon(x=c(position(crlmmSetList), rev(position(crlmmSetList))),
327
+	y=c(rep(0, nrow(crlmmSetList[[1]])), rev(numberDown)),
328
+	col=poly.cols[5], border=poly.cols[5])
329
+##plotCytoband(22, xlim=xlim, new=FALSE,
330
+##	     label.cytoband=FALSE,
331
+##	     cytoband.ycoords=c(-10, -8), xaxs="r")
332
+
333
+medLength <- round(median(alt.brks[, "nbases"]), 2)
334
+medMarkers <- median(alt.brks[, "nprobes"])
335
+sdMarkers <- round(mad(alt.brks[, "nprobes"]), 2)
336
+sdsLength <- round(mad(alt.brks[, "nbases"]), 2)
337
+legend("topright",
338
+       bty="n",
339
+       legend=c(paste("median length:", medLength, "(bp)"),
340
+       paste("MAD length:", sdsLength, "(bp)"),
341
+       paste("median # markers:", medMarkers),
342
+       paste("MAD # markers:", sdMarkers)),
343
+       cex=0.8, ncol=2)
344
+legend("topleft",
345
+       fill=poly.cols[c(3, 5)],
346
+       legend=c("amplifications", "deletions"), bty="n")
347
+par(op)
348
+gc()
164 349
 @ 
165 350
 
166
-\section{Suggested plots}
351
+\begin{figure}
352
+  \centering
353
+  \includegraphics[width=\textwidth]{copynumber-hmm_hapmap}
354
+  \caption{}
355
+\end{figure}
167 356
 
168
-\paragraph{One sample at a time}
357
+\paragraph{One sample at a time: locus-level estimates}
169 358
 
170 359
 Plot physical position versus copy number for the first sample.  Recall
171 360
 that the copy number estimates were multiplied by 100 and stored as an
... ...
@@ -173,12 +362,12 @@ integer.
173 362
 
174 363
 <<oneSample, fig=TRUE, width=8, height=4, include=FALSE>>=
175 364
 par(las=1)
176
-plot(position(crlmmResults), copyNumber(crlmmResults)[, 1], pch=".", cex=2, xaxt="n", col="grey20", ylim=c(0,6), 
365
+plot(position(crlmmSetList), copyNumber(crlmmSetList)[, 1], pch=".", cex=2, xaxt="n", col="grey20", ylim=c(0,6), 
177 366
      ylab="copy number", xlab="physical position (Mb)",
178
-     main=paste(sampleNames(crlmmResults)[1], ", CHR:", unique(chromosome(crlmmResults))))
179
-points(position(crlmmResults)[cnIndex(crlmmResults)], copyNumber(crlmmResults)[cnIndex(crlmmResults), 1],
367
+     main=paste(sampleNames(crlmmSetList)[1], ", CHR:", unique(chromosome(crlmmSetList))))
368
+points(position(crlmmSetList)[cnIndex(crlmmSetList)], copyNumber(crlmmSetList)[cnIndex(crlmmSetList), 1],
180 369
        pch=".", cex=2, col="lightblue")
181
-axis(1, at=pretty(range(position(crlmmResults))), labels=pretty(range(position(crlmmResults)))/1e6)
370
+axis(1, at=pretty(range(position(crlmmSetList))), labels=pretty(range(position(crlmmSetList)))/1e6)
182 371
 @ 
183 372
 
184 373
 <<idiogram, eval=FALSE, echo=FALSE>>=
... ...
@@ -189,7 +378,8 @@ plotCytoband(22, new=FALSE, cytoband.ycoords=c(5.8, 6.0), label.cytoband=FALSE)
189 378
 \begin{figure}
190 379
   \includegraphics[width=0.9\textwidth]{copynumber-oneSample}
191 380
   \caption{Total copy number (y-axis) for chromosome 22 plotted
192
-    against physical position (x-axis) for one sample.  }
381
+    against physical position (x-axis) for one sample.  Estimates at
382
+    nonpolymorphic loci are plotted in light blue.}
193 383
 \end{figure}
194 384
 
195 385
 \paragraph{One SNP at a time}
... ...
@@ -206,12 +396,12 @@ colors <- c("red", "blue", "green3")
206 396
 cex <- 0.6
207 397
 par(mfrow=c(3,3), las=1, pty="s", ask=FALSE, mar=c(2, 2, 2, 2), oma=c(2, 2, 1, 1))
208 398
 ##plot 9 at a time
209
-indices <- split(snpIndex(crlmmResults), rep(1:length(snpIndex(crlmmResults)), each=9, length.out=length(snpIndex(crlmmResults))))
210
-par(ask=TRUE)
399
+indices <- split(snpIndex(crlmmSetList), rep(1:length(snpIndex(crlmmSetList)), each=9, length.out=length(snpIndex(crlmmSetList))))
400
+par(ask=FALSE)
211 401
 j <- 1
212 402
 for(i in indices[[j]]){
213
-	gt <- calls(crlmmResults)[i, ]
214
-	plot(crlmmResults[i, ], 
403
+	gt <- calls(crlmmSetList)[i, ]
404
+	plot(crlmmSetList[i, ], 
215 405
 	     pch=pch, 
216 406
 	     col=colors[gt], 
217 407
 	     bg=colors[gt], cex=cex,
... ...
@@ -221,10 +411,11 @@ for(i in indices[[j]]){
221 411
 }
222 412
 @ 
223 413
 
224
-<<predictionRegion, fig=TRUE, width=8, height=8, include=FALSE>>=
414
+<<predictionRegion, fig=TRUE, width=8, height=8, include=FALSE, eval=FALSE>>=
225 415
 require(RColorBrewer)
416
+library(ellipse)
226 417
 greens <- brewer.pal(9, "Greens")
227
-J <- split(1:ncol(crlmmResults), batch(crlmmResults))
418
+J <- split(1:ncol(crlmmSetList), batch(crlmmSetList))
228 419
 colors <- c("red", "blue", "green3")
229 420
 cex <- 0.6
230 421
 colors <- c("blue", greens[8], "red")
... ...
@@ -232,35 +423,36 @@ pch.col <- c("grey40", "black", "grey40")
232 423
 xlim <- ylim <- c(6.5,13)
233 424
 plotpoints <- FALSE
234 425
 lwd <- 2
235
-pdf("figures/snp22plots%02d.pdf", width=600, height=600)
426
+##pdf("figures/snp22plots%02d.pdf", width=600, height=600)
236 427
 ask <- FALSE
237 428
 par(mfrow=c(3,3), las=1, pty="s", ask=ask, mar=c(2, 2, 2, 2), oma=c(2, 2, 1, 1))
238
-indices <- split(snpIndex(crlmmResults), rep(1:length(snpIndex(crlmmResults)), each=9, length.out=length(snpIndex(crlmmResults))))
239
-for(j in seq(along=indices)[1:10]){
429
+indices <- split(snpIndex(crlmmSetList), rep(1:length(snpIndex(crlmmSetList)), each=9, length.out=length(snpIndex(crlmmSetList))))
430
+##for(j in seq(along=indices)[1:10]){
431
+j <- 1
240 432
 	cat(j, "\n")
241 433
 	k <- 1
242 434
 	for(i in indices[[j]]){
243
-		gt <- calls(crlmmResults)[i, ]
435
+		gt <- calls(crlmmSetList)[i, ]
244 436
 		pch <- as.character(gt)
245 437
 		cex <- 0.9
246
-		plot(crlmmResults[i, ], 
438
+		plot(crlmmSetList[i, ], 
247 439
 		     pch=pch, 
248 440
 		     col=pch.col[gt],
249 441
 		     cex=cex,
250 442
 		     xlim=xlim, ylim=ylim,
251 443
 		     type="n")
252 444
 		if(plotpoints){
253
-			for(b in seq(along=unique(batch(crlmmResults)))){
254
-				points(crlmmResults[i, J[[b]]], 
445
+			for(b in seq(along=unique(batch(crlmmSetList)))){
446
+				points(crlmmSetList[i, J[[b]]], 
255 447
 				       pch=pch, 
256 448
 				       col=colors[b], bg=colors[b], cex=cex,
257 449
 				       xlim=xlim, ylim=ylim)
258 450
 			}
259 451
 		}
260
-		for(b in seq(along=unique(batch(crlmmResults)))){
261
-			ellipse(crlmmResults[i, J[[b]]], copynumber=2, col=colors[b], lwd=lwd)
452
+		for(b in seq(along=unique(batch(crlmmSetList)))){
453
+			ellipse(crlmmSetList[i, J[[b]]], copynumber=2, col=colors[b], lwd=lwd)
262 454
 		}
263
-		##legend("bottomright", bty="n", legend=featureNames(crlmmResults)[i])
455
+		##legend("bottomright", bty="n", legend=featureNames(crlmmSetList)[i])
264 456
 		if(k == 1) {
265 457
 			legend("bottomleft", bty="n", fill=colors, legend=c("CEPH", "Yoruba", "Asian"))	
266 458
 			mtext("A", 1, outer=TRUE, line=1)
... ...
@@ -268,76 +460,17 @@ for(j in seq(along=indices)[1:10]){
268 460
 		}
269 461
 		k <- k+1
270 462
 	}
271
-}
272
-dev.off()
273 463
 @
274 464
 
465
+%\begin{figure}
466
+%  \includegraphics[width=0.9\textwidth]{copynumber-predictionRegion}
467
+%  \caption{Prediction regions for copy number 2 for one SNP.  The
468
+%    plate effects are negligible for this SNP and we only plot the
469
+%    prediction region for the first plate.}
470
+%\end{figure}
275 471
 
276
-
277
-\begin{figure}
278
-  \includegraphics[width=0.9\textwidth]{copynumber-predictionRegion}
279
-  \caption{Prediction regions for copy number 2 for one SNP.  The
280
-    plate effects are negligible for this SNP and we only plot the
281
-    prediction region for the first plate.}
282
-\end{figure}
283
-
284
-
285
-\section{Smoothing via a hidden Markov model}
286
-
287
-Here we smooth via a hidden Markov model.  To facilitate comparisons
288
-with the Birdseye HMM, the same transition probabilities are specified.
289
-
290
-<<segment>>=
291
-library(VanillaICE)
292
-copyNumberStates <- 0:5
293
-##crlmmResults <- crlmm:::thresholdModelParams(crlmmResults)
294
-if(!exists("fit")){
295
-	if(file.exists(file.path(outdir, paste("fit_", CHR, ".rda", sep="")))) load(file.path(outdir, paste("fit_", CHR, ".rda", sep="")))
296
-	else {
297
-		emission.cn <- crlmm:::computeEmission(crlmmResults, copyNumberStates)
298
-		tau <- transitionProbability(chromosome=chromosome(crlmmResults),
299
-					     position=position(crlmmResults),
300
-					     TAUP=1e8)
301
-		initialP <- rep(1/length(copyNumberStates), length(copyNumberStates))
302
-		fit <- viterbi(initialStateProbs=log(initialP),
303
-			       emission=emission.cn,
304
-			       tau=tau[, "transitionPr"],
305
-			       arm=tau[, "arm"], 
306
-			       normalIndex=3,
307
-			       normal2altered=0.005,
308
-			       altered2normal=0.5,
309
-			       altered2altered=0.0025)
310
-		brks <- breaks(x=fit, states=copyNumberStates, position=tau[, "position"],
311
-			       chromosome=tau[, "chromosome"])
312
-	}
313
-}
314
-@ 
315
-
316
-<<visualizations, eval=FALSE>>=
317
-dir.create("figures")
318
-par(las=1, mfrow=c(1,1), ask=FALSE)
319
-bmp("figures/chr22plots%02d.png", width=800, height=500)
320
-for(j in 1:ncol(crlmmResults)){
321
-	cat(j, " ")
322
-	##A region that we would miss if we only looked at the polymorphic probes...is it real?
323
-	##index <- which(position(crlmmResults) > 21.2*1e6 & position(crlmmResults) < 21.6*1e6 & mns < 1)
324
-	##fit <- fit[index,]
325
-	index <- 1:nrow(fit)
326
-	plot(0:1, type="n", xlim=range(position(crlmmResults)[index]),
327
-	     xaxt="n", ylim=c(-0.5, 6), 
328
-	     ylab="copy number", xlab="physical position (Mb)",
329
-	     main=paste(sampleNames(crlmmResults)[j], ", CHR:", unique(chromosome(crlmmResults))), 
330
-	     lwd=2,
331
-	     col="blue")
332
-	points(position(crlmmResults)[index], copyNumber(crlmmResults)[index, j], pch=".", cex=2, col="grey60")
333
-	lines(position(crlmmResults)[index], fit[index, j]-1, type="s", lwd=2, col="blue")
334
-	abline(h=0:4, col="grey60")
335
-	axis(1, at=pretty(range(position(crlmmResults)[index])), labels=pretty(range(position(crlmmResults)[index]))/1e6)
336
-}
337
-dev.off()
338
-@ 
339
-
340
-
472
+\section{Details for the copy number estimation procedure}
473
+TO DO.
341 474
 
342 475
 \section{Session information}
343 476
 <<sessionInfo, results=tex>>=
344 477
Binary files a/inst/scripts/copynumber.pdf and b/inst/scripts/copynumber.pdf differ
345 478
new file mode 100644
... ...
@@ -0,0 +1,57 @@
1
+\name{crlmmIlluminaWrapper}
2
+\Rdversion{1.1}
3
+\alias{crlmmIlluminaWrapper}
4
+\title{
5
+  Reads idat files, quantile normalizes the raw intensities, and
6
+  provides diallelic genotype calls at polymorphic loci.
7
+}
8
+\description{
9
+  This function is a wrapper for utilities used to preprocess and
10
+  genotype files from the illumina platform.   See Section see also.
11
+}
12
+\usage{
13
+crlmmIlluminaWrapper(outdir = "./", cdfName, save.intermediate = FALSE, splitByChr = TRUE, intensityFile, ...)
14
+}
15
+\arguments{
16
+  \item{outdir}{Character string.  The complete path to the directory
17
+    where output will be saved.}
18
+  \item{cdfName}{Character string.  Annotation package.  See
19
+    crlmm:::validCdfNames() for supported packages.
20
+  }
21
+  \item{save.intermediate}{Logical.  Whether to save intermediate
22
+    files. This is used primarily for debugging.
23
+  }
24
+  \item{splitByChr}{Logical.  Whether to split output files by
25
+    chromosome.  This is recommended for datasets with 50 or more samples.
26
+  }
27
+  \item{intensityFile}{Character string.  Name of intermediate file for storing
28
+    quantile normalized intensities.  This is always saved regardless of
29
+    the value of save.intermediate.
30
+  }
31
+  \item{\dots}{Additional arguments to readIdatFiles.
32
+  }
33
+}
34
+\details{
35
+}
36
+\value{
37
+  An object of class \code{CrlmmSetList}.  This object will have the
38
+  following two elements:
39
+
40
+  - an object of class \code{ABset}
41
+  - an object of class \code{SnpSet}
42
+  
43
+}
44
+\author{
45
+  Rob Scharpf
46
+}
47
+\note{
48
+}
49
+\seealso{
50
+  \code{\linkS4class{CrlmmSetList}}
51
+  \code{\linkS4class{ABset}}
52
+  \code{\link[Biobase]{SnpSet}}
53
+}
54
+\examples{
55
+}
56
+\keyword{manip}
57
+