Browse code

changes to crlmmWrapper. updated vignettes in inst/scripts.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@42144 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 04/10/2009 02:37:32
Showing 20 changed files

... ...
@@ -272,3 +272,13 @@ is decoded and scanned
272 272
 * labeled figures / displayed output of code chunks in the copy number vignette
273 273
 * added bibliography for copy number vignette.  Added file inst/doc/refs.bib
274 274
 * added boxplot method 
275
+
276
+2009-10-03 R. Scharpf - committed version 1.3.21
277
+
278
+* modified crlmmWrapper function
279
+* modified illumina copy number vignette (still needs debugging)
280
+* changed title of copy number vignette
281
+* added reference to the crlmm paper
282
+* changed copyNumber() method so that CA + NA = NA, CB + NA = NA (previously had CA+NA=CA, but this can result in a lot of zeros, depending on the genotype)
283
+* new method: addFeatureAnnotation
284
+* support for snp5.0
... ...
@@ -1,8 +1,8 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.3.20
5
-Date: 2009-09-22
4
+Version: 1.3.21
5
+Date: 2009-09-29
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 6.0.
... ...
@@ -18,7 +18,7 @@ importMethodsFrom(Biobase, annotation, "annotation<-", annotatedDataFrameFrom,
18 18
 
19 19
 
20 20
 ##importMethodsFrom(oligoClasses, chromosome, copyNumber, position)
21
-importFrom(oligoClasses, chromosome, copyNumber, position, calls)
21
+importMethodsFrom(oligoClasses, chromosome, copyNumber, position, calls)
22 22
 
23 23
 ##importMethodsFrom(methods, initialize, show)
24 24
 
... ...
@@ -54,6 +54,8 @@ exportClasses(ABset, CopyNumberSet, CrlmmSetList)
54 54
 ##S3method(ellipse, CopyNumberSet)
55 55
 exportMethods("[", ##"[[",
56 56
 	      "$", A, B,
57
+	      "A<-", "B<-",
58
+	      addFeatureAnnotation,
57 59
 	      calls,
58 60
 	      CA,
59 61
 	      "CA<-",
... ...
@@ -1,5 +1,8 @@
1 1
 setGeneric("A", function(object) standardGeneric("A"))
2 2
 setGeneric("B", function(object) standardGeneric("B"))
3
+setGeneric("A<-", function(object, value) standardGeneric("A<-"))
4
+setGeneric("addFeatureAnnotation", function(object, ...) standardGeneric("addFeatureAnnotation"))
5
+setGeneric("B<-", function(object, value) standardGeneric("B<-"))
3 6
 setGeneric("batch", function(object) standardGeneric("batch"))
4 7
 ##setGeneric("calls", function(x) standardGeneric("calls"))
5 8
 setGeneric("confs", function(object) standardGeneric("confs"))
... ...
@@ -215,185 +215,6 @@ harmonizeDimnamesTo <- function(object1, object2){
215 215
 	return(object1)
216 216
 }
217 217
 
218
-##crlmmIlluminaWrapper <- function(filenames,
219
-##				 cdfName,
220
-##				 load.it=FALSE,
221
-##				 save.it=FALSE,
222
-##				 splitByChr=TRUE,
223
-##				 crlmmFile,
224
-##				 intensityFile, ...){
225
-####				 outdir="./",
226
-####				 cdfName,
227
-####				 save.intermediate=FALSE,
228
-####				 splitByChr=TRUE,
229
-####				 intensityFile,
230
-####				 ...){  ##additional arguments to readIdatFiles
231
-####	stopifnot(basename(intensityFile) == "res.rda")
232
-##	if(!file.exists(outdir)) stop(outdir, " does not exist.")
233
-##	if(!isValidCdfName(cdfName, platform="illumina")) stop(cdfName, " not supported.")
234
-##	if(file.exists(file.path(outdir, "RG.rda"))) {
235
-##		message("Loading RG.rda...")
236
-##		load(file.path(outdir, "RG.rda"))
237
-##	} else {
238
-##		message("Reading Idat files...")		
239
-##		RG <- readIdatFiles(...)
240
-##		##J <- match(c("1_A", "3_A", "5_A", "7_A"), sampleNames(RG))
241
-##		##RG <- RG[, -J]
242
-##		if(save.intermediate) save(RG, file=file.path(outdir, "RG.rda"))  ##935M for 91 samples...better not to save this
243
-##	}	
244
-##	if(!file.exists(intensityFile)){
245
-##		message("Quantile normalization / genotyping...")				
246
-##		crlmmOut <- crlmmIllumina(RG=RG,
247
-##					  cdfName=cdfName,
248
-##                                          sns=sampleNames(RG),
249
-##                                          returnParams=TRUE,
250
-##                                          save.it=TRUE,
251
-##                                          intensityFile=intensityFile)
252
-##		if(save.intermediate) save(crlmmOut, file=file.path(outdir, "crlmmOut.rda"))
253
-##	} else{
254
-##		load(file.path(outdir, "crlmmOut.rda"))
255
-##	}
256
-##	message("Loading ", intensityFile, "...")		
257
-##	load(intensityFile)
258
-##	if(exists("cnAB")){
259
-##		np.A <- as.integer(cnAB$A)
260
-##		np.B <- as.integer(cnAB$B)
261
-##		np <- ifelse(np.A > np.B, np.A, np.B)
262
-##		np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A))
263
-##		rownames(np) <- cnAB$gns
264
-##		colnames(np) <- cnAB$sns
265
-##		cnAB$NP <- np
266
-##	}
267
-##	sampleNames(crlmmOut) <- res$sns	
268
-##	if(exists("cnAB")){
269
-##		ABset <- combineIntensities(get("res"), cnAB, cdfName=cdfName)
270
-##	} else{
271
-##		ABset <- combineIntensities(get("res"), NULL, cdfName=cdfName)
272
-##	}
273
-##	##protocolData(ABset)[["ScanDate"]] <- as.character(pData(RG)$ScanDate)
274
-##	crlmmResult <- harmonizeSnpSet(crlmmOut, ABset, cdfName)
275
-##	stopifnot(all.equal(dimnames(crlmmOut), dimnames(ABset)))
276
-##	crlmmList <- list(ABset,
277
-##			  crlmmResult)
278
-##	crlmmList <- as(crlmmList, "CrlmmSetList")
279
-##	if(splitByChr){
280
-##		message("Saving by chromosome")
281
-##		splitByChromosome(crlmmList, cdfName=cdfName, outdir=outdir)
282
-##	} else{
283
-##		message("Saving crlmmList object to ", outdir)
284
-##		save(crlmmList, file=file.path(outdir, "crlmmList.rda"))
285
-##	}
286
-##	message("CrlmmSetList objects saved to ", outdir)
287
-##}
288
-
289
-##quantileNormalize1m <- function(cel.files,
290
-##				outdir,
291
-##				pkgname,
292
-##				reference=TRUE,
293
-##				createCels=TRUE,
294
-##				verbose=TRUE,
295
-##				computeCopyNumberReference=FALSE,
296
-##				normalizeNonpolymorphic=FALSE){
297
-##	if(computeCopyNumberReference){
298
-##		stopifnot(file.exists("/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m"))
299
-##	} else{
300
-##		load(system.file("1m_reference_cn.rda", package="CN"))
301
-##	}
302
-##	conn <- db(get(pkgname))
303
-##	tmp <- dbGetQuery(conn, paste("SELECT fid, man_fsetid, allele, featureSet.chrom, featureSet.physical_pos",
304
-##				      "FROM pmfeature, featureSet",
305
-##				      "WHERE pmfeature.fsetid = featureSet.fsetid"))
306
-##	tmp[is.na(tmp$chrom), "chrom"] <- 0
307
-##	tmp[is.na(tmp$physical_pos), "physical_pos"] <- 0
308
-##	tmp <- tmp[order(tmp$chrom, tmp$physical_pos, tmp$man_fsetid, tmp$allele),]
309
-##
310
-##	if(reference){
311
-##		load(system.file("extdata", paste(pkgname, "Ref.rda", sep=""), package=pkgname))
312
-##	} else{
313
-##		reference <- normalize.quantiles.determine.target(readCelIntensities(celFiles,
314
-##										     indices=tmp$fid))
315
-##		if (verbose) message("normalization vector created")
316
-##	}
317
-##	reference <- sort(reference)
318
-##	message("creating empty cel files")
319
-##	out.celFiles <- file.path(outdir, paste("QN-", basename(cel.files), sep=""))
320
-##	if(createCels){
321
-##		hh <- readCelHeader(cel.files[1])
322
-##		message(paste("creating empty cel files in", outdir))
323
-##		out.celFiles <- sapply(out.celFiles, function(x) suppressWarnings(createCel(x, header=hh, overwrite=TRUE)))
324
-##	}
325
-##
326
-##	message("Quantile normalizing SNP probes to hapmap target distribution")
327
-##	for(i in 1:length(cel.files)){
328
-##		if(i%%10==0) cat(".")
329
-##		pms <- normalize.quantiles.use.target(readCelIntensities(cel.files[i],
330
-##									 indices=tmp$fid),
331
-##						      reference, copy=FALSE)
332
-##		updateCel(out.celFiles[i], indices=tmp$fid, intensities=as.integer(pms))	  
333
-##	}
334
-##
335
-####	---------------------------------------------------------------------------
336
-####	 copy number probes
337
-####	---------------------------------------------------------------------------
338
-##	if(normalizeNonpolymorphic){
339
-##		cntmp <- dbGetQuery(conn, paste("SELECT fid, man_fsetid, featureSetCNV.chrom, featureSetCNV.chrom_start",
340
-##						"FROM pmfeatureCNV, featureSetCNV",
341
-##						"WHERE pmfeatureCNV.fsetid = featureSetCNV.fsetid"))
342
-##		cntmp[is.na(cntmp$chrom), "chrom"] <- 0
343
-##		cntmp[is.na(cntmp$chrom_start), "chrom_start"] <- 0
344
-##		cntmp <- cntmp[order(cntmp$chrom, cntmp$chrom_start, cntmp$man_fsetid), ]
345
-##
346
-##		if(computeCopyNumberReference){
347
-##			message("computing reference distribution for copy number probes.  May take a while...")
348
-##			celFiles <- list.celfiles("/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m", full.names=TRUE)
349
-##			reference <- computeCopyNumberReference(cel.files=celFiles, fid=cntmp$fid)
350
-##			message("finished computing CN reference distribution.")
351
-##		} 
352
-##		message("Quantile normalizing copy number probes to hapmap target distribution")
353
-##		for(i in 1:length(cel.files)){
354
-##			if(i%%10==0) cat(".")
355
-##			pms <- normalize.quantiles.use.target(readCelIntensities(cel.files[i],
356
-##										 indices=cntmp$fid),
357
-##							      reference, copy=FALSE)
358
-##			updateCel(out.celFiles[i], indices=cntmp$fid, intensities=as.integer(pms))	  
359
-##		}
360
-##	} else {
361
-##		message("Not quantile normalizing the nonpolymorphic probes")
362
-##	}
363
-##}
364
-
365
-validCdfNames <- function(platform){
366
-	if(missing(platform)) stop("missing platform")
367
-	if(!platform %in% c("illumina", "affymetrix"))
368
-		stop("only illumina and affymetrix platforms are supported.")
369
-	if(platform=="illumina"){
370
-		chipList = c("human1mv1c",             # 1M
371
-		"human370v1c",            # 370CNV
372
-		"human650v3a",            # 650Y
373
-		"human610quadv1b",        # 610 quad
374
-		"human660quadv1a",        # 660 quad
375
-		"human370quadv3c",        # 370CNV quad
376
-		"human550v3b",            # 550K
377
-		"human1mduov3b")          # 1M Duo
378
-	}
379
-	if(platform=="affymetrix"){
380
-		chipList=c("genomewidesnp6")
381
-	}
382
-	return(chipList)
383
-}
384
-
385
-isValidCdfName <- function(cdfName, platform){
386
-	chipList <- validCdfNames(platform)
387
-	if(!(cdfName %in% chipList)){
388
-		warning("cdfName must be one of the following: ",
389
-			chipList)
390
-	}
391
-	result <- cdfName %in% chipList
392
-	return(result)
393
-}
394
-	
395
-	
396
-	
397 218
 crlmmWrapper <- function(filenames,
398 219
 			 cdfName="genomewidesnp6",
399 220
 			 load.it=FALSE,
... ...
@@ -404,12 +225,19 @@ crlmmWrapper <- function(filenames,
404 225
 			 rgFile,
405 226
 			 platform=c("affymetrix", "illumina")[1],
406 227
 			 ...){
407
-	if(!(platform %in% c("affymetrix", "illumina")))
228
+	if(!(platform %in% c("affymetrix", "illumina"))){
408 229
 		stop("Only 'affymetrix' and 'illumina' platforms are supported at this time.")
230
+	} else {
231
+		message("Checking whether annotation package for the ", platform, " platform is available")
232
+	}
409 233
 	if(missing(intensityFile)) stop("must specify 'intensityFile'.")
410 234
 	if(missing(crlmmFile)) stop("must specify 'crlmmFile'.")
411 235
 	if(platform == "illumina"){
412 236
 		if(missing(rgFile)) stop("must specify 'rgFile'.")
237
+		if(!load.it){
238
+			RG <- readIdatFiles(...)
239
+			if(save.it) save(RG, file=rgFile)
240
+		}
413 241
 		if(load.it & !file.exists(rgFile)){
414 242
 			message("load.it is TRUE, bug rgFile not present.  Attempting to read the idatFiles.")
415 243
 			RG <- readIdatFiles(...)
... ...
@@ -456,6 +284,7 @@ crlmmWrapper <- function(filenames,
456 284
 			}
457 285
 		} else {
458 286
 			message("Loading ", crlmmFile, "...")
287
+			load(intensityFile)				
459 288
 			load(crlmmFile)
460 289
 			crlmmResult <- get("crlmmResult")
461 290
 			cnrmaResult <- get("cnrmaResult")
... ...
@@ -463,23 +292,38 @@ crlmmWrapper <- function(filenames,
463 292
 	}
464 293
 	if(platform == "illumina"){
465 294
 		if(!file.exists(crlmmFile) | !load.it){		
466
-			crlmmOut <- crlmmIllumina(RG=RG,
467
-						  cdfName=cdfName,
468
-						  sns=sampleNames(RG),
469
-						  returnParams=TRUE,
470
-						  save.it=TRUE,
471
-						  intensityFile=intensityFile)
472
-			if(save.it) save(crlmmOut, file=crlmmFile)
295
+			crlmmResult <- crlmmIllumina(RG=RG,
296
+						     cdfName=cdfName,
297
+						     sns=sampleNames(RG),
298
+						     returnParams=TRUE,
299
+						     save.it=TRUE,
300
+						     intensityFile=intensityFile)
301
+			if(save.it) save(crlmmResult, file=crlmmFile)
473 302
 		} else {
474 303
 			message("Loading ", crlmmFile, "...")
475 304
 			load(crlmmFile)
476 305
 			crlmmResult <- get("crlmmResult")
477
-			if(exists("cnAB")){
478
-				cnrmaResult <- get("cnAB")
479
-			} else cnrmaResult <- NULL
480 306
 		}
481 307
 	}
482 308
 	load(intensityFile)
309
+	if(platform=="illumina"){
310
+		if(exists("cnAB")){
311
+			np.A <- as.integer(cnAB$A)
312
+			np.B <- as.integer(cnAB$B)
313
+			np <- ifelse(np.A > np.B, np.A, np.B)
314
+			np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A))
315
+			rownames(np) <- cnAB$gns
316
+			colnames(np) <- cnAB$sns
317
+			cnAB$NP <- np
318
+			sampleNames(crlmmResult) <- res$sns				
319
+			cnrmaResult <- get("cnAB")
320
+		} else cnrmaResult <- NULL
321
+	}
322
+	if(platform=="affymetrix"){
323
+		if(exists("cnrmaResult")){
324
+			cnrmaResult <- get("cnrmaResult")
325
+		} else cnrmaResult <- NULL
326
+	}
483 327
 	ABset <- combineIntensities(get("res"), cnrmaResult, cdfName)
484 328
 	if(platform=="affymetrix") protocolData(ABset)[["ScanDate"]] <- as.character(celDates(filenames))	
485 329
 	crlmmResult <- harmonizeSnpSet(crlmmResult, ABset, cdfName)
... ...
@@ -496,9 +340,52 @@ crlmmWrapper <- function(filenames,
496 340
 	return()
497 341
 }
498 342
 
343
+validCdfNames <- function(platform){
344
+	if(!missing(platform)){
345
+		if(!platform %in% c("illumina", "affymetrix"))
346
+			stop("only illumina and affymetrix platforms are supported.")
347
+		if(platform=="illumina"){
348
+			chipList = c("human1mv1c",             # 1M
349
+			"human370v1c",            # 370CNV
350
+			"human650v3a",            # 650Y
351
+			"human610quadv1b",        # 610 quad
352
+			"human660quadv1a",        # 660 quad
353
+			"human370quadv3c",        # 370CNV quad
354
+			"human550v3b",            # 550K
355
+			"human1mduov3b")          # 1M Duo
356
+		}
357
+		if(platform=="affymetrix"){
358
+			chipList=c("genomewidesnp6", "genomewidesnp5")
359
+		}
360
+	} else{
361
+		chipList <- list()
362
+		chipList$affymetrix <- c("genomewidesnp6","genomewidesnp5")
363
+		chipList$illumina <- c("human370v1c",
364
+				       "human370quadv3c",
365
+				       "human550v3b",
366
+				       "human650v3a",
367
+				       "human610quadv1b",
368
+				       "human660quadv1a",
369
+				       "human1mduov3b")
370
+	}
371
+	return(chipList)
372
+}
499 373
 
500
-
501
-cnrma <- function(filenames, cdfName="genomewidesnp6", sns, seed=1, verbose=FALSE){
374
+isValidCdfName <- function(cdfName, platform){
375
+	chipList <- validCdfNames(platform)
376
+	if(!(cdfName %in% chipList)){
377
+		warning("cdfName must be one of the following: ",
378
+			chipList)
379
+	}
380
+	result <- cdfName %in% chipList
381
+	return(result)
382
+}
383
+	
384
+	
385
+	
386
+# steps: quantile normalize hapmap: create 1m_reference_cn.rda object
387
+cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE){
388
+	if(missing(cdfName)) stop("must specify cdfName")
502 389
 	pkgname <- getCrlmmAnnotationName(cdfName)
503 390
 	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
504 391
 	if (missing(sns)) sns <- basename(filenames)
... ...
@@ -513,8 +400,14 @@ cnrma <- function(filenames, cdfName="genomewidesnp6", sns, seed=1, verbose=FALS
513 400
 		message("Processing ", length(filenames), " files.")
514 401
 		if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
515 402
 	}
516
-        loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
403
+	if(cdfName=="genomewidesnp6"){
404
+		loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
405
+	}
406
+	if(cdfName=="genomewidesnp5"){
407
+		loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname)
408
+	}
517 409
 	reference <- getVarInEnv("reference")
410
+	##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.")
518 411
 	for(i in seq(along=filenames)){
519 412
 		y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
520 413
 		x <- log2(y[idx2])
... ...
@@ -528,6 +421,7 @@ cnrma <- function(filenames, cdfName="genomewidesnp6", sns, seed=1, verbose=FALS
528 421
 	dimnames(NP) <- list(names(fid), sns)
529 422
 	##dimnames(NP) <- list(map[, "man_fsetid"], sns)
530 423
 	res3 <- list(NP=NP, SKW=SKW)
424
+	cat("\n")
531 425
 	return(res3)
532 426
 }
533 427
 
... ...
@@ -664,11 +558,12 @@ computeCopynumber <- function(object,
664 558
 			      bias.adj=FALSE,
665 559
 			      batch,
666 560
 			      SNRmin=5,
667
-			      cdfName, ...){
561
+			      cdfName,
562
+			      platform=c("affymetrix", "illumina")[1], ...){
668 563
 	if(class(object) != "CrlmmSetList") stop("object must be of class ClrmmSetList")
669 564
 	if(missing(cdfName))
670 565
 		cdfName <- annotation(object)
671
-	if(!isValidCdfName(cdfName, platform="affymetrix")) stop(cdfName, " not supported.")	
566
+	if(!isValidCdfName(cdfName, platform=platform)) stop(cdfName, " not supported.")	
672 567
 	if(ncol(object) < 10)
673 568
 		stop("Must have at least 10 samples in each batch to estimate model parameters....preferably closer to 90 samples per batch")
674 569
 	##require(oligoClasses)
... ...
@@ -691,7 +586,6 @@ computeCopynumber <- function(object,
691 586
 	##previous version of compute copy number
692 587
 	envir <- new.env()
693 588
 	message("Fitting model for copy number estimation...")
694
-
695 589
 	.computeCopynumber(chrom=CHR,
696 590
 			   A=A(ABset),
697 591
 			   B=B(ABset),
... ...
@@ -705,7 +599,6 @@ computeCopynumber <- function(object,
705 599
 			   SNRmin=SNRmin,
706 600
 			   cdfName=cdfName,
707 601
 			   ...)
708
-
709 602
 	if(bias.adj){
710 603
 		message("Running bias adjustment...")
711 604
 		.computeCopynumber(chrom=CHR,
... ...
@@ -991,6 +884,8 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){
991 884
 	return(cnset)
992 885
 }
993 886
 
887
+
888
+
994 889
 thresholdCopyNumberSet <- function(object){
995 890
 	ca <- CA(object)
996 891
 	cb <- CB(object)
... ...
@@ -1795,195 +1690,6 @@ biasAdj <- function(plateIndex, envir, priorProb, PROP=0.75){
1795 1690
 }
1796 1691
 
1797 1692
 
1798
-posteriorNonpolymorphic <- function(plateIndex, envir, priorProb, cnStates=0:6){
1799
-	p <- plateIndex
1800
-	CHR <- envir[["chrom"]]
1801
-	if(missing(priorProb)) priorProb <- rep(1/length(cnStates), length(cnStates)) ##uniform	
1802
-	plate <- envir[["plate"]]
1803
-	uplate <- envir[["plate"]]
1804
-	NP <- envir[["NP"]][, plate==uplate[p]]
1805
-	nuT <- envir[["nuT"]][, p]
1806
-	phiT <- envir[["phiT"]][, p]
1807
-	sig2T <- envir[["sig2T"]][, p]
1808
-	##Assuming background variance for np probes is the same on the log-scale
1809
-	emit <- array(NA, dim=c(nrow(NP), ncol(NP), length(cnStates)))##SNPs x sample x 'truth'
1810
-	lT <- log2(NP)
1811
-	sds <- sqrt(sig2T)
1812
-	counter <- 1##state counter	
1813
-	for(CT in cnStates){
1814
-		cat(".")
1815
-		if(CHR == 23) browser()
1816
-		means <- suppressWarnings(log2(nuT + CT*phiT))
1817
-		emit[, , counter] <- dnorm(lT, mean=means, sd=sds)
1818
-		counter <- counter+1
1819
-	}
1820
-	for(j in seq(along=cnStates)){
1821
-		emit[, , j] <- priorProb[j]*emit[, , j]
1822
-	}
1823
-	homDel <- emit[, , 1]
1824
-	hemDel <- emit[, , 2]
1825
-	norm <- emit[, , 3]
1826
-	amp <- emit[, , 4]
1827
-	amp4 <- emit[, , 5]
1828
-	amp5 <- emit[, , 6]
1829
-	amp6 <- emit[, , 7]
1830
-	total <- homDel+hemDel+norm+amp+amp4+amp5+amp6
1831
-	weights <- array(NA, dim=c(nrow(NP), ncol(NP), length(cnStates)))
1832
-	weights[, , 1] <- homDel/total
1833
-	weights[, , 2] <- hemDel/total
1834
-	weights[, , 3] <- norm/total
1835
-	weights[, , 4] <- amp/total
1836
-	weights[, , 5] <- amp4/total
1837
-	weights[, , 6] <- amp5/total
1838
-	weights[, , 7] <- amp6/total
1839
-	##posterior mode
1840
-	posteriorMode <- apply(weights, c(1, 2), function(x) order(x, decreasing=TRUE)[1])
1841
-	posteriorMode <- posteriorMode-1
1842
-	##sns <- envir[["sns"]]
1843
-	##colnames(posteriorMode) <- sns
1844
-	##envir[["np.posteriorMode"]] <- posteriorMode
1845
-	##envir[["np.weights"]] <- weights
1846
-	posteriorMeans <- 0*homDel/total + 1*hemDel/total + 2*norm/total + 3*amp/total + 4*amp4/total + 5*amp5/total + 6*amp6/total
1847
-	##colnames(posteriorMeans) <- sns
1848
-	##envir[["np.posteriorMeans"]] <- posteriorMeans
1849
-	return(posteriorMode)
1850
-}
1851
-
1852
-posteriorWrapper <- function(envir){
1853
-	snp.PM <- matrix(NA, length(envir[["snps"]]), length(envir[["sns"]]))
1854
-	np.PM <- matrix(NA, length(envir[["cnvs"]]), length(envir[["sns"]]))
1855
-	plate <- envir[["plate"]]
1856
-	uplate <- envir[["uplate"]]
1857
-	for(p in seq(along=uplate)){
1858
-		tmp <- expectedC(plateIndex=p, envir=envir)
1859
-		snp.PM[, plate==uplate[p]] <- tmp
1860
-		##snp.pm <- env[["posteriorMode"]]
1861
-		##trace(posteriorNonpolymorphic, browser)
1862
-		tmp <- posteriorNonpolymorphic(plateIndex=p, envir=envir)
1863
-		np.PM[, plate==uplate[p]] <- tmp##env[["np.posteriorMode"]]
1864
-		##pMode <- rbind(snp.pm, np.pm)
1865
-		##rownames(pMode) <- c(env[["snps"]], env[["cnvs"]])
1866
-		##dn <- dimnames(pMode)
1867
-		##pMode <- matrix(as.integer(pMode), nrow(pMode), ncol(pMode))
1868
-	}
1869
-	PM <- rbind(snp.PM, np.PM)
1870
-	PM <- matrix(as.integer(PM), nrow(PM), ncol(PM))
1871
-	dns <- list(c(envir[["snps"]], envir[["cnvs"]]), envir[["sns"]])
1872
-	dimnames(PM) <- dns
1873
-	return(PM)
1874
-}
1875
-
1876
-
1877
-
1878
-
1879
-
1880
-##for polymorphic probes
1881
-expectedC <- function(plateIndex, envir, priorProb, cnStates=0:6){
1882
-	p <- plateIndex
1883
-	CHR <- envir[["chrom"]]
1884
-	if(missing(priorProb)) priorProb <- rep(1/length(cnStates), length(cnStates)) ##uniform	
1885
-	plate <- envir[["plate"]]
1886
-	uplate <- envir[["uplate"]]
1887
-	A <- envir[["A"]]
1888
-	B <- envir[["B"]]
1889
-	A <- A[, plate==uplate[p]]
1890
-	B <- B[, plate==uplate[p]]
1891
-	calls <- envir[["calls"]]	
1892
-	calls <- calls[, plate==unique(plate)[p]]
1893
-	probA <- sqrt(rowMeans(calls == 1, na.rm=TRUE))
1894
-	probB <- sqrt(rowMeans(calls == 3, na.rm=TRUE))
1895
-	sig2A <- envir[["sig2A"]]
1896
-	sig2B <- envir[["sig2B"]]
1897
-	tau2A <- envir[["tau2A"]]
1898
-	tau2B <- envir[["tau2B"]]
1899
-	corrA.BB <- envir[["corrA.BB"]]
1900
-	corrB.AA <- envir[["corrB.AA"]]
1901
-	corr <- envir[["corr"]]
1902
-	nuA <- envir[["nuA"]]
1903
-	nuB <- envir[["nuB"]]
1904
-	phiA <- envir[["phiA"]]
1905
-	phiB <- envir[["phiB"]]
1906
-	emit <- array(NA, dim=c(nrow(A), ncol(A), 28))##SNPs x sample x 'truth'
1907
-	##AAAA, AAAB, AABB, ABBB, BBBB
1908
-	##AAAAA, AAAAB, AAABB, AABBB, ABBBB, BBBBB
1909
-	##AAAAAA, AAAAAB, AAAABB, AAABBB, AABBBB, ABBBBB, BBBBBB
1910
-	lA <- log2(A)
1911
-	lB <- log2(B)	
1912
-	X <- cbind(lA, lB)	
1913
-	counter <- 1##state counter
1914
-	for(CT in cnStates){
1915
-		cat(".")
1916
-		for(CA in 0:CT){
1917
-			CB <- CT-CA
1918
-			A.scale <- sqrt(tau2A[, p]*(CA==0) + sig2A[, p]*(CA > 0))
1919
-			B.scale <- sqrt(tau2B[, p]*(CB==0) + sig2B[, p]*(CB > 0))
1920
-			scale <- c(A.scale, B.scale)
1921
-			if(CA == 0 & CB == 0) rho <- 0
1922
-			if(CA == 0 & CB > 0) rho <- corrA.BB[, p]
1923
-			if(CA > 0 & CB == 0) rho <- corrB.AA[, p]
1924
-			if(CA > 0 & CB > 0) rho <- corr[, p]
1925
-			if(CHR == 23) browser()
1926
-			means <- cbind(suppressWarnings(log2(nuA[, p]+CA*phiA[, p])), suppressWarnings(log2(nuB[, p]+CB*phiB[, p])))
1927
-			covs <- rho*A.scale*B.scale
1928
-			A.scale2 <- A.scale^2
1929
-			B.scale2 <- B.scale^2			
1930
-			##ensure positive definite			
1931
-			##Sigma <- as.matrix(nearPD(matrix(c(A.scale^2, covs,
1932
-			##covs, B.scale^2), 2, 2))[[1]])
1933
-			m <- 1##snp counter				
1934
-			for(i in 1:nrow(A)){
1935
-				Sigma <- matrix(c(A.scale2[i], covs[i], covs[i], B.scale2[i]), 2,2)
1936
-				xx <- matrix(X[i, ], ncol=2)
1937
-				tmp <- dmvnorm(xx, mean=means[i, ], sigma=Sigma) 				
1938
-				##Using HWE: P(CA=ca, CB=cb|CT=c)				
1939
-				ptmp <- (probA[i]^CA)*(probB[i]^CB)*tmp
1940
-				emit[m, , counter] <- ptmp
1941
-				m <- m+1				
1942
-			}
1943
-			counter <- counter+1			
1944
-		}
1945
-	}
1946
-	##priorProb=P(CT=c)
1947
-	homDel <- priorProb[1]*emit[, , 1]
1948
-	hemDel <- priorProb[2]*emit[, , c(2, 3)] # + priorProb[3]*emit[, c(4, 5, 6)] + priorProb[4]*emit[, c(7:10)]
1949
-	norm <- priorProb[3]*emit[, , 4:6]
1950
-	amp <- priorProb[4]*emit[, , 7:10]
1951
-	amp4 <- priorProb[5]*emit[, , 11:15]
1952
-	amp5 <- priorProb[6]*emit[, , 16:21]
1953
-	amp6 <- priorProb[7]*emit[, , 22:28]	
1954
-	##sum over the different combinations within each copy number state
1955
-	hemDel <- apply(hemDel, c(1,2), sum)
1956
-	norm <- apply(norm, c(1, 2), sum)
1957
-	amp <- apply(amp, c(1,2), sum)
1958
-	amp4 <- apply(amp4, c(1,2), sum)
1959
-	amp5 <- apply(amp5, c(1,2), sum)
1960
-	amp6 <- apply(amp6, c(1,2), sum)
1961
-	total <- homDel+hemDel+norm+amp+amp4+amp5+amp6
1962
-	weights <- array(NA, dim=c(nrow(homDel), ncol(A), 7))
1963
-	weights[, , 1] <- homDel/total
1964
-	weights[, , 2] <- hemDel/total
1965
-	weights[, , 3] <- norm/total
1966
-	weights[, , 4] <- amp/total
1967
-	weights[, , 5] <- amp4/total
1968
-	weights[, , 6] <- amp5/total
1969
-	weights[, , 7] <- amp6/total
1970
-	##posterior mode
1971
-	posteriorMode <- apply(weights, c(1, 2), function(x) order(x, decreasing=TRUE)[1])
1972
-	posteriorMode <- posteriorMode-1
1973
-	##This is for one plate.  Need to instantiate a much bigger
1974
-	##object in the environment
1975
-	
1976
-	##envir[["posteriorMode"]] <- posteriorMode
1977
-	##weights <- list(homDel/total, hemDel/total, norm/total, amp/total, amp4/total, amp5/total, amp6/total)
1978
-	##names(weights) <- c(cnStates)
1979
-	##envir[["weights"]] <- weights
1980
-	posteriorMeans <- 0*homDel/total + 1*hemDel/total + 2*norm/total + 3*amp/total + 4*amp4/total + 5*amp5/total + 6*amp6/total
1981
-	##sns <- envir[["sns"]]
1982
-	##colnames(posteriorMeans) <- sns
1983
-	##envir[["posteriorMeans"]] <- posteriorMeans
1984
-	return(posteriorMode)
1985
-}
1986
-
1987 1693
 biasAdjNP <- function(plateIndex, envir, priorProb){
1988 1694
 	p <- plateIndex
1989 1695
 	normalNP <- envir[["normalNP"]]
... ...
@@ -2079,7 +1785,6 @@ computeEmission <- function(object,
2079 1785
 	##threshold small nu's and phis
2080 1786
 	cnset <- thresholdModelParams(object[[3]], MIN=MIN)
2081 1787
 	index <- order(chromosome(cnset), position(cnset))
2082
-	
2083 1788
 	if(any(diff(index) > 1)) stop("must be ordered by chromosome and physical position")
2084 1789
 	emissionProbs <- array(NA, dim=c(nrow(cnset), ncol(cnset), length(copyNumberStates)))
2085 1790
 	dimnames(emissionProbs) <- list(featureNames(object),
... ...
@@ -2223,3 +1928,15 @@ thresholdModelParams <- function(object, MIN=2^3){
2223 1928
 	}
2224 1929
 	emissionProbs
2225 1930
 }
1931
+
1932
+setMethod("update", "character", function(object, ...){
1933
+	crlmmFile <- object
1934
+	for(i in seq(along=crlmmFile)){
1935
+		cat("Processing ", clrmmFile[i], "...\n")
1936
+		load(crlmmFile[i])
1937
+		crlmmSetList <- get("crlmmSetList")
1938
+		crlmmSetList <- update(crlmmSetList, ...)
1939
+		save(crlmmSetList, file=crlmmFile[i])
1940
+		rm(crlmmSetList); gc();
1941
+	}
1942
+})
... ...
@@ -12,6 +12,14 @@ readIdatFiles <- function(sampleSheet=NULL,
12 12
 			  sep="_",
13 13
 			  fileExt=list(green="Grn.idat", red="Red.idat"),
14 14
 			  saveDate=FALSE) {
15
+       if(is.null(arrayNames)) {
16
+	       arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
17
+	       if(!is.null(sampleSheet)) {
18
+		       sampleSheet=NULL
19
+		       cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
20
+	       }
21
+	       pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
22
+       }	
15 23
        if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
16 24
 	       if(!is.null(arrayNames)){
17 25
 		       ##arrayNames=NULL
... ...
@@ -35,14 +43,6 @@ readIdatFiles <- function(sampleSheet=NULL,
35 43
 	       pd = new("AnnotatedDataFrame", data = sampleSheet)
36 44
 	       sampleNames(pd) <- arrayNames
37 45
        }
38
-       if(is.null(arrayNames)) {
39
-	       arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
40
-	       if(!is.null(sampleSheet)) {
41
-		       sampleSheet=NULL
42
-		       cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
43
-	       }
44
-	       pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
45
-       }
46 46
        narrays = length(arrayNames)
47 47
        grnfiles = paste(arrayNames, fileExt$green, sep=sep)
48 48
        redfiles = paste(arrayNames, fileExt$red, sep=sep)
... ...
@@ -50,9 +50,10 @@ readIdatFiles <- function(sampleSheet=NULL,
50 50
 	       stop("Cannot find .idat files")
51 51
        if(length(grnfiles)!=length(redfiles))
52 52
 	       stop("Cannot find matching .idat files")
53
-       if(!all(c(redfiles,grnfiles) %in% dir(path=path)))
53
+       if(!all(c(redfiles,grnfiles) %in% dir(path=path))){
54 54
 	       stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
55 55
 		    paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
56
+       }
56 57
        grnidats = file.path(path, grnfiles)
57 58
        redidats = file.path(path, redfiles)
58 59
        headerInfo = list(nProbes = rep(NA, narrays),
... ...
@@ -5,7 +5,14 @@ setValidity("ABset", function(object) {
5 5
 })
6 6
 setMethod("A", "ABset", function(object) assayData(object)[["A"]])
7 7
 setMethod("B", "ABset", function(object) assayData(object)[["B"]])
8
-
8
+setReplaceMethod("A", signature(object="ABset", value="matrix"),
9
+		 function(object, value){
10
+			 assayDataElementReplace(object, "A", value)			
11
+		 })
12
+setReplaceMethod("B", signature(object="ABset", value="matrix"),
13
+		 function(object, value){
14
+			 assayDataElementReplace(object, "B", value)			
15
+		 })
9 16
 
10 17
 
11 18
 
... ...
@@ -6,12 +6,14 @@ setValidity("CopyNumberSet", function(object) {
6 6
 ##may want to allow thresholding here (... arg)
7 7
 setMethod("CA", "CopyNumberSet", function(object, ...) assayData(object)[["CA"]]/100)
8 8
 setMethod("CB", "CopyNumberSet", function(object, ...) assayData(object)[["CB"]]/100)
9
-setReplaceMethod("CB", signature(object="CopyNumberSet", value="matrix"),
10
-		 function(object, value) assayDataElementReplace(object, "CB", value))
9
+
11 10
 setReplaceMethod("CA", signature(object="CopyNumberSet", value="matrix"),
12
-		 function(object, value) assayDataElementReplace(object, "CA", value))
11
+		 function(object, value) assayDataElementReplace(object, "CA", value*100))
12
+setReplaceMethod("CB", signature(object="CopyNumberSet", value="matrix"),
13
+		 function(object, value) assayDataElementReplace(object, "CB", value*100))
14
+
15
+
13 16
 
14
-setMethod("chromosome", "CopyNumberSet", function(object) fData(object)$chromosome)
15 17
 
16 18
 setMethod("batch", "CopyNumberSet", function(object){
17 19
 	if("batch" %in% varLabels(object)){
... ...
@@ -23,19 +25,23 @@ setMethod("batch", "CopyNumberSet", function(object){
23 25
 })
24 26
 
25 27
 setMethod("copyNumber", "CopyNumberSet", function(object){
28
+	require(paste(annotation(object), "Crlmm", sep=""), character.only=TRUE) || stop(paste("Annotation package ", annotation(object), "Crlmm not available", sep=""))
26 29
 	##ensure that 2 + NA = 2 by replacing NA's with zero
30
+	##the above results in copy number 0, 1, or 2 depending on the genotype....safer just to drop
27 31
 	CA <- CA(object)
28 32
 	CB <- CB(object)
29
-	nas <- is.na(CA) & is.na(CB)
30
-	CA[is.na(CA)] <- 0
31
-	CB[is.na(CB)] <- 0
33
+	##nas <- is.na(CA) & is.na(CB)
34
+	##CA[is.na(CA)] <- 0
35
+	##CB[is.na(CB)] <- 0
32 36
 	CN <- CA + CB
37
+	##For nonpolymorphic probes, CA is the total copy number
38
+	CN[cnIndex(object, annotation(object)), ] <- CA(object)[cnIndex(object, annotation(object)), ]
33 39
 	##if both CA and CB are NA, report NA
34
-	CN[nas] <- NA
40
+	##CN[nas] <- NA
35 41
 	CN
36 42
 })
37 43
 
38
-setMethod("position", "CopyNumberSet", function(object) fData(object)$position)
44
+
39 45
 
40 46
 ##setMethod("ellipse", "CopyNumberSet", function(x, copynumber, ...){
41 47
 ellipse.CopyNumberSet <- function(x, copynumber, ...){
... ...
@@ -43,7 +49,15 @@ ellipse.CopyNumberSet <- function(x, copynumber, ...){
43 49
 	##fittedOrder <- unique(sapply(basename(celFiles), function(x) strsplit(x, "_")[[1]][2]))
44 50
 	##index <- match(plates, fittedOrder)
45 51
 	if(nrow(x) > 1) stop("only 1 snp at a time")
46
-	batch <- unique(x$batch)
52
+	##batch <- unique(x$batch)
53
+	args <- list(...)
54
+	if(!"batch" %in% names(args)){
55
+		jj <- match("batch", varLabels(x))
56
+		if(length(jj) < 1) stop("batch not in varLabels")
57
+		batch <- unique(pData(x)[, jj])
58
+	} else{
59
+		batch <- unique(args$batch)
60
+	}
47 61
 	if(length(batch) > 1) stop("batch variable not unique")
48 62
 	nuA <- as.numeric(fData(x)[, match(paste("nuA", batch, sep="_"), fvarLabels(x))])
49 63
 	nuB <- as.numeric(fData(x)[, match(paste("nuB", batch, sep="_"), fvarLabels(x))])	
... ...
@@ -65,6 +79,7 @@ ellipse.CopyNumberSet <- function(x, copynumber, ...){
65 79
 			if(CA == 0 & CB > 0) rho <- corrA.BB
66 80
 			if(CA > 0 & CB == 0) rho <- corrB.AA
67 81
 			if(CA > 0 & CB > 0) rho <- corr
82
+			if(CA == 0 & CB == 0) rho <- 0
68 83
 			lines(ellipse(x=rho, centre=c(log2(nuA+CA*phiA),
69 84
 					     log2(nuB+CB*phiB)),
70 85
 				      scale=scale), ...)
... ...
@@ -48,13 +48,78 @@ setMethod(".harmonizeDimnames", "CrlmmSetList", function(object){
48 48
 })
49 49
 
50 50
 setMethod("A", "CrlmmSetList", function(object) A(object[[1]]))
51
+
52
+setMethod("addFeatureAnnotation", "CrlmmSetList", function(object, CHR){
53
+	if(missing(CHR)) stop("Must specificy chromosome")
54
+	cdfName <- annotation(object)
55
+	pkgname <- paste(cdfName, "Crlmm", sep="")	
56
+	path <- system.file("extdata", package=pkgname)
57
+	loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv)
58
+	cnProbes <- get("cnProbes", envir=.crlmmPkgEnv)
59
+	loader("snpProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv)
60
+	snpProbes <- get("snpProbes", envir=.crlmmPkgEnv)	
61
+
62
+	##Feature Data
63
+	snps <- featureNames(object)[snpIndex(object)]
64
+	nps <- featureNames(object)[cnIndex(object)]
65
+	position.snp <- snpProbes[match(snps, rownames(snpProbes)), "position"]
66
+	names(position.snp) <- snps
67
+	position.np <- cnProbes[match(nps, rownames(cnProbes)), "position"]
68
+	names(position.np) <- nps
69
+	
70
+	position <- c(position.snp, position.np)
71
+	position <- position[match(featureNames(object), names(position))]
72
+	stopifnot(identical(names(position), featureNames(object)))
73
+	if(sum(duplicated(names(position))) > 0){
74
+		warning("Removing rows with NA identifiers...")
75
+		##RS: fix this
76
+		I <- which(!is.na(names(position)))
77
+	}  else I <- seq(along=names(position))
78
+	fd <- data.frame(cbind(CHR,
79
+			       position[I]))
80
+	colnames(fd) <- c("chromosome", "position")
81
+	rownames(fd) <- featureNames(object)
82
+	fD <- new("AnnotatedDataFrame",
83
+		  data=fd,
84
+		  varMetadata=data.frame(labelDescription=colnames(fd)))
85
+	return(fD)
86
+})
87
+
51 88
 setMethod("annotation", "CrlmmSetList", function(object) annotation(object[[1]]))
52 89
 setMethod("B", "CrlmmSetList", function(object) B(object[[1]]))
53 90
 setMethod("batch", "CrlmmSetList", function(object) batch(object[[3]]))
54 91
 setMethod("CA", "CrlmmSetList", function(object, ...) CA(object[[3]], ...))
55 92
 setMethod("CB", "CrlmmSetList", function(object, ...) CB(object[[3]], ...))
56 93
 setMethod("calls", "CrlmmSetList", function(object) calls(object[[2]]))
57
-setMethod("chromosome", "CrlmmSetList", function(object) chromosome(object[[3]]))
94
+setMethod("chromosome", "CrlmmSetList", function(object){
95
+	chr <- NULL
96
+	for(i  in 1:length(object)){
97
+		if(length(fvarLabels(object[[i]])) > 0){
98
+			if("chromosome" %in% fvarLabels(object[[i]])){
99
+				chr <- chromosome(object[[i]])
100
+				break()
101
+			}
102
+		}
103
+	}
104
+	if(is.null(chr)) warning("fvarLabel 'chromosome' not in any element of the CrlmmSetList object")	
105
+	return(chr)
106
+	##chromosome(object[[3]])
107
+	})
108
+
109
+setMethod("position", "CrlmmSetList", function(object){
110
+	pos <- NULL
111
+	for(i  in 1:length(object)){
112
+		if(length(fvarLabels(object[[i]])) > 0){
113
+			if("position" %in% fvarLabels(object[[i]])){
114
+				pos <- position(object[[i]])
115
+				break()
116
+			}
117
+		} else next()
118
+	}
119
+	if(is.null(pos)) warning("fvarLabel 'position' not in any element of the CrlmmSetList object")
120
+	return(pos)
121
+	})
122
+
58 123
 setMethod("cnIndex", "CrlmmSetList", function(object, ...) {
59 124
 	match(cnNames(object[[1]], annotation(object)), featureNames(object))
60 125
 })
... ...
@@ -94,7 +159,6 @@ setMethod("points", signature(x="CrlmmSetList"),
94 159
 		  B <- log2(B(x))
95 160
 		  points(A, B, ...)
96 161
 	  })
97
-setMethod("position", "CrlmmSetList", function(object) position(object[[3]]))
98 162
 setMethod("sampleNames", "CrlmmSetList", function(object) sampleNames(object[[1]]))
99 163
 setMethod("show", "CrlmmSetList", function(object){
100 164
 	cat("\n Elements in CrlmmSetList object: \n")
... ...
@@ -130,6 +194,29 @@ setMethod("update", "CrlmmSetList", function(object, ...){
130 194
 	computeCopynumber(object, ...)
131 195
 })
132 196
 
197
+setReplaceMethod("CA", signature(object="CrlmmSetList", value="matrix"),
198
+		 function(object, value){
199
+			 CA(object[[3]]) <- value
200
+			 object
201
+		 })
202
+setReplaceMethod("CB", signature(object="CrlmmSetList", value="matrix"),
203
+		 function(object, value){
204
+			 CB(object[[3]]) <- value
205
+			 object
206
+			 })
207
+
208
+setReplaceMethod("A", signature(object="CrlmmSetList", value="matrix"),
209
+		 function(object, value){
210
+			 A(object[[1]]) <- value
211
+			 object
212
+		 })
213
+setReplaceMethod("B", signature(object="CrlmmSetList", value="matrix"),
214
+		 function(object, value){
215
+			 B(object[[1]]) <- value
216
+			 object
217
+		 })
218
+
219
+
133 220
 
134 221
 setMethod("boxplot", "CrlmmSetList", function(x, ...){
135 222
 ##boxplot.CrlmmSetList <- function(x, ...){
... ...
@@ -138,7 +225,7 @@ setMethod("boxplot", "CrlmmSetList", function(x, ...){
138 225
 	A1 <- A(x)
139 226
 	B1 <- B(x)
140 227
 	Alist <- split(A1, genotypes)
141
-	Alist <- rev(Alist)
228
+	Alist <- as(rev(Alist), "data.frame")
142 229
 	Blist <- split(B1, genotypes)
143 230
 	ylim <- range(unlist(Alist))
144 231
 	boxplot(Alist, xaxt="n", ylab=expression(I[A]), 
... ...
@@ -15,6 +15,8 @@ setMethod("snpNames", "eSet", function(object, cdfName){
15 15
 	snps <- snps[snps %in% featureNames(object)]
16 16
 	featureNames(object)[match(snps, featureNames(object))]
17 17
   })
18
+setMethod("chromosome", "eSet", function(object) fData(object)$chromosome)
19
+setMethod("position", "eSet", function(object) fData(object)$position)
18 20
 ##setMethod("combine", signature=signature(x="eSet", y="eSet"),
19 21
 ##	  function(x, y, ...){
20 22
 ##		  ##Check that both x and y are valid objects
... ...
@@ -1,90 +1,85 @@
1 1
 snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1, verbose=TRUE, seed=1, cdfName, sns){
2
-  if (missing(sns)) sns <- basename(filenames)
3
-  ##ADD CHECK TO SEE IF LOADED
4
-  if (missing(cdfName))
5
-    cdfName <- read.celfile.header(filenames[1])$cdfName
6
-##  stuffDir <- changeToCrlmmAnnotationName(cdfName)
7
-  pkgname <- getCrlmmAnnotationName(cdfName)
8
-  if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
9
-    suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
10
-    msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
11
-    message(strwrap(msg))
12
-    stop("Package ", pkgname, " could not be found.")
13
-    rm(suggCall, msg)
14
-  }
15
-  
16
-  if(verbose) message("Loading annotations and mixture model parameters.")
17
-  loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
18
-  loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
19
-  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
20
-##  data(preprocStuff, genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv)
21
-  autosomeIndex <- getVarInEnv("autosomeIndex")
22
-  pnsa <- getVarInEnv("pnsa")
23
-  pnsb <- getVarInEnv("pnsb")
24
-  fid <- getVarInEnv("fid")
25
-  reference <- getVarInEnv("reference")
26
-  aIndex <- getVarInEnv("aIndex")
27
-  bIndex <- getVarInEnv("bIndex")
28
-  SMEDIAN <- getVarInEnv("SMEDIAN")
29
-  theKnots <- getVarInEnv("theKnots")
30
-  gns <- getVarInEnv("gns")
2
+	if (missing(sns)) sns <- basename(filenames)
3
+	##ADD CHECK TO SEE IF LOADED
4
+	if (missing(cdfName))
5
+		cdfName <- read.celfile.header(filenames[1])$cdfName
6
+	##  stuffDir <- changeToCrlmmAnnotationName(cdfName)
7
+	pkgname <- getCrlmmAnnotationName(cdfName)
8
+	if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
9
+		suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
10
+		msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
11
+		message(strwrap(msg))
12
+		stop("Package ", pkgname, " could not be found.")
13
+		rm(suggCall, msg)
14
+	}
15
+	if(verbose) message("Loading annotations and mixture model parameters.")
16
+	loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
17
+	loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
18
+	loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
19
+	autosomeIndex <- getVarInEnv("autosomeIndex")
20
+	pnsa <- getVarInEnv("pnsa")
21
+	pnsb <- getVarInEnv("pnsb")
22
+	fid <- getVarInEnv("fid")
23
+	reference <- getVarInEnv("reference")
24
+	aIndex <- getVarInEnv("aIndex")
25
+	bIndex <- getVarInEnv("bIndex")
26
+	SMEDIAN <- getVarInEnv("SMEDIAN")
27
+	theKnots <- getVarInEnv("theKnots")
28
+	gns <- getVarInEnv("gns")
31 29
 
32
-  ##We will read each cel file, summarize, and run EM one by one
33
-  ##We will save parameters of EM to use later
34
-  mixtureParams <- matrix(0, 4, length(filenames))
35
-  SNR <- vector("numeric", length(filenames))
36
-  SKW <- vector("numeric", length(filenames))
37
-
38
-  ## This is the sample for the fitting of splines
39
-  ## BC: I like better the idea of the user passing the seed,
40
-  ##     because this might intefere with other analyses
41
-  ##     (like what happened to GCRMA)
42
-  set.seed(seed)
43
-  
44
-  idx <- sort(sample(autosomeIndex, mixtureSampleSize))
30
+	##We will read each cel file, summarize, and run EM one by one
31
+	##We will save parameters of EM to use later
32
+	mixtureParams <- matrix(0, 4, length(filenames))
33
+	SNR <- vector("numeric", length(filenames))
34
+	SKW <- vector("numeric", length(filenames))
45 35
 
46
-  ##S will hold (A+B)/2 and M will hold A-B
47
-  ##NOTE: We actually dont need to save S. Only for pics etc...
48
-  ##f is the correction. we save to avoid recomputing
49
-  A <- matrix(as.integer(0), length(pnsa), length(filenames))
50
-  B <- matrix(as.integer(0), length(pnsb), length(filenames))
36
+	## This is the sample for the fitting of splines
37
+	## BC: I like better the idea of the user passing the seed,
38
+	##     because this might intefere with other analyses
39
+	##     (like what happened to GCRMA)
40
+	set.seed(seed)
41
+	idx <- sort(sample(autosomeIndex, mixtureSampleSize))
42
+	##S will hold (A+B)/2 and M will hold A-B
43
+	##NOTE: We actually dont need to save S. Only for pics etc...
44
+	##f is the correction. we save to avoid recomputing
45
+	A <- matrix(as.integer(0), length(pnsa), length(filenames))
46
+	B <- matrix(as.integer(0), length(pnsb), length(filenames))
51 47
   
52
-  if(verbose){
53
-    message("Processing ", length(filenames), " files.")
54
-    if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
55
-  }
56
-  ##We start looping throug cel files
57
-  idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
58
-  for(i in seq(along=filenames)){
59
-    y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
60
-    x <- log2(y[idx2])
61
-    SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
62
-    rm(x)
63
-    y <- normalize.quantiles.use.target(y, target=reference)
64
-    A[, i] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
65
-    B[, i] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
66
-    
67
-    ##Now to fit the EM
68
-    if(fitMixture){
69
-      S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN
70
-      M <- log2(A[idx, i])-log2(B[idx, i])
48
+	if(verbose){
49
+		message("Processing ", length(filenames), " files.")
50
+		if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
51
+	}
52
+	##We start looping throug cel files
53
+	idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
54
+	for(i in seq(along=filenames)){
55
+		y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
56
+		x <- log2(y[idx2])
57
+		SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
58
+		rm(x)
59
+		y <- normalize.quantiles.use.target(y, target=reference)
60
+		A[, i] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
61
+		B[, i] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
62
+		##Now to fit the EM
63
+		if(fitMixture){
64
+			S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN
65
+			M <- log2(A[idx, i])-log2(B[idx, i])
71 66
       
72
-      ##we need to test the choice of eps.. it is not the max diff between funcs
73
-      tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
74
-      
75
-      mixtureParams[, i] <- tmp[["coef"]]
76
-      SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
77
-    }
78
-    if (verbose)
79
-      if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
80
-      else cat(".")
81
-  }
82
-  if (verbose)
83
-    if (getRversion() > '2.7.0') close(pb)
84
-    else cat("\n")
85
-  if (!fitMixture) SNR <- mixtureParams <- NA
86
-  ## gns comes from preprocStuff.rda
87
-  list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
67
+			##we need to test the choice of eps.. it is not the max diff between funcs
68
+			tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
69
+			
70
+			mixtureParams[, i] <- tmp[["coef"]]
71
+			SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
72
+		}
73
+		if (verbose)
74
+			if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
75
+			else cat(".")
76
+	}
77
+	if (verbose)
78
+		if (getRversion() > '2.7.0') close(pb)
79
+		else cat("\n")
80
+	if (!fitMixture) SNR <- mixtureParams <- NA
81
+	## gns comes from preprocStuff.rda
82
+	list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
88 83
 }
89 84
 
90 85
 fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){
... ...
@@ -51,7 +51,7 @@ B Carvalho
51 51
     copy-neutral ROH:  (1-epsilon)/2 AA,  epsilon AB,  (1-epsilon)/2
52 52
     BB
53 53
 
54
-  o add caConfidence and cbConfidence slots (but what about the correlation)
54
+  o Define a class that contains settings for genotyping/copynumber estimation
55 55
 
56 56
 
57 57
 
... ...
@@ -4,7 +4,7 @@
4 4
 %\VignettePackage{crlmm}
5 5
 \documentclass{article}
6 6
 \usepackage{graphicx}
7
-\usepackage[authoryear,round,numbers]{natbib}
7
+\usepackage{natbib}
8 8
 \newcommand{\Rfunction}[1]{{\texttt{#1}}}
9 9
 \newcommand{\Rmethod}[1]{{\texttt{#1}}}
10 10
 \newcommand{\Rcode}[1]{{\texttt{#1}}}
... ...
@@ -15,7 +15,7 @@
15 15
 \newcommand{\R}{\textsf{R}}
16 16
 
17 17
 \begin{document}
18
-\title{Copy number estimation}
18
+\title{Copy number estimation and genotype calling with \Rpackage{crlmm}}
19 19
 \date{May, 2009}
20 20
 \author{Rob Scharpf}
21 21
 \maketitle
... ...
@@ -40,12 +40,13 @@ options(prompt="R> ")
40 40
 
41 41
 \section{Simple usage}
42 42
 
43
-The following packages are required:
43
+CRLMM supports the following platforms:
44 44
 
45
-<<requiredPackages>>=
45
+<<cdfname>>=
46 46
 library(crlmm)
47
-library(genomewidesnp6Crlmm)
47
+crlmm:::validCdfNames()
48 48
 @ 
49
+
49 50
 \paragraph{Preprocess and genotype.}
50 51
 
51 52
 Specify the coordinates of Affymetrix cel files and where to put
... ...
@@ -55,17 +56,26 @@ number estimation.
55 56
 <<celfiles>>=
56 57
 myPath <- "/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m"
57 58
 celFiles <- list.celfiles(myPath, full.names=TRUE, pattern=".CEL")
58
-outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/affy"
59
+#outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/affy"
60
+outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/tmp"
61
+dir.create(outdir)
59 62
 @ 
60 63
 
61
-\noindent Preprocess and genotype (for more info see the crlmm vignette):
62 64
 
63
-<<preprocessAndGenotype>>=
64
-crlmmWrapper(celFiles[1:15], 
65
+
66
+\noindent The next code chunk quantile normalizes the samples to a
67
+target reference distribution and uses the crlmm algorithm to genotype.
68
+See \cite{Carvalho2007a} for details regarding the crlmm genotyping
69
+algorithm.
70
+
71
+<<preprocessAndGenotype, eval=FALSE>>=
72
+crlmmWrapper(celFiles,##[1:15], 
73
+	     cdfName="genomewidesnp6",
65 74
 	     save.it=TRUE, 
66
-	     load.it=TRUE,
75
+	     load.it=FALSE,
67 76
 	     intensityFile=file.path(outdir, "normalizedIntensities.rda"),
68
-	     crlmmFile=file.path(outdir, "snpsetObject.rda"))
77
+	     crlmmFile=file.path(outdir, "snpsetObject.rda"),
78
+	     platform="affymetrix")
69 79
 @ 
70 80
 
71 81
 As a result of the above wrapper, the following R objects are now created:
... ...
@@ -110,9 +120,10 @@ if(!exists("emission.cn")){
110 120
 }
111 121
 @ 
112 122
 
113
-*Warning: more can be done than currently implemented to protect against
114
-outliers.  In addition, improved estimates of uncertainty for the copy
115
-number prediction regions will also help.*
123
+*Before smoothing the estimates as a function of physical position by
124
+segmentation hidden Markov models, one should consider how to handle
125
+outliers.  In particular, samples with low signal to noise ratios are
126
+likely to have many copy number outliers. See suggested visualizations.
116 127
 
117 128
 Initial state probabilities and transition probabilities for the HMM:
118 129
 
... ...
@@ -265,8 +276,6 @@ as well as better containers for storing these parameters. See
265 276
 <<copynumberParameters>>=
266 277
 fvarLabels(crlmmSetList[[3]])
267 278
 @ 
268
-
269
-
270 279
 \section{Suggested visualizations}
271 280
 
272 281
 A histogram of the signal to noise ratio for the HapMap samples:
... ...
@@ -285,11 +294,11 @@ hist(crlmmSetList[[2]]$SNR, xlab="SNR", main="")
285 294
 SNRmin <- 5
286 295
 @ 
287 296
 
288
-We suggest excluding samples with a signal to noise ratio less than
289
-\Sexpr{SNRmin}.  As batch effects can be very large in the
290
-quantile-normalized intensities, we suggest adjusting for date or
291
-chemistry plate.  Ideally, one would have 70+ files in a given
292
-batch. Here we make a table of date versus ancestry:
297
+For Affymetrix 6.0, we suggest excluding samples with a signal to noise
298
+ratio less than \Sexpr{SNRmin}.  Adjusting by date or chemistry plate
299
+can be helpful for limiting the influence of batch effects.  Ideally,
300
+one would have 70+ files in a given batch. Here we make a table of date
301
+versus ancestry:
293 302
 
294 303
 <<specifyBatch, eval=FALSE, echo=FALSE>>=
295 304
 sns <- sampleNames(crlmmResults)
... ...
@@ -307,7 +316,7 @@ not the case, we illustrate how one may adjust for batch using the
307 316
 chemistry plate as an argument for \Robject{batch} in the
308 317
 \Robject{computeCopynumber} function.
309 318
 
310
-**Note: the number of samples in the \Robject{CrlmmSetList} object after
319
+*Note: the number of samples in the \Robject{CrlmmSetList} object after
311 320
 copy number estimation may be fewer than the number of samples in the
312 321
 \Robject{CrlmmSetList} object after preprocessing/genotyping.  This
313 322
 occurs when 1 or more samples have a signal-to-noise ratio less than
... ...
@@ -1,8 +1,32 @@
1
+%\VignetteIndexEntry{crlmm copy number Vignette for Illumina}
2
+%\VignetteDepends{crlmm}
3
+%\VignetteKeywords{crlmm, illumina}
4
+%\VignettePackage{crlmm}
5
+\documentclass{article}
6
+\usepackage{graphicx}
7
+\usepackage{natbib}
8
+\newcommand{\Rfunction}[1]{{\texttt{#1}}}
9
+\newcommand{\Rmethod}[1]{{\texttt{#1}}}
10
+\newcommand{\Rcode}[1]{{\texttt{#1}}}
11
+\newcommand{\Robject}[1]{{\texttt{#1}}}
12
+\newcommand{\Rpackage}[1]{{\textsf{#1}}}
13
+\newcommand{\Rclass}[1]{{\textit{#1}}}
14
+\newcommand{\oligo}{\Rpackage{oligo }}
15
+\newcommand{\R}{\textsf{R}}
16
+
17
+\begin{document}
18
+\title{Copy number estimation and genotype calling with \Rpackage{crlmm}}
19
+\date{Oct, 2009}
20
+\author{Rob Scharpf}
21
+\maketitle
22
+
23
+<<setup, echo=FALSE, results=hide>>=
24
+options(width=60)
25
+options(continue=" ")
26
+options(prompt="R> ")
27
+@ 
28
+
1 29
 <<>>=
2
-# start R from /home/bst/other/mritchie/R/R-300309/bin/R
3
-# as this has current crlmm and region package installed
4
-# if you want to ue your own R, the current region package is at
5
-# /thumper/ctsa/snpmicroarray/illumina/crlmm/370k/human370v1cCrlmm/
6 30
 library(Biobase)
7 31
 library(crlmm)
8 32
 setwd("/thumper/ctsa/snpmicroarray/illumina/IDATS/370k")
... ...
@@ -11,71 +35,37 @@ setwd("/thumper/ctsa/snpmicroarray/illumina/IDATS/370k")
11 35
 <<readIdat>>=
12 36
 samplesheet5 = read.csv("HumanHap370Duo_Sample_Map.csv", header=TRUE, as.is=TRUE)[-c(28:46,61:75,78:79),]
13 37
 if(!exists("RG")){
14
-	## remove samples which I don't have .idats for
15
-	## subset further to allow for quicker testing
16
-	##samplesheet5 = samplesheet5[1:5,]
17
-	## read in the .idats
18 38
 	RG <- readIdatFiles(samplesheet5, arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), saveDate=TRUE)
19 39
 }
20 40
 @ 
21 41
 
22
-Get dates for each array - scan date is likely to be most meaningful
42
+Alternatively, arguments to the readIdatFiles can be passed through the
43
+{\tt ...} argument of the \R{} function \Robject{crlmmWrapper}.
23 44
 
24
-<<dates>>=
25
-pd <- pData(RG)
26
-scandatetime = strptime(as.character(pd$ScanDate), "%m/%d/%Y %H:%M:%S %p")
27
-decodedatetime = strptime(as.character(pd$DecodeDate), "%m/%d/%Y %H:%M:%S %p")
28
-table(format(scandatetime, "%d %b %Y"))
29
-table(format(scandatetime, "%b"))
45
+<<wrapper>>=
46
+crlmmWrapper(sampleSheet=samplesheet5,
47
+	     arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), 
48
+	     saveDate=TRUE,
49
+	     cdfName=cdfName,
50
+	     load.it=FALSE,
51
+	     save.it=TRUE,
52
+	     intensityFile=file.path(platedir, "normalizedIntensities.rda"),
53
+	     crlmmFile=file.path(platedir, "snpsetObject.rda"),
54
+	     rgFile=file.path(platedir, "rgFile.rda"),
55
+	     platform="illumina")
30 56
 @ 
31 57
 
32 58
 Note: the code below is run for testing purposes only. None of these
33 59
 functions are exported, so would not routinely be run directly by the
34
-user.  A typical analysis would involve runnning readIdatFiles()
35
-followed by crlmmIllumina()
36
-
37
-<<crlmm>>=
38
-if(!exists("res.rda")){
39
-	outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/illumina/HumanCNV370-Duo"	
40
-	if(!file.exists(file.path(outdir, "res.rda"))){
41
-		crlmmOut <- crlmmIllumina(RG=RG, cdfName="human370v1c", sns=pData(RG)$ID, returnParams=TRUE, save.it=TRUE, intensityFile=file.path(outdir, "res.rda"))
42
-		save(crlmmOut, file=file.path(outdir, "crlmmOut.rda"))				
43
-	} else{
44
-		message("Loading...")		
45
-		load(file.path(outdir, "res.rda"))
46
-		load(file.path(outdir, "crlmmOut.rda"))		
47
-	}
48
-}
49
-@
60
+user.  A typical analysis would involve runnning crlmmWrapper() followed
61
+by update() to obtain copy number.
50 62
 
51
-TODO: The code below needs to be udpated for the Illumina platform.
52 63
 
53 64
 <<SNR>>=
54 65
 hist(crlmmOut$SNR) ##approx. 5-fold higher than what we see in Affy!
55 66
 @ 
56 67
 
57
-<<>>=
58
-##cnAB was assigned to the global environment
59
-NP = (cnAB$A+cnAB$B)/2 # average normalized A and B intensities
60
-colnames(NP) <- colnames(crlmm:::calls(crlmmOut))
61
-chr22 <- computeCopynumber(chrom=22, A=res[["A"]], B=res[["B"]], 
62
-			   calls=crlmm:::calls(crlmmOut),
63
-			   conf=confs(crlmmOut), 
64
-			   plate=rep(1, ncol(NP)),
65
-			   NP=NP, 
66
-			   cdfName="human370v1c", 
67
-			   SNR=res[["SNR"]], 
68
-			   envir=cnenv)
69
-@ 
70
-
71
-<<>>=
72
-copyA=cnenv[["CA"]]
73
-copyB=cnenv[["CB"]]
74
-copyT=(copyA + copyB)/100
75
-copyT[copyT > 6] <- 6
76
-copyT[copyT < 0] <- 0
77
-plot(copyT[, 1], pch=".")
78
-@ 
68
+\end{document}
79 69
 
80 70
 
81 71
 
... ...
@@ -4,5 +4,52 @@
4 4
   title = {A multilevel model to address batch effects in copy number estimation
5 5
 	using SNP arrays},
6 6
   month = {May},
7
-  year = {2009}
7
+  year = {2009},
8
+  url={http://www.bepress.com/cgi/viewcontent.cgi?article=1193&context=jhubiostat}
9
+}
10
+
11
+@ARTICLE{Carvalho2007a,
12
+  author = {Benilton Carvalho and Henrik Bengtsson and Terence P Speed and Rafael
13
+	A Irizarry},
14
+  title = {Exploration, normalization, and genotype calls of high-density oligonucleotide
15
+	SNP array data.},
16
+  journal = {Biostatistics},
17
+  year = {2007},
18
+  volume = {8},
19
+  pages = {485--499},
20
+  number = {2},
21
+  month = {Apr},
22
+  abstract = {In most microarray technologies, a number of critical steps are required
23
+	to convert raw intensity measurements into the data relied upon by
24
+	data analysts, biologists, and clinicians. These data manipulations,
25
+	referred to as preprocessing, can influence the quality of the ultimate
26
+	measurements. In the last few years, the high-throughput measurement
27
+	of gene expression is the most popular application of microarray
28
+	technology. For this application, various groups have demonstrated
29
+	that the use of modern statistical methodology can substantially
30
+	improve accuracy and precision of the gene expression measurements,
31
+	relative to ad hoc procedures introduced by designers and manufacturers
32
+	of the technology. Currently, other applications of microarrays are
33
+	becoming more and more popular. In this paper, we describe a preprocessing
34
+	methodology for a technology designed for the identification of DNA
35
+	sequence variants in specific genes or regions of the human genome
36
+	that are associated with phenotypes of interest such as disease.
37
+	In particular, we describe a methodology useful for preprocessing
38
+	Affymetrix single-nucleotide polymorphism chips and obtaining genotype
39
+	calls with the preprocessed data. We demonstrate how our procedure
40
+	improves existing approaches using data from 3 relatively large studies
41
+	including the one in which large numbers of independent calls are
42
+	available. The proposed methods are implemented in the package oligo
43
+	available from Bioconductor.},
44
+  doi = {10.1093/biostatistics/kxl042},
45
+  institution = {Department of Biostatistics, Johns Hopkins University, Baltimore,
46
+	MD 21205, USA.},
47
+  keywords = {Algorithms; Alleles; Data Interpretation, Statistical; Genotype; Humans;
48
+	Oligonucleotide Array Sequence Analysis; Oligonucleotides; Polymorphism,
49
+	Single Nucleotide},
50
+  owner = {rscharpf},
51
+  pii = {kxl042},
52
+  pmid = {17189563},
53
+  timestamp = {2008.08.07},
54
+  url = {http://dx.doi.org/10.1093/biostatistics/kxl042}
8