Browse code

changes to crlmmWrapper. updated vignettes in inst/scripts.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@42144 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 04/10/2009 02:37:32
Showing20 changed files

... ...
@@ -272,3 +272,13 @@ is decoded and scanned
272 272
 * labeled figures / displayed output of code chunks in the copy number vignette
273 273
 * added bibliography for copy number vignette.  Added file inst/doc/refs.bib
274 274
 * added boxplot method 
275
+
276
+2009-10-03 R. Scharpf - committed version 1.3.21
277
+
278
+* modified crlmmWrapper function
279
+* modified illumina copy number vignette (still needs debugging)
280
+* changed title of copy number vignette
281
+* added reference to the crlmm paper
282
+* changed copyNumber() method so that CA + NA = NA, CB + NA = NA (previously had CA+NA=CA, but this can result in a lot of zeros, depending on the genotype)
283
+* new method: addFeatureAnnotation
284
+* support for snp5.0
... ...
@@ -1,8 +1,8 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.3.20
5
-Date: 2009-09-22
4
+Version: 1.3.21
5
+Date: 2009-09-29
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 6.0.
... ...
@@ -18,7 +18,7 @@ importMethodsFrom(Biobase, annotation, "annotation<-", annotatedDataFrameFrom,
18 18
 
19 19
 
20 20
 ##importMethodsFrom(oligoClasses, chromosome, copyNumber, position)
21
-importFrom(oligoClasses, chromosome, copyNumber, position, calls)
21
+importMethodsFrom(oligoClasses, chromosome, copyNumber, position, calls)
22 22
 
23 23
 ##importMethodsFrom(methods, initialize, show)
24 24
 
... ...
@@ -54,6 +54,8 @@ exportClasses(ABset, CopyNumberSet, CrlmmSetList)
54 54
 ##S3method(ellipse, CopyNumberSet)
55 55
 exportMethods("[", ##"[[",
56 56
 	      "$", A, B,
57
+	      "A<-", "B<-",
58
+	      addFeatureAnnotation,
57 59
 	      calls,
58 60
 	      CA,
59 61
 	      "CA<-",
... ...
@@ -1,5 +1,8 @@
1 1
 setGeneric("A", function(object) standardGeneric("A"))
2 2
 setGeneric("B", function(object) standardGeneric("B"))
3
+setGeneric("A<-", function(object, value) standardGeneric("A<-"))
4
+setGeneric("addFeatureAnnotation", function(object, ...) standardGeneric("addFeatureAnnotation"))
5
+setGeneric("B<-", function(object, value) standardGeneric("B<-"))
3 6
 setGeneric("batch", function(object) standardGeneric("batch"))
4 7
 ##setGeneric("calls", function(x) standardGeneric("calls"))
5 8
 setGeneric("confs", function(object) standardGeneric("confs"))
... ...
@@ -215,185 +215,6 @@ harmonizeDimnamesTo <- function(object1, object2){
215 215
 	return(object1)
216 216
 }
217 217
 
218
-##crlmmIlluminaWrapper <- function(filenames,
219
-##				 cdfName,
220
-##				 load.it=FALSE,
221
-##				 save.it=FALSE,
222
-##				 splitByChr=TRUE,
223
-##				 crlmmFile,
224
-##				 intensityFile, ...){
225
-####				 outdir="./",
226
-####				 cdfName,
227
-####				 save.intermediate=FALSE,
228
-####				 splitByChr=TRUE,
229
-####				 intensityFile,
230
-####				 ...){  ##additional arguments to readIdatFiles
231
-####	stopifnot(basename(intensityFile) == "res.rda")
232
-##	if(!file.exists(outdir)) stop(outdir, " does not exist.")
233
-##	if(!isValidCdfName(cdfName, platform="illumina")) stop(cdfName, " not supported.")
234
-##	if(file.exists(file.path(outdir, "RG.rda"))) {
235
-##		message("Loading RG.rda...")
236
-##		load(file.path(outdir, "RG.rda"))
237
-##	} else {
238
-##		message("Reading Idat files...")		
239
-##		RG <- readIdatFiles(...)
240
-##		##J <- match(c("1_A", "3_A", "5_A", "7_A"), sampleNames(RG))
241
-##		##RG <- RG[, -J]
242
-##		if(save.intermediate) save(RG, file=file.path(outdir, "RG.rda"))  ##935M for 91 samples...better not to save this
243
-##	}	
244
-##	if(!file.exists(intensityFile)){
245
-##		message("Quantile normalization / genotyping...")				
246
-##		crlmmOut <- crlmmIllumina(RG=RG,
247
-##					  cdfName=cdfName,
248
-##                                          sns=sampleNames(RG),
249
-##                                          returnParams=TRUE,
250
-##                                          save.it=TRUE,
251
-##                                          intensityFile=intensityFile)
252
-##		if(save.intermediate) save(crlmmOut, file=file.path(outdir, "crlmmOut.rda"))
253
-##	} else{
254
-##		load(file.path(outdir, "crlmmOut.rda"))
255
-##	}
256
-##	message("Loading ", intensityFile, "...")		
257
-##	load(intensityFile)
258
-##	if(exists("cnAB")){
259
-##		np.A <- as.integer(cnAB$A)
260
-##		np.B <- as.integer(cnAB$B)
261
-##		np <- ifelse(np.A > np.B, np.A, np.B)
262
-##		np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A))
263
-##		rownames(np) <- cnAB$gns
264
-##		colnames(np) <- cnAB$sns
265
-##		cnAB$NP <- np
266
-##	}
267
-##	sampleNames(crlmmOut) <- res$sns	
268
-##	if(exists("cnAB")){
269
-##		ABset <- combineIntensities(get("res"), cnAB, cdfName=cdfName)
270
-##	} else{
271
-##		ABset <- combineIntensities(get("res"), NULL, cdfName=cdfName)
272
-##	}
273
-##	##protocolData(ABset)[["ScanDate"]] <- as.character(pData(RG)$ScanDate)
274
-##	crlmmResult <- harmonizeSnpSet(crlmmOut, ABset, cdfName)
275
-##	stopifnot(all.equal(dimnames(crlmmOut), dimnames(ABset)))
276
-##	crlmmList <- list(ABset,
277
-##			  crlmmResult)
278
-##	crlmmList <- as(crlmmList, "CrlmmSetList")
279
-##	if(splitByChr){
280
-##		message("Saving by chromosome")
281
-##		splitByChromosome(crlmmList, cdfName=cdfName, outdir=outdir)
282
-##	} else{
283
-##		message("Saving crlmmList object to ", outdir)
284
-##		save(crlmmList, file=file.path(outdir, "crlmmList.rda"))
285
-##	}
286
-##	message("CrlmmSetList objects saved to ", outdir)
287
-##}
288
-
289
-##quantileNormalize1m <- function(cel.files,
290
-##				outdir,
291
-##				pkgname,
292
-##				reference=TRUE,
293
-##				createCels=TRUE,
294
-##				verbose=TRUE,
295
-##				computeCopyNumberReference=FALSE,
296
-##				normalizeNonpolymorphic=FALSE){
297
-##	if(computeCopyNumberReference){
298
-##		stopifnot(file.exists("/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m"))
299
-##	} else{
300
-##		load(system.file("1m_reference_cn.rda", package="CN"))
301
-##	}
302
-##	conn <- db(get(pkgname))
303
-##	tmp <- dbGetQuery(conn, paste("SELECT fid, man_fsetid, allele, featureSet.chrom, featureSet.physical_pos",
304
-##				      "FROM pmfeature, featureSet",
305
-##				      "WHERE pmfeature.fsetid = featureSet.fsetid"))
306
-##	tmp[is.na(tmp$chrom), "chrom"] <- 0
307
-##	tmp[is.na(tmp$physical_pos), "physical_pos"] <- 0
308
-##	tmp <- tmp[order(tmp$chrom, tmp$physical_pos, tmp$man_fsetid, tmp$allele),]
309
-##
310
-##	if(reference){
311
-##		load(system.file("extdata", paste(pkgname, "Ref.rda", sep=""), package=pkgname))
312
-##	} else{
313
-##		reference <- normalize.quantiles.determine.target(readCelIntensities(celFiles,
314
-##										     indices=tmp$fid))
315
-##		if (verbose) message("normalization vector created")
316
-##	}
317
-##	reference <- sort(reference)
318
-##	message("creating empty cel files")
319
-##	out.celFiles <- file.path(outdir, paste("QN-", basename(cel.files), sep=""))
320
-##	if(createCels){
321
-##		hh <- readCelHeader(cel.files[1])
322
-##		message(paste("creating empty cel files in", outdir))
323
-##		out.celFiles <- sapply(out.celFiles, function(x) suppressWarnings(createCel(x, header=hh, overwrite=TRUE)))
324
-##	}
325
-##
326
-##	message("Quantile normalizing SNP probes to hapmap target distribution")
327
-##	for(i in 1:length(cel.files)){
328
-##		if(i%%10==0) cat(".")
329
-##		pms <- normalize.quantiles.use.target(readCelIntensities(cel.files[i],
330
-##									 indices=tmp$fid),
331
-##						      reference, copy=FALSE)
332
-##		updateCel(out.celFiles[i], indices=tmp$fid, intensities=as.integer(pms))	  
333
-##	}
334
-##
335
-####	---------------------------------------------------------------------------
336
-####	 copy number probes
337
-####	---------------------------------------------------------------------------
338
-##	if(normalizeNonpolymorphic){
339
-##		cntmp <- dbGetQuery(conn, paste("SELECT fid, man_fsetid, featureSetCNV.chrom, featureSetCNV.chrom_start",
340
-##						"FROM pmfeatureCNV, featureSetCNV",
341
-##						"WHERE pmfeatureCNV.fsetid = featureSetCNV.fsetid"))
342
-##		cntmp[is.na(cntmp$chrom), "chrom"] <- 0
343
-##		cntmp[is.na(cntmp$chrom_start), "chrom_start"] <- 0
344
-##		cntmp <- cntmp[order(cntmp$chrom, cntmp$chrom_start, cntmp$man_fsetid), ]
345
-##
346
-##		if(computeCopyNumberReference){
347
-##			message("computing reference distribution for copy number probes.  May take a while...")
348
-##			celFiles <- list.celfiles("/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m", full.names=TRUE)
349
-##			reference <- computeCopyNumberReference(cel.files=celFiles, fid=cntmp$fid)
350
-##			message("finished computing CN reference distribution.")
351
-##		} 
352
-##		message("Quantile normalizing copy number probes to hapmap target distribution")
353
-##		for(i in 1:length(cel.files)){
354
-##			if(i%%10==0) cat(".")
355
-##			pms <- normalize.quantiles.use.target(readCelIntensities(cel.files[i],
356
-##										 indices=cntmp$fid),
357
-##							      reference, copy=FALSE)
358
-##			updateCel(out.celFiles[i], indices=cntmp$fid, intensities=as.integer(pms))	  
359
-##		}
360
-##	} else {
361
-##		message("Not quantile normalizing the nonpolymorphic probes")
362
-##	}
363
-##}
364
-
365
-validCdfNames <- function(platform){
366
-	if(missing(platform)) stop("missing platform")
367
-	if(!platform %in% c("illumina", "affymetrix"))
368
-		stop("only illumina and affymetrix platforms are supported.")
369
-	if(platform=="illumina"){
370
-		chipList = c("human1mv1c",             # 1M
371
-		"human370v1c",            # 370CNV
372
-		"human650v3a",            # 650Y
373
-		"human610quadv1b",        # 610 quad
374
-		"human660quadv1a",        # 660 quad
375
-		"human370quadv3c",        # 370CNV quad
376
-		"human550v3b",            # 550K
377
-		"human1mduov3b")          # 1M Duo
378
-	}
379
-	if(platform=="affymetrix"){
380
-		chipList=c("genomewidesnp6")
381
-	}
382
-	return(chipList)
383
-}
384
-
385
-isValidCdfName <- function(cdfName, platform){
386
-	chipList <- validCdfNames(platform)
387
-	if(!(cdfName %in% chipList)){
388
-		warning("cdfName must be one of the following: ",
389
-			chipList)
390
-	}
391
-	result <- cdfName %in% chipList
392
-	return(result)
393
-}
394
-	
395
-	
396
-	
397 218
 crlmmWrapper <- function(filenames,
398 219
 			 cdfName="genomewidesnp6",
399 220
 			 load.it=FALSE,
... ...
@@ -404,12 +225,19 @@ crlmmWrapper <- function(filenames,
404 225
 			 rgFile,
405 226
 			 platform=c("affymetrix", "illumina")[1],
406 227
 			 ...){
407
-	if(!(platform %in% c("affymetrix", "illumina")))
228
+	if(!(platform %in% c("affymetrix", "illumina"))){
408 229
 		stop("Only 'affymetrix' and 'illumina' platforms are supported at this time.")
230
+	} else {
231
+		message("Checking whether annotation package for the ", platform, " platform is available")
232
+	}
409 233
 	if(missing(intensityFile)) stop("must specify 'intensityFile'.")
410 234
 	if(missing(crlmmFile)) stop("must specify 'crlmmFile'.")
411 235
 	if(platform == "illumina"){
412 236
 		if(missing(rgFile)) stop("must specify 'rgFile'.")
237
+		if(!load.it){
238
+			RG <- readIdatFiles(...)
239
+			if(save.it) save(RG, file=rgFile)
240
+		}
413 241
 		if(load.it & !file.exists(rgFile)){
414 242
 			message("load.it is TRUE, bug rgFile not present.  Attempting to read the idatFiles.")
415 243
 			RG <- readIdatFiles(...)
... ...
@@ -456,6 +284,7 @@ crlmmWrapper <- function(filenames,
456 284
 			}
457 285
 		} else {
458 286
 			message("Loading ", crlmmFile, "...")
287
+			load(intensityFile)				
459 288
 			load(crlmmFile)
460 289
 			crlmmResult <- get("crlmmResult")
461 290
 			cnrmaResult <- get("cnrmaResult")
... ...
@@ -463,23 +292,38 @@ crlmmWrapper <- function(filenames,
463 292
 	}
464 293
 	if(platform == "illumina"){
465 294
 		if(!file.exists(crlmmFile) | !load.it){		
466
-			crlmmOut <- crlmmIllumina(RG=RG,
467
-						  cdfName=cdfName,
468
-						  sns=sampleNames(RG),
469
-						  returnParams=TRUE,
470
-						  save.it=TRUE,
471
-						  intensityFile=intensityFile)
472
-			if(save.it) save(crlmmOut, file=crlmmFile)
295
+			crlmmResult <- crlmmIllumina(RG=RG,
296
+						     cdfName=cdfName,
297
+						     sns=sampleNames(RG),
298
+						     returnParams=TRUE,
299
+						     save.it=TRUE,
300
+						     intensityFile=intensityFile)
301
+			if(save.it) save(crlmmResult, file=crlmmFile)
473 302
 		} else {
474 303
 			message("Loading ", crlmmFile, "...")
475 304
 			load(crlmmFile)
476 305
 			crlmmResult <- get("crlmmResult")
477
-			if(exists("cnAB")){
478
-				cnrmaResult <- get("cnAB")
479
-			} else cnrmaResult <- NULL
480 306
 		}
481 307
 	}
482 308
 	load(intensityFile)
309
+	if(platform=="illumina"){
310
+		if(exists("cnAB")){
311
+			np.A <- as.integer(cnAB$A)
312
+			np.B <- as.integer(cnAB$B)
313
+			np <- ifelse(np.A > np.B, np.A, np.B)
314
+			np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A))
315
+			rownames(np) <- cnAB$gns
316
+			colnames(np) <- cnAB$sns
317
+			cnAB$NP <- np
318
+			sampleNames(crlmmResult) <- res$sns				
319
+			cnrmaResult <- get("cnAB")
320
+		} else cnrmaResult <- NULL
321
+	}
322
+	if(platform=="affymetrix"){
323
+		if(exists("cnrmaResult")){
324
+			cnrmaResult <- get("cnrmaResult")
325
+		} else cnrmaResult <- NULL
326
+	}
483 327
 	ABset <- combineIntensities(get("res"), cnrmaResult, cdfName)
484 328
 	if(platform=="affymetrix") protocolData(ABset)[["ScanDate"]] <- as.character(celDates(filenames))	
485 329
 	crlmmResult <- harmonizeSnpSet(crlmmResult, ABset, cdfName)
... ...
@@ -496,9 +340,52 @@ crlmmWrapper <- function(filenames,
496 340
 	return()
497 341
 }
498 342
 
343
+validCdfNames <- function(platform){
344
+	if(!missing(platform)){
345
+		if(!platform %in% c("illumina", "affymetrix"))
346
+			stop("only illumina and affymetrix platforms are supported.")
347
+		if(platform=="illumina"){
348
+			chipList = c("human1mv1c",             # 1M
349
+			"human370v1c",            # 370CNV
350
+			"human650v3a",            # 650Y
351
+			"human610quadv1b",        # 610 quad
352
+			"human660quadv1a",        # 660 quad
353
+			"human370quadv3c",        # 370CNV quad
354
+			"human550v3b",            # 550K
355
+			"human1mduov3b")          # 1M Duo
356
+		}
357
+		if(platform=="affymetrix"){
358
+			chipList=c("genomewidesnp6", "genomewidesnp5")
359
+		}
360
+	} else{
361
+		chipList <- list()
362
+		chipList$affymetrix <- c("genomewidesnp6","genomewidesnp5")
363
+		chipList$illumina <- c("human370v1c",
364
+				       "human370quadv3c",
365
+				       "human550v3b",
366
+				       "human650v3a",
367
+				       "human610quadv1b",
368
+				       "human660quadv1a",
369
+				       "human1mduov3b")
370
+	}
371
+	return(chipList)
372
+}
499 373
 
500
-
501
-cnrma <- function(filenames, cdfName="genomewidesnp6", sns, seed=1, verbose=FALSE){
374
+isValidCdfName <- function(cdfName, platform){
375
+	chipList <- validCdfNames(platform)
376
+	if(!(cdfName %in% chipList)){
377
+		warning("cdfName must be one of the following: ",
378
+			chipList)
379
+	}
380
+	result <- cdfName %in% chipList
381
+	return(result)
382
+}
383
+	
384
+	
385
+	
386
+# steps: quantile normalize hapmap: create 1m_reference_cn.rda object
387
+cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE){
388
+	if(missing(cdfName)) stop("must specify cdfName")
502 389
 	pkgname <- getCrlmmAnnotationName(cdfName)
503 390
 	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
504 391
 	if (missing(sns)) sns <- basename(filenames)
... ...
@@ -513,8 +400,14 @@ cnrma <- function(filenames, cdfName="genomewidesnp6", sns, seed=1, verbose=FALS
513 400
 		message("Processing ", length(filenames), " files.")
514 401
 		if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
515 402
 	}
516
-        loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
403
+	if(cdfName=="genomewidesnp6"){
404
+		loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
405
+	}
406
+	if(cdfName=="genomewidesnp5"){
407
+		loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname)
408
+	}
517 409
 	reference <- getVarInEnv("reference")
410
+	##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.")
518 411
 	for(i in seq(along=filenames)){
519 412
 		y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
520 413
 		x <- log2(y[idx2])
... ...
@@ -528,6 +421,7 @@ cnrma <- function(filenames, cdfName="genomewidesnp6", sns, seed=1, verbose=FALS
528 421
 	dimnames(NP) <- list(names(fid), sns)
529 422
 	##dimnames(NP) <- list(map[, "man_fsetid"], sns)
530 423
 	res3 <- list(NP=NP, SKW=SKW)
424
+	cat("\n")
531 425
 	return(res3)
532 426
 }
533 427
 
... ...
@@ -664,11 +558,12 @@ computeCopynumber <- function(object,
664 558
 			      bias.adj=FALSE,
665 559
 			      batch,
666 560
 			      SNRmin=5,
667
-			      cdfName, ...){
561
+			      cdfName,
562
+			      platform=c("affymetrix", "illumina")[1], ...){
668 563
 	if(class(object) != "CrlmmSetList") stop("object must be of class ClrmmSetList")
669 564
 	if(missing(cdfName))
670 565
 		cdfName <- annotation(object)
671
-	if(!isValidCdfName(cdfName, platform="affymetrix")) stop(cdfName, " not supported.")	
566
+	if(!isValidCdfName(cdfName, platform=platform)) stop(cdfName, " not supported.")	
672 567
 	if(ncol(object) < 10)
673 568
 		stop("Must have at least 10 samples in each batch to estimate model parameters....preferably closer to 90 samples per batch")
674 569
 	##require(oligoClasses)
... ...
@@ -691,7 +586,6 @@ computeCopynumber <- function(object,
691 586
 	##previous version of compute copy number
692 587
 	envir <- new.env()
693 588
 	message("Fitting model for copy number estimation...")
694
-
695 589
 	.computeCopynumber(chrom=CHR,
696 590
 			   A=A(ABset),
697 591
 			   B=B(ABset),
... ...
@@ -705,7 +599,6 @@ computeCopynumber <- function(object,
705 599
 			   SNRmin=SNRmin,
706 600
 			   cdfName=cdfName,
707 601
 			   ...)
708
-
709 602
 	if(bias.adj){
710 603
 		message("Running bias adjustment...")
711 604
 		.computeCopynumber(chrom=CHR,
... ...
@@ -991,6 +884,8 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){
991 884
 	return(cnset)
992 885
 }
993 886
 
887
+
888
+
994 889
 thresholdCopyNumberSet <- function(object){
995 890
 	ca <- CA(object)
996 891
 	cb <- CB(object)
... ...
@@ -1795,195 +1690,6 @@ biasAdj <- function(plateIndex, envir, priorProb, PROP=0.75){
1795 1690
 }
1796 1691
 
1797 1692
 
1798
-posteriorNonpolymorphic <- function(plateIndex, envir, priorProb, cnStates=0:6){
1799
-	p <- plateIndex
1800
-	CHR <- envir[["chrom"]]
1801
-	if(missing(priorProb)) priorProb <- rep(1/length(cnStates), length(cnStates)) ##uniform	
1802
-	plate <- envir[["plate"]]
1803
-	uplate <- envir[["plate"]]
1804
-	NP <- envir[["NP"]][, plate==uplate[p]]
1805
-	nuT <- envir[["nuT"]][, p]
1806
-	phiT <- envir[["phiT"]][, p]
1807
-	sig2T <- envir[["sig2T"]][, p]
1808
-	##Assuming background variance for np probes is the same on the log-scale
1809
-	emit <- array(NA, dim=c(nrow(NP), ncol(NP), length(cnStates)))##SNPs x sample x 'truth'
1810
-	lT <- log2(NP)
1811
-	sds <- sqrt(sig2T)
1812
-	counter <- 1##state counter	
1813
-	for(CT in cnStates){
1814
-		cat(".")
1815
-		if(CHR == 23) browser()
1816
-		means <- suppressWarnings(log2(nuT + CT*phiT))
1817
-		emit[, , counter] <- dnorm(lT, mean=means, sd=sds)
1818
-		counter <- counter+1
1819
-	}
1820
-	for(j in seq(along=cnStates)){
1821
-		emit[, , j] <- priorProb[j]*emit[, , j]
1822
-	}
1823
-	homDel <- emit[, , 1]
1824
-	hemDel <- emit[, , 2]
1825
-	norm <- emit[, , 3]
1826
-	amp <- emit[, , 4]
1827
-	amp4 <- emit[, , 5]
1828
-	amp5 <- emit[, , 6]
1829
-	amp6 <- emit[, , 7]
1830
-	total <- homDel+hemDel+norm+amp+amp4+amp5+amp6
1831
-	weights <- array(NA, dim=c(nrow(NP), ncol(NP), length(cnStates)))
1832
-	weights[, , 1] <- homDel/total
1833
-	weights[, , 2] <- hemDel/total
1834
-	weights[, , 3] <- norm/total
1835
-	weights[, , 4] <- amp/total
1836
-	weights[, , 5] <- amp4/total
1837
-	weights[, , 6] <- amp5/total
1838
-	weights[, , 7] <- amp6/total
1839
-	##posterior mode
1840
-	posteriorMode <- apply(weights, c(1, 2), function(x) order(x, decreasing=TRUE)[1])
1841
-	posteriorMode <- posteriorMode-1
1842
-	##sns <- envir[["sns"]]
1843
-	##colnames(posteriorMode) <- sns
1844
-	##envir[["np.posteriorMode"]] <- posteriorMode
1845
-	##envir[["np.weights"]] <- weights
1846
-	posteriorMeans <- 0*homDel/total + 1*hemDel/total + 2*norm/total + 3*amp/total + 4*amp4/total + 5*amp5/total + 6*amp6/total
1847
-	##colnames(posteriorMeans) <- sns
1848
-	##envir[["np.posteriorMeans"]] <- posteriorMeans
1849
-	return(posteriorMode)
1850
-}
1851
-
1852
-posteriorWrapper <- function(envir){
1853
-	snp.PM <- matrix(NA, length(envir[["snps"]]), length(envir[["sns"]]))
1854
-	np.PM <- matrix(NA, length(envir[["cnvs"]]), length(envir[["sns"]]))
1855
-	plate <- envir[["plate"]]
1856
-	uplate <- envir[["uplate"]]
1857
-	for(p in seq(along=uplate)){
1858
-		tmp <- expectedC(plateIndex=p, envir=envir)
1859
-		snp.PM[, plate==uplate[p]] <- tmp
1860
-		##snp.pm <- env[["posteriorMode"]]
1861
-		##trace(posteriorNonpolymorphic, browser)
1862
-		tmp <- posteriorNonpolymorphic(plateIndex=p, envir=envir)
1863
-		np.PM[, plate==uplate[p]] <- tmp##env[["np.posteriorMode"]]
1864
-		##pMode <- rbind(snp.pm, np.pm)
1865
-		##rownames(pMode) <- c(env[["snps"]], env[["cnvs"]])
1866
-		##dn <- dimnames(pMode)
1867
-		##pMode <- matrix(as.integer(pMode), nrow(pMode), ncol(pMode))
1868
-	}
1869
-	PM <- rbind(snp.PM, np.PM)
1870
-	PM <- matrix(as.integer(PM), nrow(PM), ncol(PM))
1871
-	dns <- list(c(envir[["snps"]], envir[["cnvs"]]), envir[["sns"]])
1872
-	dimnames(PM) <- dns
1873
-	return(PM)
1874
-}
1875
-
1876
-
1877
-
1878
-
1879
-
1880
-##for polymorphic probes
1881
-expectedC <- function(plateIndex, envir, priorProb, cnStates=0:6){
1882
-	p <- plateIndex
1883
-	CHR <- envir[["chrom"]]
1884
-	if(missing(priorProb)) priorProb <- rep(1/length(cnStates), length(cnStates)) ##uniform	
1885
-	plate <- envir[["plate"]]
1886
-	uplate <- envir[["uplate"]]
1887
-	A <- envir[["A"]]
1888
-	B <- envir[["B"]]
1889
-	A <- A[, plate==uplate[p]]
1890
-	B <- B[, plate==uplate[p]]
1891
-	calls <- envir[["calls"]]	
1892
-	calls <- calls[, plate==unique(plate)[p]]
1893
-	probA <- sqrt(rowMeans(calls == 1, na.rm=TRUE))
1894
-	probB <- sqrt(rowMeans(calls == 3, na.rm=TRUE))
1895
-	sig2A <- envir[["sig2A"]]
1896
-	sig2B <- envir[["sig2B"]]
1897
-	tau2A <- envir[["tau2A"]]
1898
-	tau2B <- envir[["tau2B"]]
1899
-	corrA.BB <- envir[["corrA.BB"]]
1900
-	corrB.AA <- envir[["corrB.AA"]]
1901
-	corr <- envir[["corr"]]
1902
-	nuA <- envir[["nuA"]]
1903
-	nuB <- envir[["nuB"]]
1904
-	phiA <- envir[["phiA"]]
1905
-	phiB <- envir[["phiB"]]
1906
-	emit <- array(NA, dim=c(nrow(A), ncol(A), 28))##SNPs x sample x 'truth'
1907
-	##AAAA, AAAB, AABB, ABBB, BBBB
1908
-	##AAAAA, AAAAB, AAABB, AABBB, ABBBB, BBBBB
1909
-	##AAAAAA, AAAAAB, AAAABB, AAABBB, AABBBB, ABBBBB, BBBBBB
1910
-	lA <- log2(A)
1911
-	lB <- log2(B)	
1912
-	X <- cbind(lA, lB)	
1913
-	counter <- 1##state counter
1914
-	for(CT in cnStates){
1915
-		cat(".")
1916
-		for(CA in 0:CT){
1917
-			CB <- CT-CA
1918
-			A.scale <- sqrt(tau2A[, p]*(CA==0) + sig2A[, p]*(CA > 0))
1919
-			B.scale <- sqrt(tau2B[, p]*(CB==0) + sig2B[, p]*(CB > 0))
1920
-			scale <- c(A.scale, B.scale)
1921
-			if(CA == 0 & CB == 0) rho <- 0
1922
-			if(CA == 0 & CB > 0) rho <- corrA.BB[, p]
1923
-			if(CA > 0 & CB == 0) rho <- corrB.AA[, p]
1924
-			if(CA > 0 & CB > 0) rho <- corr[, p]
1925
-			if(CHR == 23) browser()
1926
-			means <- cbind(suppressWarnings(log2(nuA[, p]+CA*phiA[, p])), suppressWarnings(log2(nuB[, p]+CB*phiB[, p])))
1927
-			covs <- rho*A.scale*B.scale
1928
-			A.scale2 <- A.scale^2
1929
-			B.scale2 <- B.scale^2			
1930
-			##ensure positive definite			
1931
-			##Sigma <- as.matrix(nearPD(matrix(c(A.scale^2, covs,
1932
-			##covs, B.scale^2), 2, 2))[[1]])
1933
-			m <- 1##snp counter				
1934
-			for(i in 1:nrow(A)){
1935
-				Sigma <- matrix(c(A.scale2[i], covs[i], covs[i], B.scale2[i]), 2,2)
1936
-				xx <- matrix(X[i, ], ncol=2)
1937
-				tmp <- dmvnorm(xx, mean=means[i, ], sigma=Sigma) 				
1938
-				##Using HWE: P(CA=ca, CB=cb|CT=c)				
1939
-				ptmp <- (probA[i]^CA)*(probB[i]^CB)*tmp
1940
-				emit[m, , counter] <- ptmp
1941
-				m <- m+1				
1942
-			}
1943
-			counter <- counter+1			
1944
-		}
1945
-	}
1946
-	##priorProb=P(CT=c)
1947
-	homDel <- priorProb[1]*emit[, , 1]
1948
-	hemDel <- priorProb[2]*emit[, , c(2, 3)] # + priorProb[3]*emit[, c(4, 5, 6)] + priorProb[4]*emit[, c(7:10)]
1949
-	norm <- priorProb[3]*emit[, , 4:6]
1950
-	amp <- priorProb[4]*emit[, , 7:10]
1951
-	amp4 <- priorProb[5]*emit[, , 11:15]
1952
-	amp5 <- priorProb[6]*emit[, , 16:21]
1953
-	amp6 <- priorProb[7]*emit[, , 22:28]	
1954
-	##sum over the different combinations within each copy number state
1955
-	hemDel <- apply(hemDel, c(1,2), sum)
1956
-	norm <- apply(norm, c(1, 2), sum)
1957
-	amp <- apply(amp, c(1,2), sum)
1958
-	amp4 <- apply(amp4, c(1,2), sum)
1959
-	amp5 <- apply(amp5, c(1,2), sum)
1960
-	amp6 <- apply(amp6, c(1,2), sum)
1961
-	total <- homDel+hemDel+norm+amp+amp4+amp5+amp6
1962
-	weights <- array(NA, dim=c(nrow(homDel), ncol(A), 7))
1963
-	weights[, , 1] <- homDel/total
1964
-	weights[, , 2] <- hemDel/total
1965
-	weights[, , 3] <- norm/total
1966
-	weights[, , 4] <- amp/total
1967
-	weights[, , 5] <- amp4/total
1968
-	weights[, , 6] <- amp5/total
1969
-	weights[, , 7] <- amp6/total
1970
-	##posterior mode
1971
-	posteriorMode <- apply(weights, c(1, 2), function(x) order(x, decreasing=TRUE)[1])
1972
-	posteriorMode <- posteriorMode-1
1973
-	##This is for one plate.  Need to instantiate a much bigger
1974
-	##object in the environment
1975
-	
1976
-	##envir[["posteriorMode"]] <- posteriorMode
1977
-	##weights <- list(homDel/total, hemDel/total, norm/total, amp/total, amp4/total, amp5/total, amp6/total)
1978
-	##names(weights) <- c(cnStates)
1979
-	##envir[["weights"]] <- weights
1980
-	posteriorMeans <- 0*homDel/total + 1*hemDel/total + 2*norm/total + 3*amp/total + 4*amp4/total + 5*amp5/total + 6*amp6/total
1981
-	##sns <- envir[["sns"]]
1982
-	##colnames(posteriorMeans) <- sns
1983
-	##envir[["posteriorMeans"]] <- posteriorMeans
1984
-	return(posteriorMode)
1985
-}
1986
-
1987 1693
 biasAdjNP <- function(plateIndex, envir, priorProb){
1988 1694
 	p <- plateIndex
1989 1695
 	normalNP <- envir[["normalNP"]]
... ...
@@ -2079,7 +1785,6 @@ computeEmission <- function(object,
2079 1785
 	##threshold small nu's and phis
2080 1786
 	cnset <- thresholdModelParams(object[[3]], MIN=MIN)
2081 1787
 	index <- order(chromosome(cnset), position(cnset))
2082
-	
2083 1788
 	if(any(diff(index) > 1)) stop("must be ordered by chromosome and physical position")
2084 1789
 	emissionProbs <- array(NA, dim=c(nrow(cnset), ncol(cnset), length(copyNumberStates)))
2085 1790
 	dimnames(emissionProbs) <- list(featureNames(object),
... ...
@@ -2223,3 +1928,15 @@ thresholdModelParams <- function(object, MIN=2^3){
2223 1928
 	}
2224 1929
 	emissionProbs
2225 1930
 }
1931
+
1932
+setMethod("update", "character", function(object, ...){
1933
+	crlmmFile <- object
1934
+	for(i in seq(along=crlmmFile)){
1935
+		cat("Processing ", clrmmFile[i], "...\n")
1936
+		load(crlmmFile[i])
1937
+		crlmmSetList <- get("crlmmSetList")
1938
+		crlmmSetList <- update(crlmmSetList, ...)
1939
+		save(crlmmSetList, file=crlmmFile[i])
1940
+		rm(crlmmSetList); gc();
1941
+	}
1942
+})
... ...
@@ -12,6 +12,14 @@ readIdatFiles <- function(sampleSheet=NULL,
12 12
 			  sep="_",
13 13
 			  fileExt=list(green="Grn.idat", red="Red.idat"),
14 14
 			  saveDate=FALSE) {
15
+       if(is.null(arrayNames)) {
16
+	       arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
17
+	       if(!is.null(sampleSheet)) {
18
+		       sampleSheet=NULL
19
+		       cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
20
+	       }
21
+	       pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
22
+       }	
15 23
        if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
16 24
 	       if(!is.null(arrayNames)){
17 25
 		       ##arrayNames=NULL
... ...
@@ -35,14 +43,6 @@ readIdatFiles <- function(sampleSheet=NULL,
35 43
 	       pd = new("AnnotatedDataFrame", data = sampleSheet)
36 44
 	       sampleNames(pd) <- arrayNames
37 45
        }
38
-       if(is.null(arrayNames)) {
39
-	       arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
40
-	       if(!is.null(sampleSheet)) {
41
-		       sampleSheet=NULL
42
-		       cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
43
-	       }
44
-	       pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
45
-       }
46 46
        narrays = length(arrayNames)
47 47
        grnfiles = paste(arrayNames, fileExt$green, sep=sep)
48 48
        redfiles = paste(arrayNames, fileExt$red, sep=sep)
... ...
@@ -50,9 +50,10 @@ readIdatFiles <- function(sampleSheet=NULL,
50 50
 	       stop("Cannot find .idat files")
51 51
        if(length(grnfiles)!=length(redfiles))
52 52
 	       stop("Cannot find matching .idat files")
53
-       if(!all(c(redfiles,grnfiles) %in% dir(path=path)))
53
+       if(!all(c(redfiles,grnfiles) %in% dir(path=path))){
54 54
 	       stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
55 55
 		    paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
56
+       }
56 57
        grnidats = file.path(path, grnfiles)
57 58
        redidats = file.path(path, redfiles)
58 59
        headerInfo = list(nProbes = rep(NA, narrays),
... ...
@@ -5,7 +5,14 @@ setValidity("ABset", function(object) {
5 5
 })
6 6
 setMethod("A", "ABset", function(object) assayData(object)[["A"]])
7 7
 setMethod("B", "ABset", function(object) assayData(object)[["B"]])
8
-
8
+setReplaceMethod("A", signature(object="ABset", value="matrix"),
9
+		 function(object, value){
10
+			 assayDataElementReplace(object, "A", value)			
11
+		 })
12
+setReplaceMethod("B", signature(object="ABset", value="matrix"),
13
+		 function(object, value){
14
+			 assayDataElementReplace(object, "B", value)			
15
+		 })
9 16
 
10 17
 
11 18
 
... ...
@@ -6,12 +6,14 @@ setValidity("CopyNumberSet", function(object) {
6 6
 ##may want to allow thresholding here (... arg)
7 7
 setMethod("CA", "CopyNumberSet", function(object, ...) assayData(object)[["CA"]]/100)
8 8
 setMethod("CB", "CopyNumberSet", function(object, ...) assayData(object)[["CB"]]/100)
9
-setReplaceMethod("CB", signature(object="CopyNumberSet", value="matrix"),
10
-		 function(object, value) assayDataElementReplace(object, "CB", value))
9
+
11 10
 setReplaceMethod("CA", signature(object="CopyNumberSet", value="matrix"),
12
-		 function(object, value) assayDataElementReplace(object, "CA", value))
11
+		 function(object, value) assayDataElementReplace(object, "CA", value*100))
12
+setReplaceMethod("CB", signature(object="CopyNumberSet", value="matrix"),
13
+		 function(object, value) assayDataElementReplace(object, "CB", value*100))
14
+
15
+
13 16
 
14
-setMethod("chromosome", "CopyNumberSet", function(object) fData(object)$chromosome)
15 17
 
16 18
 setMethod("batch", "CopyNumberSet", function(object){
17 19
 	if("batch" %in% varLabels(object)){
... ...
@@ -23,19 +25,23 @@ setMethod("batch", "CopyNumberSet", function(object){
23 25
 })
24 26
 
25 27
 setMethod("copyNumber", "CopyNumberSet", function(object){
28
+	require(paste(annotation(object), "Crlmm", sep=""), character.only=TRUE) || stop(paste("Annotation package ", annotation(object), "Crlmm not available", sep=""))
26 29
 	##ensure that 2 + NA = 2 by replacing NA's with zero
30
+	##the above results in copy number 0, 1, or 2 depending on the genotype....safer just to drop
27 31
 	CA <- CA(object)
28 32
 	CB <- CB(object)
29
-	nas <- is.na(CA) & is.na(CB)
30
-	CA[is.na(CA)] <- 0
31
-	CB[is.na(CB)] <- 0
33
+	##nas <- is.na(CA) & is.na(CB)
34
+	##CA[is.na(CA)] <- 0
35
+	##CB[is.na(CB)] <- 0
32 36
 	CN <- CA + CB
37
+	##For nonpolymorphic probes, CA is the total copy number
38
+	CN[cnIndex(object, annotation(object)), ] <- CA(object)[cnIndex(object, annotation(object)), ]
33 39
 	##if both CA and CB are NA, report NA
34
-	CN[nas] <- NA
40
+	##CN[nas] <- NA
35 41
 	CN
36 42
 })
37 43
 
38
-setMethod("position", "CopyNumberSet", function(object) fData(object)$position)
44
+
39 45
 
40 46
 ##setMethod("ellipse", "CopyNumberSet", function(x, copynumber, ...){
41 47
 ellipse.CopyNumberSet <- function(x, copynumber, ...){
... ...
@@ -43,7 +49,15 @@ ellipse.CopyNumberSet <- function(x, copynumber, ...){
43 49
 	##fittedOrder <- unique(sapply(basename(celFiles), function(x) strsplit(x, "_")[[1]][2]))
44 50
 	##index <- match(plates, fittedOrder)
45 51
 	if(nrow(x) > 1) stop("only 1 snp at a time")
46
-	batch <- unique(x$batch)
52
+	##batch <- unique(x$batch)
53
+	args <- list(...)
54
+	if(!"batch" %in% names(args)){
55
+		jj <- match("batch", varLabels(x))
56
+		if(length(jj) < 1) stop("batch not in varLabels")
57
+		batch <- unique(pData(x)[, jj])
58
+	} else{
59
+		batch <- unique(args$batch)
60
+	}
47 61
 	if(length(batch) > 1) stop("batch variable not unique")
48 62
 	nuA <- as.numeric(fData(x)[, match(paste("nuA", batch, sep="_"), fvarLabels(x))])
49 63
 	nuB <- as.numeric(fData(x)[, match(paste("nuB", batch, sep="_"), fvarLabels(x))])	
... ...
@@ -65,6 +79,7 @@ ellipse.CopyNumberSet <- function(x, copynumber, ...){
65 79
 			if(CA == 0 & CB > 0) rho <- corrA.BB
66 80
 			if(CA > 0 & CB == 0) rho <- corrB.AA
67 81
 			if(CA > 0 & CB > 0) rho <- corr
82
+			if(CA == 0 & CB == 0) rho <- 0
68 83
 			lines(ellipse(x=rho, centre=c(log2(nuA+CA*phiA),
69 84
 					     log2(nuB+CB*phiB)),
70 85
 				      scale=scale), ...)
... ...
@@ -48,13 +48,78 @@ setMethod(".harmonizeDimnames", "CrlmmSetList", function(object){
48 48
 })
49 49
 
50 50
 setMethod("A", "CrlmmSetList", function(object) A(object[[1]]))
51
+
52
+setMethod("addFeatureAnnotation", "CrlmmSetList", function(object, CHR){
53
+	if(missing(CHR)) stop("Must specificy chromosome")
54
+	cdfName <- annotation(object)
55
+	pkgname <- paste(cdfName, "Crlmm", sep="")	
56
+	path <- system.file("extdata", package=pkgname)
57
+	loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv)
58
+	cnProbes <- get("cnProbes", envir=.crlmmPkgEnv)
59
+	loader("snpProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv)
60
+	snpProbes <- get("snpProbes", envir=.crlmmPkgEnv)	
61
+
62
+	##Feature Data
63
+	snps <- featureNames(object)[snpIndex(object)]
64
+	nps <- featureNames(object)[cnIndex(object)]
65
+	position.snp <- snpProbes[match(snps, rownames(snpProbes)), "position"]
66
+	names(position.snp) <- snps
67
+	position.np <- cnProbes[match(nps, rownames(cnProbes)), "position"]
68
+	names(position.np) <- nps
69
+	
70
+	position <- c(position.snp, position.np)
71
+	position <- position[match(featureNames(object), names(position))]
72
+	stopifnot(identical(names(position), featureNames(object)))
73
+	if(sum(duplicated(names(position))) > 0){
74
+		warning("Removing rows with NA identifiers...")
75
+		##RS: fix this
76
+		I <- which(!is.na(names(position)))
77
+	}  else I <- seq(along=names(position))
78
+	fd <- data.frame(cbind(CHR,
79
+			       position[I]))
80
+	colnames(fd) <- c("chromosome", "position")
81
+	rownames(fd) <- featureNames(object)
82
+	fD <- new("AnnotatedDataFrame",
83
+		  data=fd,
84
+		  varMetadata=data.frame(labelDescription=colnames(fd)))
85
+	return(fD)
86
+})
87
+
51 88
 setMethod("annotation", "CrlmmSetList", function(object) annotation(object[[1]]))
52 89
 setMethod("B", "CrlmmSetList", function(object) B(object[[1]]))
53 90
 setMethod("batch", "CrlmmSetList", function(object) batch(object[[3]]))
54 91
 setMethod("CA", "CrlmmSetList", function(object, ...) CA(object[[3]], ...))
55 92
 setMethod("CB", "CrlmmSetList", function(object, ...) CB(object[[3]], ...))
56 93
 setMethod("calls", "CrlmmSetList", function(object) calls(object[[2]]))
57
-setMethod("chromosome", "CrlmmSetList", function(object) chromosome(object[[3]]))
94
+setMethod("chromosome", "CrlmmSetList", function(object){
95
+	chr <- NULL
96
+	for(i  in 1:length(object)){
97
+		if(length(fvarLabels(object[[i]])) > 0){
98
+			if("chromosome" %in% fvarLabels(object[[i]])){
99
+				chr <- chromosome(object[[i]])
100
+				break()
101
+			}
102
+		}
103
+	}
104
+	if(is.null(chr)) warning("fvarLabel 'chromosome' not in any element of the CrlmmSetList object")	
105
+	return(chr)
106
+	##chromosome(object[[3]])
107
+	})
108
+
109
+setMethod("position", "CrlmmSetList", function(object){
110
+	pos <- NULL
111
+	for(i  in 1:length(object)){
112
+		if(length(fvarLabels(object[[i]])) > 0){
113
+			if("position" %in% fvarLabels(object[[i]])){
114
+				pos <- position(object[[i]])
115
+				break()
116
+			}
117
+		} else next()
118
+	}
119
+	if(is.null(pos)) warning("fvarLabel 'position' not in any element of the CrlmmSetList object")
120
+	return(pos)
121
+	})
122
+
58 123
 setMethod("cnIndex", "CrlmmSetList", function(object, ...) {
59 124
 	match(cnNames(object[[1]], annotation(object)), featureNames(object))
60 125
 })
... ...
@@ -94,7 +159,6 @@ setMethod("points", signature(x="CrlmmSetList"),
94 159
 		  B <- log2(B(x))
95 160
 		  points(A, B, ...)
96 161
 	  })
97
-setMethod("position", "CrlmmSetList", function(object) position(object[[3]]))
98 162
 setMethod("sampleNames", "CrlmmSetList", function(object) sampleNames(object[[1]]))
99 163
 setMethod("show", "CrlmmSetList", function(object){
100 164
 	cat("\n Elements in CrlmmSetList object: \n")
... ...
@@ -130,6 +194,29 @@ setMethod("update", "CrlmmSetList", function(object, ...){
130 194
 	computeCopynumber(object, ...)
131 195
 })
132 196
 
197
+setReplaceMethod("CA", signature(object="CrlmmSetList", value="matrix"),
198
+		 function(object, value){
199
+			 CA(object[[3]]) <- value
200
+			 object
201
+		 })
202
+setReplaceMethod("CB", signature(object="CrlmmSetList", value="matrix"),
203
+		 function(object, value){
204
+			 CB(object[[3]]) <- value
205
+			 object
206
+			 })
207
+
208
+setReplaceMethod("A", signature(object="CrlmmSetList", value="matrix"),
209
+		 function(object, value){
210
+			 A(object[[1]]) <- value
211
+			 object
212
+		 })
213
+setReplaceMethod("B", signature(object="CrlmmSetList", value="matrix"),
214
+		 function(object, value){
215
+			 B(object[[1]]) <- value
216
+			 object
217
+		 })
218
+
219
+
133 220
 
134 221
 setMethod("boxplot", "CrlmmSetList", function(x, ...){
135 222
 ##boxplot.CrlmmSetList <- function(x, ...){
... ...
@@ -138,7 +225,7 @@ setMethod("boxplot", "CrlmmSetList", function(x, ...){
138 225
 	A1 <- A(x)
139 226
 	B1 <- B(x)
140 227
 	Alist <- split(A1, genotypes)
141
-	Alist <- rev(Alist)
228
+	Alist <- as(rev(Alist), "data.frame")
142 229
 	Blist <- split(B1, genotypes)
143 230
 	ylim <- range(unlist(Alist))
144 231
 	boxplot(Alist, xaxt="n", ylab=expression(I[A]), 
... ...
@@ -15,6 +15,8 @@ setMethod("snpNames", "eSet", function(object, cdfName){
15 15
 	snps <- snps[snps %in% featureNames(object)]
16 16
 	featureNames(object)[match(snps, featureNames(object))]
17 17
   })
18
+setMethod("chromosome", "eSet", function(object) fData(object)$chromosome)
19
+setMethod("position", "eSet", function(object) fData(object)$position)
18 20
 ##setMethod("combine", signature=signature(x="eSet", y="eSet"),
19 21
 ##	  function(x, y, ...){
20 22
 ##		  ##Check that both x and y are valid objects
... ...
@@ -1,90 +1,85 @@
1 1
 snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1, verbose=TRUE, seed=1, cdfName, sns){
2
-  if (missing(sns)) sns <- basename(filenames)
3
-  ##ADD CHECK TO SEE IF LOADED
4
-  if (missing(cdfName))
5
-    cdfName <- read.celfile.header(filenames[1])$cdfName
6
-##  stuffDir <- changeToCrlmmAnnotationName(cdfName)
7
-  pkgname <- getCrlmmAnnotationName(cdfName)
8
-  if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
9
-    suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
10
-    msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
11
-    message(strwrap(msg))
12
-    stop("Package ", pkgname, " could not be found.")
13
-    rm(suggCall, msg)
14
-  }
15
-  
16
-  if(verbose) message("Loading annotations and mixture model parameters.")
17
-  loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
18
-  loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
19
-  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
20
-##  data(preprocStuff, genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv)
21
-  autosomeIndex <- getVarInEnv("autosomeIndex")
22
-  pnsa <- getVarInEnv("pnsa")
23
-  pnsb <- getVarInEnv("pnsb")
24
-  fid <- getVarInEnv("fid")
25
-  reference <- getVarInEnv("reference")
26
-  aIndex <- getVarInEnv("aIndex")
27
-  bIndex <- getVarInEnv("bIndex")
28
-  SMEDIAN <- getVarInEnv("SMEDIAN")
29
-  theKnots <- getVarInEnv("theKnots")
30
-  gns <- getVarInEnv("gns")
2
+	if (missing(sns)) sns <- basename(filenames)
3
+	##ADD CHECK TO SEE IF LOADED
4
+	if (missing(cdfName))
5
+		cdfName <- read.celfile.header(filenames[1])$cdfName
6
+	##  stuffDir <- changeToCrlmmAnnotationName(cdfName)
7
+	pkgname <- getCrlmmAnnotationName(cdfName)
8
+	if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
9
+		suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
10
+		msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
11
+		message(strwrap(msg))
12
+		stop("Package ", pkgname, " could not be found.")
13
+		rm(suggCall, msg)
14
+	}
15
+	if(verbose) message("Loading annotations and mixture model parameters.")
16
+	loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
17
+	loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
18
+	loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
19
+	autosomeIndex <- getVarInEnv("autosomeIndex")
20
+	pnsa <- getVarInEnv("pnsa")
21
+	pnsb <- getVarInEnv("pnsb")
22
+	fid <- getVarInEnv("fid")
23
+	reference <- getVarInEnv("reference")
24
+	aIndex <- getVarInEnv("aIndex")
25
+	bIndex <- getVarInEnv("bIndex")
26
+	SMEDIAN <- getVarInEnv("SMEDIAN")
27
+	theKnots <- getVarInEnv("theKnots")
28
+	gns <- getVarInEnv("gns")
31 29
 
32
-  ##We will read each cel file, summarize, and run EM one by one
33
-  ##We will save parameters of EM to use later
34
-  mixtureParams <- matrix(0, 4, length(filenames))
35
-  SNR <- vector("numeric", length(filenames))
36
-  SKW <- vector("numeric", length(filenames))
37
-
38
-  ## This is the sample for the fitting of splines
39
-  ## BC: I like better the idea of the user passing the seed,
40
-  ##     because this might intefere with other analyses
41
-  ##     (like what happened to GCRMA)
42
-  set.seed(seed)
43
-  
44
-  idx <- sort(sample(autosomeIndex, mixtureSampleSize))
30
+	##We will read each cel file, summarize, and run EM one by one
31
+	##We will save parameters of EM to use later
32
+	mixtureParams <- matrix(0, 4, length(filenames))
33
+	SNR <- vector("numeric", length(filenames))
34
+	SKW <- vector("numeric", length(filenames))
45 35
 
46
-  ##S will hold (A+B)/2 and M will hold A-B
47
-  ##NOTE: We actually dont need to save S. Only for pics etc...
48
-  ##f is the correction. we save to avoid recomputing
49
-  A <- matrix(as.integer(0), length(pnsa), length(filenames))
50
-  B <- matrix(as.integer(0), length(pnsb), length(filenames))
36
+	## This is the sample for the fitting of splines
37
+	## BC: I like better the idea of the user passing the seed,
38
+	##     because this might intefere with other analyses
39
+	##     (like what happened to GCRMA)
40
+	set.seed(seed)
41
+	idx <- sort(sample(autosomeIndex, mixtureSampleSize))
42
+	##S will hold (A+B)/2 and M will hold A-B
43
+	##NOTE: We actually dont need to save S. Only for pics etc...
44
+	##f is the correction. we save to avoid recomputing
45
+	A <- matrix(as.integer(0), length(pnsa), length(filenames))
46
+	B <- matrix(as.integer(0), length(pnsb), length(filenames))
51 47
   
52
-  if(verbose){
53
-    message("Processing ", length(filenames), " files.")
54
-    if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
55
-  }
56
-  ##We start looping throug cel files
57
-  idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
58
-  for(i in seq(along=filenames)){
59
-    y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
60
-    x <- log2(y[idx2])
61
-    SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
62
-    rm(x)
63
-    y <- normalize.quantiles.use.target(y, target=reference)
64
-    A[, i] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
65
-    B[, i] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
66
-    
67
-    ##Now to fit the EM
68
-    if(fitMixture){
69
-      S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN
70
-      M <- log2(A[idx, i])-log2(B[idx, i])
48
+	if(verbose){
49
+		message("Processing ", length(filenames), " files.")
50
+		if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
51
+	}
52
+	##We start looping throug cel files
53
+	idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
54
+	for(i in seq(along=filenames)){
55
+		y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
56
+		x <- log2(y[idx2])
57
+		SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
58
+		rm(x)
59
+		y <- normalize.quantiles.use.target(y, target=reference)
60
+		A[, i] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
61
+		B[, i] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
62
+		##Now to fit the EM
63
+		if(fitMixture){
64
+			S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN
65
+			M <- log2(A[idx, i])-log2(B[idx, i])
71 66
       
72
-      ##we need to test the choice of eps.. it is not the max diff between funcs
73
-      tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
74
-      
75
-      mixtureParams[, i] <- tmp[["coef"]]
76
-      SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
77
-    }
78
-    if (verbose)
79
-      if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
80
-      else cat(".")
81
-  }
82
-  if (verbose)
83
-    if (getRversion() > '2.7.0') close(pb)
84
-    else cat("\n")
85
-  if (!fitMixture) SNR <- mixtureParams <- NA
86
-  ## gns comes from preprocStuff.rda
87
-  list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
67
+			##we need to test the choice of eps.. it is not the max diff between funcs
68
+			tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
69
+			
70
+			mixtureParams[, i] <- tmp[["coef"]]
71
+			SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
72
+		}
73
+		if (verbose)
74
+			if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
75
+			else cat(".")
76
+	}
77
+	if (verbose)
78
+		if (getRversion() > '2.7.0') close(pb)
79
+		else cat("\n")
80
+	if (!fitMixture) SNR <- mixtureParams <- NA
81
+	## gns comes from preprocStuff.rda
82
+	list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
88 83
 }
89 84
 
90 85
 fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){
... ...
@@ -51,7 +51,7 @@ B Carvalho
51 51
     copy-neutral ROH:  (1-epsilon)/2 AA,  epsilon AB,  (1-epsilon)/2
52 52
     BB
53 53
 
54
-  o add caConfidence and cbConfidence slots (but what about the correlation)
54
+  o Define a class that contains settings for genotyping/copynumber estimation
55 55
 
56 56
 
57 57
 
... ...
@@ -4,7 +4,7 @@
4 4
 %\VignettePackage{crlmm}
5 5
 \documentclass{article}
6 6
 \usepackage{graphicx}
7
-\usepackage[authoryear,round,numbers]{natbib}
7
+\usepackage{natbib}
8 8
 \newcommand{\Rfunction}[1]{{\texttt{#1}}}
9 9
 \newcommand{\Rmethod}[1]{{\texttt{#1}}}
10 10
 \newcommand{\Rcode}[1]{{\texttt{#1}}}
... ...
@@ -15,7 +15,7 @@
15 15
 \newcommand{\R}{\textsf{R}}
16 16
 
17 17
 \begin{document}
18
-\title{Copy number estimation}
18
+\title{Copy number estimation and genotype calling with \Rpackage{crlmm}}
19 19
 \date{May, 2009}
20 20
 \author{Rob Scharpf}
21 21
 \maketitle
... ...
@@ -40,12 +40,13 @@ options(prompt="R> ")
40 40
 
41 41
 \section{Simple usage}
42 42
 
43
-The following packages are required:
43
+CRLMM supports the following platforms:
44 44
 
45
-<<requiredPackages>>=
45
+<<cdfname>>=
46 46
 library(crlmm)
47
-library(genomewidesnp6Crlmm)
47
+crlmm:::validCdfNames()
48 48
 @ 
49
+
49 50
 \paragraph{Preprocess and genotype.}
50 51
 
51 52
 Specify the coordinates of Affymetrix cel files and where to put
... ...
@@ -55,17 +56,26 @@ number estimation.
55 56
 <<celfiles>>=
56 57
 myPath <- "/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m"
57 58
 celFiles <- list.celfiles(myPath, full.names=TRUE, pattern=".CEL")
58
-outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/affy"
59
+#outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/affy"
60
+outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/tmp"
61
+dir.create(outdir)
59 62
 @ 
60 63
 
61
-\noindent Preprocess and genotype (for more info see the crlmm vignette):
62 64
 
63
-<<preprocessAndGenotype>>=
64
-crlmmWrapper(celFiles[1:15], 
65
+
66
+\noindent The next code chunk quantile normalizes the samples to a
67
+target reference distribution and uses the crlmm algorithm to genotype.
68
+See \cite{Carvalho2007a} for details regarding the crlmm genotyping
69
+algorithm.
70
+
71
+<<preprocessAndGenotype, eval=FALSE>>=
72
+crlmmWrapper(celFiles,##[1:15], 
73
+	     cdfName="genomewidesnp6",
65 74
 	     save.it=TRUE, 
66
-	     load.it=TRUE,
75
+	     load.it=FALSE,
67 76
 	     intensityFile=file.path(outdir, "normalizedIntensities.rda"),
68
-	     crlmmFile=file.path(outdir, "snpsetObject.rda"))
77
+	     crlmmFile=file.path(outdir, "snpsetObject.rda"),
78
+	     platform="affymetrix")
69 79
 @ 
70 80
 
71 81
 As a result of the above wrapper, the following R objects are now created:
... ...
@@ -110,9 +120,10 @@ if(!exists("emission.cn")){
110 120
 }
111 121
 @ 
112 122
 
113
-*Warning: more can be done than currently implemented to protect against
114
-outliers.  In addition, improved estimates of uncertainty for the copy
115
-number prediction regions will also help.*
123
+*Before smoothing the estimates as a function of physical position by
124
+segmentation hidden Markov models, one should consider how to handle
125
+outliers.  In particular, samples with low signal to noise ratios are
126
+likely to have many copy number outliers. See suggested visualizations.
116 127
 
117 128
 Initial state probabilities and transition probabilities for the HMM:
118 129
 
... ...
@@ -265,8 +276,6 @@ as well as better containers for storing these parameters. See
265 276
 <<copynumberParameters>>=
266 277
 fvarLabels(crlmmSetList[[3]])
267 278
 @ 
268
-
269
-
270 279
 \section{Suggested visualizations}
271 280
 
272 281
 A histogram of the signal to noise ratio for the HapMap samples:
... ...
@@ -285,11 +294,11 @@ hist(crlmmSetList[[2]]$SNR, xlab="SNR", main="")
285 294
 SNRmin <- 5
286 295
 @ 
287 296
 
288
-We suggest excluding samples with a signal to noise ratio less than
289
-\Sexpr{SNRmin}.  As batch effects can be very large in the
290
-quantile-normalized intensities, we suggest adjusting for date or
291
-chemistry plate.  Ideally, one would have 70+ files in a given
292
-batch. Here we make a table of date versus ancestry:
297
+For Affymetrix 6.0, we suggest excluding samples with a signal to noise
298
+ratio less than \Sexpr{SNRmin}.  Adjusting by date or chemistry plate
299
+can be helpful for limiting the influence of batch effects.  Ideally,
300
+one would have 70+ files in a given batch. Here we make a table of date
301
+versus ancestry:
293 302
 
294 303
 <<specifyBatch, eval=FALSE, echo=FALSE>>=
295 304
 sns <- sampleNames(crlmmResults)
... ...
@@ -307,7 +316,7 @@ not the case, we illustrate how one may adjust for batch using the
307 316
 chemistry plate as an argument for \Robject{batch} in the
308 317
 \Robject{computeCopynumber} function.
309 318
 
310
-**Note: the number of samples in the \Robject{CrlmmSetList} object after
319
+*Note: the number of samples in the \Robject{CrlmmSetList} object after
311 320
 copy number estimation may be fewer than the number of samples in the
312 321
 \Robject{CrlmmSetList} object after preprocessing/genotyping.  This
313 322
 occurs when 1 or more samples have a signal-to-noise ratio less than
... ...
@@ -1,8 +1,32 @@
1
+%\VignetteIndexEntry{crlmm copy number Vignette for Illumina}
2
+%\VignetteDepends{crlmm}
3
+%\VignetteKeywords{crlmm, illumina}
4
+%\VignettePackage{crlmm}
5
+\documentclass{article}
6
+\usepackage{graphicx}
7
+\usepackage{natbib}
8
+\newcommand{\Rfunction}[1]{{\texttt{#1}}}
9
+\newcommand{\Rmethod}[1]{{\texttt{#1}}}
10
+\newcommand{\Rcode}[1]{{\texttt{#1}}}
11
+\newcommand{\Robject}[1]{{\texttt{#1}}}
12
+\newcommand{\Rpackage}[1]{{\textsf{#1}}}
13
+\newcommand{\Rclass}[1]{{\textit{#1}}}
14
+\newcommand{\oligo}{\Rpackage{oligo }}
15
+\newcommand{\R}{\textsf{R}}
16
+
17
+\begin{document}
18
+\title{Copy number estimation and genotype calling with \Rpackage{crlmm}}
19
+\date{Oct, 2009}
20
+\author{Rob Scharpf}
21
+\maketitle
22
+
23
+<<setup, echo=FALSE, results=hide>>=
24
+options(width=60)
25
+options(continue=" ")
26
+options(prompt="R> ")
27
+@ 
28
+
1 29
 <<>>=
2
-# start R from /home/bst/other/mritchie/R/R-300309/bin/R
3
-# as this has current crlmm and region package installed
4
-# if you want to ue your own R, the current region package is at
5
-# /thumper/ctsa/snpmicroarray/illumina/crlmm/370k/human370v1cCrlmm/
6 30
 library(Biobase)
7 31
 library(crlmm)
8 32
 setwd("/thumper/ctsa/snpmicroarray/illumina/IDATS/370k")
... ...
@@ -11,71 +35,37 @@ setwd("/thumper/ctsa/snpmicroarray/illumina/IDATS/370k")
11 35
 <<readIdat>>=
12 36
 samplesheet5 = read.csv("HumanHap370Duo_Sample_Map.csv", header=TRUE, as.is=TRUE)[-c(28:46,61:75,78:79),]
13 37
 if(!exists("RG")){
14
-	## remove samples which I don't have .idats for
15
-	## subset further to allow for quicker testing
16
-	##samplesheet5 = samplesheet5[1:5,]
17
-	## read in the .idats
18 38
 	RG <- readIdatFiles(samplesheet5, arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), saveDate=TRUE)
19 39
 }
20 40
 @ 
21 41
 
22
-Get dates for each array - scan date is likely to be most meaningful
42
+Alternatively, arguments to the readIdatFiles can be passed through the
43
+{\tt ...} argument of the \R{} function \Robject{crlmmWrapper}.
23 44
 
24
-<<dates>>=
25
-pd <- pData(RG)
26
-scandatetime = strptime(as.character(pd$ScanDate), "%m/%d/%Y %H:%M:%S %p")
27
-decodedatetime = strptime(as.character(pd$DecodeDate), "%m/%d/%Y %H:%M:%S %p")
28
-table(format(scandatetime, "%d %b %Y"))
29
-table(format(scandatetime, "%b"))
45
+<<wrapper>>=
46
+crlmmWrapper(sampleSheet=samplesheet5,
47
+	     arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), 
48
+	     saveDate=TRUE,
49
+	     cdfName=cdfName,
50
+	     load.it=FALSE,
51
+	     save.it=TRUE,
52
+	     intensityFile=file.path(platedir, "normalizedIntensities.rda"),
53
+	     crlmmFile=file.path(platedir, "snpsetObject.rda"),
54
+	     rgFile=file.path(platedir, "rgFile.rda"),
55
+	     platform="illumina")
30 56
 @ 
31 57
 
32 58
 Note: the code below is run for testing purposes only. None of these
33 59
 functions are exported, so would not routinely be run directly by the
34
-user.  A typical analysis would involve runnning readIdatFiles()
35
-followed by crlmmIllumina()
36
-
37
-<<crlmm>>=
38
-if(!exists("res.rda")){
39
-	outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/illumina/HumanCNV370-Duo"	
40
-	if(!file.exists(file.path(outdir, "res.rda"))){
41
-		crlmmOut <- crlmmIllumina(RG=RG, cdfName="human370v1c", sns=pData(RG)$ID, returnParams=TRUE, save.it=TRUE, intensityFile=file.path(outdir, "res.rda"))
42
-		save(crlmmOut, file=file.path(outdir, "crlmmOut.rda"))				
43
-	} else{
44
-		message("Loading...")		
45
-		load(file.path(outdir, "res.rda"))
46
-		load(file.path(outdir, "crlmmOut.rda"))		
47
-	}
48
-}
49
-@
60
+user.  A typical analysis would involve runnning crlmmWrapper() followed
61
+by update() to obtain copy number.
50 62
 
51
-TODO: The code below needs to be udpated for the Illumina platform.
52 63
 
53 64
 <<SNR>>=
54 65
 hist(crlmmOut$SNR) ##approx. 5-fold higher than what we see in Affy!
55 66
 @ 
56 67
 
57
-<<>>=
58
-##cnAB was assigned to the global environment
59
-NP = (cnAB$A+cnAB$B)/2 # average normalized A and B intensities
60
-colnames(NP) <- colnames(crlmm:::calls(crlmmOut))
61
-chr22 <- computeCopynumber(chrom=22, A=res[["A"]], B=res[["B"]], 
62
-			   calls=crlmm:::calls(crlmmOut),
63
-			   conf=confs(crlmmOut), 
64
-			   plate=rep(1, ncol(NP)),
65
-			   NP=NP, 
66
-			   cdfName="human370v1c", 
67
-			   SNR=res[["SNR"]], 
68
-			   envir=cnenv)
69
-@ 
70
-
71
-<<>>=
72
-copyA=cnenv[["CA"]]
73
-copyB=cnenv[["CB"]]
74
-copyT=(copyA + copyB)/100
75
-copyT[copyT > 6] <- 6
76
-copyT[copyT < 0] <- 0
77
-plot(copyT[, 1], pch=".")
78
-@ 
68
+\end{document}
79 69
 
80 70
 
81 71
 
... ...
@@ -4,5 +4,52 @@
4 4
   title = {A multilevel model to address batch effects in copy number estimation
5 5
 	using SNP arrays},
6 6
   month = {May},
7
-  year = {2009}
7
+  year = {2009},
8
+  url={http://www.bepress.com/cgi/viewcontent.cgi?article=1193&context=jhubiostat}
9
+}
10
+
11
+@ARTICLE{Carvalho2007a,
12
+  author = {Benilton Carvalho and Henrik Bengtsson and Terence P Speed and Rafael
13
+	A Irizarry},
14
+  title = {Exploration, normalization, and genotype calls of high-density oligonucleotide
15
+	SNP array data.},
16
+  journal = {Biostatistics},
17
+  year = {2007},
18
+  volume = {8},
19
+  pages = {485--499},
20
+  number = {2},
21
+  month = {Apr},
22
+  abstract = {In most microarray technologies, a number of critical steps are required
23
+	to convert raw intensity measurements into the data relied upon by
24
+	data analysts, biologists, and clinicians. These data manipulations,
25
+	referred to as preprocessing, can influence the quality of the ultimate
26
+	measurements. In the last few years, the high-throughput measurement
27
+	of gene expression is the most popular application of microarray
28
+	technology. For this application, various groups have demonstrated
29
+	that the use of modern statistical methodology can substantially
30
+	improve accuracy and precision of the gene expression measurements,
31
+	relative to ad hoc procedures introduced by designers and manufacturers
32
+	of the technology. Currently, other applications of microarrays are
33
+	becoming more and more popular. In this paper, we describe a preprocessing
34
+	methodology for a technology designed for the identification of DNA
35
+	sequence variants in specific genes or regions of the human genome
36
+	that are associated with phenotypes of interest such as disease.
37
+	In particular, we describe a methodology useful for preprocessing
38
+	Affymetrix single-nucleotide polymorphism chips and obtaining genotype
39
+	calls with the preprocessed data. We demonstrate how our procedure
40
+	improves existing approaches using data from 3 relatively large studies
41
+	including the one in which large numbers of independent calls are
42
+	available. The proposed methods are implemented in the package oligo
43
+	available from Bioconductor.},
44
+  doi = {10.1093/biostatistics/kxl042},
45
+  institution = {Department of Biostatistics, Johns Hopkins University, Baltimore,
46
+	MD 21205, USA.},
47
+  keywords = {Algorithms; Alleles; Data Interpretation, Statistical; Genotype; Humans;
48
+	Oligonucleotide Array Sequence Analysis; Oligonucleotides; Polymorphism,
49
+	Single Nucleotide},
50
+  owner = {rscharpf},
51
+  pii = {kxl042},
52
+  pmid = {17189563},
53
+  timestamp = {2008.08.07},
54
+  url = {http://dx.doi.org/10.1093/biostatistics/kxl042}
8 55
 }
9 56
\ No newline at end of file
... ...
@@ -3,9 +3,11 @@
3 3
 \docType{class}
4 4
 \alias{ABset-class}
5 5
 \alias{A}
6
-\alias{A,ABset-method}
7 6
 \alias{B}
7
+\alias{A,ABset-method}
8 8
 \alias{B,ABset-method}
9
+\alias{A<-,ABset,matrix-method}
10
+\alias{B<-,ABset,matrix-method}
9 11
 \title{Class "ABset"}
10 12
 \description{Containter for quantile-normalized intensities}
11 13
 \section{Objects from the Class}{
... ...
@@ -31,7 +33,15 @@ Class \code{\link[Biobase:class.Versioned]{Versioned}}, by class "eSet", distanc
31 33
   \describe{
32 34
     \item{A}{\code{signature(object="ABset")}: accessor for the
33 35
       quantile-normalized intensities of allele A for polymorphic probes
34
-      and the quantile normalized intensities for the copy number probes.}
36
+      and the quantile normalized intensities for the copy number
37
+      probes.}
38
+
39
+    \item{"A<-"}{\code{signature(object="ABset", value="matrix")}:
40
+      replacement method for the A allele intensities.}
41
+
42
+    \item{"B<-"}{\code{signature(object="ABset", value="matrix")}:
43
+    replacement method for the B allele intensities.}    
44
+    
35 45
     \item{B}{\code{signature(object="ABset")}: accessor for the
36 46
       quantile-normalized intensities of allele B for polymorphic probes.}    
37 47
     \item{plot}{\code{signature(x = "ABset", y = "CopyNumberSet")}:
... ...
@@ -4,11 +4,19 @@
4 4
 \alias{CrlmmSetList-class}
5 5
 \alias{[,CrlmmSetList-method}
6 6
 \alias{$,CrlmmSetList-method}
7
+\alias{addFeatureAnnotation}
8
+\alias{addFeatureAnnotation,CrlmmSetList-method}
9
+\alias{A<-}
10
+\alias{B<-}
7 11
 \alias{A,CrlmmSetList-method}
12
+\alias{A<-,CrlmmSetList,matrix-method}
8 13
 \alias{B,CrlmmSetList-method}
14
+\alias{B<-,CrlmmSetList,matrix-method}
9 15
 \alias{batch,CrlmmSetList-method}
10 16
 \alias{CA,CrlmmSetList-method}
11 17
 \alias{CB,CrlmmSetList-method}
18
+\alias{CA<-,CrlmmSetList,matrix-method}
19
+\alias{CB<-,CrlmmSetList,matrix-method}
12 20
 \alias{calls,CrlmmSetList-method}
13 21
 \alias{chromosome,CrlmmSetList-method}
14 22
 \alias{cnIndex,CrlmmSetList-method}
... ...
@@ -24,6 +32,7 @@
24 32
 \alias{show,CrlmmSetList-method}
25 33
 \alias{snpIndex,CrlmmSetList-method}
26 34
 \alias{update,CrlmmSetList-method}
35
+\alias{update,character-method}
27 36
 
28 37
 
29 38
 \title{Class "CrlmmSetList"}
... ...
@@ -81,14 +90,26 @@
81 90
     currently stored in the \code{featureData} of \code{CopyNumberSet}
82 91
     objects. }
83 92
 
93
+  \item{"addFeatureAnnotation"}{\code{signature(object =
94
+  "CrlmmSetList")}: Creates an object of class AnnotatedDataFrame from
95
+  the CrlmmSetList object.  The new annotation object contains
96
+  information on chromosome and physical position and has the same
97
+  ordering of rows as the CrlmmSetList object. }
98
+
84 99
     \item{"A"}{\code{signature(object = "CrlmmSetList")}: extracts the
85 100
       quantile normalized intensities for the A allele in the
86 101
       \code{ABset} element.}
102
+
103
+    \item{"A<-"}{\code{signature(object = "CrlmmSetList", value =
104
+    "matrix")}: Replacement method for the A intensities.}    
87 105
   
88 106
     \item{"B"}{\code{signature(object = "CrlmmSetList")}: extracts the
89 107
       quantile normalized intensities for the B allele in the
90 108
       \code{ABset} element.}
91 109
     
110
+    \item{"B<-"}{\code{signature(object = "CrlmmSetList", value =
111
+    "matrix")}: Replacement method for the B intensities.}        
112
+    
92 113
     \item{"batch"}{\code{signature(object = "CrlmmSetList")}: extracts the
93 114
       batch information used to estimate copy number.}    
94 115
 
... ...
@@ -98,7 +119,13 @@
98 119
   
99 120
     \item{"CB"}{\code{signature(object = "CrlmmSetList")}: extracts the
100 121
       copy number for allele B at polymorphic loci. For nonpolymorphic
101
-      probes, CB returns 'NA'.}    
122
+      probes, CB returns 'NA'.}
123
+
124
+    \item{"CA<-"}{\code{signature(object = "CrlmmSetList", value =
125
+	"matrix")}: Replacement method for the CA estimates.}
126
+
127
+    \item{"CB<-"}{\code{signature(object = "CrlmmSetList", value =
128
+    "matrix")}: Replacement method for the CB estimates.}                
102 129
 
103 130
     \item{"calls"}{\code{signature(object = "CrlmmSetList")}: extracts
104 131
       the genotype calls from the \code{SnpSet} element.}
... ...
@@ -139,6 +166,15 @@
139 166
     itself an instance of class \code{CrlmmSetList}.  The third element
140 167
     of the list is an instance of \code{CopyNumberSet} containing the
141 168
     locus-level, allele-specific estimates of copynumber.  }
169
+
170
+  \item{"update"}{\code{signature(object = "character")}: This method
171
+    loads the CrlmmSetList file given as the argument to object --
172
+    the object is a character string or a vector of character strings
173
+    that provides the complete path to the CrlmmSetList file.
174
+    The CrlmmSetList object is loaded and updated with copy number
175
+    estimates -- an object of class CopyNumberSet becomes the third
176
+    element in the CrlmmSetList object.  After updating the CrlmmSetList
177
+    object, the object is saved to the same path.}
142 178
 }
143 179
 }
144 180
 
... ...
@@ -6,7 +6,7 @@
6 6
   to a HapMap reference distribution.
7 7
 }
8 8
 \usage{
9
-cnrma(filenames, cdfName="genomewidesnp6", sns, seed = 1, verbose=FALSE)
9
+cnrma(filenames, cdfName, sns, seed = 1, verbose=FALSE)
10 10
 }
11 11
 \arguments{
12 12
   \item{filenames}{filenames with complete path}
... ...
@@ -8,7 +8,8 @@
8 8
   
9 9
 }
10 10
 \usage{
11
-computeCopynumber(object, CHR, bias.adj=FALSE, batch, SNRmin=5, cdfName, ...)
11
+computeCopynumber(object, CHR, bias.adj=FALSE, batch, SNRmin=5, cdfName,
12
+platform=c("affymetrix", "illumina")[1], ...)
12 13
 }
13 14
 \arguments{
14 15
   \item{object}{object of class \code{CrlmmSetList}.}
... ...
@@ -20,6 +21,7 @@ computeCopynumber(object, CHR, bias.adj=FALSE, batch, SNRmin=5, cdfName, ...)
20 21
   \item{SNRmin}{The minimum value for the SNR -- we suggest 5. Samples
21 22
     with SNR below SNRmin are dropped.}
22 23
   \item{cdfName}{Annotation package }
24
+  \item{platform}{Character string--must be eitheraffymetrix or illumina.}  
23 25
   \item{\dots}{arguments to \code{.computeCopynumber}.}
24 26
 }
25 27
 
... ...
@@ -33,7 +35,7 @@ computeCopynumber(object, CHR, bias.adj=FALSE, batch, SNRmin=5, cdfName, ...)
33 35
   parameters.  Preferably, 70+ samples would be processed together in a
34 36
   batch.
35 37
   
36
-  This function transforms the quantile-normalized fluorescence
38
+  This function translates the quantile-normalized fluorescence
37 39
   intensities to the scale of copy number.  We assume that for any given
38 40
   locus the median copy number is two for each batch. When
39 41
   \code{bias.adj=TRUE}, an extra iteration is performed whereby samples
... ...
@@ -42,6 +44,14 @@ computeCopynumber(object, CHR, bias.adj=FALSE, batch, SNRmin=5, cdfName, ...)
42 44
   and scale are more robust to a large number of samples having a copy
43 45
   number variant.
44 46
 
47
+  For details, see the technical report:
48
+
49
+Scharpf RB, Ruczinski I, Carvalho B, Doan B, Chakravarti A, and Irizarry
50
+  R. A multilevel model to address batch effects in copy number
51
+  estimation using SNP arrays
52
+  (http://www.bepress.com/cgi/viewcontent.cgi?article=1193&context=jhubiostat).
53
+
54
+
45 55
 }
46 56
 
47 57
 \value{
... ...
@@ -1,24 +1,32 @@
1 1
 \name{methods-eSet}
2
+\alias{chromosome}
3
+\alias{chromosome,eSet-method}
2 4
 \alias{cnIndex}
3 5
 \alias{cnIndex,eSet-method}
6
+\alias{position}
7
+\alias{position,eSet-method}
4 8
 \alias{snpIndex}
5 9
 \alias{snpIndex,eSet-method}
6 10
 
7
-
8 11
 \title{Methods for eSet derivatives}
9 12
 
10 13
 \description{
14
+  \code{chromosome} return the chromosome number for SNP and NP probes.
15
+    
16
+  \code{cnIndex, ...} return the row indices of copy number probes.
11 17
 
18
+  \code{position} return the physical position of the SNP or the
19
+  physical position for the first index of the NP probe.
20
+  
12 21
   \code{snpIndex, ...} return the row indices of polymorphic loci.
13 22
 
14
-  \code{cnIndex, ...} return the row indices of copy number probes.
15
-
16 23
 }
17 24
 
18 25
 \usage{
19 26
 
27
+  chromosome(object)
20 28
   cnIndex(object, ...)
21
-
29
+  position(object)
22 30
   snpIndex(object, ...)
23 31
 
24 32
 }