Browse code

beginning of copy number methods for illumina. some work on copy number estimation for chromosome X

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@40527 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 06/07/2009 19:24:27
Showing17 changed files

... ...
@@ -186,3 +186,12 @@ is decoded and scanned
186 186
 * added vignette for genoytping illumina data (crlmmIllumina.pdf in inst/doc and crlmmIllumina.Rnw in inst/scripts)
187 187
 * Changed stop() to warning() when idats are of different type in readIdatFiles()
188 188
 
189
+
190
+2009-07-06 R Scharpf - committed version 1.3.8
191
+
192
+* initial development of copy number methods for illumina platform
193
+
194
+* added Rd files for previously defined classes/methods
195
+
196
+* some improvements to chromosome X copy number estimation
197
+
... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.3.7
4
+Version: 1.3.8
5 5
 Date: 2009-06-17
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -45,7 +45,7 @@ importFrom(mvtnorm, dmvnorm)
45 45
 
46 46
 importFrom(ellipse, ellipse)
47 47
 
48
-S3method(ellipse,CopyNumberSet)
48
+##S3method(ellipse,CopyNumberSet)
49 49
 
50 50
 exportClasses(ABset, CopyNumberSet, CrlmmSetList)
51 51
 ##S3method(ellipse, CopyNumberSet)
... ...
@@ -73,7 +73,7 @@ export(celDates,
73 73
        crlmmIllumina,
74 74
        crlmmWrapper,
75 75
        ellipse,
76
-       computeEmission,
76
+##       computeEmission,
77 77
        list.celfiles,
78 78
        position,
79 79
        readIdatFiles,
80 80
new file mode 100644
... ...
@@ -0,0 +1,51 @@
1
+        **************************************************
2
+        *                                                *
3
+        *              1.3 SERIES NEWS                   *
4
+        *                                                *
5
+        **************************************************
6
+
7
+USER VISIBLE CHANGES
8
+
9
+     o 3 new classes created:
10
+
11
+        i.  'ABset': container for quantile-normalized A and B
12
+       	    intensities for both SNP and copy number probes.  Required
13
+       	    assay data elements are 'A' and 'B'.  Extends eSet
14
+       	    directly.
15
+
16
+                - For nonpolymorphic probes, the quantile normalized
17
+                  intensity is stored in the 'A' assay data element.
18
+                  The corresponding row in the 'B' assay data element
19
+                  is NA.  This is a bit inefficient, but greatly
20
+                  simplifies downstream analyses.  In particular, '['
21
+                  works.
22
+
23
+        ii.  'CrlmmSetList': container for results from preprocessing
24
+	     and genotyping.  This object is a list.  The first
25
+	     element of the list is an ABset.  The second element is a
26
+	     SnpSet containing genotype calls.  The two elements are
27
+	     required to have identical featureNames and sampleNames.
28
+
29
+       		- added several methods for subsetting and accessing
30
+ 		  elements of this object, including featureNames,
31
+ 		  sampleNames, and "[".
32
+
33
+         iii. 'CopyNumberSet': contains locus-level estimates of copy
34
+  	      number for SNPs and polymorphic probes.
35
+
36
+       	        - Required assay data elements are 'CA' and 'CB',
37
+                  corresponding to the absolute copy number for allele
38
+                  A and B, respectively.
39
+
40
+	        - For nonpolymorphic probes, the total copy number is
41
+                  stored in the 'CA' slot and a NA is recorded for the
42
+                  corresponding row in the CB matrix. 
43
+
44
+		- Useful methods: 'copyNumber', 'ellipse', 'points'
45
+
46
+     o 'crlmmWrapper' function does preprocessing
47
+       (quantile-normalization) and genotyping, saving an object of
48
+       class CrlmmSetList for each chromosome
49
+
50
+     o 'computeCopynumber' now requires an object of class
51
+        'CrlmmSetList' and returns an object of class 'CopyNumberSet'.
... ...
@@ -1,7 +1,6 @@
1 1
 setGeneric("A", function(object) standardGeneric("A"))
2 2
 setGeneric("B", function(object) standardGeneric("B"))
3 3
 ##setGeneric("calls", function(x) standardGeneric("calls"))
4
-
5 4
 setGeneric("confs", function(object) standardGeneric("confs"))
6 5
 setGeneric("CA", function(object) standardGeneric("CA"))
7 6
 setGeneric("CB", function(object) standardGeneric("CB"))
... ...
@@ -154,11 +154,16 @@ predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){
154 154
 combineIntensities <- function(res, cnrmaResult, cdfName){
155 155
 	rownames(res$B) <- rownames(res$A) <- res$gns
156 156
 	colnames(res$B) <- colnames(res$A) <- res$sns
157
-	NP <- cnrmaResult$NP
158
-	blank <- matrix(NA, nrow(NP), ncol(NP))
159
-	dimnames(blank) <- dimnames(NP)
160
-	A <- rbind(res$A, NP)
161
-	B <- rbind(res$B, blank)
157
+	if(!is.null(cnrmaResult)){
158
+		NP <- cnrmaResult$NP
159
+		blank <- matrix(NA, nrow(NP), ncol(NP))
160
+		dimnames(blank) <- dimnames(NP)
161
+		A <- rbind(res$A, NP)
162
+		B <- rbind(res$B, blank)
163
+	} else {
164
+		A <- res$A
165
+		B <- res$B
166
+	}
162 167
 	aD <- assayDataNew("lockedEnvironment",
163 168
 			   A=A,
164 169
 			   B=B)
... ...
@@ -166,7 +171,7 @@ combineIntensities <- function(res, cnrmaResult, cdfName){
166 171
 		     assayData=aD,
167 172
 		     featureData=annotatedDataFrameFrom(A, byrow=TRUE),
168 173
 		     phenoData=annotatedDataFrameFrom(A, byrow=FALSE),
169
-		     annotation="genomewidesnp6")
174
+		     annotation=cdfName)
170 175
 	ABset$SNR <- res$SNR
171 176
 	ABset$gender <- predictGender(res=res, cdfName=cdfName)
172 177
 	return(ABset)
... ...
@@ -207,8 +212,48 @@ harmonizeDimnamesTo <- function(object1, object2){
207 212
 	stopifnot(all.equal(sampleNames(object1), sampleNames(object2)))
208 213
 	return(object1)
209 214
 }
215
+
216
+crlmmIlluminaWrapper <- function(sampleSheet, outdir="./", cdfName,
217
+				 save.intermediate=FALSE,
218
+				 splitByChr=TRUE,...){
219
+	if(file.exists(file.path(outdir, "RG.rda"))) load(file.path(outdir, "RG.rda"))
220
+	else {
221
+		RG <- readIdatFiles(sampleSheet=samplesheet5, arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), saveDate=TRUE,
222
+				    path=path)
223
+		J <- match(c("1_A", "3_A", "5_A", "7_A"), sampleNames(RG))
224
+		RG <- RG[, -J]
225
+		if(save.intermediate) save(RG, file=file.path(outdir, "RG.rda"))  ##935M for 91 samples...better not to save this
226
+	}	
227
+	if(!file.exists(file.path(outdir, "res.rda"))){
228
+		crlmmOut <- crlmmIllumina(RG=RG, cdfName=cdfName, sns=pData(RG)$ID, returnParams=TRUE, save.it=TRUE, intensityFile=file.path(outdir, "res.rda"))
229
+		if(save.intermediate) save(crlmmOut, file=file.path(outdir, "crlmmOut.rda"))				
230
+	} else{
231
+		message("Loading...")		
232
+		load(file.path(outdir, "res.rda"))
233
+		load(file.path(outdir, "crlmmOut.rda"))		
234
+	}
235
+	ABset <- combineIntensities(res, NULL, cdfName=cdfName)
236
+	scanDates(ABset) <- as.character(pData(RG)$ScanDate)
237
+	crlmmResult <- harmonizeSnpSet(crlmmOut, ABset)
238
+	stopifnot(all.equal(dimnames(crlmmOut), dimnames(ABset)))
239
+	crlmmList <- list(ABset,
240
+			  crlmmResult)
241
+	crlmmList <- as(crlmmList, "CrlmmSetList")
242
+	if(splitByChr){
243
+		message("Saving by chromosome")
244
+		splitByChromosome(crlmmList, cdfName=cdfName, outdir=outdir)
245
+	} else{
246
+		message("Saving crlmmList object to ", outdir)
247
+		save(crlmmList, file=file.path(outdir, "crlmmList.rda"))
248
+	}
249
+	message("CrlmmSetList objects saved to ", outdir)
250
+}
251
+	
210 252
 	
211
-crlmmWrapper <- function(filenames, outdir="./", cdfName="genomewidesnp6", save.it, splitByChr=TRUE, ...){
253
+	
254
+crlmmWrapper <- function(filenames, outdir="./", cdfName="genomewidesnp6", save.it,
255
+			 splitByChr=TRUE, ...){
256
+	##no visible binding for res
212 257
 	if(!file.exists(file.path(outdir, "crlmmResult.rda"))){
213 258
 		crlmmResult <- crlmm(filenames=filenames, cdfName=cdfName, save.it=TRUE, ...)
214 259
 		if(save.it) save(crlmmResult, file=file.path(outdir, "crlmmResult.rda"))
... ...
@@ -248,6 +293,8 @@ crlmmWrapper <- function(filenames, outdir="./", cdfName="genomewidesnp6", save.
248 293
 	return()
249 294
 }
250 295
 
296
+
297
+
251 298
 cnrma <- function(filenames, cdfName="genomewidesnp6", sns, seed=1, verbose=FALSE){
252 299
 	pkgname <- getCrlmmAnnotationName(cdfName)
253 300
 	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
... ...
@@ -307,29 +354,20 @@ goodSnps <- function(phi.thr, envir, fewAA=20, fewBB=20){
307 354
 	return(flags)
308 355
 }
309 356
 
357
+##Needs to allow for NULL NP
310 358
 instantiateObjects <- function(calls, conf, NP, plate, envir,
311 359
 			       chrom,
312 360
 			       A, B,
313 361
 			       gender, SNRmin=5, SNR,
314 362
                                pkgname,
315 363
 			       locusSet){
316
-##	pkgname <- paste(pkgname, "Crlmm", sep="")
317 364
 	envir[["chrom"]] <- chrom
318
-##	CHR_INDEX <- paste(chrom, "index", sep="")
319
-##        fname <- paste(CHR_INDEX, ".rda", sep="")
320
-##        loader(fname, .crlmmPkgEnv, pkgname)
321
-##        index <- get("index", envir=.crlmmPkgEnv)
322
-####	data(list=CHR_INDEX, package="genomewidesnp6Crlmm")
323
-##	A <- A[index[[1]], SNR > SNRmin]
324
-##	B <- B[index[[1]], SNR > SNRmin]
325
-##	calls <- calls[index[[1]], SNR > SNRmin]
326
-##	conf <- conf[index[[1]], SNR > SNRmin]
327
-##	NP <- NP[index[[2]], SNR > SNRmin]
328 365
 	A <- A[, SNR > SNRmin]
329 366
 	B <- B[, SNR > SNRmin]
330 367
 	calls <- calls[, SNR > SNRmin]
331 368
 	conf <- conf[, SNR > SNRmin]
332
-	NP <- NP[, SNR > SNRmin]
369
+	if(!is.null(NP))
370
+		NP <- NP[, SNR > SNRmin]
333 371
 	plate <- plate[SNR > SNRmin]
334 372
 	uplate <- unique(plate)
335 373
 	SNR <- SNR[SNR > SNRmin]
... ...
@@ -342,9 +380,12 @@ instantiateObjects <- function(calls, conf, NP, plate, envir,
342 380
 	envir[["calls"]] <- calls
343 381
 	envir[["conf"]] <- conf
344 382
 	snps <- rownames(calls)
345
-	cnvs <- rownames(NP)
383
+	if(!is.null(NP)){	
384
+		cnvs <- rownames(NP)
385
+	} else cnvs <- NULL
346 386
 	sns <- basename(colnames(calls))
347
-	stopifnot(identical(colnames(calls), colnames(NP)))
387
+	if(!is.null(NP))
388
+		stopifnot(identical(colnames(calls), colnames(NP)))
348 389
 	envir[["sns"]] <- sns
349 390
 	envir[["snps"]] <- snps
350 391
 	envir[["cnvs"]] <- cnvs
... ...
@@ -354,7 +395,6 @@ instantiateObjects <- function(calls, conf, NP, plate, envir,
354 395
 			message("Estimating gender")
355 396
 			XMedian <- apply(log2(A[, , drop=FALSE]) + log2(B[, , drop=FALSE]), 2, median)/2
356 397
 			gender <- kmeans(XMedian, c(min(XMedian[SNR>SNRmin]), max(XMedian[SNR>SNRmin])))[["cluster"]]			
357
-			##gender <- getGender(res)
358 398
 			gender[gender==2] <- "female"
359 399
 			gender[gender=="1"] <- "male"
360 400
 			envir[["gender"]] <- gender
... ...
@@ -373,41 +413,34 @@ instantiateObjects <- function(calls, conf, NP, plate, envir,
373 413
 	envir[["Ns"]] <- envir[["muB"]] <- envir[["muA"]] <- Ns
374 414
 	envir[["vB"]] <- envir[["vA"]] <- Ns
375 415
 
376
-	CT.sds <- CT <- matrix(NA, nrow(NP), length(sns))
377
-	##NP.CT <- matrix(NA, nrow(NP), ncol(NP))
378
-	##NP.sds <- matrix(NA, nrow(NP), ncol(NP))
416
+	if(!is.null(NP)){
417
+		CT.sds <- CT <- matrix(NA, nrow(NP), length(sns))
418
+		nuT <- matrix(NA, nrow(NP), length(uplate))
419
+		phiT <- nuT
420
+		normalNP <- matrix(TRUE, nrow(NP), ncol(NP))
421
+		sig2T <- matrix(NA, nrow(NP), length(uplate))		
422
+	} else{
423
+		sig2T <- normalNP <- nuT <- phiT <- CT.sds <- CT <- NULL
424
+	}
379 425
 	envir[["CT"]] <- CT
380 426
 	envir[["CT.sds"]] <- CT.sds
381
-
382
-	##assign("NP.CT", NP.CT, envir)
383
-	##assign("NP.sds", NP.sds, envir)
384
-	nuT <- matrix(NA, nrow(NP), length(uplate))
385
-	phiT <- nuT
386 427
 	envir[["nuT"]] <- nuT
387 428
 	envir[["phiT"]] <- phiT
388
-	##assign("nus", nus, envir=envir)  
389
-	##assign("phis", nus, envir=envir)
390
-
391 429
 	plates.completed <- rep(FALSE, length(uplate))
392 430
 	envir[["plates.completed"]] <- plates.completed
393
-
394 431
 	steps <- rep(FALSE, 4)
395 432
 	names(steps) <- c("suffStats", "coef", "snp-cn", "np-cn")
396 433
 	envir[["steps"]] <- steps
397
-
398 434
 	snpflags <- matrix(FALSE, length(snps), length(uplate))
399 435
 	npflags <- matrix(FALSE, length(cnvs), length(uplate))
400
-	##assign("snpflags", snpflags, envir=envir)
401
-	##assign("npflags", npflags, envir=envir)
402 436
 	envir[["snpflags"]] <- snpflags
403 437
 	envir[["npflags"]] <- npflags
404
-
405 438
 	tau2A <- matrix(NA, nrow(calls), length(uplate))
406 439
 	envir[["tau2A"]] <- tau2A
407 440
 	envir[["tau2B"]] <- tau2A
408 441
 	envir[["sig2A"]] <- tau2A
409 442
 	envir[["sig2B"]] <- tau2A
410
-	sig2T <- matrix(NA, nrow(NP), length(uplate))
443
+
411 444
 	envir[["sig2T"]] <- sig2T
412 445
 	envir[["corr"]] <- tau2A
413 446
 	envir[["corrA.BB"]] <- tau2A
... ...
@@ -417,7 +450,6 @@ instantiateObjects <- function(calls, conf, NP, plate, envir,
417 450
 	envir[["nuB.se"]] <- envir[["nuA.se"]] <- tau2A
418 451
 	envir[["phiB.se"]] <- envir[["phiA.se"]] <- tau2A
419 452
 	normal <- matrix(TRUE, nrow(A), ncol(A))
420
-	normalNP <- matrix(TRUE, nrow(NP), ncol(NP))	
421 453
 	envir[["normal"]] <- normal
422 454
 	envir[["normalNP"]] <- normalNP
423 455
 }
... ...
@@ -430,6 +462,7 @@ computeCopynumber <- function(object,
430 462
 			      cdfName="genomewidesnp6", ...){
431 463
 	##require(oligoClasses)
432 464
 	if(missing(CHR)) stop("Must specify CHR")
465
+	if(CHR == 24) stop("Nothing available yet for chromosome Y")
433 466
 	if(missing(batch)) stop("Must specify batch")
434 467
 	if(length(batch) != ncol(object[[1]])) stop("Batch must be the same length as the number of samples")
435 468
 	##the AB intensities
... ...
@@ -455,6 +488,7 @@ computeCopynumber <- function(object,
455 488
 			   SNR=ABset$SNR,
456 489
 			   bias.adj=FALSE,
457 490
 			   SNRmin=SNRmin,
491
+			   cdfName=cdfName,
458 492
 			   ...)
459 493
 
460 494
 	if(bias.adj){
... ...
@@ -470,6 +504,7 @@ computeCopynumber <- function(object,
470 504
 				   SNR=ABset$SNR,
471 505
 				   bias.adj=TRUE,
472 506
 				   SNRmin=SNRmin,
507
+				   cdfName=cdfName,
473 508
 				   ...)
474 509
 	}
475 510
 	message("Organizing results...")			
... ...
@@ -477,6 +512,55 @@ computeCopynumber <- function(object,
477 512
 	return(locusSet)
478 513
 }
479 514
 
515
+cnIllumina <- function(object,
516
+		       CHR,
517
+		       bias.adj=FALSE,
518
+		       batch,
519
+		       SNRmin=5,
520
+		       cdfName="genomewidesnp6", ...){
521
+	if(missing(CHR)) stop("Must specify CHR")
522
+	if(missing(batch)) stop("Must specify batch")
523
+	if(length(batch) != ncol(object[[1]])) stop("Batch must be the same length as the number of samples")
524
+	ABset <- object[[1]]
525
+	snpset <- object[[2]]
526
+	envir <- new.env()
527
+	NP <- NULL
528
+	message("Fitting model for copy number estimation...")
529
+	.computeCopynumber(chrom=CHR,
530
+			   A=A(ABset),
531
+			   B=B(ABset),
532
+			   calls=calls(snpset),
533
+			   conf=confs(snpset),
534
+			   NP=NULL,
535
+			   plate=batch,
536
+			   envir=envir,
537
+			   SNR=ABset$SNR,
538
+			   bias.adj=FALSE,
539
+			   SNRmin=SNRmin,
540
+			   cdfName=cdfName,
541
+			   ...)
542
+
543
+	if(bias.adj){
544
+		message("Running bias adjustment...")
545
+		.computeCopynumber(chrom=CHR,
546
+				   A=A(ABset),
547
+				   B=B(ABset),
548
+				   calls=calls(snpset),
549
+				   conf=confs(snpset),
550
+				   NP=NULL,
551
+				   plate=batch,
552
+				   envir=envir,
553
+				   SNR=ABset$SNR,
554
+				   bias.adj=TRUE,
555
+				   SNRmin=SNRmin,
556
+				   cdfName=cdfName,
557
+				   ...)
558
+	}
559
+	message("Organizing results...")			
560
+	locusSet <- list2locusSet(envir, ABset=ABset, NPset=NULL, CHR=CHR, cdfName=cdfName)
561
+	return(locusSet)
562
+}
563
+
480 564
 ##getCopyNumberEnvironment <- function(crlmmSetList, cnSet){
481 565
 ##	envir <- new.env()
482 566
 ##	envir[["A"]] <- A(crlmmSetList)[snpIndex(crlmmSetList), ]
... ...
@@ -558,7 +642,6 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){
558 642
 	##require(oligoClasses) || stop("oligoClasses package not available")
559 643
 	if(length(ls(envir)) == 0) stop("environment empty")
560 644
 	batch <- envir[["plate"]]
561
-
562 645
 	##SNP copy number	
563 646
 	CA <- envir[["CA"]]
564 647
 	dimnames(CA) <- list(envir[["snps"]], envir[["sns"]])	
... ...
@@ -566,20 +649,21 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){
566 649
 	dimnames(CB) <- dimnames(CA)
567 650
 
568 651
 	##NP copy number
569
-	CT <- envir[["CT"]]
570
-	rownames(CT) <- envir[["cnvs"]]
571
-	colnames(CT) <- envir[["sns"]]
572
-	sig2T <- envir[["sig2T"]]
573
-	rownames(sig2T) <- rownames(CT)
574
-	nuT <- envir[["nuT"]]
575
-	colnames(nuT) <- paste("nuT", unique(batch), sep="_")
576
-	##nuA <- rbind(nuA, nuT)
577
-	##nuB <- rbind(nuA, blank)
578
-	phiT <- envir[["phiT"]]
579
-	colnames(phiT) <- paste("phiT", unique(batch), sep="_")
580
-	naMatrix <- matrix(NA, nrow(CT), ncol(CT))
581
-	dimnames(naMatrix) <- dimnames(CT)
582
-	
652
+	if(!is.null(NPset)){
653
+		CT <- envir[["CT"]]
654
+		rownames(CT) <- envir[["cnvs"]]
655
+		colnames(CT) <- envir[["sns"]]
656
+		sig2T <- envir[["sig2T"]]
657
+		rownames(sig2T) <- rownames(CT)
658
+		nuT <- envir[["nuT"]]
659
+		colnames(nuT) <- paste("nuT", unique(batch), sep="_")
660
+		phiT <- envir[["phiT"]]
661
+		colnames(phiT) <- paste("phiT", unique(batch), sep="_")
662
+		naMatrix <- matrix(NA, nrow(CT), ncol(CT))
663
+		dimnames(naMatrix) <- dimnames(CT)
664
+	} else{
665
+		sig2T <- nuT <- phiT <- naMatrix <- CT <- NULL
666
+	}
583 667
 	CA <- rbind(CA, CT)
584 668
 	CB <- rbind(CB, naMatrix)	
585 669
 
... ...
@@ -609,8 +693,12 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){
609 693
 
610 694
 
611 695
 	##Combine SNP and NP parameters
612
-	naMatrixParams <- matrix(NA, nrow(CT), length(unique(batch)))
613
-	dimnames(naMatrixParams) <- list(rownames(CT), unique(batch))
696
+	if(!is.null(NPset)){
697
+		naMatrixParams <- matrix(NA, nrow(CT), length(unique(batch)))
698
+		dimnames(naMatrixParams) <- list(rownames(CT), unique(batch))
699
+	} else{
700
+		naMatrixParams <- NULL
701
+	}
614 702
 	tau2A <- rbind(tau2A, naMatrixParams)
615 703
 	tau2B <- rbind(tau2B, naMatrixParams)
616 704
 	sig2A <- rbind(sig2A, sig2T)
... ...
@@ -625,7 +713,6 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){
625 713
 	rownames(tau2A) <- rownames(tau2B) <- rownames(sig2A) <- rownames(sig2B) <- rownames(CA)
626 714
 	rownames(corr) <- rownames(corrA.BB) <- rownames(corrB.AA) <- rownames(CA)
627 715
 	rownames(nuA) <- rownames(phiA) <- rownames(nuB) <- rownames(phiB) <- rownames(CA)	
628
-
629 716
 	##phenodata
630 717
 	phenodata <- phenoData(ABset)
631 718
 	phenodata <- phenodata[match(envir[["sns"]], sampleNames(phenodata)), ]
... ...
@@ -633,32 +720,41 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){
633 720
 
634 721
 	##Feature Data
635 722
 	position.snp <- snpProbes[match(envir[["snps"]], rownames(snpProbes)), "position"]
636
-	position.np <- cnProbes[match(envir[["cnvs"]], rownames(cnProbes)), "position"]
723
+	names(position.snp) <- envir[["snps"]]
724
+	if(!is.null(NPset)){
725
+		position.np <- cnProbes[match(envir[["cnvs"]], rownames(cnProbes)), "position"]
726
+		names(position.np) <- envir[["cnvs"]]
727
+	} else position.np <- NULL
637 728
 	position <- c(position.snp, position.np)
638 729
 	if(!(identical(names(position), rownames(CA)))){
639 730
 		position <- position[match(rownames(CA), names(position))]
640 731
 	}
732
+	if(sum(duplicated(names(position))) > 0){
733
+		warning("Removing rows with NA identifiers...")
734
+		##RS: fix this
735
+		I <- which(!is.na(names(position)))
736
+	}  else I <- seq(along=names(position))
641 737
 	fd <- data.frame(cbind(CHR,
642
-			       position,
643
-			       tau2A,
644
-			       tau2B,
645
-			       sig2A,
646
-			       sig2B,
647
-			       nuA,
648
-			       nuB,
649
-			       phiA,
650
-			       phiB,
651
-			       corr,
652
-			       corrA.BB,
653
-			       corrB.AA))
738
+			       position[I],
739
+			       tau2A[I,, drop=FALSE],
740
+			       tau2B[I,, drop=FALSE],
741
+			       sig2A[I,, drop=FALSE],
742
+			       sig2B[I,, drop=FALSE],
743
+			       nuA[I,, drop=FALSE],
744
+			       nuB[I,, drop=FALSE],
745
+			       phiA[I,, drop=FALSE],
746
+			       phiB[I,, drop=FALSE],
747
+			       corr[I,, drop=FALSE],
748
+			       corrA.BB[I,, drop=FALSE],
749
+			       corrB.AA[I,, drop=FALSE]))
654 750
 	colnames(fd)[1:2] <- c("chromosome", "position")
655
-	rownames(fd) <- rownames(CA)
751
+	rownames(fd) <- rownames(CA)[I]
656 752
 	fD <- new("AnnotatedDataFrame",
657 753
 		  data=fd,
658 754
 		  varMetadata=data.frame(labelDescription=colnames(fd)))	
659 755
 	assayData <- assayDataNew("lockedEnvironment",
660
-				  CA=CA,
661
-				  CB=CB)
756
+				  CA=CA[I, ],
757
+				  CB=CB[I, ])
662 758
 	cnset <- new("CopyNumberSet",
663 759
 		      assayData=assayData,
664 760
 		      featureData=fD,
... ...
@@ -671,10 +767,14 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){
671 767
 thresholdCopyNumberSet <- function(object){
672 768
 	ca <- CA(object)
673 769
 	cb <- CB(object)
674
-	ca[ca < 5] <- 5
675
-	ca[ca > 500] <- 500
676
-	cb[cb < 5] <- 5
677
-	cb[cb > 500] <- 500
770
+	ca[ca < 0.05] <- 0.05
771
+	ca[ca > 5] <- 5
772
+	cb[cb < 0.05] <- 0.05
773
+	cb[cb > 5] <- 5
774
+	ca <- matrix(as.integer(ca*100), nrow(ca), ncol(ca))
775
+	cb <- matrix(as.integer(cb*100), nrow(cb), ncol(cb))
776
+	rownames(ca) <- rownames(cb) <- featureNames(object)
777
+	colnames(ca) <- colnames(cb) <- sampleNames(object)
678 778
 	CA(object) <- ca
679 779
 	CB(object) <- cb
680 780
 	return(object)
... ...
@@ -682,31 +782,31 @@ thresholdCopyNumberSet <- function(object){
682 782
 
683 783
 
684 784
 .computeCopynumber <- function(chrom,
685
-			      A,
686
-			      B,
687
-			      calls,
688
-			      conf,
689
-			      NP,
690
-			      plate,
691
-			      MIN.OBS=5,
692
-			      envir,
693
-			      P,
694
-			      DF.PRIOR=50,
695
-			      CONF.THR=0.99,
696
-			      bias.adj=FALSE,
697
-			      priorProb,
698
-			      gender=NULL,
699
-			      SNR,
700
-			      SNRmin=5, seed=123,
701
-			      cdfName="genomewidesnp6",
702
-			      verbose=TRUE, ...){
785
+			       A,
786
+			       B,
787
+			       calls,
788
+			       conf,
789
+			       NP,
790
+			       plate,
791
+			       MIN.OBS=5,
792
+			       envir,
793
+			       P,
794
+			       DF.PRIOR=50,
795
+			       CONF.THR=0.99,
796
+			       bias.adj=FALSE,
797
+			       priorProb,
798
+			       gender=NULL,
799
+			       SNR,
800
+			       SNRmin=5, seed=123,
801
+			       cdfName,
802
+			       verbose=TRUE, ...){
803
+	if(missing(cdfName)) stop("cdfName must be provided")
703 804
 	require(paste(cdfName, "Crlmm", sep=""), character.only=TRUE) || stop(paste("cdf ", cdfName, "Crlmm", " not available.", sep=""))
704 805
 	if(!missing(plate)){
705 806
 		if(length(plate) != ncol(A))
706 807
 			stop("plate must the same length as the number of columns of A")
707 808
 	}
708 809
 	set.seed(seed)
709
-	##if(missing(chrom)) stop("must specify chromosome")
710 810
 	if(length(ls(envir)) == 0) {
711 811
 		instantiateObjects(calls=calls,
712 812
 				   conf=conf,
... ...
@@ -741,13 +841,16 @@ thresholdCopyNumberSet <- function(object){
741 841
 			cat(".")
742 842
 			if(sum(plate == uplate[p]) < 10) next()
743 843
 			J <- plate==uplate[p]
844
+			if(!is.null(NP)){
845
+				npMatrix <- NP[, J]
846
+			} else npMatrix <- NULL
744 847
 			oneBatch(plateIndex=p,
745 848
 				 A=A[, J],
746 849
 				 B=B[, J],
747 850
 				 calls=calls[, J],
748 851
 				 conf=conf[, J],
749 852
 				 gender=NULL,
750
-				 NP[, J],
853
+				 npMatrix,
751 854
 				 plate[J],
752 855
 				 MIN.OBS=1,
753 856
 				 envir=envir,
... ...
@@ -782,12 +885,14 @@ thresholdCopyNumberSet <- function(object){
782 885
 		envir[["steps"]] <- steps						
783 886
 	}
784 887
 	if(!steps[4]){
785
-		message("\nCopy number for nonpolymorphic probes...")	
786
-		for(p in P){
787
-			cat(".")
788
-			nonpolymorphic(plateIndex=p,
789
-				       NP=NP[, plate==uplate[p]],
790
-				       envir=envir)
888
+		if(!is.null(NP)){
889
+			message("\nCopy number for nonpolymorphic probes...")	
890
+			for(p in P){
891
+				cat(".")
892
+				nonpolymorphic(plateIndex=p,
893
+					       NP=NP[, plate==uplate[p]],
894
+					       envir=envir)
895
+			}
791 896
 		}
792 897
 		steps[4] <- TRUE
793 898
 		envir[["step"]] <- steps
... ...
@@ -841,44 +946,38 @@ nonpolymorphic <- function(plateIndex, NP, envir, CONF.THR=0.99, DF.PRIOR=50, pk
841 946
 	} else {
842 947
 		ix <- 1:nrow(X)
843 948
 	}
844
-	##X <- X[ix, ]
845
-	##Y <- Y[ix]
846 949
 	betahat <- solve(crossprod(X[ix, ]), crossprod(X[ix, ], Y[ix]))
847
-##	Yhat <- X%*%betahat
848
-##	phihat <- 2^Yhat
849
-##	nuhat <- 2^X[, 2] - 2*phihat
850
-##	nuAB <- c(nuA[!flagsA], nuB[!flagsB])
851
-##	plot(log2(nuhat), log2(nuAB), pch=".")
852
-##	plot(log2(nuhat)-log2(nuAB), pch=".")
853
-##	hist(log2(nuAB))
854
-	##plot(Y-Yhat, pch=".")
855
-	##plot(Y, Yhat, pch=".")
856
-	##calculate R2
857 950
 	if(CHR == 23){
951
+		normalNP <- envir[["normalNP"]]
952
+		normalNP <- normalNP[, plate==uplate[p]]
953
+		nuT <- envir[["nuT"]]
954
+		phiT <- envir[["phiT"]]
955
+		
858 956
 		cnvs <- envir[["cnvs"]]
859 957
                 loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv)
860 958
                 cnProbes <- get("cnProbes", envir=.crlmmPkgEnv)
861
-##		data(cnProbes, package="genomewidesnp6Crlmm")
862 959
 		cnProbes <- cnProbes[match(cnvs, rownames(cnProbes)), ]
863
-		##par:pseudo-autosomal regions
864
-		par <- cnProbes[, "position"] < 2709520 | (cnProbes[, "position"] > 154584237 & cnProbes[, "position"] < 154913754)
865
-		gender <- envir[["gender"]]
866
-		mu1 <- rowMedians(NP[, gender=="male"], na.rm=TRUE)
867
-		mu2 <- rowMedians(NP[, gender=="female"], na.rm=TRUE)
868
-		mus <- log(cbind(mu1, mu2))
869
-		X <- cbind(1, mus[, 1])
960
+
870 961
 		##For build Hg18
871 962
 		##http://genome.ucsc.edu/cgi-bin/hgGateway
872 963
 		##pseudo-autosomal regions on X
873 964
 		##chrX:1-2,709,520 and chrX:154584237-154913754, respectively
874
-		Yhat1 <- as.numeric(X %*% betahat)
875
-		X <- cbind(1, mus[, 2])		
876
-		Yhat2 <- as.numeric(X %*% betahat)
877
-		phi1 <- exp(Yhat1)
878
-		phi2 <- exp(Yhat2)
879
-		nu1 <- exp(mus[, 1]) - phi1
880
-		nu1[par] <- exp(mus[par, 1]) - 2*phi1[par]
881
-		nu2 <- exp(mus[, 2]) - 2*phi2
965
+		##par:pseudo-autosomal regions
966
+		pseudoAR <- cnProbes[, "position"] < 2709520 | (cnProbes[, "position"] > 154584237 & cnProbes[, "position"] < 154913754)
967
+		gender <- envir[["gender"]]
968
+		mu1 <- rowMedians(NP[, gender=="male"], na.rm=TRUE)
969
+		mu2 <- rowMedians(NP[, gender=="female"], na.rm=TRUE)
970
+		mus <- log2(cbind(mu1, mu2))
971
+		X.men <- cbind(1, mus[, 1])
972
+		X.fem <- cbind(1, mus[, 2])
973
+		
974
+		Yhat1 <- as.numeric(X.men %*% betahat)
975
+		Yhat2 <- as.numeric(X.fem %*% betahat)
976
+		phi1 <- 2^(Yhat1)
977
+		phi2 <- 2^(Yhat2)
978
+		nu1 <- 2^(mus[, 1]) - phi1
979
+		nu2 <- 2^(mus[, 2]) - 2*phi2		
980
+		nu1[pseudoAR] <- 2^(mus[pseudoAR, 1]) - 2*phi1[pseudoAR]
882 981
 		CT1 <- 1/phi1*(NP[, gender=="male"]-nu1)
883 982
 		CT2 <- 1/phi2*(NP[, gender=="female"]-nu2)
884 983
 		CT1 <- matrix(as.integer(100*CT1), nrow(CT1), ncol(CT1))
... ...
@@ -887,6 +986,16 @@ nonpolymorphic <- function(plateIndex, NP, envir, CONF.THR=0.99, DF.PRIOR=50, pk
887 986
 		CT[, plate==uplate[p] & gender=="male"] <- CT1
888 987
 		CT[, plate==uplate[p] & gender=="female"] <- CT2
889 988
 		envir[["CT"]] <- CT
989
+
990
+		##only using females to compute the variance
991
+		normalNP[, gender=="male"] <- NA		
992
+		sig2T[, p] <- rowMAD(log2(NP*normalNP), na.rm=TRUE)^2
993
+		nuT[, p] <- nu2
994
+		phiT[, p] <- phi2
995
+		envir[["sig2T"]] <- sig2T
996
+		envir[["CT"]] <- CT
997
+		envir[["phiT"]] <- nuT
998
+		envir[["nuT"]] <- phiT
890 999
 	} else {
891 1000
 		normalNP <- envir[["normalNP"]]
892 1001
 		normalNP <- normalNP[, plate==uplate[p]]
... ...
@@ -934,7 +1043,6 @@ withinGenotypeMoments <- function(p, A, B, calls, conf, CONF.THR, DF.PRIOR, envi
934 1043
 		IX <- matrix(gender, nrow(G), ncol(G), byrow=TRUE)
935 1044
 		IX <- IX == "female"
936 1045
 	} else IX <- matrix(TRUE, nrow(G), ncol(G))
937
-	
938 1046
 	index <- GT.B <- GT.A <- vector("list", 3)
939 1047
 	names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB")
940 1048
 	##--------------------------------------------------
... ...
@@ -1024,7 +1132,8 @@ oneBatch <- function(plateIndex,
1024 1132
 		}
1025 1133
 		message("running bias adjustment")		
1026 1134
 		##adjustment for nonpolymorphic probes
1027
-		biasAdjNP(plateIndex=p, envir=envir, priorProb=priorProb)
1135
+		if(!is.null(NP))
1136
+			biasAdjNP(plateIndex=p, envir=envir, priorProb=priorProb)
1028 1137
 		##adjustment for SNPs
1029 1138
 		biasAdj(plateIndex=p, envir=envir, priorProb=priorProb)
1030 1139
 		message("Recomputing location and scale parameters")		
... ...
@@ -1356,6 +1465,7 @@ polymorphic <- function(plateIndex, A, B, envir){
1356 1465
 
1357 1466
 
1358 1467
 biasAdj <- function(plateIndex, envir, priorProb, PROP=0.75){
1468
+	gender <- envir[["gender"]]
1359 1469
 	CHR <- envir[["chrom"]]
1360 1470
 	if(CHR == 23){
1361 1471
 		phiAx <- envir[["phiAx"]]
... ...
@@ -1429,18 +1539,13 @@ biasAdj <- function(plateIndex, envir, priorProb, PROP=0.75){
1429 1539
 	posteriorProb[, , 3] <- norm
1430 1540
 	posteriorProb[, , 4] <- amp
1431 1541
 	mostLikelyState <- apply(posteriorProb, c(1, 2), function(x) order(x, decreasing=TRUE)[1])
1542
+	if(CHR == 23){
1543
+		##so state index 3 is the most likely state for men and women
1544
+		mostLikelyState[, gender=="male"] <- mostLikelyState[, gender=="male"] + 1
1545
+	}
1432 1546
 	##Adjust for SNPs that have less than 80% of the samples in an altered state
1433 1547
 	##flag the remainder?
1434
-	if(CHR != 23){
1435
-		proportionSamplesAltered <- rowMeans(mostLikelyState != 3)
1436
-	}else{
1437
-		##should also consider pseud
1438
-##		browser()
1439
-		gender <- envir[["gender"]]
1440
-##		tmp3 <- tmp2
1441
-##		tmp3[, gender=="male"] <- tmp2[, gender=="male"] != 2
1442
-##		tmp3[, gender=="female"] <- tmp2[, gender=="female"] != 3
1443
-	}
1548
+	proportionSamplesAltered <- rowMeans(mostLikelyState != 3)
1444 1549
 	##Those near 1 have NaNs for nu and phi.  this occurs by NaNs in the muA[,, "A"] or muA[, , "B"] for X chromosome
1445 1550
 	##ii <- proportionSamplesAltered < PROP
1446 1551
 	##ii <- proportionSamplesAltered > 0.05
... ...
@@ -1448,46 +1553,45 @@ biasAdj <- function(plateIndex, envir, priorProb, PROP=0.75){
1448 1553
 	ii <- proportionSamplesAltered < 0.8 & proportionSamplesAltered > 0.01
1449 1554
 	##only exclude observations from one tail, depending on
1450 1555
 	##whether more are up or down
1451
-	if(CHR != 23){
1452
-		moreup <- rowSums(mostLikelyState > 3) > rowSums(mostLikelyState < 3) ##3 is normal
1453
-		NORM <- matrix(FALSE, nrow(A), ncol(A))
1454
-		NORM[proportionSamplesAltered > 0.8, ] <- FALSE
1455
-		##big and greater than 1 if copy number 3 is more likely
1456
-		ratioUp <- posteriorProb[, , 4]/posteriorProb[, , 3]
1457
-		##large values will have small ranks
1458
-		##rankUP <- t(apply(ratio, 1, rank))
1459
-		## drop small ranks up to the maximum number, unless the ratio is greater than 1 
1460
-		##NORM[ii & moreup, ] <- !(rankUP <= maxNumberToDrop) | (ratio < 1)
1461
-		NORM[ii & moreup, ] <- ratioUp[moreup & ii] < 1  ##normal more likely
1462
-		##big and greater than 1 if copy number 1 is more likely than 2
1463
-		ratioDown <- posteriorProb[, , 2]/posteriorProb[, , 3]
1464
-		##big values have small ranks
1465
-		##rankDown <- t(apply(ratio, 1, rank))		
1466
-		##NORM[ii & !moreup, ] <- !(rankDown <= maxNumberToDrop) | (ratio < 1)
1467
-		NORM[ii & !moreup, ] <- ratioDown[!moreup & ii] < 1  ##normal more likely
1468
-##		NORM[ii & !moreup, ] <- up
1469
-		##Define NORM so that we can iterate this step
1470
-		##NA's in the previous iteration (normal) will be propogated
1471
-		normal <- NORM*normal
1472
-	} else{
1473
-		fem <- mostLikelyState[, gender=="female"]
1474
-		mal <- mostLikelyState[, gender=="male"]
1475
-		moreupF <- rowSums(fem > 3) > rowSums(fem < 3)
1476
-		moreupM <- rowSums(mal > 2) > rowSums(mal < 2)
1477
-		notUpF <-  fem[ii & moreupF, ] <= 3
1478
-		notUpM <-  fem[ii & moreupM, ] <= 2	       
1479
-		notDownF <- fem[ii & !moreupF, ] >= 3
1480
-		notDownM <- mal[ii & !moreupM, ] >= 2
1481
-		normalF <- matrix(TRUE, nrow(fem), ncol(fem))
1482
-		normalF[ii & moreupF, ] <- notUpF
1483
-		normalF[ii & !moreupF, ] <- notDownF
1484
-		normalM <- matrix(TRUE, nrow(mal), ncol(mal))
1485
-		normalM[ii & moreupM, ] <- notUpM
1486
-		normalM[ii & !moreupM, ] <- notDownM
1487
-		normal <- matrix(TRUE, nrow(A), ncol(A))
1488
-		normal[, gender=="female"] <- normalF
1489
-		normal[, gender=="male"] <- normalM
1490
-	}
1556
+	moreup <- rowSums(mostLikelyState > 3) > rowSums(mostLikelyState < 3) ##3 is normal
1557
+	NORM <- matrix(FALSE, nrow(A), ncol(A))
1558
+	NORM[proportionSamplesAltered > 0.8, ] <- FALSE
1559
+	##big and greater than 1 if copy number 3 is more likely
1560
+	ratioUp <- posteriorProb[, , 4]/posteriorProb[, , 3]
1561
+	##large values will have small ranks
1562
+	##rankUP <- t(apply(ratio, 1, rank))
1563
+	## drop small ranks up to the maximum number, unless the ratio is greater than 1 
1564
+	##NORM[ii & moreup, ] <- !(rankUP <= maxNumberToDrop) | (ratio < 1)
1565
+	NORM[ii & moreup, ] <- ratioUp[moreup & ii] < 1  ##normal more likely
1566
+	##big and greater than 1 if copy number 1 is more likely than 2
1567
+	ratioDown <- posteriorProb[, , 2]/posteriorProb[, , 3]
1568
+	##big values have small ranks
1569
+	##rankDown <- t(apply(ratio, 1, rank))		
1570
+	##NORM[ii & !moreup, ] <- !(rankDown <= maxNumberToDrop) | (ratio < 1)
1571
+	NORM[ii & !moreup, ] <- ratioDown[!moreup & ii] < 1  ##normal more likely
1572
+	##		NORM[ii & !moreup, ] <- up
1573
+	##Define NORM so that we can iterate this step
1574
+	##NA's in the previous iteration (normal) will be propogated
1575
+	normal <- NORM*normal
1576
+##	} else{
1577
+##		fem <- mostLikelyState[, gender=="female"]
1578
+##		mal <- mostLikelyState[, gender=="male"]
1579
+##		moreupF <- rowSums(fem > 3) > rowSums(fem < 3)
1580
+##		moreupM <- rowSums(mal > 2) > rowSums(mal < 2)
1581
+##		notUpF <-  fem[ii & moreupF, ] <= 3
1582
+##		notUpM <-  fem[ii & moreupM, ] <= 2	       
1583
+##		notDownF <- fem[ii & !moreupF, ] >= 3
1584
+##		notDownM <- mal[ii & !moreupM, ] >= 2
1585
+##		normalF <- matrix(TRUE, nrow(fem), ncol(fem))
1586
+##		normalF[ii & moreupF, ] <- notUpF
1587
+##		normalF[ii & !moreupF, ] <- notDownF
1588
+##		normalM <- matrix(TRUE, nrow(mal), ncol(mal))
1589
+##		normalM[ii & moreupM, ] <- notUpM
1590
+##		normalM[ii & !moreupM, ] <- notDownM
1591
+##		normal <- matrix(TRUE, nrow(A), ncol(A))
1592
+##		normal[, gender=="female"] <- normalF
1593
+##		normal[, gender=="male"] <- normalM
1594
+##	}
1491 1595
 	flagAltered <- which(proportionSamplesAltered > 0.5)
1492 1596
 	envir[["flagAltered"]] <- flagAltered
1493 1597
 	normal[normal == FALSE] <- NA
... ...
@@ -1692,6 +1796,7 @@ biasAdjNP <- function(plateIndex, envir, priorProb){
1692 1796
 	plate <- envir[["plate"]]
1693 1797
 	uplate <- envir[["uplate"]]
1694 1798
 	sig2T <- envir[["sig2T"]]
1799
+	gender <- envir[["gender"]]
1695 1800
 	normalNP <- normalNP[, plate==uplate[p]]	
1696 1801
 	NP <- NP[, plate==uplate[p]]
1697 1802
 	sig2T <- sig2T[, p]
... ...
@@ -1716,32 +1821,23 @@ biasAdjNP <- function(plateIndex, envir, priorProb){
1716 1821
 		counter <- counter+1
1717 1822
 	}
1718 1823
 	mostLikelyState <- apply(emit, c(1, 2), function(x) order(x, decreasing=TRUE)[1])
1719
-	##Adjust for SNPs that have less than 80% of the samples in an altered state
1720
-	##flag the remainder?
1721
-	if(CHR != 23){
1722
-		tmp3 <- mostLikelyState != 3
1723
-	}else{
1724
-		browser()
1725
-		##should also consider pseudoautosomal
1726
-		gender <- envir[["gender"]]
1727
-		tmp3 <- mostLikelyState
1728
-		tmp3[, gender=="male"] <- mostLikelyState[, gender=="male"] != 2
1729
-		tmp3[, gender=="female"] <- mostLikelyState[, gender=="female"] != 3
1824
+	if(CHR == 23){
1825
+		## the state index for male on chromosome 23  is 2
1826
+		## add 1 so that the state index is 3 for 'normal' state
1827
+		mostLikelyState[, gender=="male"] <- mostLikelyState[, gender=="male"] + 1
1730 1828
 	}
1829
+	tmp3 <- mostLikelyState != 3
1731 1830
 	##Those near 1 have NaNs for nu and phi.  this occurs by NaNs in the muA[,, "A"] or muA[, , "B"] for X chromosome
1732 1831
 	proportionSamplesAltered <- rowMeans(tmp3)##prop normal
1733 1832
 	ii <- proportionSamplesAltered < 0.75
1734
-	##only exclude observations from one tail, depending on
1735
-	##whether more are up or down
1736
-	if(CHR != 23){
1737
-		moreup <- rowSums(mostLikelyState > 3) > rowSums(mostLikelyState < 3)
1738
-		notUp <-  mostLikelyState[ii & moreup, ] <= 3
1739
-		notDown <- mostLikelyState[ii & !moreup, ] >= 3
1740
-		NORM <- matrix(TRUE, nrow(NP), ncol(NP))
1741
-		NORM[ii & moreup, ] <- notUp
1742
-		NORM[ii & !moreup, ] <- notDown
1743
-		normalNP <- normalNP*NORM
1744
-	}
1833
+	moreup <- rowSums(mostLikelyState > 3) > rowSums(mostLikelyState < 3)
1834
+	notUp <-  mostLikelyState[ii & moreup, ] <= 3
1835
+	notDown <- mostLikelyState[ii & !moreup, ] >= 3
1836
+	NORM <- matrix(TRUE, nrow(NP), ncol(NP))
1837
+	NORM[ii & moreup, ] <- notUp
1838
+	NORM[ii & !moreup, ] <- notDown
1839
+	normalNP <- normalNP*NORM
1840
+
1745 1841
 	flagAltered <- which(proportionSamplesAltered > 0.5)
1746 1842
 	envir[["flagAlteredNP"]] <- flagAltered
1747 1843
 	normalNP[normalNP == FALSE] <- NA
... ...
@@ -3,47 +3,47 @@
3 3
 # or to use the optional 'Path' column in sampleSheet
4 4
 # - there is a lot of header information that is currently discarded - could try and store this somewhere in the resulting NChannelSet
5 5
 
6
-readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
7
-                                arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
8
-                                highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), saveDate=FALSE) {
6
+readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
7
+			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
8
+			  highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), saveDate=FALSE) {
9 9
        if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
10
-         arrayNames=NULL
11
-         if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
12
-           barcode = sampleSheet[,arrayInfoColNames$barcode]
13
-           arrayNames=barcode
14
-         }
15
-         if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {  
16
-           position = sampleSheet[,arrayInfoColNames$position]
17
-           if(is.null(arrayNames))
18
-             arrayNames=position
19
-           else
20
-             arrayNames = paste(arrayNames, position, sep=sep)
21
-           if(highDensity) {
22
-             hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
23
-             for(i in names(hdExt))
24
-               arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
25
-            }
26
-         }
27
-         pd = new("AnnotatedDataFrame", data = sampleSheet)
10
+	       arrayNames=NULL
11
+	       if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
12
+		       barcode = sampleSheet[,arrayInfoColNames$barcode]
13
+		       arrayNames=barcode
14
+	       }
15
+	       if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {  
16
+		       position = sampleSheet[,arrayInfoColNames$position]
17
+		       if(is.null(arrayNames))
18
+			       arrayNames=position
19
+		       else
20
+			       arrayNames = paste(arrayNames, position, sep=sep)
21
+		       if(highDensity) {
22
+			       hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
23
+			       for(i in names(hdExt))
24
+				       arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
25
+		       }
26
+	       }
27
+	       pd = new("AnnotatedDataFrame", data = sampleSheet)
28 28
        }
29 29
        if(is.null(arrayNames)) {
30
-         arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
31
-         if(!is.null(sampleSheet)) {
32
-           sampleSheet=NULL
33
-           cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
34
-         }
35
-         pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
30
+	       arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
31
+	       if(!is.null(sampleSheet)) {
32
+		       sampleSheet=NULL
33
+		       cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
34
+	       }
35
+	       pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
36 36
        }
37 37
        narrays = length(arrayNames)
38 38
        grnfiles = paste(arrayNames, fileExt$green, sep=sep)
39 39
        redfiles = paste(arrayNames, fileExt$red, sep=sep)
40 40
        if(length(grnfiles)==0 || length(redfiles)==0)
41
-         stop("Cannot find .idat files")
41
+	       stop("Cannot find .idat files")
42 42
        if(length(grnfiles)!=length(redfiles))
43
-         stop("Cannot find matching .idat files")
43
+	       stop("Cannot find matching .idat files")
44 44
        if(!all(c(redfiles,grnfiles) %in% dir(path=path)))
45
-         stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
46
-                 paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
45
+	       stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
46
+		    paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
47 47
        grnidats = file.path(path, grnfiles)
48 48
        redidats = file.path(path, redfiles)
49 49
        
... ...
@@ -58,97 +58,95 @@ readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
58 58
 
59 59
        # read in the data
60 60
        for(i in seq(along=arrayNames)) {
61
-         cat("reading", arrayNames[i], "\t")
62
-         idsG = idsR = G = R = NULL
63
-
64
-         cat(paste(sep, fileExt$green, sep=""), "\t")
65
-         G = readIDAT(grnidats[i])
66
-         idsG = rownames(G$Quants)
67
-         
68
-         headerInfo$nProbes[i] = G$nSNPsRead
69
-         headerInfo$Barcode[i] = G$Barcode
70
-         headerInfo$ChipType[i] = G$ChipType
71
-         headerInfo$Manifest[i] = G$Unknown$MostlyNull
72
-         headerInfo$Position[i] = G$Unknowns$MostlyA
61
+	       cat("reading", arrayNames[i], "\t")
62
+	       idsG = idsR = G = R = NULL
63
+	       cat(paste(sep, fileExt$green, sep=""), "\t")
64
+	       G = readIDAT(grnidats[i])
65
+	       idsG = rownames(G$Quants)
66
+	       headerInfo$nProbes[i] = G$nSNPsRead
67
+	       headerInfo$Barcode[i] = G$Barcode
68
+	       headerInfo$ChipType[i] = G$ChipType
69
+	       headerInfo$Manifest[i] = G$Unknown$MostlyNull
70
+	       headerInfo$Position[i] = G$Unknowns$MostlyA
73 71
          
74
-         if(headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) {
75
-                  # || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest
76
-                  # but differed by a few SNPs for some reason - most of the chip was the same though
77
-#           stop("Chips are not of all of the same type - please check your data")
78
-           warning("Chips are not of the same type.  Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
79
-           next()
80
-         }
81
-
82
-         dates$decode[i] = G$RunInfo[1, 1]
83
-         dates$scan[i] = G$RunInfo[2, 1]
84
-
85
-         if(i==1) {
86
-           if(is.null(ids) && !is.null(G))
87
-             ids = idsG
88
-           else
89
-             stop("Could not find probe IDs")
90
-           nprobes = length(ids)
91
-           narrays = length(arrayNames)
72
+	       if(headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) {
73
+		       ## || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest
74
+		       ## but differed by a few SNPs for some reason - most of the chip was the same though
75
+		       ##           stop("Chips are not of all of the same type - please check your data")
76
+		       warning("Chips are not of the same type.  Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
77
+		       next()
78
+	       }
79
+	       
80
+	       dates$decode[i] = G$RunInfo[1, 1]
81
+	       dates$scan[i] = G$RunInfo[2, 1]
82
+
83
+	       if(i==1) {
84
+		       if(is.null(ids) && !is.null(G))
85
+			       ids = idsG
86
+		       else
87
+			       stop("Could not find probe IDs")
88
+		       nprobes = length(ids)
89
+		       narrays = length(arrayNames)
92 90
            
93
-           tmpmat = matrix(NA, nprobes, narrays)
94
-           rownames(tmpmat) = ids
95
-           if(!is.null(sampleSheet))
96
-             colnames(tmpmat) = sampleSheet$Sample_ID
97
-           else
98
-             colnames(tmpmat) = arrayNames
99
-
100
-           RG = new("NChannelSet",
101
-                     R=tmpmat, G=tmpmat, Rnb=tmpmat, Gnb=tmpmat,
102
-                     Rse=tmpmat, Gse=tmpmat, annotation=headerInfo$Manifest[1],
103
-                     phenoData=pd, storage.mode="environment")
104
-           rm(tmpmat)
105
-           gc()
106
-         }
91
+		       tmpmat = matrix(NA, nprobes, narrays)
92
+		       rownames(tmpmat) = ids
93
+		       if(!is.null(sampleSheet))
94
+			       colnames(tmpmat) = sampleSheet$Sample_ID
95
+		       else
96
+			       colnames(tmpmat) = arrayNames
97
+
98
+		       RG = new("NChannelSet",
99
+		       R=tmpmat, G=tmpmat, Rnb=tmpmat, Gnb=tmpmat,
100
+		       Rse=tmpmat, Gse=tmpmat, annotation=headerInfo$Manifest[1],
101
+		       phenoData=pd, storage.mode="environment")
102
+		       rm(tmpmat)
103
+		       gc()
104
+	       }
107 105
          
108
-         if(length(ids)==length(idsG)) {
109
-           if(sum(ids==idsG)==nprobes) {
110
-             RG@assayData$G[,i] = G$Quants[, "Mean"]
111
-             RG@assayData$Gnb[,i] = G$Quants[, "NBeads"]
112
-             RG@assayData$Gse[,i] = G$Quants[, "SD"]
113
-           }
114
-         }
115
-         else {
116
-           indG = match(ids, idsG)
117
-           RG@assayData$G[,i] = G$Quants[indG, "Mean"]
118
-           RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"]
119
-           RG@assayData$Gse[,i] = G$Quants[indG, "SD"]
120
-         }
121
-         rm(G)
122
-         gc()
106
+	       if(length(ids)==length(idsG)) {
107
+		       if(sum(ids==idsG)==nprobes) {
108
+			       RG@assayData$G[,i] = G$Quants[, "Mean"]
109
+			       RG@assayData$Gnb[,i] = G$Quants[, "NBeads"]
110
+			       RG@assayData$Gse[,i] = G$Quants[, "SD"]
111
+		       }
112
+	       }
113
+	       else {
114
+		       indG = match(ids, idsG)
115
+		       RG@assayData$G[,i] = G$Quants[indG, "Mean"]
116
+		       RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"]
117
+		       RG@assayData$Gse[,i] = G$Quants[indG, "SD"]
118
+	       }
119
+	       rm(G)
120
+	       gc()
123 121
          
124
-         cat(paste(sep, fileExt$red, sep=""), "\n")
125
-         R = readIDAT(redidats[i])
126
-         idsR = rownames(R$Quants)
122
+	       cat(paste(sep, fileExt$red, sep=""), "\n")
123
+	       R = readIDAT(redidats[i])
124
+	       idsR = rownames(R$Quants)
127 125
          
128
-         if(length(ids)==length(idsG)) {   
129
-           if(sum(ids==idsR)==nprobes) {
130
-             RG@assayData$R[,i] = R$Quants[ ,"Mean"]
131
-             RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"]
132
-             RG@assayData$Rse[,i] = R$Quants[ ,"SD"]
133
-           }
134
-         }
135
-         else {
136
-           indR = match(ids, idsR)
137
-           RG@assayData$R[,i] = R$Quants[indR, "Mean"]
138
-           RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"]
139
-           RG@assayData$Rse[,i] = R$Quants[indR, "SD"]
140
-         }
141
-         rm(R)
142
-         gc()
126
+	       if(length(ids)==length(idsG)) {   
127
+		       if(sum(ids==idsR)==nprobes) {
128
+			       RG@assayData$R[,i] = R$Quants[ ,"Mean"]
129
+			       RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"]
130
+			       RG@assayData$Rse[,i] = R$Quants[ ,"SD"]
131
+		       }
132
+	       }
133
+	       else {
134
+		       indR = match(ids, idsR)
135
+		       RG@assayData$R[,i] = R$Quants[indR, "Mean"]
136
+		       RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"]
137
+		       RG@assayData$Rse[,i] = R$Quants[indR, "SD"]
138
+	       }
139
+	       rm(R)
140
+	       gc()
143 141
        }
144 142
        if(saveDate)
145
-         RG@scanDates = dates$scan
143
+	       RG@scanDates = dates$scan
146 144
        storageMode(RG) = "lockedEnvironment"
147 145
        RG
148 146
 }
149 147
 
150 148
 
151
-# the readIDAT() and readBPM() functions below were provided by Keith Baggerly, 27/8/2008
149
+## the readIDAT() and readBPM() functions below were provided by Keith Baggerly, 27/8/2008
152 150
 readIDAT <- function(idatFile){
153 151
 
154 152
   fileSize <- file.info(idatFile)$size
... ...
@@ -692,7 +690,6 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
692 690
                   mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
693 691
                   cdfName, sns, recallMin=10, recallRegMin=1000,
694 692
                   returnParams=FALSE, badSNP=.7) {
695
-
696 693
   if ((load.it | save.it) & missing(intensityFile))
697 694
     stop("'intensityFile' is missing, and you chose either load.it or save.it")
698 695
   if (!missing(intensityFile))
... ...
@@ -3,15 +3,15 @@ setValidity("CopyNumberSet", function(object) {
3 3
 	msg <- validMsg(NULL, assayDataValidMembers(assayData(object), c("CA", "CB")))
4 4
 	if (is.null(msg)) TRUE else msg
5 5
 })
6
-setMethod("CA", "CopyNumberSet", function(object) assayData(object)[["CA"]])
7
-setMethod("CB", "CopyNumberSet", function(object) assayData(object)[["CB"]])
6
+setMethod("CA", "CopyNumberSet", function(object) assayData(object)[["CA"]]/100)
7
+setMethod("CB", "CopyNumberSet", function(object) assayData(object)[["CB"]]/100)
8 8
 setReplaceMethod("CB", signature(object="CopyNumberSet", value="matrix"),
9 9
 		 function(object, value) assayDataElementReplace(object, "CB", value))
10 10
 setReplaceMethod("CA", signature(object="CopyNumberSet", value="matrix"),
11 11
 		 function(object, value) assayDataElementReplace(object, "CA", value))
12 12
 
13 13
 setMethod("chromosome", "CopyNumberSet", function(object) fData(object)$chromosome)
14
-setMethod("position", "CopyNumberSet", function(object) fData(object)$position)
14
+
15 15
 
16 16
 setMethod("copyNumber", "CopyNumberSet", function(object){
17 17
 	##ensure that 2 + NA = 2 by replacing NA's with zero
... ...
@@ -20,14 +20,17 @@ setMethod("copyNumber", "CopyNumberSet", function(object){
20 20
 	nas <- is.na(CA) & is.na(CB)
21 21
 	CA[is.na(CA)] <- 0
22 22
 	CB[is.na(CB)] <- 0
23
-	CN <- CA/100 + CB/100
23
+	CN <- CA + CB
24 24
 	##if both CA and CB are NA, report NA
25 25
 	CN[nas] <- NA
26 26
 	CN
27 27
 })
28 28
 
29
+setMethod("position", "CopyNumberSet", function(object) fData(object)$position)
30
+
29 31
 ##setMethod("ellipse", "CopyNumberSet", function(x, copynumber, ...){
30
-ellipse.CopyNumberSet <- function(x, copynumber, ...){
32
+##ellipse.CopyNumberSet <- function(x, copynumber, ...){
33
+setMethod("ellipse", "CopyNumberSet", function(x, copynumber, ...){
31 34
 	##fittedOrder <- unique(sapply(basename(celFiles), function(x) strsplit(x, "_")[[1]][2]))
32 35
 	##index <- match(plates, fittedOrder)
33 36
 	if(nrow(x) > 1) stop("only 1 snp at a time")
... ...
@@ -58,7 +61,7 @@ ellipse.CopyNumberSet <- function(x, copynumber, ...){
58 61
 				      scale=scale), ...)
59 62
 		}
60 63
 	}
61
-}
64
+})
62 65
 
63 66
 
64 67
 
... ...
@@ -39,7 +39,7 @@ setMethod("combine", signature=signature(x="CrlmmSetList", y="CrlmmSetList"),
39 39
 		  abset <- combine(x.abset, y.abset)
40 40
 
41 41
 		  ##we have hijacked the featureData slot to store parameters.  Biobase will not allow combining our 'feature' data.
42
-		  warning("removing featureData")
42
+		  warning("the featureData is not easily combined...  removing the featureData")
43 43
 		  ##fd1 <- featureData(x.snpset)
44 44
 		  ##fd2 <- featureData(y.snpset)
45 45
 		  featureData(x.snpset) <- annotatedDataFrameFrom(calls(x.snpset), byrow=TRUE)
... ...
@@ -53,18 +53,21 @@ setMethod("combine", signature=signature(x="CrlmmSetList", y="CrlmmSetList"),
53 53
 
54 54
 
55 55
 setMethod("featureNames", "CrlmmSetList", function(object) featureNames(object[[1]]))
56
+
56 57
 setMethod("plot", signature(x="CrlmmSetList"),
57 58
 	  function(x, y, ...){
58 59
 		  A <- log2(A(x))
59 60
 		  B <- log2(B(x))
60 61
 		  plot(A, B, ...)
61 62
 	  })
63
+
62 64
 setMethod("points", signature(x="CrlmmSetList"),
63 65
 	  function(x, y, ...){
64 66
 		  A <- log2(A(x))
65 67
 		  B <- log2(B(x))
66 68
 		  points(A, B, ...)
67 69
 	  })
70
+
68 71
 setMethod("sampleNames", "CrlmmSetList", function(object) sampleNames(object[[1]]))
69 72
 setMethod("scanDates", "CrlmmSetList", function(object) scanDates(object[[1]]))
70 73
 setMethod("show", "CrlmmSetList", function(object){
... ...
@@ -78,8 +81,8 @@ setMethod("splitByChromosome", "CrlmmSetList", function(object, cdfName, outdir)
78 81
 	load(file.path(path, "cnProbes.rda"))				
79 82
 	for(CHR in 1:24){
80 83
 		cat("Chromosome ", CHR, "\n")
81
-		snps <- rownames(snpProbes)[snpProbes[, "chrom"] == CHR]
82
-		cnps <- rownames(cnProbes)[cnProbes[, "chrom"] == CHR]
84
+		snps <- rownames(snpProbes)[snpProbes$chr == CHR]
85
+		cnps <- rownames(cnProbes)[cnProbes$chr == CHR]
83 86
 		index <- c(match(snps, featureNames(object)),
84 87
 			   match(cnps, featureNames(object)))
85 88
 		crlmmResults <- object[index, ]
... ...
@@ -2,4 +2,4 @@
2 2
 
3 3
 
4 4
 setMethod("calls", "SnpSet", function(object) assayData(object)$call)
5
-setMethod("confs", "SnpSet", function(x) assayData(x)$callProbability)
5
+setMethod("confs", "SnpSet", function(object) assayData(object)$callProbability)
... ...
@@ -142,7 +142,7 @@ loader <- function(theFile, envir, pkgname){
142 142
 	theFile <- file.path(system.file(package=pkgname),
143 143
 			     "extdata", theFile)
144 144
 	if (!file.exists(theFile))
145
-		stop("File", theFile, "does not exist in", pkgname)
145
+		stop("File ", theFile, " does not exist in ", pkgname)
146 146
 	load(theFile, envir=envir)
147 147
 }
148 148
 
... ...
@@ -39,7 +39,6 @@ vignette.
39 39
 <<test, eval=FALSE, echo=FALSE>>=
40 40
 library(crlmm)
41 41
 load("~/madman/Rpacks/crlmm/inst/scripts/example.cnset.rda")
42
-
43 42
 chromosome(example.cnset)[1:5]
44 43
 position(example.cnset)[1:5]
45 44
 scanDates(example.cnset)[1:5]
... ...
@@ -53,6 +52,7 @@ library(genomewidesnp6Crlmm)
53 52
 Specify the complete path for the CEL files and a directory in which to
54 53
 store intermediate files:
55 54
 
55
+
56 56
 <<celfiles>>=
57 57
 celFiles <- list.celfiles("/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m", full.names=TRUE, pattern=".CEL")
58 58
 outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/affy"
... ...
@@ -299,7 +299,7 @@ if(!exists("fit")){
299 299
 			       emission=emission.cn,
300 300
 			       tau=tau[, "transitionPr"],
301 301
 			       arm=tau[, "arm"], 
302
-			       normalIndex=2,
302
+			       normalIndex=3,
303 303
 			       normal2altered=0.005,
304 304
 			       altered2normal=0.5,
305 305
 			       altered2altered=0.0025)
306 306
new file mode 100644
... ...
@@ -0,0 +1,45 @@
1
+\name{ABset-class}
2
+\Rdversion{1.1}
3
+\docType{class}
4
+\alias{ABset-class}
5
+\alias{A}
6
+\alias{A,ABset-method}
7
+\alias{B}
8
+\alias{B,ABset-method}
9
+\title{Class "ABset"}
10
+\description{Containter for quantile-normalized intensities}
11
+\section{Objects from the Class}{
12
+Objects can be created by calls of the form \code{new("ABset", assayData, phenoData, featureData, experimentData, annotation, scanDates, ...)}.
13
+}
14
+\section{Slots}{
15
+	 \describe{
16
+    \item{\code{assayData}:}{Object of class \code{"AssayData"} }
17
+    \item{\code{phenoData}:}{Object of class \code{"AnnotatedDataFrame"}  }
18
+    \item{\code{featureData}:}{Object of class \code{"AnnotatedDataFrame"} }
19
+    \item{\code{experimentData}:}{Object of class \code{"MIAME"} }
20
+    \item{\code{annotation}:}{Object of class \code{"character"}  }
21
+    \item{\code{scanDates}:}{Object of class \code{"character"}  }
22
+    \item{\code{.__classVersion__}:}{Object of class \code{"Versions"}  }
23
+  }
24
+}
25
+\section{Extends}{
26
+Class \code{"\linkS4class{eSet}"}, directly.
27
+Class \code{"\linkS4class{VersionedBiobase}"}, by class "eSet", distance 2.
28
+Class \code{"\linkS4class{Versioned}"}, by class "eSet", distance 3.
29
+}
30
+\section{Methods}{
31
+  \describe{
32
+    \item{A}{\code{signature(object="ABset")}: accessor for the
33
+      quantile-normalized intensities of allele A for polymorphic probes
34
+      and the quantile normalized intensities for the copy number probes.}
35
+    \item{B}{\code{signature(object="ABset")}: accessor for the
36
+      quantile-normalized intensities of allele B for polymorphic probes.}    
37
+    \item{plot}{\code{signature(x = "ABset", y = "CopyNumberSet")}:
38
+... }}
39
+
40
+}
41
+\author{R. Scharpf}
42
+\examples{
43
+showClass("ABset")
44
+}
45
+\keyword{classes}
0 46
new file mode 100644
... ...
@@ -0,0 +1,85 @@
1
+\name{CopyNumberSet-class}
2
+\Rdversion{1.1}
3
+\docType{class}
4
+\alias{CopyNumberSet-class}
5
+\alias{CA}
6
+\alias{CA<-}
7
+\alias{CA,CopyNumberSet-method}
8
+\alias{CA<-,CopyNumberSet,matrix-method}
9
+\alias{CB}
10
+\alias{CB<-}
11
+\alias{CB,CopyNumberSet-method}
12
+\alias{CB<-,CopyNumberSet,matrix-method}
13
+\alias{chromosome,CopyNumberSet-method}
14
+\alias{copyNumber,CopyNumberSet-method}
15
+\alias{ellipse,CopyNumberSet-method}
16
+\alias{plot,ABset,CopyNumberSet-method}
17
+\alias{position,CopyNumberSet-method}
18
+\title{Class "CopyNumberSet"}
19
+\description{Container for allele-specific estimates of copy number and
20
+  the corresponding uncertainty}
21
+\section{Objects from the Class}{
22
+Objects can be created by calls of the form \code{new("CopyNumberSet", assayData, phenoData, featureData, experimentData, annotation, scanDates, ...)}.
23
+}
24
+\section{Slots}{
25
+	 \describe{
26
+    \item{\code{assayData}:}{Object of class \code{"AssayData"}}
27
+    \item{\code{phenoData}:}{Object of class \code{"AnnotatedDataFrame"}}
28
+    \item{\code{featureData}:}{Object of class \code{"AnnotatedDataFrame"} }
29
+    \item{\code{experimentData}:}{Object of class \code{"MIAME"}  }
30
+    \item{\code{annotation}:}{Object of class \code{"character"}  }
31
+    \item{\code{scanDates}:}{Object of class \code{"character"} }
32
+    \item{\code{.__classVersion__}:}{Object of class \code{"Versions"}  }
33
+  }
34
+}
35
+\section{Extends}{
36
+Class \code{"\linkS4class{eSet}"}, directly.
37
+Class \code{"\linkS4class{VersionedBiobase}"}, by class "eSet", distance 2.
38
+Class \code{"\linkS4class{Versioned}"}, by class "eSet", distance 3.
39
+}
40
+\section{Methods}{
41
+  \describe{
42
+
43
+    \item{CA}{\code{signature(object = "CopyNumberSet")}: Extracts the
44
+      copy number for allele 'A' at polymorphic loci. At nonpolymorphic
45
+      loci, the total copy number is returned.}
46
+
47
+    \item{CA<-}{\code{signature(object = "CopyNumberSet",
48
+    value="matrix")}: Replaces CA matrix with supplied value.}    
49
+
50
+    \item{CB}{\code{signature(object = "CopyNumberSet")}:
51
+      Extracts copy number for allele 'B' at polymorphic loci. NAs are
52
+      returned for nonpolymorphic loci.}    
53
+
54
+    \item{CB<-}{\code{signature(object = "CopyNumberSet",
55
+	value="matrix")}: Replaces CB matrix with supplied value .}
56
+
57
+    
58
+    \item{chromosome}{\code{signature(object = "CopyNumberSet")}:
59
+      Extract chromosome.}
60
+
61
+    \item{copyNumber}{\code{signature(object = "CopyNumberSet")}:
62
+      Returns CA + CB.}
63
+
64
+    \item{ellipse}{\code{signature(x = "CopyNumberSet", ...)}:
65
+      Extracts parameters from \code{featureData} slot and draws
66
+      prediction regions for the supplied copy number.}
67
+
68
+    \item{plot}{\code{signature(x = "ABset", y = "CopyNumberSet")}:
69
+      Physical position}
70
+    \item{position}{\code{signature(object = "CopyNumberSet")}: physical position}
71
+  }
72
+}
73
+
74
+\author{R. Scharpf}
75
+\examples{
76
+showClass("CopyNumberSet")
77
+
78
+\dontrun{
79
+##returns the copy number for allele A at polymorphic loci
80
+CA(object[snpIndex(object), ])
81
+##returns the total copy number at nonpolymorphic loci
82
+CA(object[cnIndex(object), ])
83
+}
84
+}
85
+\keyword{classes}
0 86
new file mode 100644
... ...
@@ -0,0 +1,118 @@
1
+\name{CrlmmSetList-class}
2
+\Rdversion{1.1}
3
+\docType{class}
4
+\alias{CrlmmSetList-class}
5
+\alias{[,CrlmmSetList-method}
6
+\alias{A,CrlmmSetList-method}
7
+\alias{B,CrlmmSetList-method}
8
+\alias{calls,CrlmmSetList-method}
9
+\alias{cnIndex,CrlmmSetList-method}
10
+\alias{combine,CrlmmSetList,CrlmmSetList-method}
11
+\alias{plot,CrlmmSetList,ANY-method}
12
+\alias{points,CrlmmSetList-method}
13
+\alias{sampleNames,CrlmmSetList-method}
14
+\alias{scanDates,CrlmmSetList-method}
15
+\alias{show,CrlmmSetList-method}
16
+\alias{snpIndex,CrlmmSetList-method}
17
+
18
+
19
+\title{Class "CrlmmSetList"}
20
+\description{Container for quantile normalized intensities and genotype calls.}
21
+\section{Objects from the Class}{
22
+  Objects from the class are created by calls to the \code{crlmmWrapper} function.
23
+}
24
+\section{Slots}{
25
+  \describe{
26
+    \item{\code{.Data}:}{Object of class \code{"list"} ~~ }
27
+  }    
28
+%	   \item{\code{ABset}:}{Object of class \code{"ABset"}.
29
+%	     Contains quantile normalized A and B intensities.  Quantile
30
+%	   normalized intensities for nonpolymorphic probes are stored
31
+%	   in the A slot.}
32
+%	 \item{\code{crlmmResult}:}{Object of class
33
+%	   \code{"SnpSet"}. Contains genotype calls and the
34
+%	   corresponding confidence estimates.}
35
+}
36
+
37
+\section{Details}{
38
+
39
+  Instances of \code{CrlmmSetList} are a list with two elements:
40
+
41
+  1.  an object of class \code{ABset}
42
+
43
+  2.  an object of class \code{SnpSet}
44
+
45
+  The \code{featureNames} and \code{sampleNames} of both objects are
46
+  required to be identical.
47
+
48
+  Quantile-normalized intensities for the copy number probes are stored
49
+  in the assay data element A of the ABset object.  Corresponding rows
50
+  for the B allele are recorded as NAs.  Genotype calls for copy number
51
+  probes in the SnpSet object are recorded as NAs.
52
+
53
+}
54
+  
55
+\section{Extends}{
56
+  Class \code{"\linkS4class{list}"}, from data part.
57
+  Class \code{"\linkS4class{vector}"}, by class "list", distance 2.
58
+  Class \code{"\linkS4class{AssayData}"}, by class "list", distance 2.
59
+}
60
+\section{Methods}{
61
+  \describe{
62
+    \item{"["}{\code{signature(x = "CrlmmSetList")}: subsets both the }
63
+  
64
+    \item{"A"}{\code{signature(object = "CrlmmSetList")}: extracts the
65
+      quantile normalized intensities for the A allele in the
66
+      \code{ABset} element.}
67
+    
68
+  
69
+    \item{"B"}{\code{signature(object = "CrlmmSetList")}: extracts the
70
+      quantile normalized intensities for the B allele in the
71
+      \code{ABset} element.}
72
+
73
+    \item{"calls"}{\code{signature(object = "CrlmmSetList")}: extracts
74
+      the genotype calls from the \code{SnpSet} element.}
75
+
76
+    \item{"cnIndex"}{\code{signature(object = "CrlmmSetList")}: returns
77
+      the row indices of the copy number probes.}
78
+
79
+    \item{"combine"}{\code{signature(x = "CrlmmSetList", y = "CrlmmSetList")}: combines
80
+      objects of the class.  }
81
+
82
+    \item{"plot"}{\code{signature(x = "CrlmmSetList")}: For A versus B
83
+      scatterplots.  }
84
+
85
+    \item{"points"}{\code{signature(x = "CrlmmSetList")}: For A versus B
86
+      scatterplots.  }
87
+
88
+    \item{"sampleNames"}{\code{signature(object = "CrlmmSetList")}:
89
+      Accessor for the column identifiers of the assay data elements.  }
90
+
91
+    \item{"show"}{\code{signature(object = "CrlmmSetList")}:
92
+      Shows the \code{ABset} and \code{SnpSet} elements of the list.  }
93
+
94
+
95
+    \item{"snpIndex"}{\code{signature(object = "CrlmmSetList")}: returns
96
+      the row indices of the polymorphic probes.}
97
+    
98
+    
99
+  }
100
+}
101
+
102
+
103
+\section{Warnings}{
104
+
105
+   \code{combine} will produce a warning as the \code{featureData} are
106
+   not easily combined and therefore removed.  This method needs
107
+   improvement.
108
+
109
+}
110
+\author{R. Scharpf}
111
+\seealso{
112
+  See also \code{"\link{crlmmWrapper}"},
113
+  \code{"\linkS4class{ABset}"}
114
+}
115
+\examples{
116
+showClass("CrlmmSetList")
117
+}
118
+\keyword{classes}
0 119
new file mode 100644
... ...
@@ -0,0 +1,58 @@
1
+\name{crlmmWrapper}
2
+\Rdversion{1.1}
3
+\alias{crlmmWrapper}
4
+\title{ Function to quantile normalize the cell files and genotype the
5
+  polymorphic loci.
6
+}
7
+\description{
8
+  Quantile normalizes the cel files to a target reference distribution
9
+  (both polymorphic and nonpolymorphic loci).  Genotype calls and
10
+  confidence scores are assigned using the crlmm algorithm.
11
+}
12
+\usage{
13
+crlmmWrapper(filenames, outdir = "./", cdfName = "genomewidesnp6", save.it, splitByChr=TRUE, ...)
14
+}
15
+\arguments{
16
+  \item{filenames}{
17
+    Complete path to cel files
18
+  }
19
+  \item{outdir}{
20
+    Where to store results.
21
+  }
22
+  \item{cdfName}{
23
+    Annotation package.
24
+  }
25
+  \item{save.it}{
26
+    Whether to save intermediate results. Sometimes this can be helpful.
27
+  }
28
+  \item{splitByChr}{
29
+    Logical. Whether to split results by chromosome (default is TRUE)
30
+  }
31
+  \item{\dots}{
32
+    additional arguments to function \code{crlmm}
33
+  }
34
+}
35
+\value{
36
+  An object of class \code{CrlmmSetList}
37
+}
38
+
39
+\author{
40
+  R. Scharpf
41
+}
42
+\seealso{
43
+  \code{\linkS4class{CrlmmSetList}}
44
+  \code{\link{crlmm}}
45
+}
46
+\examples{
47
+\dontrun{
48
+library(crlmm)
49
+library(genomewidesnp6Crlmm)
50
+celFiles <- list.files("PATH_TO_CELS", full.name=TRUE)
51
+crlmmList <- crlmmWrapper(celFiles, outdir="./", save.it=TRUE, intensityFile="intensities.rda")
52
+}
53
+}
54
+% Add one or more standard keywords, see file 'KEYWORDS' in the
55
+% R documentation directory.
56
+\keyword{robust}
57
+\keyword{methods}
58
+\keyword{list}
0 59
new file mode 100644
... ...
@@ -0,0 +1,48 @@
1
+\name{methods-eSet}
2
+\alias{cnIndex}
3
+\alias{cnIndex,eSet-method}
4
+\alias{snpIndex}
5
+\alias{snpIndex,eSet-method}
6
+
7
+
8
+\title{Methods for eSet derivatives}
9
+
10
+\description{
11
+
12
+  \code{snpIndex} return the row indices of polymorphic loci.
13
+
14
+  \code{cnIndex} return the row indices of copy number probes.
15
+
16
+}
17
+
18
+\usage{
19
+
20
+  cnIndex(object)
21
+
22
+  snpIndex(object)
23
+
24
+}
25
+
26
+\arguments{
27
+  
28
+  \item{object}{Any object extending eSet (ABset, CopyNumberSet,
29
+    CrlmmSetList, SnpSet}
30
+
31
+}
32
+
33
+\value{
34
+
35
+  Vector of indices.
36
+
37
+}
38
+
39
+\seealso{
40
+
41
+  \code{"\linkS4class{ABset}"}
42
+  \code{"\linkS4class{CopyNumberSet}"}
43
+  \code{"\linkS4class{CrlmmSetList}"}
44
+  
45
+}
46
+  
47
+
48
+\keyword{manip}
0 49
\ No newline at end of file