Browse code

Bug in copy number accessors fixed.

This fixes a bug that occurs when CA() or CB() for a random subset of
samples with mixed batches was accessed -- the order of samples
returned was incorrect. The dimnames of the resulting object are
returned as well.

copynumber.pdf vignette also updated

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@50889 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 15/11/2010 14:21:58
Showing 3 changed files

... ...
@@ -311,18 +311,19 @@ setReplaceMethod("flags", signature=signature(object="CNSet", value="ff_matrix")
311 311
 ##   autosome NPs
312 312
 ##   chromosome X NPs for women
313 313
 C1 <- function(object, marker.index, batch.index, sample.index){
314
-	acn <- vector("list", length(batch.index))
314
+##	acn <- vector("list", length(batch.index))
315
+	acn <- matrix(NA, nrow=length(marker.index), ncol=length(sample.index))
315 316
 	for(k in seq_along(batch.index)){
316 317
 		l <- batch.index[k]
317 318
 		jj <- sample.index[as.character(batch(object))[sample.index] == batchNames(object)[l]]
318 319
 		bg <- nuA(object)[marker.index, l]
319 320
 		slope <- phiA(object)[marker.index, l]
320 321
 		I <- A(object)[marker.index, jj]
321
-		acn[[k]] <- 1/slope * (I - bg)
322
+		acn[, match(jj, sample.index)] <- 1/slope * (I - bg)
322 323
 	}
323
-	if(length(acn) > 1){
324
-		acn <- do.call("cbind", acn)
325
-	} else acn <- acn[[1]]
324
+##	if(length(acn) > 1){
325
+##		acn <- do.call("cbind", acn)
326
+##	} else acn <- acn[[1]]
326 327
 	return(as.matrix(acn))
327 328
 }
328 329
 
... ...
@@ -330,7 +331,7 @@ C1 <- function(object, marker.index, batch.index, sample.index){
330 331
 ##   autosome SNPs
331 332
 ##   chromosome X for male nonpolymorphic markers
332 333
 C2 <- function(object, marker.index, batch.index, sample.index, NP.X=FALSE){
333
-	acn <- vector("list", length(batch.index))
334
+	acn <- matrix(NA, nrow=length(marker.index), ncol=length(sample.index))
334 335
 	for(k in seq_along(batch.index)){
335 336
 		l <- batch.index[k]
336 337
 		jj <- sample.index[as.character(batch(object))[sample.index] == batchNames(object)[l]]
... ...
@@ -339,17 +340,18 @@ C2 <- function(object, marker.index, batch.index, sample.index, NP.X=FALSE){
339 340
 		if(!NP.X){
340 341
 			I <- B(object)[marker.index, jj]
341 342
 		} else I <- A(object)[marker.index, jj]
342
-		acn[[k]] <- 1/slope * (I - bg)
343
+		acn[, match(jj, sample.index)] <- 1/slope * (I - bg)
343 344
 	}
344
-	if(length(acn) > 1){
345
-		acn <- do.call("cbind", acn)
346
-	} else acn <- acn[[1]]
345
+##	if(length(acn) > 1){
346
+##		acn <- do.call("cbind", acn)
347
+##	} else acn <- acn[[1]]
347 348
 	return(as.matrix(acn))
348 349
 }
349 350
 
350 351
 ## Chromosome X SNPs
351 352
 C3 <- function(object, allele, marker.index, batch.index, sample.index){
352
-	acn <- vector("list", length(batch.index))
353
+##	acn <- vector("list", length(batch.index))
354
+	acn <- matrix(NA, nrow=length(marker.index), ncol=length(sample.index))
353 355
 	for(k in seq_along(batch.index)){
354 356
 		l <- batch.index[k]
355 357
 		##j <- which(as.character(batch(object))[sample.index] == batchNames(object)[l])
... ...
@@ -368,10 +370,16 @@ C3 <- function(object, allele, marker.index, batch.index, sample.index){
368 370
 		##CB <- 1/(1-phiA2*phiB2/(phiA*phiB)) * 1/phiB * (IA-nuB-phiB2/phiA*(IA-nuA))
369 371
 		CA <- (IA-nuA-phiA2*CB)/phiA
370 372
 		if(allele == "B"){
371
-			acn[[k]] <- CB
373
+			acn[, match(jj, sample.index)] <- CB
374
+			##acn[[k]] <- CB
372 375
 		}
373 376
 		if(allele == "A"){
374
-			acn[[k]] <- CA
377
+			acn[, match(jj, sample.index)] <- (IA-nuA-phiA2*CB)/phiA
378
+		}
379
+		if(allele == "AandB"){
380
+			CA <- tmp/(1-phistar*phiA2/phiB)
381
+			CB <- (IA-nuA-phiA2*CB)/phiA
382
+			acn[, match(jj, sample.index)] <- (IA-nuA-phiA2*CB)/phiA
375 383
 		}
376 384
 ##		if(allele=="AandB")
377 385
 ##			CA <- tmp/(1-phistar*phiA2/phiB)
... ...
@@ -379,9 +387,9 @@ C3 <- function(object, allele, marker.index, batch.index, sample.index){
379 387
 ##			acn[[k]] <- CA+CB
380 388
 ##		}
381 389
 	}
382
-	if(length(acn) > 1){
383
-		acn <- do.call("cbind", acn)
384
-	} else acn <- acn[[1]]
390
+##	if(length(acn) > 1){
391
+##		acn <- do.call("cbind", acn)
392
+##	} else acn <- acn[[1]]
385 393
 	return(as.matrix(acn))
386 394
 }
387 395
 
... ...
@@ -408,6 +416,7 @@ ACN <- function(object, allele, i , j){
408 416
 	## Define batch.index and sample.index
409 417
 	if(!missing.j) {
410 418
 		batches <- unique(as.character(batch(object))[j])
419
+		##batches <- as.character(batch(object)[j])
411 420
 		batch.index <- match(batches, batchNames(object))
412 421
 	} else {
413 422
 		batch.index <- seq_along(batchNames(object))
... ...
@@ -416,6 +425,8 @@ ACN <- function(object, allele, i , j){
416 425
 	nr <- length(i)
417 426
 	nc <- length(j)
418 427
 	acn <- matrix(NA, nr, nc)
428
+	dimnames(acn) <- list(featureNames(object)[i],
429
+			      sampleNames(object)[j])
419 430
 	if(allele == "A"){
420 431
 		if(is.ff){
421 432
 			open(nuA(object))
... ...
@@ -452,7 +463,7 @@ ACN <- function(object, allele, i , j){
452 463
 					close(B(object))
453 464
 				}
454 465
 			}
455
-			if(any(!is.snp)){  ## nonpolymorphic X needs to be fixed
466
+			if(any(!is.snp)){
456 467
 				marker.index <- i[is.X & !is.snp]
457 468
 				acn.index <- which(is.X & !is.snp)
458 469
 				acn[acn.index, ] <- NA
... ...
@@ -492,10 +503,14 @@ ACN <- function(object, allele, i , j){
492 503
 			open(phiB(object))
493 504
 			open(B(object))
494 505
 		}
506
+		if(any(!is.snp)){
507
+			acn.index <- which(!is.snp)
508
+			acn[acn.index, ] <- 0
509
+		}
495 510
 		if(any(is.auto)){
496 511
 			auto.index <- which(is.auto & is.snp)
497 512
 			if(length(auto.index) > 0){
498
-				marker.index <- i[is.auto]
513
+				marker.index <- i[auto.index]
499 514
 				acn[auto.index, ] <- C2(object, marker.index, batch.index, j)
500 515
 			}
501 516
 		}
... ...
@@ -554,16 +569,6 @@ setMethod("totalCopynumber", signature=signature(object="CNSet"),
554 569
 	  function(object, ...){
555 570
 		  ca <- CA(object, ...)
556 571
 		  cb <- CB(object, ...)
557
-##		  is.snp <- isSnp(object)
558
-##		  dotArgs <- list(...)
559
-##		  if("i" %in% names(dotArgs)){
560
-##			  i <- dotArgs[["i"]]
561
-##			  np.index <- which(!is.snp[i])
562
-##			  if(length(np.index) > 0) cb[np.index, ] <- 0
563
-##		  } else {
564
-##			  np.index <- which(!is.snp)
565
-##			  if(length(np.index) > 0) cb[np.index, ] <- 0
566
-##		  }
567 572
 		  return(ca+cb)
568 573
 	  })
569 574
 
... ...
@@ -164,12 +164,6 @@ if(!file.exists(file.path(outdir, "cnSet.rda"))){
164 164
 			     filenames=celFiles,
165 165
 			     cdfName=cdfName,
166 166
 			     batch=batch)
167
-	## try normalizing np probes on chr X
168
-##	callSet <- gtSet
169
-##	filenames <- celFiles
170
-##	snp.I <- isSnp(callSet)
171
-##	is.snp <- which(snp.I)
172
-##	np.index <- which(!is.snp)
173 167
 }
174 168
 @
175 169
 
... ...
@@ -194,9 +188,7 @@ The \Rfunction{crlmmCopynumber} performs the following steps:
194 188
   displayed during the processing.
195 189
 
196 190
 <<LDS_copynumber>>=
197
-GT.CONF.THR <- 0.90
198
-cnSet <- checkExists("cnSet", .path=outdir, .FUN=crlmmCopynumber, object=gtSet, GT.CONF.THR=GT.CONF.THR)
199
-invisible(open(cnSet))
191
+cnSet <- checkExists("cnSet", .path=outdir, .FUN=crlmmCopynumber, object=gtSet)
200 192
 @
201 193
 
202 194
 In an effort to reduce I/O, the \Rpackage{crlmmCopynumber} function no
... ...
@@ -251,31 +243,26 @@ stopifnot(all.equal(ct, ct2))
251 243
 @
252 244
 
253 245
 
254
-Nonpolymorphic markers on chromosome X:
255
-
256
-\begin{figure}
246
+\begin{figure}[t!]
247
+  Nonpolymorphic markers on chromosome X:
257 248
   \centering
258
-<<nonpolymorphicX, fig=TRUE>>=
249
+<<nonpolymorphicX, fig=TRUE, width=8, height=4>>=
259 250
 npx.index <- which(chromosome(cnSet)==23 & !isSnp(cnSet))
260 251
 M <- sample(which(cnSet$gender==1), 5)
261 252
 F <- sample(which(cnSet$gender==2), 5)
262
-##a <- A(cnSet)[npx.index, M]
263
-##nus <- nuA(cnSet)[npx.index, ]
264
-## nu and phi are not estimated
265 253
 cn.M <- CA(cnSet, i=npx.index, j=M)
266 254
 cn.F <- CA(cnSet, i=npx.index, j=F)
267
-boxplot(data.frame(cbind(cn.M, cn.F)), pch=".")
255
+boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", col="grey60", outline=FALSE)
268 256
 @
269
-\caption{Copy number estimates for nonpolymorphic markers on
270
-  chromosome X.  We assume that the median copy number (across
271
-  samples) at any given nonpolymorphic marker on chromosome X is 1 for
272
-  males and 2 for females.}
257
+\caption{Copy number estimates for nonpolymorphic loci on chromosome
258
+  X (5 men, 5 women).  crlmm assumes that the median copy number
259
+  across samples at a given marker on X is 1 for men and 2 for women.}
273 260
 \end{figure}
274 261
 
275
-Polymorphic markers, X chromosome:
276 262
 
277
-\begin{figure}
278
-  \centering
263
+
264
+\begin{figure}[t!]
265
+ \centering
279 266
 <<polymorphicX, fig=TRUE, width=8, height=4>>=
280 267
 ## copy number estimates on X for SNPs are biased towards small values.
281 268
 X.markers <- which(isSnp(cnSet) & chromosome(cnSet) == 23)
... ...
@@ -288,15 +275,12 @@ cn.F <- ca.F+cb.F
288 275
 boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", outline=FALSE, col=cols, xaxt="n", ylim=c(0,5))
289 276
 legend("topleft", fill=unique(cols), legend=c("Male", "Female"))
290 277
 abline(h=c(1,2))
291
-## alternatively
292
-cn.MF1 <- cbind(cn.M, cn.F)
293
-cn.MF2 <- totalCopynumber(cnSet, i=X.markers, j=c(M,F))
294
-##stopifnot(all.equal(cn.MF1[!is.na(cn.MF1)], cn.MF2[!is.na(cn.MF2)]))
278
+cn2 <- totalCopynumber(cnSet, i=X.markers, j=c(M, F))
279
+stopifnot(all.equal(cbind(cn.M, cn.F), cn2))
295 280
 @
296 281
 \caption{Copy number estimates for polymorphic markers on chromosome
297
-  X.  We assume that the median copy number (across samples) at any
298
-  given polymorphic marker on chromosome X is 1 for males and 2 for
299
-  females.}
282
+  X. crlmm assumes that the median copy number across samples at a
283
+  given marker on X is 1 for men and 2 for women.}
300 284
 \end{figure}
301 285
 
302 286
 Accessors for physical position and chromosome are also provided. In
... ...
@@ -415,12 +399,11 @@ invisible(open(B(cnSet)))
415 399
 b.snps <- (B(cnSet))[snp.index, ]
416 400
 @
417 401
 
418
-Note that NAs are recorded in the 'B' assay data element for
419
-nonpolymorphic loci:
402
+Note that NA's are stored in the slot for normalized 'B' allele
403
+intensities:
420 404
 
421 405
 <<B.NAs>>=
422
-all(is.na((B(cnSet))[np.index, ]))
423
-invisible(close(B(cnSet)))
406
+all(is.na(B(cnSet)[np.index, ]))
424 407
 @
425 408
 
426 409
 <<clean, echo=FALSE>>=
... ...
@@ -481,12 +464,9 @@ plot(x, cn, pch=".",
481 464
      cex=2, xaxt="n", col="grey60", ylim=c(0,6),
482 465
      ylab="copy number", xlab="physical position (Mb)",
483 466
      main=paste(sampleNames(cnSet)[1], ", CHR: 1"))
484
-##np.index <- which(!isSnp(cnSet)[marker.index])
485
-##points(x[np.index], cn[np.index],
486
-##       pch=".", cex=2, col="lightblue")
487 467
 axis(1, at=pretty(x), labels=pretty(x)/1e6)
488 468
 require(SNPchip)
489
-invisible(plotCytoband(1, new=FALSE, cytoband.ycoords=c(5.3, 5.9), label.cytoband=FALSE))
469
+invisible(plotCytoband(1, new=FALSE, cytoband.ycoords=c(5.5, 6), label.cytoband=FALSE))
490 470
 @
491 471
 
492 472
 \begin{figure}
... ...
@@ -1 +1,2 @@
1
-rsync -avuzb --exclude '.git*' -e ssh ~/madman/Rpacks/git/crlmm/ enigma2.jhsph.edu:~/madman/Rpacks/release/crlmm
1
+rsync -avuzb --exclude '~*' --exclude '.git*' -e ssh ~/madman/Rpacks/git/crlmm/ enigma2.jhsph.edu:~/madman/Rpacks/release/crlmm
2
+rsync -avuzb --exclude '.git*' -e ssh enigma2.jhsph.edu:~/madman/Rpacks/release/crlmm/inst/scripts/copynumber.pdf ~/madman/Rpacks/git/crlmm/inst/scripts/copynumber.pdf