Browse code

Update generics for posteriorProbability and calculatePosteriorMean (exclude weight from generic of calculatePosteriorMean)

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@58637 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 01/10/2011 04:47:35
Showing4 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.11.15
4
+Version: 1.11.16
5 5
 Date: 2010-12-10
6 6
 Author: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>, Ingo Ruczinski <iruczins@jhsph.edu>, Rafael A Irizarry
7 7
 Maintainer: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -95,9 +95,9 @@ setGeneric("flags<-", function(object, value) standardGeneric("flags<-"))
95 95
 setGeneric("posteriorMean", function(object) standardGeneric("posteriorMean"))
96 96
 setGeneric("posteriorMean<-", function(object, value) standardGeneric("posteriorMean<-"))
97 97
 
98
-setGeneric("posteriorProbability", function(object, predictRegion, copyNumber=0:4)
98
+setGeneric("posteriorProbability", function(object, predictRegion, copyNumber=0:4, w)
99 99
 	   standardGeneric("posteriorProbability"))
100
-setGeneric("calculatePosteriorMean", function(object, posteriorProb, copyNumber=0:4, w, ...)
100
+setGeneric("calculatePosteriorMean", function(object, posteriorProb, copyNumber=0:4, ...)
101 101
 	   standardGeneric("calculatePosteriorMean"))
102 102
 
103 103
 setGeneric("predictionRegion", function(object, copyNumber=0:4)
... ...
@@ -2688,14 +2688,104 @@ genotypes <- function(copyNumber){
2688 2688
 
2689 2689
 dbvn <- function(x, mu, Sigma){
2690 2690
 	##stopifnot(ncol(x)==2)
2691
-	logA <- x[, , "A"]
2692
-	logB <- x[, , "B"]
2691
+	logA <- x[, , 1]
2692
+	logB <- x[, , 2]
2693 2693
 	sigma2A <- Sigma[,1]
2694 2694
 	sigma2B <- Sigma[,3]
2695 2695
 	sigmaAB <- sqrt(sigma2A*sigma2B)
2696 2696
 	rho <- Sigma[,2]
2697
-	muA <- mu[, "A"]
2698
-	muB <- mu[, "B"]
2697
+	muA <- mu[, 1]
2698
+	muB <- mu[, 2]
2699 2699
 	Q.x.y <- 1/(1-rho^2)*((logA - muA)^2/sigma2A + (logB - muB)^2/sigma2B - (2*rho*((logA - muA)*(logB - muB)))/sigmaAB)
2700 2700
 	f.x.y <- 1/(2*pi*sigmaAB*sqrt(1-rho^2))*exp(-0.5*Q.x.y)
2701 2701
 }
2702
+
2703
+ABpanel <- function(x, y, predictRegion,
2704
+		    copyNumber=0:4,
2705
+		    object,
2706
+##		    x.axis,
2707
+##		    line.col,
2708
+##		    line.lwd,
2709
+##		    shades,
2710
+		    ...,
2711
+		    subscripts){
2712
+##		    data.last=FALSE,
2713
+##		    highlight.index=NULL){
2714
+	##if(length(scale.sd)==1) scale.sd <- rep(scale.sd,3)
2715
+	panel.grid(h=5, v=5)
2716
+	panel.xyplot(x, y, ...)
2717
+##	if(!is.null(highlight.index)){
2718
+##		##ii <- subscripts[highlight.index]
2719
+##		ii <- highlight.index
2720
+##		lpoints(x[ii], y[ii], pch="X", cex=1.5, col="black")
2721
+##	}
2722
+	i <- panel.number()
2723
+	for(CN in copyNumber){
2724
+		gts <- genotypes(CN)
2725
+		index <- match(gts, names(predictRegion))
2726
+		pr <- predictRegion[index]
2727
+		for(i in seq_along(pr)){
2728
+			## scale?
2729
+			pr2 <- pr[[i]]
2730
+			mu <- pr2$mu
2731
+			Sigma <- pr2$cov
2732
+			for(j in seq_len(dim(mu)[3])){
2733
+				dat.ellipse <- ellipse(x=Sigma[, 2, j],
2734
+						       centre=mu[, , j],
2735
+						       scale=c(sqrt(Sigma[,1,j]),
2736
+						       sqrt(Sigma[, 3, j])))
2737
+				lpolygon(dat.ellipse[,1], dat.ellipse[,2], ...)
2738
+			}
2739
+##			} else {
2740
+##				dat.ellipse <- ellipse(x=rho, centre=c(log2(nuB+CB*phB), log2(nuA+CA*phA)), scale=rev(scale))
2741
+##			}
2742
+##			lpolygon(dat.ellipse[, 1], dat.ellipse[, 2], border=line.col[k], col=shades[k], ...)
2743
+		}
2744
+	}
2745
+##	nuB <- as.numeric(nu(object, "B"))[i]
2746
+##	phB <- as.numeric(phi(object, "B"))[i]
2747
+##	nuA <- as.numeric(nu(object, "A"))[i]
2748
+##	phA <- as.numeric(phi(object, "A"))[i]
2749
+##	taus <- tau2(object, i=i)[, , , 1]
2750
+##	cors <- corr(object, i=i)[, , 1]
2751
+##	t2A <- taus["A", "BB"]
2752
+##	t2B <- taus["B", "AA"]
2753
+##	s2A <- taus["A", "AA"]
2754
+##	s2B <- taus["B", "BB"]
2755
+##	corrAB <- cors["AB"]
2756
+##	corrAA <- cors["AA"]
2757
+##	corrBB <- cors["BB"]
2758
+##	k <- 1
2759
+##	for(CN in copynumber){
2760
+##		for(CA in 0:CN){
2761
+##			CB <- CN-CA
2762
+##			A.scale <- sqrt(t2A*(CA==0) + s2A*(CA > 0))
2763
+##			B.scale <- sqrt(t2B*(CB==0) + s2B*(CB > 0))
2764
+##			if(CA == 0 | CB == 0){
2765
+##				A.scale <- A.scale*scale.sd[1]
2766
+##				B.scale <- B.scale*scale.sd[1]
2767
+##			} else { ## both greater than zero
2768
+##				A.scale <- A.scale*scale.sd[2]
2769
+##				B.scale <- B.scale*scale.sd[2]
2770
+##			}
2771
+##			scale <- c(A.scale, B.scale)
2772
+##			if(CA == 0 & CB > 0) rho <- corrBB
2773
+##			if(CA > 0 & CB == 0) rho <- corrAA
2774
+##			if(CA > 0 & CB > 0) rho <- corrAB
2775
+##			if(CA == 0 & CB == 0) rho <- 0
2776
+##			if(x.axis=="A"){
2777
+##				dat.ellipse <- ellipse(x=rho, centre=c(log2(nuA+CA*phA), log2(nuB+CB*phB)), scale=scale)
2778
+##			} else {
2779
+##				dat.ellipse <- ellipse(x=rho, centre=c(log2(nuB+CB*phB), log2(nuA+CA*phA)), scale=rev(scale))
2780
+##			}
2781
+##			lpolygon(dat.ellipse[, 1], dat.ellipse[, 2], border=line.col[k], col=shades[k], ...)
2782
+##		}
2783
+##		k <- k+1
2784
+##	}
2785
+##	if(data.last) {
2786
+##		panel.xyplot(x, y, ...)
2787
+##		if(!is.null(highlight.index)){
2788
+##			lpoints(x[ii], y[ii], pch="X", cex=1.5, col="black")
2789
+##		}
2790
+##	}
2791
+}
... ...
@@ -284,7 +284,9 @@ setMethod("totalCopynumber", signature=signature(object="CNSet"),
284 284
 rawCopynumber <- totalCopynumber
285 285
 
286 286
 setMethod("posteriorProbability", signature(object="CNSet"),
287
-	  function(object, predictRegion, copyNumber=0:4){
287
+	  function(object, predictRegion, copyNumber=0:4, w){
288
+		  if(missing(w)) w <- rep(1/length(copyNumber),length(copyNumber))
289
+		  stopifnot(sum(w)==1)
288 290
 		  logI <- array(NA, dim=c(nrow(object), ncol(object), 2), dimnames=list(NULL, NULL, LETTERS[1:2]))
289 291
 		  getIntensity <- function(object){
290 292
 			  logI[, , 1] <- log2(A(object))
... ...
@@ -295,38 +297,56 @@ setMethod("posteriorProbability", signature(object="CNSet"),
295 297
 		  ##gts <- lapply(as.list(0:4), genotypes)
296 298
 		  prob <- array(NA, dim=c(nrow(object), ncol(object), length(copyNumber)),
297 299
 				dimnames=list(NULL,NULL, paste("copynumber",copyNumber, sep="")))
300
+		  bns <- batchNames(object)
298 301
 		  for(i in seq_along(copyNumber)){
299 302
 			  G <- genotypes(copyNumber[i])
300 303
 			  P <- array(NA, dim=c(nrow(object), ncol(object), length(G)),
301 304
 				     dimnames=list(NULL,NULL,G))
302 305
 			  for(g in seq_along(G)){
303 306
 				  gt <- G[g]
304
-				  P[, , gt] <- dbvn(x=logI, mu=predictRegion[[gt]]$mu, Sigma=predictRegion[[gt]]$cov)
307
+				  for(j in seq_along(bns)){
308
+					  this.batch <- bns[j]
309
+					  sample.index <- which(batch(object) == this.batch)
310
+					  mu <- predictRegion[[gt]]$mu[, , this.batch, drop=FALSE]
311
+					  dim(mu) <- dim(mu)[1:2]
312
+					  if(all(is.na(mu))){
313
+						  P[, sample.index, gt] <- NA
314
+					  } else {
315
+						  Sigma <- predictRegion[[gt]]$cov[, , this.batch, drop=FALSE]
316
+						  dim(Sigma) <- dim(Sigma)[1:2]
317
+						  P[, sample.index, gt] <- dbvn(x=logI[, sample.index, , drop=FALSE], mu=mu, Sigma=Sigma)
318
+					  }
319
+				  }
305 320
 			  }
306 321
 			  ## the marginal probability for the total copy number
307 322
 			  ##  -- integrate out the probability for the different genotypes
308 323
 			  for(j in 1:ncol(P)){
309
-				  prob[, j, i] <- rowSums(as.matrix(P[, j, ]), na.rm=TRUE)
324
+				  PP <- P[, j, , drop=FALSE]
325
+				  dim(PP) <- dim(PP)[c(1, 3)]
326
+				  prob[, j, i] <- rowSums(PP, na.rm=TRUE)
310 327
 			  }
311 328
 		  }
312
-		  ## divide by normalizing constant.  Probs across copy number states must sum to one.
313
-		  ##nc <- apply(prob, c(1,3), sum, na.rm=TRUE)
329
+
330
+		  ## scale by prior weights and divide by normalizing
331
+		  ##constant.  Probs across copy number states must
332
+		  ##sum to one.  nc <- apply(prob, c(1,3), sum,
333
+		  ##na.rm=TRUE)
334
+		  wm <- matrix(w, nrow(object), length(copyNumber), byrow=TRUE)
314 335
 		  nc <- matrix(NA, nrow(object), ncol(object))
315 336
 		  for(j in seq_len(ncol(object))){
316
-			  nc <- rowSums(as.matrix(prob[,j, ]), na.rm=TRUE)
317
-			  prob[, j, ] <- prob[, j, ]/nc
337
+			  pp <- prob[, j, ] * wm
338
+			  nc <- rowSums(pp, na.rm=TRUE)
339
+			  prob[, j, ] <- pp/nc
318 340
 		  }
319 341
 		  return(prob)
320 342
 	  })
321 343
 
322 344
 setMethod("calculatePosteriorMean", signature(object="CNSet"),
323
-	  function(object, posteriorProb, copyNumber=0:4, w, ...){
324
-		  if(missing(w)) w <- rep(1/length(copyNumber),length(copyNumber))
325
-		  stopifnot(sum(w)==1)
345
+	  function(object, posteriorProb, copyNumber=0:4, ...){
326 346
 		  stopifnot(dim(posteriorProb)[[3]] == length(copyNumber))
327 347
 		  pm <- matrix(0, nrow(object), ncol(object))
328 348
 		  for(i in seq_along(copyNumber)){
329
-			  pm <- pm + posteriorProb[, , i] * copyNumber[i] * w[i]
349
+			  pm <- pm + posteriorProb[, , i] * copyNumber[i]
330 350
 		  }
331 351
 		  return(pm)
332 352
 	  })
... ...
@@ -404,7 +424,23 @@ setMethod("predictionRegion", signature(object="CNSet", copyNumber="integer"),
404 424
 
405 425
 setMethod("xyplotcrlmm", signature(x="formula", data="CNSet", predictRegion="list"),
406 426
 	  function(x, data, predictRegion, ...){
407
-		  df <- data.frame(A=log2(A(data)), B=log2(B(data)), gt=calls(data), gt.conf=confs(data))#, snp=snpId)
427
+		  fns <- featureNames(data)
428
+		  fns <- matrix(fns, nrow(data), ncol(data), byrow=FALSE)
429
+		  fns <- as.character(fns)
430
+		  df <- list(A=as.numeric(log2(A(data))),
431
+			     B=as.numeric(log2(B(data))),
432
+			     gt=as.integer(calls(data)),
433
+			     gt.conf=as.numeric(confs(data)),
434
+			     snpid=fns)#, snp=snpId)
435
+		  df <- as.data.frame(df)
436
+		  bns <- batchNames(data)
437
+		  predictRegion <- lapply(predictRegion, function(x, bns){
438
+			  batch.index <- match(bns, dimnames(x$mu)[[3]])
439
+			  x$mu <- x$mu[, , batch.index, drop=FALSE]
440
+			  x$cov <- x$cov[, , batch.index, drop=FALSE]
441
+			  return(x)
442
+		  }, bns=bns)
443
+		  df$snpid <- as.character(df$snpid)
408 444
 		  ##df <- as.data.frame(data)
409 445
 		  xyplot(x, df, predictRegion=predictRegion, ...)
410 446
 	  })