Browse code

add scale.sd option to computePosteriorMeans

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@53576 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 10/03/2011 19:30:48
Showing 3 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.9.16
4
+Version: 1.9.17
5 5
 Date: 2010-12-10
6 6
 Author: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>, Ingo Ruczinski <iruczins@jhsph.edu>, Rafael A Irizarry
7 7
 Maintainer: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -71,5 +71,11 @@ export(constructIlluminaCNSet)
71 71
 export(totalCopynumber)
72 72
 exportMethods(A, B, nuA, nuB, phiA, phiB, corr, tau2, Ns, medians, mads)
73 73
 
74
+## for posterior means
75
+export(calculatePosteriorMean)
76
+exportMethods(posteriorMean, "posteriorMean<-")
77
+
78
+
79
+
74 80
 
75 81
 ##export(summarizeNps, genotypeSummary, fit.lm2)
... ...
@@ -2161,7 +2161,7 @@ imputeAcrossBatch <- function(N.AA, N.AB, N.BB,
2161 2161
 
2162 2162
 calculatePosteriorMean <- function(object, type=c("SNP", "NP", "X.SNP", "X.NP"), verbose=TRUE,
2163 2163
 				   prior.prob=c(1/7, 1/7, 3/7, 1/7, 1/7),
2164
-				   CN=0:4){
2164
+				   CN=0:4, scale.sd=1){
2165 2165
 	stopifnot(type %in% c("SNP", "NP", "X.SNP", "X.NP"))
2166 2166
 	stopifnot(sum(prior.prob)==1)
2167 2167
 	stopifnot(length(CN) == length(prior.prob))
... ...
@@ -2173,14 +2173,16 @@ calculatePosteriorMean <- function(object, type=c("SNP", "NP", "X.SNP", "X.NP"),
2173 2173
 	is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
2174 2174
 	if(is.lds) stopifnot(isPackageLoaded("ff"))
2175 2175
 	samplesPerBatch <- table(as.character(batch(object)))
2176
-	message("adding <posteriorMean> slot to assayData")
2177
-	pM <- matrix(NA, nrow(object), ncol(object), dimnames=list(featureNames(object), sampleNames(object)))
2178
-	tmp <- assayDataNew(alleleA=A(object),
2179
-		    alleleB=B(object),
2180
-		    call=calls(object),
2181
-		    callProbability=snpCallProbability(object),
2182
-		    posteriorMean=pM)
2183
-	assayData(object) <- tmp
2176
+	if(!"posteriorMean" %in% assayDataElementNames(object)){
2177
+		message("adding <posteriorMean> slot to assayData")
2178
+		pM <- matrix(NA, nrow(object), ncol(object), dimnames=list(featureNames(object), sampleNames(object)))
2179
+		tmp <- assayDataNew(alleleA=A(object),
2180
+				    alleleB=B(object),
2181
+				    call=calls(object),
2182
+				    callProbability=snpCallProbability(object),
2183
+				    posteriorMean=pM)
2184
+		assayData(object) <- tmp
2185
+	}
2184 2186
 	## add new assay data element for posterior probabilities
2185 2187
 	mylabel <- function(marker.type){
2186 2188
 		switch(marker.type,
... ...
@@ -2203,7 +2205,8 @@ calculatePosteriorMean <- function(object, type=c("SNP", "NP", "X.SNP", "X.NP"),
2203 2205
 				 index.list=marker.list,
2204 2206
 				 verbose=verbose,
2205 2207
 				 prior.prob=prior.prob,
2206
-				 CN=CN)
2208
+				 CN=CN,
2209
+				 scale.sd=scale.sd)
2207 2210
 		#for(i in seq_along(marker.list)){
2208 2211
 		#index <- marker.list[[i]]
2209 2212
 
... ...
@@ -2212,7 +2215,6 @@ calculatePosteriorMean <- function(object, type=c("SNP", "NP", "X.SNP", "X.NP"),
2212 2215
 	if(length(emit)==1) emit <- emit[[1]] else stop("need to rbind elements of emit list?")
2213 2216
 	##tmp <- do.call("rbind", emit)
2214 2217
 	match.index <- match(rownames(emit), featureNames(object))
2215
-
2216 2218
 	S <- length(prior.prob)
2217 2219
 	pM <- matrix(0, length(match.index), ncol(object), dimnames=list(featureNames(object)[match.index], sampleNames(object)))
2218 2220
 	for(i in 1:S) pM <- pM + CN[i]*emit[, , i]
... ...
@@ -2221,7 +2223,9 @@ calculatePosteriorMean <- function(object, type=c("SNP", "NP", "X.SNP", "X.NP"),
2221 2223
 }
2222 2224
 
2223 2225
 posteriorMean.snp <- function(stratum, object, index.list, CN,
2224
-			      prior.prob=c(1/7, 1/7, 3/7, 1/7, 1/7), is.lds=TRUE, verbose=TRUE){
2226
+			      prior.prob=c(1/7, 1/7, 3/7, 1/7, 1/7), is.lds=TRUE, verbose=TRUE,
2227
+			      scale.sd=1){
2228
+	if(length(scale.sd) == 1) rep(scale.sd,2)
2225 2229
 	if(verbose) message("Probe stratum ", stratum, " of ", length(index.list))
2226 2230
 	index <- index.list[[stratum]]
2227 2231
 	test <- tryCatch(open(A(object)), error=function(e) NULL)
... ...
@@ -2283,7 +2287,8 @@ posteriorMean.snp <- function(stratum, object, index.list, CN,
2283 2287
 		       cn1=2,
2284 2288
 		       cn2=3,
2285 2289
 		       cn3=4,
2286
-		       cn4=4, NULL)
2290
+		       cn4=4,
2291
+		       cn5=6, NULL)
2287 2292
 	}
2288 2293
 	##emit <- vector("list", length(sample.index))
2289 2294
 	for(j in seq_along(sample.index)){
... ...
@@ -2292,6 +2297,7 @@ posteriorMean.snp <- function(stratum, object, index.list, CN,
2292 2297
 		probs <- array(NA, dim=c(nrow(a), length(J), S))
2293 2298
 		for(k in seq_along(CN)){
2294 2299
 			CT <- CN[k]
2300
+			## 5: AAAAA, AAAAB, AAABB, AABBB, ABBBB, BBBBB
2295 2301
 			##CN=4
2296 2302
 			## AAAA, AAAB, AABB, ABBB, BBBB:  L = 4
2297 2303
 			##CN=3
... ...
@@ -2308,6 +2314,13 @@ posteriorMean.snp <- function(stratum, object, index.list, CN,
2308 2314
 				CB <- CT-CA
2309 2315
 				A.scale <- sqrt(tau2A[, j]*(CA==0) + sig2A[, j]*(CA > 0))
2310 2316
 				B.scale <- sqrt(tau2B[, j]*(CB==0) + sig2B[, j]*(CB > 0))
2317
+				if(CA == 0 | CB == 0){
2318
+					A.scale <- A.scale*scale.sd[1]
2319
+					B.scale <- B.scale*scale.sd[1]
2320
+				} else { ## both greater than zero
2321
+					A.scale <- A.scale*scale.sd[2]
2322
+					B.scale <- B.scale*scale.sd[2]
2323
+				}
2311 2324
 				if(CA == 0 & CB == 0) rho <- 0
2312 2325
 				if(CA == 0 & CB > 0) rho <- corrBB[, j]
2313 2326
 				if(CA > 0 & CB == 0) rho <- corrAA[, j]
... ...
@@ -2333,16 +2346,6 @@ posteriorMean.snp <- function(stratum, object, index.list, CN,
2333 2346
 	for(i in 1:S) emit[, , i] <- emit[, , i]*prior.prob[i]
2334 2347
 	total <- matrix(0, nrow(emit), ncol(emit))
2335 2348
 	for(i in 1:S) total <- total+emit[, , i]
2336
-	message(" need to check for outliers before rescaling")
2337
-	is.outlier <- total < 1e-5
2338
-##	plot(a[outlier.index[1], ], b[outlier.index[1], ], col="grey50",
2339
-##	     xaxt="n", yaxt="n", pch=21, cex=0.8,
2340
-##	     xlim=c(6.5, 12.5), ylim=c(6.5, 12.5), xlab="", ylab="")
2341
-##	for(j in seq_along(sample.index)){
2342
-##		J <- sample.index[[j]]
2343
-##		b <- unique(batch(object)[J])
2344
-##		for(CN in 2) 	lines(object, outlier.index[1], b, CN, col="black", lwd=2, x.axis="A")
2345
-##	}
2346 2349
 	## how to handle outliers...
2347 2350
 	##  - use priors (posterior mean will likely be near 2).  smoothing needs to take into account the uncertainty
2348 2351
 	##  - need uncertainty estimates for posterior means