Browse code

genotype function should be independent of annotation package version

no longer uses ffrowapply. Updates assayData elements of callSet column by column

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@50807 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 11/11/2010 04:13:23
Showing 7 changed files

... ...
@@ -1,4 +1,5 @@
1 1
 auto*
2
+ModelForPolymorphicX.pdf
2 3
 .Rhistory
3 4
 *.o
4 5
 *.so
... ...
@@ -1,6 +1,6 @@
1 1
 useDynLib("crlmm", .registration=TRUE)
2 2
 ## this is temporary
3
-## exportPattern("^[^\\.]")
3
+ exportPattern("^[^\\.]")
4 4
 
5 5
 ##---------------------------------------------------------------------------
6 6
 ## Biobase
... ...
@@ -118,16 +118,13 @@ genotype <- function(filenames,
118 118
 			     sns=sns,
119 119
 			     verbose=verbose,
120 120
 			     batch=batch)
121
-	if(is.lds) open(callSet)
122
-	mixtureParams <- matrix(NA, 4, length(filenames))
123
-	is.snp <- isSnp(callSet)
124
-	snp.index <- which(is.snp)
125 121
 	FUN <- ifelse(is.lds, "snprma2", "snprma")
126 122
 	snprmaFxn <- function(FUN,...){
127 123
 		switch(FUN,
128 124
 		       snprma=snprma(...),
129 125
 		       snprma2=snprma2(...))
130 126
 	}
127
+
131 128
 	snprmaRes <- snprmaFxn(FUN,
132 129
 			       filenames=filenames,
133 130
 			       mixtureSampleSize=mixtureSampleSize,
... ...
@@ -137,6 +134,15 @@ genotype <- function(filenames,
137 134
 			       seed=seed,
138 135
 			       cdfName=cdfName,
139 136
 			       sns=sns)
137
+	gns <- snprmaRes[["gns"]]
138
+	snp.I <- isSnp(callSet)
139
+	is.snp <- which(snp.I)
140
+	snp.index <- match(featureNames(callSet)[is.snp], gns)
141
+	stopifnot(identical(featureNames(callSet)[is.snp], gns[snp.index]))
142
+##	is.snp <- isSnp(callSet)
143
+##	snp.index <- which(is.snp)
144
+	if(is.lds) open(callSet)
145
+	mixtureParams <- matrix(NA, 4, length(filenames))
140 146
 	##message("Saving snprmaRes file")
141 147
 	##save(snprmaRes, file=file.path(outdir, "snprmaRes.rda"))
142 148
 	if(verbose) message("Finished preprocessing.")
... ...
@@ -147,17 +153,21 @@ genotype <- function(filenames,
147 153
 		open(snprmaRes[["mixtureParams"]])
148 154
 		##bb <- getOption("ffbatchbytes")
149 155
 		message("Writing normalized intensities to callSet")
150
-		ffrowapply(A(callSet)[i1:i2, ] <- snprmaRes[["A"]][i1:i2, ], X=snprmaRes[["A"]])##, BATCHBYTES=bb)
151
-		ffrowapply(B(callSet)[i1:i2, ] <- snprmaRes[["B"]][i1:i2, ], X=snprmaRes[["B"]])##, BATCHBYTES=bb)
156
+		for(j in 1:ncol(callSet)){
157
+			A(callSet)[is.snp, j] <- snprmaRes[["A"]][snp.index, j]
158
+			B(callSet)[is.snp, j] <- snprmaRes[["B"]][snp.index, j]
159
+		}
160
+		##ffrowapply(A(callSet)[i1:i2, ] <- snprmaRes[["A"]][i1:i2, ], X=snprmaRes[["A"]])##, BATCHBYTES=bb)
161
+		##ffrowapply(B(callSet)[i1:i2, ] <- snprmaRes[["B"]][i1:i2, ], X=snprmaRes[["B"]])##, BATCHBYTES=bb)
152 162
 		pData(callSet)$SKW <- snprmaRes[["SKW"]]
153 163
 		pData(callSet)$SNR <- snprmaRes[["SNR"]]
154 164
 	} else{
155
-		A(callSet)[snp.index, ] <- snprmaRes[["A"]]
156
-		B(callSet)[snp.index, ] <- snprmaRes[["B"]]
165
+		A(callSet)[is.snp, ] <- snprmaRes[["A"]][snp.index, ]
166
+		B(callSet)[is.snp, ] <- snprmaRes[["B"]][snp.index, ]
157 167
 		pData(callSet)$SKW <- snprmaRes[["SKW"]]
158 168
 		pData(callSet)$SNR <- snprmaRes[["SNR"]]
159 169
 	}
160
-	np.index <- which(!is.snp)
170
+	np.index <- which(!snp.I)
161 171
 	FUN <- ifelse(is.lds, "cnrma2", "cnrma")
162 172
 	## main purpose is to update 'alleleA'
163 173
 	cnrmaFxn <- function(FUN,...){
... ...
@@ -203,11 +213,15 @@ genotype <- function(filenames,
203 213
 	if(is.lds){
204 214
 		open(tmp[["calls"]])
205 215
 		open(tmp[["confs"]])
206
-		ffrowapply(snpCall(callSet)[i1:i2, ] <- tmp[["calls"]][i1:i2, ], X=tmp[["calls"]])#, BATCHBYTES=bb)
207
-		ffrowapply(snpCallProbability(callSet)[i1:i2, ] <- tmp[["confs"]][i1:i2, ], X=tmp[["confs"]])#, BATCHBYTES=bb)
216
+		for(j in 1:ncol(callSet)){
217
+			snpCall(callSet)[is.snp, j] <- tmp[["calls"]][snp.index, j]
218
+			snpCallProbability(callSet)[is.snp, j] <- tmp[["confs"]][snp.index, j]
219
+		}
220
+		##		ffrowapply(snpCall(callSet)[i1:i2, ] <- tmp[["calls"]][i1:i2, ], X=tmp[["calls"]])#, BATCHBYTES=bb)
221
+		##		ffrowapply(snpCallProbability(callSet)[i1:i2, ] <- tmp[["confs"]][i1:i2, ], X=tmp[["confs"]])#, BATCHBYTES=bb)
208 222
 	} else {
209
-		calls(callSet)[snp.index, ] <- tmp[["calls"]]
210
-		snpCallProbability(callSet)[snp.index, ] <- tmp[["confs"]]
223
+		calls(callSet)[snp.index, ] <- tmp[["calls"]][snp.index, ]
224
+		snpCallProbability(callSet)[snp.index, ] <- tmp[["confs"]][snp.index, ]
211 225
 	}
212 226
 	message("Finished updating.  Cleaning up.")
213 227
 	callSet$gender <- tmp$gender
... ...
@@ -816,9 +830,9 @@ fit.lm3 <- function(strata,
816 830
 		nuA[nuA < MIN.NU] <- MIN.NU
817 831
 		nuB[nuB < MIN.NU] <- MIN.NU
818 832
 		phiA[phiA < MIN.PHI] <- MIN.PHI
819
-		phiA2[phiA2 < MIN.PHI] <- MIN.PHI
833
+		phiA2[phiA2 < 1] <- 1
820 834
 		phiB[phiB < MIN.PHI] <- MIN.PHI
821
-		phiB2[phiB2 < MIN.PHI] <- MIN.PHI
835
+		phiB2[phiB2 < 1] <- 1
822 836
 	}
823 837
 	nuA(object)[marker.index, ] <- nuA
824 838
 	nuB(object)[marker.index, ] <- nuB
... ...
@@ -958,7 +972,6 @@ cnrma <- function(A, filenames, row.names, verbose=TRUE, seed=1, cdfName, sns){
958 972
 	if(verbose) message("Loading annotations for nonpolymorphic probes")
959 973
         loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
960 974
 	fid <- getVarInEnv("npProbesFid")
961
-
962 975
 	if(cdfName=="genomewidesnp6"){
963 976
 		loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
964 977
 	}
... ...
@@ -1798,7 +1811,7 @@ crlmmCopynumber <- function(object,
1798 1811
 	samplesPerBatch <- table(as.character(batch(object)))
1799 1812
 	if(any(samplesPerBatch < MIN.SAMPLES)){
1800 1813
 		warning("The following batches have fewer than ", MIN.SAMPLES, ":")
1801
-		message(paste(samplesPerBatch[samplesPerBatch < MIN.SAMPLES], collapse=", "))
1814
+		message(paste(names(samplesPerBatch)[samplesPerBatch < MIN.SAMPLES], collapse=", "))
1802 1815
 		message("Not estimating copy number for the above batches")
1803 1816
 	}
1804 1817
 	mylabel <- function(marker.type){
... ...
@@ -364,14 +364,16 @@ C3 <- function(object, allele, marker.index, batch.index, sample.index){
364 364
 		phiA <- phiA(object)[marker.index, l]
365 365
 		IA <- A(object)[marker.index, jj]
366 366
 		IB <- B(object)[marker.index, jj]
367
-		phistar <- phiB2/phiA
368
-		tmp <- (IB - nuB - phistar*IA + phistar*nuA)/phiB
369
-		CB <- tmp/(1-phistar*phiA2/phiB)
367
+		##phistar <- phiB2/phiA
368
+		##tmp <- (IB - nuB - phistar*IA + phistar*nuA)/phiB
369
+		CB <- 1/(1-phiA2*phiB2/(phiA*phiB)) * 1/phiB * (IA-nuB-phiB2/phiA*(IA-nuA))
370
+		##CB <- tmp/(1-phistar*phiA2/phiB)
370 371
 		if(allele == "B"){
371 372
 			acn[[k]] <- CB
372 373
 		}
373 374
 		if(allele == "A"){
374
-			acn[[k]] <- (IA-nuA-phiA2*CB)/phiA
375
+			ca <- (IA-nuA-phiA2*CB)/phiA
376
+			acn[[k]] <- ca
375 377
 		}
376 378
 		if(allele == "AandB"){
377 379
 			CA <- tmp/(1-phistar*phiA2/phiB)
... ...
@@ -25,7 +25,7 @@ testingFF: testingFF.Rnw
25 25
 	qsub -m e -r y -cwd -l mem_free=12G,h_vmem=16G testingFF.R.sh
26 26
 
27 27
 affy:	copynumber.Rnw
28
-	echo "Stangle(\"copynumber.Rnw\")" | R --no-save --no-restore;	
28
+	echo "Stangle(\"copynumber.Rnw\")" | R-devel --no-save --no-restore;	
29 29
 	cat ~/bin/cluster.template | perl -pe "s/Rprog/copynumber.R/" > copynumber.R.sh
30 30
 	qsub -m e -r y -cwd -l mem_free=12G,h_vmem=16G copynumber.R.sh
31 31
 
... ...
@@ -52,7 +52,7 @@ separately) that are listed below.
52 52
 
53 53
 <<annotationPackages>>=
54 54
 pkgs <- annotationPackages()
55
-pkgs <- pgks[grep("Crlmm", pkgs)]
55
+pkgs <- pkgs[grep("Crlmm", pkgs)]
56 56
 pkgs
57 57
 @
58 58
 
... ...
@@ -157,6 +157,7 @@ be run interactively.
157 157
 
158 158
 <<LDS_genotype>>=
159 159
 if(!file.exists(file.path(outdir, "cnSet.rda"))){
160
+	##trace(genotype, browser)
160 161
 	gtSet <- checkExists("gtSet",
161 162
 			     .path=outdir,
162 163
 			     .FUN=genotype,
... ...
@@ -190,6 +191,9 @@ The \Rfunction{crlmmCopynumber} performs the following steps:
190 191
 <<LDS_copynumber>>=
191 192
 GT.CONF.THR <- 0.90
192 193
 cnSet <- checkExists("cnSet", .path=outdir, .FUN=crlmmCopynumber, object=gtSet, GT.CONF.THR=GT.CONF.THR)
194
+invisible(open(cnSet))
195
+q("no")
196
+stop("here")
193 197
 @
194 198
 
195 199
 In an effort to reduce I/O, the \Rpackage{crlmmCopynumber} function no
... ...
@@ -243,15 +247,21 @@ ct2 <- totalCopynumber(cnSet, i=marker.index, j=1:5)
243 247
 stopifnot(all.equal(ct, ct2))
244 248
 @
245 249
 
246
-TODO: FIX estimation for nonpolymorphic markers on X
250
+Nonpolymorphic markers on X:
247 251
 
248
-<<nonpolymorphicX, eval=FALSE>>=
252
+<<nonpolymorphicX>>=
249 253
 ## nu and phi are not estimated appropriately for nonpolymorphic X markers.
250
-X.markers <- which(!isSnp(cnSet) & chromosome(cnSet) == 23)
251
-cn.M <- CA(cnSet, i=X.markers, j=which(cnSet$gender==1))
252
-cn.F <- CA(cnSet, i=X.markers, j=which(cnSet$gender==2))
253
-phi(cnSet, "A")[X.markers[1:10]]
254
-boxplot(data.frame(cbind(cn.M, cn.F)), pch=".")
254
+library(RColorBrewer)
255
+cols <- brewer.pal(8, "Paired")[3:4]
256
+cols <- rep(cols, each=5)
257
+##set.seed(123)
258
+X.markers <- sample(which(!isSnp(cnSet) & chromosome(cnSet) == 23), 20e3)
259
+cn.M <- CA(cnSet, i=X.markers, j=sample(which(cnSet$gender==1), 5))
260
+cn.F <- CA(cnSet, i=X.markers, j=sample(which(cnSet$gender==2), 5))
261
+##phi(cnSet, "A")[X.markers[1:10]]
262
+boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", xaxt="n", main="nonpolymorphic markers on X",
263
+	col=cols)
264
+legend("topleft", fill=unique(cols), legend=c("Male", "Female"))
255 265
 @
256 266
 
257 267
 
... ...
@@ -261,17 +271,94 @@ Polymorphic markers, X chromosome:
261 271
   \centering
262 272
 <<polymorphicX, fig=TRUE, width=8, height=4>>=
263 273
 ## copy number estimates on X for SNPs are biased towards small values.
264
-X.markers <- which(isSnp(cnSet) & chromosome(cnSet) == 23)
265
-ca.M <- CA(cnSet, i=X.markers, j=which(cnSet$gender==1))
266
-cb.M <- CB(cnSet, i=X.markers, j=which(cnSet$gender==1))
267
-ca.F <- CA(cnSet, i=X.markers, j=which(cnSet$gender==2))
268
-cb.F <- CB(cnSet, i=X.markers, j=which(cnSet$gender==2))
274
+X.markers <- sample(which(isSnp(cnSet) & chromosome(cnSet) == 23), 25e3)
275
+
276
+##genomewidesnp6Crlmm is correct.  FeatureData is incorrect.
277
+index <- match(featureNames(cnSet), rownames(snpProbes))
278
+sum(snpProbes[index, "chrom"] != chromosome(cnSet))
279
+
280
+M <- sample(which(cnSet$gender==1), 5)
281
+F <- sample(which(cnSet$gender==2), 5)
282
+
283
+fns <- list.files("~/hapmap_metadata", full.names=T)
284
+dat <- read.delim(fns[2])
285
+sns <- substr(sampleNames(cnSet), 1, 7)
286
+dat <- dat[match(sns, dat$IID), ]
287
+stopifnot(all.equal(as.character(dat$IID), sns))
288
+identical(cnSet$gender, dat$sex)
289
+
290
+cols <- brewer.pal(8, "Paired")[3:4]
291
+cols <- cols[cnSet$gender]
292
+ia <- A(cnSet)[X.markers, ]##c(M, F)]
293
+ib <- B(cnSet)[X.markers, ]##c(M, F)]
294
+i.total <- ia+ib
295
+boxplot(data.frame(log2(i.total)), pch=".", col=cols)
296
+legend("topleft", fill=unique(cols), legend=c("Male", "Female"))
297
+
298
+nus <- nuA(cnSet)[X.markers, ]
299
+phiss <- phiA(cnSet)[X.markers, ]
300
+
301
+
302
+xSet <- cnSet[X.markers, ]
303
+a <- as.matrix(A(xSet))
304
+b <- as.matrix(B(xSet))
305
+gt <- as.matrix(calls(xSet))
306
+nuA <- nu(xSet, "A")
307
+phA <- phi(xSet, "A")
308
+col <- brewer.pal(7, "Accent")[c(1, 4, 7)]
309
+fns.xset <- featureNames(xSet)
310
+
311
+path <- system.file("extdata", package="genomewidesnp6Crlmm")
312
+load(file.path(path, "snpProbes.rda"))
313
+index <- match(fns.xset, rownames(snpProbes))
314
+all(snpProbes[index, "chrom"]==23)
315
+
316
+pkg <- "pd.genomewidesnp.6"
317
+library(pd.genomewidesnp.6)
318
+conn <- db(get(pkg))
319
+sql <- "SELECT man_fsetid, chrom, physical_pos FROM featureSet"
320
+snps <- dbGetQuery(conn, sql)
321
+index <- match(fns.xset, snps$man_fsetid)
322
+all(snps$chrom[index] == "X")
323
+
324
+## are we writing intensities to the wrong location??
325
+
326
+
327
+
328
+###################################################
329
+### chunk number 25: ABscatterplots
330
+###################################################
331
+#line 599 "manuscript.Rnw"
332
+lA <- log2(a)
333
+lB <- log2(b)
334
+cols <- c("blue", "black", "red")
335
+par(las=1, mfrow=c(4,4), mar=rep(0.5,4), oma=c(4,4, 4,4))
336
+for(i in 1:16){
337
+	plot(lB[i, ], lA[i, ], col="grey50", bg=col[gt[i, ]], xaxt="n", yaxt="n", pch=21, cex=0.8,
338
+	     xlim=c(6.5, 12.5), ylim=c(6.5, 12.5), xlab="", ylab="")
339
+	for(CN in 1:3) 	lines(xSet, i, "GIGAS", CN, col=cols[CN], lwd=2, x.axis="B")
340
+}
341
+mtext(expression(log[2](I[B])), 1, outer=TRUE)
342
+par(las=3)
343
+mtext(expression(log[2](I[A])), 2, outer=TRUE)
344
+
345
+
346
+
347
+boxplot(data.frame(ia+ib), pch=".", col=cols)
348
+trace(ACN, browser)
349
+trace(C3, browser)
350
+ca.M <- CA(cnSet, i=X.markers, j=M)
351
+cb.M <- CB(cnSet, i=X.markers, j=M)
352
+ca.F <- CA(cnSet, i=X.markers, j=F)
353
+cb.F <- CB(cnSet, i=X.markers, j=F)
269 354
 cn.M <- ca.M+cb.M
270 355
 cn.F <- ca.F+cb.F
271
-boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", outline=FALSE, col="grey60")
356
+tmp <- totalCopynumber(cnSet, i=X.markers, j=c(M, F))
357
+boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", outline=FALSE, col=cols, xaxt="n")
358
+boxplot(data.frame(tmp), pch=".", outline=FALSE, col=cols, xaxt="n")
272 359
 abline(h=c(1,2))
273 360
 ## alternatively
274
-cn.F2 <- totalCopynumber(cnSet, i=X.markers, j=which(cnSet$gender==2))
361
+cn.F2 <- totalCopynumber(cnSet, i=X.markers, j=F)
275 362
 stopifnot(all.equal(cn.F, cn.F2))
276 363
 @
277 364
 \caption{Copy number estimates for polymorphic markers on chromosome X}
... ...
@@ -588,6 +675,13 @@ are drawbacks to this approach, including variance estimates that are
588 675
 overly optimistic.  More direct approaches for outlier detection and
589 676
 removal may be explored in the future.
590 677
 
678
+Copy number estimates for other chromosomes, such as mitochondrial and
679
+chromosome Y, are not currently available in \crlmm{}.
680
+
681
+<<echo=FALSE>>=
682
+invisible(close(cnSet))
683
+@
684
+
591 685
 
592 686
 \section{Session information}
593 687
 <<sessionInfo, results=tex>>=
594 688
Binary files a/inst/scripts/copynumber.pdf and b/inst/scripts/copynumber.pdf differ