Browse code

fix for finding index of nonpolymorphic markers in genotype function

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@50887 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 15/11/2010 14:21:45
Showing4 changed files

... ...
@@ -4,7 +4,6 @@ ModelForPolymorphicX.pdf
4 4
 *.o
5 5
 *.so
6 6
 *.tex
7
-sync
8 7
 illumina_copynumber-*
9 8
 Makefile2
10 9
 Rprof*
... ...
@@ -38,8 +38,8 @@ getFeatureData <- function(cdfName, copynumber=FALSE){
38 38
 	M[index, "position"] <- snpProbes[, grep("pos", colnames(snpProbes))]
39 39
 	M[index, "chromosome"] <- chromosome2integer(snpProbes[, grep("chr", colnames(snpProbes))])
40 40
 	M[index, "isSnp"] <- 1L
41
-	index <- which(is.na(M[, "isSnp"]))
42
-	M[index, "isSnp"] <- 1L
41
+	##index <- which(is.na(M[, "isSnp"]))
42
+	##M[index, "isSnp"] <- 1L
43 43
 	if(copynumber){
44 44
 		index <- match(rownames(cnProbes), rownames(M)) #only snp probes in M get assigned position
45 45
 		M[index, "position"] <- cnProbes[, grep("pos", colnames(cnProbes))]
... ...
@@ -47,7 +47,7 @@ getFeatureData <- function(cdfName, copynumber=FALSE){
47 47
 		M[index, "isSnp"] <- 0L
48 48
 	}
49 49
 	##A few of the snpProbes do not match -- I think it is chromosome Y.
50
-	M <- M[!is.na(M[, "chromosome"]), ]
50
+	M[is.na(M[, "isSnp"]), "isSnp"] <- 1L
51 51
 	M <- data.frame(M)
52 52
 	return(new("AnnotatedDataFrame", data=M))
53 53
 }
... ...
@@ -64,6 +64,7 @@ construct <- function(filenames,
64 64
 	if(verbose) message("Initializing container for copy number estimation")
65 65
 	featureData <- getFeatureData(cdfName, copynumber=copynumber)
66 66
 	if(!missing(fns)){
67
+		warning("subsetting the featureData can cause the snprma object and CNSet object to be misaligned. This problem is fixed in devel as we match the names prior to assigning values from snprma")
67 68
 		index <- match(fns, featureNames(featureData))
68 69
 		if(all(is.na(index))) stop("fns not in featureNames")
69 70
 		featureData <- featureData[index, ]
... ...
@@ -90,6 +91,8 @@ construct <- function(filenames,
90 91
 	pd <- data.frame(matrix(NA, nc, 3), row.names=sns)
91 92
 	colnames(pd)=c("SKW", "SNR", "gender")
92 93
 	phenoData(cnSet) <- new("AnnotatedDataFrame", data=pd)
94
+	gns <- getVarInEnv("gns")
95
+	stopifnot(identical(gns, featureNames(cnSet)[1:length(gns)]))
93 96
 	return(cnSet)
94 97
 }
95 98
 
... ...
@@ -170,7 +173,7 @@ genotype <- function(filenames,
170 173
 	pData(callSet)$SKW <- snprmaRes[["SKW"]]
171 174
 	pData(callSet)$SNR <- snprmaRes[["SNR"]]
172 175
 	mixtureParams <- snprmaRes$mixtureParams
173
-	np.index <- which(!is.snp)
176
+	np.index <- which(!snp.I)
174 177
 	if(verbose) message("Normalizing nonpolymorphic markers")
175 178
 	FUN <- ifelse(is.lds, "cnrma2", "cnrma")
176 179
 	## main purpose is to update 'alleleA'
... ...
@@ -163,8 +163,16 @@ if(!file.exists(file.path(outdir, "cnSet.rda"))){
163 163
 			     .FUN=genotype,
164 164
 			     filenames=celFiles,
165 165
 			     cdfName=cdfName,
166
-			     batch=batch, .load.it=FALSE)
167
-	class(calls(gtSet))
166
+			     batch=batch)
167
+	## try normalizing np probes on chr X
168
+	callSet <- gtSet
169
+	filenames <- celFiles
170
+	snp.I <- isSnp(callSet)
171
+	is.snp <- which(snp.I)
172
+	np.index <- which(!is.snp)
173
+
174
+
175
+
168 176
 }
169 177
 @
170 178
 
... ...
@@ -247,21 +255,21 @@ stopifnot(all.equal(ct, ct2))
247 255
 
248 256
 Nonpolymorphic markers on X:
249 257
 
250
-\begin{figure}
251
-  \centering
258
+\begin{figure}[!t]
259
+\centering
252 260
 <<nonpolymorphicX>>=
253
-library(RColorBrewer)
254
-cols <- brewer.pal(8, "Paired")[3:4]
255
-cols <- rep(cols, each=5)
256
-set.seed(123)
257
-X.markers <- sample(which(chromosome(cnSet)==23 & !isSnp(cnSet)), 10e3)
261
+npx.index <- which(chromosome(cnSet)==23 & !isSnp(cnSet))
262
+a <- A(cnSet)[npx.index, M]
263
+nus <- nuA(cnSet)[npx.index, ]
264
+## nu and phi are not estimated
258 265
 M <- sample(which(cnSet$gender==1), 5)
259 266
 F <- sample(which(cnSet$gender==2), 5)
267
+X.markers <- which(!isSnp(cnSet) & chromosome(cnSet) == 23)
260 268
 cn.M <- CA(cnSet, i=X.markers, j=M)
261 269
 cn.F <- CA(cnSet, i=X.markers, j=F)
262
-boxplot(data.frame(cbind(cn.M, cn.F)), pch=".", xaxt="n", main="nonpolymorphic markers on X",
263
-	col=cols, ylim=c(0,5))
264
-legend("topleft", fill=unique(cols), legend=c("Male", "Female"))
270
+##phi(cnSet, "A")[X.markers[1:10]]
271
+index <- which(is.na(nuA(cnSet)[X.markers, 1]))
272
+boxplot(data.frame(cbind(cn.M, cn.F)), pch=".")
265 273
 @
266 274
 \caption{Copy number estimates for nonpolymorphic markers on
267 275
   chromosome X.  We assume that the median copy number (across
... ...
@@ -327,6 +335,7 @@ samples.
327 335
 <<check>>=
328 336
 dims(cnSet)
329 337
 print(object.size(cnSet), units="Mb")
338
+invisible(open(calls(cnSet)))
330 339
 gt <- calls(cnSet)[, 1:50]
331 340
 @
332 341
 
... ...
@@ -370,9 +379,9 @@ markers, we extract only the polymorphic markers using the
370 379
 <<assayData>>=
371 380
 rows <- which(isSnp(cnSet))
372 381
 cols <- which(batch == "C")
382
+invisible(open(snpCallProbability(cnSet)))
373 383
 posterior.prob <- tryCatch(i2p(snpCallProbability(cnSet)),
374 384
 			   error=function(e) print("This will not work for an ff object."))
375
-##posterior.prob2 <- p2i(snpCallProbability(cnSet)[rows, cols])
376 385
 @
377 386
 
378 387
 Accessors for the quantile normalized intensities for the A allele at
379 388
new file mode 100755
... ...
@@ -0,0 +1 @@
1
+rsync -avuzb --exclude '.git*' -e ssh ~/madman/Rpacks/git/crlmm/ enigma2.jhsph.edu:~/madman/Rpacks/release/crlmm