Browse code

fixed bug in cnrma

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@38566 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 07/04/2009 19:57:07
Showing 4 changed files

... ...
@@ -120,4 +120,8 @@ is decoded and scanned
120 120
 
121 121
 * modifications to chromosome X copy number estimation
122 122
 
123
+2009-04-07 Rob Scharpf - committed versioni 1.0.76
124
+
125
+* fixed bug in cnrma
126
+
123 127
 
... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for SNP 5.0 and 6.0 arrays.
4
-Version: 1.0.75
4
+Version: 1.0.76
5 5
 Date: 2008-12-30
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -138,7 +138,7 @@ cnrma <- function(filenames, cdfName="genomewidesnp6", sns, seed=1, verbose=FALS
138 138
 	## Loading data in .crlmmPkgEnv and extracting from there
139 139
         loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
140 140
 ##	data("npProbesFid", package=pkgname, envir=.crlmmPkgEnv)
141
-	fid <- getVarInEnv("npProbesFid")
141
+	fid <- getVarInEnv("fid")
142 142
 	set.seed(seed)
143 143
 	idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
144 144
 	SKW <- vector("numeric", length(filenames))
... ...
@@ -531,7 +531,6 @@ withinGenotypeMoments <- function(p, A, B, calls, conf, CONF.THR, DF.PRIOR, envi
531 531
 	uplate <- envir[["uplate"]]
532 532
 	normal <- envir[["normal"]][, plate==uplate[p]]
533 533
 	G <- calls; rm(calls); gc()	
534
-	if(is.null(normal)) normal <- matrix(TRUE, nrow(G), ncol(G))
535 534
 
536 535
 	highConf <- 1-exp(-conf/1000)
537 536
 	highConf <- highConf > CONF.THR
... ...
@@ -548,7 +547,9 @@ withinGenotypeMoments <- function(p, A, B, calls, conf, CONF.THR, DF.PRIOR, envi
548 547
 	##--------------------------------------------------
549 548
 	GT.B <- GT.A <- list()
550 549
 	for(j in 1:3){
551
-		GT <- G==j & highConf & IX & normal
550
+		##GT <- G==j & highConf & IX & normal
551
+		GT <- G==j & highConf & IX
552
+		GT <- GT * normal
552 553
 		Ns[, p, j+2] <- rowSums(GT, na.rm=TRUE)		
553 554
 		GT[GT == FALSE] <- NA
554 555
 		GT.A[[j]] <- GT*A
... ...
@@ -633,8 +634,14 @@ oneBatch <- function(plateIndex,
633 634
 		biasAdj(plateIndex=p, envir=envir, priorProb=priorProb)
634 635
 		message("Recomputing location and scale parameters")		
635 636
 	}
636
-	withinGenotypeMoments(p=p, A=A, B=B, calls=calls, conf=conf, CONF.THR=CONF.THR,
637
-			      DF.PRIOR=DF.PRIOR, envir=envir)
637
+	withinGenotypeMoments(p=p,
638
+			      A=A,
639
+			      B=B,
640
+			      calls=calls,
641
+			      conf=conf,
642
+			      CONF.THR=CONF.THR,
643
+			      DF.PRIOR=DF.PRIOR,
644
+			      envir=envir)
638 645
 	GT.A <- envir[["GT.A"]]
639 646
 	GT.B <- envir[["GT.B"]]
640 647
 	index <- envir[["index"]]
... ...
@@ -953,7 +960,7 @@ polymorphic <- function(plateIndex, A, B, envir){
953 960
 
954 961
 
955 962
 
956
-biasAdj <- function(plateIndex, envir, priorProb){
963
+biasAdj <- function(plateIndex, envir, priorProb, PROP=0.75){
957 964
 	CHR <- envir[["chrom"]]
958 965
 	if(CHR == 23){
959 966
 		phiAx <- envir[["phiAx"]]
... ...
@@ -1042,18 +1049,26 @@ biasAdj <- function(plateIndex, envir, priorProb){
1042 1049
 	}
1043 1050
 	##Those near 1 have NaNs for nu and phi.  this occurs by NaNs in the muA[,, "A"] or muA[, , "B"] for X chromosome
1044 1051
 	propAlt <- rowMeans(tmp3)##prop normal
1045
-	ii <- propAlt < 0.75
1052
+	##ii <- propAlt < PROP
1053
+	ii <- propAlt > 0.05
1046 1054
 	##only exclude observations from one tail, depending on
1047 1055
 	##whether more are up or down
1048 1056
 	if(CHR != 23){
1049
-		moreup <- rowSums(tmp2 > 3) > rowSums(tmp2 < 3)
1050
-		notUp <-  tmp2[ii & moreup, ] <= 3
1051
-		notDown <- tmp2[ii & !moreup, ] >= 3
1052
-		NORM <- matrix(NA, nrow(A), ncol(A))
1053
-		NORM[ii & moreup, ] <- notUp
1054
-		NORM[ii & !moreup, ] <- notDown
1057
+		moreup <- rowSums(tmp2 > 3) > rowSums(tmp2 < 3) ##3 is normal
1058
+		##notUp <-  tmp2[ii & moreup, ] <= 3
1059
+		##notDown <- tmp2[ii & !moreup, ] >= 3
1060
+		
1061
+		##What if we just keep X% of the ones with the highest
1062
+		##posterior probability for normal
1063
+		prNorm <- tmp[, , 3]
1064
+		rankNorm <- t(apply(prNorm, 1, order, decreasing=TRUE))
1065
+		percentage <- round(0.75*ncol(prNorm),0)
1066
+		NORM <- rankNorm <= percentage
1067
+##		NORM <- matrix(NA, nrow(A), ncol(A))
1068
+##		NORM[ii & moreup, ] <- notUp
1069
+##		NORM[ii & !moreup, ] <- notDown
1055 1070
 		##Define NORM so that we can iterate this step
1056
-		##NA's in the previous iteration (normal) will be propogated		
1071
+		##NA's in the previous iteration (normal) will be propogated
1057 1072
 		normal <- NORM*normal
1058 1073
 	} else{
1059 1074
 		fem <- tmp2[, gender=="female"]
... ...
@@ -136,7 +136,6 @@ list2SnpSet <- function(x, returnParams=FALSE){
136 136
 }
137 137
 
138 138
 loader <- function(theFile, envir, pkgname){
139
-	##stopifnot(theFile %in% c("genotypeStuff.rda", "mixtureStuff.rda", "preprocStuff.rda"))
140 139
 	theFile <- file.path(system.file(package=pkgname),
141 140
 			     "extdata", theFile)
142 141
 	if (!file.exists(theFile))