git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@38566 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for SNP 5.0 and 6.0 arrays. |
4 |
-Version: 1.0.75 |
|
4 |
+Version: 1.0.76 |
|
5 | 5 |
Date: 2008-12-30 |
6 | 6 |
Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
7 | 7 |
Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
... | ... |
@@ -138,7 +138,7 @@ cnrma <- function(filenames, cdfName="genomewidesnp6", sns, seed=1, verbose=FALS |
138 | 138 |
## Loading data in .crlmmPkgEnv and extracting from there |
139 | 139 |
loader("npProbesFid.rda", .crlmmPkgEnv, pkgname) |
140 | 140 |
## data("npProbesFid", package=pkgname, envir=.crlmmPkgEnv) |
141 |
- fid <- getVarInEnv("npProbesFid") |
|
141 |
+ fid <- getVarInEnv("fid") |
|
142 | 142 |
set.seed(seed) |
143 | 143 |
idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything |
144 | 144 |
SKW <- vector("numeric", length(filenames)) |
... | ... |
@@ -531,7 +531,6 @@ withinGenotypeMoments <- function(p, A, B, calls, conf, CONF.THR, DF.PRIOR, envi |
531 | 531 |
uplate <- envir[["uplate"]] |
532 | 532 |
normal <- envir[["normal"]][, plate==uplate[p]] |
533 | 533 |
G <- calls; rm(calls); gc() |
534 |
- if(is.null(normal)) normal <- matrix(TRUE, nrow(G), ncol(G)) |
|
535 | 534 |
|
536 | 535 |
highConf <- 1-exp(-conf/1000) |
537 | 536 |
highConf <- highConf > CONF.THR |
... | ... |
@@ -548,7 +547,9 @@ withinGenotypeMoments <- function(p, A, B, calls, conf, CONF.THR, DF.PRIOR, envi |
548 | 547 |
##-------------------------------------------------- |
549 | 548 |
GT.B <- GT.A <- list() |
550 | 549 |
for(j in 1:3){ |
551 |
- GT <- G==j & highConf & IX & normal |
|
550 |
+ ##GT <- G==j & highConf & IX & normal |
|
551 |
+ GT <- G==j & highConf & IX |
|
552 |
+ GT <- GT * normal |
|
552 | 553 |
Ns[, p, j+2] <- rowSums(GT, na.rm=TRUE) |
553 | 554 |
GT[GT == FALSE] <- NA |
554 | 555 |
GT.A[[j]] <- GT*A |
... | ... |
@@ -633,8 +634,14 @@ oneBatch <- function(plateIndex, |
633 | 634 |
biasAdj(plateIndex=p, envir=envir, priorProb=priorProb) |
634 | 635 |
message("Recomputing location and scale parameters") |
635 | 636 |
} |
636 |
- withinGenotypeMoments(p=p, A=A, B=B, calls=calls, conf=conf, CONF.THR=CONF.THR, |
|
637 |
- DF.PRIOR=DF.PRIOR, envir=envir) |
|
637 |
+ withinGenotypeMoments(p=p, |
|
638 |
+ A=A, |
|
639 |
+ B=B, |
|
640 |
+ calls=calls, |
|
641 |
+ conf=conf, |
|
642 |
+ CONF.THR=CONF.THR, |
|
643 |
+ DF.PRIOR=DF.PRIOR, |
|
644 |
+ envir=envir) |
|
638 | 645 |
GT.A <- envir[["GT.A"]] |
639 | 646 |
GT.B <- envir[["GT.B"]] |
640 | 647 |
index <- envir[["index"]] |
... | ... |
@@ -953,7 +960,7 @@ polymorphic <- function(plateIndex, A, B, envir){ |
953 | 960 |
|
954 | 961 |
|
955 | 962 |
|
956 |
-biasAdj <- function(plateIndex, envir, priorProb){ |
|
963 |
+biasAdj <- function(plateIndex, envir, priorProb, PROP=0.75){ |
|
957 | 964 |
CHR <- envir[["chrom"]] |
958 | 965 |
if(CHR == 23){ |
959 | 966 |
phiAx <- envir[["phiAx"]] |
... | ... |
@@ -1042,18 +1049,26 @@ biasAdj <- function(plateIndex, envir, priorProb){ |
1042 | 1049 |
} |
1043 | 1050 |
##Those near 1 have NaNs for nu and phi. this occurs by NaNs in the muA[,, "A"] or muA[, , "B"] for X chromosome |
1044 | 1051 |
propAlt <- rowMeans(tmp3)##prop normal |
1045 |
- ii <- propAlt < 0.75 |
|
1052 |
+ ##ii <- propAlt < PROP |
|
1053 |
+ ii <- propAlt > 0.05 |
|
1046 | 1054 |
##only exclude observations from one tail, depending on |
1047 | 1055 |
##whether more are up or down |
1048 | 1056 |
if(CHR != 23){ |
1049 |
- moreup <- rowSums(tmp2 > 3) > rowSums(tmp2 < 3) |
|
1050 |
- notUp <- tmp2[ii & moreup, ] <= 3 |
|
1051 |
- notDown <- tmp2[ii & !moreup, ] >= 3 |
|
1052 |
- NORM <- matrix(NA, nrow(A), ncol(A)) |
|
1053 |
- NORM[ii & moreup, ] <- notUp |
|
1054 |
- NORM[ii & !moreup, ] <- notDown |
|
1057 |
+ moreup <- rowSums(tmp2 > 3) > rowSums(tmp2 < 3) ##3 is normal |
|
1058 |
+ ##notUp <- tmp2[ii & moreup, ] <= 3 |
|
1059 |
+ ##notDown <- tmp2[ii & !moreup, ] >= 3 |
|
1060 |
+ |
|
1061 |
+ ##What if we just keep X% of the ones with the highest |
|
1062 |
+ ##posterior probability for normal |
|
1063 |
+ prNorm <- tmp[, , 3] |
|
1064 |
+ rankNorm <- t(apply(prNorm, 1, order, decreasing=TRUE)) |
|
1065 |
+ percentage <- round(0.75*ncol(prNorm),0) |
|
1066 |
+ NORM <- rankNorm <= percentage |
|
1067 |
+## NORM <- matrix(NA, nrow(A), ncol(A)) |
|
1068 |
+## NORM[ii & moreup, ] <- notUp |
|
1069 |
+## NORM[ii & !moreup, ] <- notDown |
|
1055 | 1070 |
##Define NORM so that we can iterate this step |
1056 |
- ##NA's in the previous iteration (normal) will be propogated |
|
1071 |
+ ##NA's in the previous iteration (normal) will be propogated |
|
1057 | 1072 |
normal <- NORM*normal |
1058 | 1073 |
} else{ |
1059 | 1074 |
fem <- tmp2[, gender=="female"] |
... | ... |
@@ -136,7 +136,6 @@ list2SnpSet <- function(x, returnParams=FALSE){ |
136 | 136 |
} |
137 | 137 |
|
138 | 138 |
loader <- function(theFile, envir, pkgname){ |
139 |
- ##stopifnot(theFile %in% c("genotypeStuff.rda", "mixtureStuff.rda", "preprocStuff.rda")) |
|
140 | 139 |
theFile <- file.path(system.file(package=pkgname), |
141 | 140 |
"extdata", theFile) |
142 | 141 |
if (!file.exists(theFile)) |