git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@71295 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -208,7 +208,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
208 | 208 |
callsGt.present <- !missing(callsGt) |
209 | 209 |
callsPr.present <- !missing(callsPr) |
210 | 210 |
overwriteAB <- !callsGt.present & !callsPr.present |
211 |
- if(overwriteAB){ |
|
211 |
+ if(!overwriteAB){ |
|
212 | 212 |
open(callsGt) |
213 | 213 |
open(callsPr) |
214 | 214 |
} |
... | ... |
@@ -246,7 +246,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
246 | 246 |
close(A) |
247 | 247 |
close(B) |
248 | 248 |
close(mixtureParams) |
249 |
- if(overwriteAB){ |
|
249 |
+ if(!overwriteAB){ |
|
250 | 250 |
close(callsGt) |
251 | 251 |
close(callsPr) |
252 | 252 |
} |
* collab:
Update AffyGW.pdf and IlluminaPreprocessCN.pdf
crlmmIlluminaV2 calls crlmmGT. comment call to crlmmGT2
Update AffyGW vignette to step through construction of CNSet, normalization, and genotyping
v 1.15.27: Fix bug in crlmmGT2. clean up comments in crlmmIllumina.
update getFeatureData
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@69561 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -78,7 +78,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
78 | 78 |
} else YIndex2 <- YIndex |
79 | 79 |
message("Imputing gender") |
80 | 80 |
gender <- imputeGender(XIndex=XIndex2, YIndex=YIndex2) |
81 |
- cnSet$gender[,] <- gender |
|
81 |
+ ##cnSet$gender[,] <- gender |
|
82 | 82 |
} |
83 | 83 |
Indexes <- list(autosomeIndex, XIndex, YIndex) |
84 | 84 |
cIndexes <- list(keepIndex, |
* collab:
update Rds
revert imputeGender to original api
bug fix for calculateRBafCNSet
update calculateRBafCNSet
Fix documentation for crlmmCopynumber -- added argument fitLinearModel
update crlmmGT2
Put rm(DD, ...) further down in crlmmGT2 function
remove message about cloning A and B
Open and close callsPr and callsGt in crlmmGT2 (when args not missing)
revert removed indices loaded in crlmmGT2
snprmaAffy no longer writes normalized intensities to calls and callProbability slots. crlmmGT2 takes arguments callsGt and callsPr. When present, crlmmGT2 will not overwrite A and B.
explicit coercion to matrix in imputeGender
Assign imputed gender to cnSet$gender within crlmmGT2 function.
Change sum(SNR > SNRmin) to sum(SNR[] > SNRmin) in imputeGender
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@69450 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -2,7 +2,9 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
2 | 2 |
col.names=NULL, probs=c(1/3, 1/3, 1/3), DF=6, |
3 | 3 |
SNRMin=5, recallMin=10, recallRegMin=1000, |
4 | 4 |
gender=NULL, desctrucitve=FALSE, verbose=TRUE, |
5 |
- returnParams=FALSE, badSNP=.7){ |
|
5 |
+ returnParams=FALSE, badSNP=.7, |
|
6 |
+ callsGt, |
|
7 |
+ callsPr){ |
|
6 | 8 |
pkgname <- getCrlmmAnnotationName(cdfName) |
7 | 9 |
stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose)) |
8 | 10 |
open(SNR) |
... | ... |
@@ -20,7 +22,6 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
20 | 22 |
} else { |
21 | 23 |
index <- match(gns, rownames(A)) |
22 | 24 |
} |
23 |
- ##snpBatches <- splitIndicesByLength(index, ocProbesets(), balance=TRUE) |
|
24 | 25 |
snpBatches <- splitIndicesByLength(index, ocProbesets()) |
25 | 26 |
NR <- length(unlist(snpBatches)) |
26 | 27 |
if(verbose) message("Calling ", NR, " SNPs for recalibration... ") |
... | ... |
@@ -29,6 +30,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
29 | 30 |
if(verbose) message("Loading annotations.") |
30 | 31 |
obj1 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
31 | 32 |
obj2 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
33 |
+ ## |
|
32 | 34 |
## this is toget rid of the 'no visible binding' notes |
33 | 35 |
## variable definitions |
34 | 36 |
XIndex <- getVarInEnv("XIndex") |
... | ... |
@@ -43,20 +45,21 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
43 | 45 |
## use lexical scope |
44 | 46 |
imputeGender <- function(XIndex, YIndex){ |
45 | 47 |
if(length(YIndex) > 0){ |
46 |
- a <- log2(A[XIndex,,drop=FALSE]) |
|
47 |
- b <- log2(B[XIndex,,drop=FALSE]) |
|
48 |
+ a <- log2(as.matrix(A[XIndex,,drop=FALSE])) |
|
49 |
+ b <- log2(as.matrix(B[XIndex,,drop=FALSE])) |
|
48 | 50 |
meds.X <- (apply(a+b, 2, median))/2 |
49 |
- a <- log2(A[YIndex,,drop=FALSE]) |
|
50 |
- b <- log2(B[YIndex,,drop=FALSE]) |
|
51 |
+ ## Y |
|
52 |
+ a <- log2(as.matrix(A[YIndex,,drop=FALSE])) |
|
53 |
+ b <- log2(as.matrix(B[YIndex,,drop=FALSE])) |
|
51 | 54 |
meds.Y <- (apply(a+b, 2, median))/2 |
52 | 55 |
R <- meds.X - meds.Y |
53 |
- if(sum(SNR > SNRMin) == 1){ |
|
56 |
+ if(sum(SNR[] > SNRMin) == 1){ |
|
54 | 57 |
gender <- ifelse(R[SNR[] > SNRMin] > 0.5, 2L, 1L) |
55 | 58 |
} else{ |
56 | 59 |
gender <- kmeans(R, c(min(R[SNR[]>SNRMin]), max(R[SNR[]>SNRMin])))[["cluster"]] |
57 | 60 |
} |
58 | 61 |
} else { |
59 |
- XMedian <- apply(log2(A[XIndex,,drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2 |
|
62 |
+ XMedian <- apply(log2(as.matrix(A[XIndex,,drop=FALSE]))+log2(as.matrix(B[XIndex,, drop=FALSE])), 2, median)/2 |
|
60 | 63 |
if(sum(SNR > SNRMin) == 1){ |
61 | 64 |
gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5))) |
62 | 65 |
} else{ |
... | ... |
@@ -75,6 +78,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
75 | 78 |
} else YIndex2 <- YIndex |
76 | 79 |
message("Imputing gender") |
77 | 80 |
gender <- imputeGender(XIndex=XIndex2, YIndex=YIndex2) |
81 |
+ cnSet$gender[,] <- gender |
|
78 | 82 |
} |
79 | 83 |
Indexes <- list(autosomeIndex, XIndex, YIndex) |
80 | 84 |
cIndexes <- list(keepIndex, |
... | ... |
@@ -186,8 +190,8 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
186 | 190 |
dev=apply(DD,1,function(x) x%*%SSI%*%x) |
187 | 191 |
dev=1/sqrt( (2*pi)^3*det(SS))*exp(-0.5*dev) |
188 | 192 |
} |
193 |
+ gc(verbose=FALSE) |
|
189 | 194 |
} |
190 |
- |
|
191 | 195 |
if (verbose) message("OK") |
192 | 196 |
## |
193 | 197 |
## BC: must keep SD |
... | ... |
@@ -201,6 +205,13 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
201 | 205 |
## ## MOVE TO C####### |
202 | 206 |
## |
203 | 207 |
## running in batches |
208 |
+ callsGt.present <- !missing(callsGt) |
|
209 |
+ callsPr.present <- !missing(callsPr) |
|
210 |
+ overwriteAB <- !callsGt.present & !callsPr.present |
|
211 |
+ if(overwriteAB){ |
|
212 |
+ open(callsGt) |
|
213 |
+ open(callsPr) |
|
214 |
+ } |
|
204 | 215 |
process2 <- function(idxBatch){ |
205 | 216 |
snps <- snpBatches[[idxBatch]] |
206 | 217 |
tmpA <- as.matrix(A[snps,]) |
... | ... |
@@ -222,14 +233,23 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
222 | 233 |
DF, probs, 0.025, |
223 | 234 |
which(regionInfo[snps, 2]), |
224 | 235 |
which(regionInfo[snps, 1])) |
225 |
- A[snps,] <- tmpA |
|
226 |
- B[snps,] <- tmpB |
|
236 |
+ if(overwriteAB){ |
|
237 |
+ A[snps,] <- tmpA |
|
238 |
+ B[snps,] <- tmpB |
|
239 |
+ } else { |
|
240 |
+ callsGt[snps, ] <- tmpA |
|
241 |
+ callsPr[snps, ] <- tmpB |
|
242 |
+ } |
|
227 | 243 |
} |
228 | 244 |
gc(verbose=FALSE) |
229 | 245 |
ocLapply(seq(along=snpBatches), process2, neededPkgs="crlmm") |
230 | 246 |
close(A) |
231 | 247 |
close(B) |
232 | 248 |
close(mixtureParams) |
249 |
+ if(overwriteAB){ |
|
250 |
+ close(callsGt) |
|
251 |
+ close(callsPr) |
|
252 |
+ } |
|
233 | 253 |
gc(verbose=FALSE) |
234 | 254 |
message("Done with process2") |
235 | 255 |
## END MOVE TO C####### |
* collab:
Update CNSet objects in data/ with datadir slot and protocolData(object)$filename
update cnrmaAffy. processCEL2 located inside cnrmaAffy function. Uses lexical scope.
Change API for genotypeAffy -- remove mixtureParams argument. Update call to genotypeAffy in genotype function
snprmaAffy no longer initializes mixtureParams object, but accesses this information from the cnSet
constructAffyCNSet initializes mixtureParams slot of the appropriate dimensions
updated cnrmaAffy. Removed cnrma2, cnrma. cnrmaAffy uses lexical scope
Fix bug in crlmmGT2 caused by unequal batch sizes
Moved rsprocessCel inside of snprmaAffy for lexical scope. Moved imputeGender inside crlmmGT2 for lexical scope
Revert imputeGender to original approach for crlmm. Splitting samples across nodes does not work well if there are not a lot of samples in the individual nodes. Probably better to use fewer markers on chr X when large number of samples are processed
contains old process1 call
change gender <- unlist(gender) to gender <- unlist(genderList)
v1.15.15 Fix memory leak in imputeGender step by running this function in sample batches of size ocSamples(). Use lexical scope in calling process1 function in crlmmGT2.
set default values in summarizeNps
depends on v 1.19.39 of oligoClasses
v1.15.14: Export constructAffyCNSet. Used datadir slot in CNSet object added to v 1.19.39 of oligoClasses
update getFeatureData for use with annotation package that contains a number of SNPs not necessarily included in the genotyping. These additional snps are removed when constructing the featureData in constructAffy
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@67826 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -20,6 +20,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
20 | 20 |
} else { |
21 | 21 |
index <- match(gns, rownames(A)) |
22 | 22 |
} |
23 |
+ ##snpBatches <- splitIndicesByLength(index, ocProbesets(), balance=TRUE) |
|
23 | 24 |
snpBatches <- splitIndicesByLength(index, ocProbesets()) |
24 | 25 |
NR <- length(unlist(snpBatches)) |
25 | 26 |
if(verbose) message("Calling ", NR, " SNPs for recalibration... ") |
... | ... |
@@ -39,14 +40,42 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
39 | 40 |
params <- getVarInEnv("params") |
40 | 41 |
rm(list=c(obj1, obj2), envir=.crlmmPkgEnv) |
41 | 42 |
rm(obj1, obj2) |
42 |
- ## |
|
43 |
- ## IF gender not provide, we predict |
|
44 |
- ## FIXME: XIndex may be greater than ocProbesets() |
|
43 |
+ ## use lexical scope |
|
44 |
+ imputeGender <- function(XIndex, YIndex){ |
|
45 |
+ if(length(YIndex) > 0){ |
|
46 |
+ a <- log2(A[XIndex,,drop=FALSE]) |
|
47 |
+ b <- log2(B[XIndex,,drop=FALSE]) |
|
48 |
+ meds.X <- (apply(a+b, 2, median))/2 |
|
49 |
+ a <- log2(A[YIndex,,drop=FALSE]) |
|
50 |
+ b <- log2(B[YIndex,,drop=FALSE]) |
|
51 |
+ meds.Y <- (apply(a+b, 2, median))/2 |
|
52 |
+ R <- meds.X - meds.Y |
|
53 |
+ if(sum(SNR > SNRMin) == 1){ |
|
54 |
+ gender <- ifelse(R[SNR[] > SNRMin] > 0.5, 2L, 1L) |
|
55 |
+ } else{ |
|
56 |
+ gender <- kmeans(R, c(min(R[SNR[]>SNRMin]), max(R[SNR[]>SNRMin])))[["cluster"]] |
|
57 |
+ } |
|
58 |
+ } else { |
|
59 |
+ XMedian <- apply(log2(A[XIndex,,drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2 |
|
60 |
+ if(sum(SNR > SNRMin) == 1){ |
|
61 |
+ gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5))) |
|
62 |
+ } else{ |
|
63 |
+ gender <- kmeans(XMedian, c(min(XMedian[SNR[]>SNRMin]), max(XMedian[SNR[]>SNRMin])))[["cluster"]] |
|
64 |
+ } |
|
65 |
+ } |
|
66 |
+ return(gender) |
|
67 |
+ } |
|
45 | 68 |
if(is.null(gender)){ |
46 |
- if(verbose) message("Determining gender.") |
|
47 |
- gender <- imputeGender(A, B, XIndex, YIndex, SNR, SNRMin) |
|
69 |
+ if(ocProbesets() < length(XIndex)){ |
|
70 |
+ if(verbose) message("Using ", ocProbesets(), " SNPs on chrom X and Y to assign gender.") |
|
71 |
+ XIndex2 <- sample(XIndex, ocProbesets(), replace=FALSE) |
|
72 |
+ } else XIndex2 <- XIndex |
|
73 |
+ if(ocProbesets() < length(YIndex)){ |
|
74 |
+ YIndex2 <- sample(YIndex, ocProbesets(), replace=FALSE) |
|
75 |
+ } else YIndex2 <- YIndex |
|
76 |
+ message("Imputing gender") |
|
77 |
+ gender <- imputeGender(XIndex=XIndex2, YIndex=YIndex2) |
|
48 | 78 |
} |
49 |
- ## |
|
50 | 79 |
Indexes <- list(autosomeIndex, XIndex, YIndex) |
51 | 80 |
cIndexes <- list(keepIndex, |
52 | 81 |
keepIndex[which(gender[keepIndex]==2)], |
... | ... |
@@ -56,16 +85,13 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
56 | 85 |
mIndex <- which(gender==1) |
57 | 86 |
## different here |
58 | 87 |
## use gtypeCallerR in batches |
59 |
- ##snpBatches <- splitIndicesByLength(1:nrow(A), ocProbesets()) |
|
60 |
- newparamsBatch <- vector("list", length(snpBatches)) |
|
61 |
- process1 <- function(idxBatch, snpBatches, autosomeIndex, XIndex, |
|
62 |
- YIndex, A, B, mixtureParams, fIndex, mIndex, |
|
63 |
- params, cIndexes, SMEDIAN, theKnots, DF, probs, batchSize){ |
|
64 |
- open(A) |
|
65 |
- open(B) |
|
66 |
- open(mixtureParams) |
|
88 |
+ batchSize <- ocProbesets() |
|
89 |
+ open(A) |
|
90 |
+ open(B) |
|
91 |
+ open(mixtureParams) |
|
92 |
+ process1 <- function(idxBatch){ |
|
67 | 93 |
snps <- snpBatches[[idxBatch]] |
68 |
- rSnps <- range(snps) |
|
94 |
+ ##rSnps <- range(snps) |
|
69 | 95 |
last <- (idxBatch-1)*batchSize |
70 | 96 |
IndexesBatch <- list(autosomeIndex[autosomeIndex %in% snps]-last, |
71 | 97 |
XIndex[XIndex %in% snps]-last, |
... | ... |
@@ -73,7 +99,6 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
73 | 99 |
IndexesBatch <- lapply(IndexesBatch, as.integer) |
74 | 100 |
tmpA <- as.matrix(A[snps,]) |
75 | 101 |
tmpB <- as.matrix(B[snps,]) |
76 |
- ## newparamsBatch[[idxBatch]] |
|
77 | 102 |
tmp <- gtypeCallerR(tmpA, tmpB, fIndex, mIndex, |
78 | 103 |
params[["centers"]][snps,], |
79 | 104 |
params[["scales"]][snps,], |
... | ... |
@@ -82,25 +107,13 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
82 | 107 |
sapply(IndexesBatch, length), |
83 | 108 |
sapply(cIndexes, length), SMEDIAN, |
84 | 109 |
theKnots, mixtureParams[], DF, probs, 0.025) |
85 |
- rm(snps, rSnps, IndexesBatch, tmpA, tmpB, last) |
|
86 |
- gc(verbose=FALSE) |
|
87 |
- close(A) |
|
88 |
- close(B) |
|
89 |
- close(mixtureParams) |
|
90 | 110 |
tmp |
91 | 111 |
} |
92 |
- ## |
|
93 |
- ##if(verbose) message("Calling process1") |
|
94 |
- newparamsBatch <- ocLapply(seq(along=snpBatches), process1, |
|
95 |
- snpBatches=snpBatches, |
|
96 |
- autosomeIndex=autosomeIndex, XIndex=XIndex, |
|
97 |
- YIndex=YIndex, A=A, B=B, |
|
98 |
- mixtureParams=mixtureParams, fIndex=fIndex, |
|
99 |
- mIndex=mIndex, params=params, |
|
100 |
- cIndexes=cIndexes, SMEDIAN=SMEDIAN, |
|
101 |
- theKnots=theKnots, DF=DF, probs=probs, |
|
102 |
- batchSize=ocProbesets(), |
|
103 |
- neededPkgs="crlmm") |
|
112 |
+ ## Lexical scope |
|
113 |
+ gc(verbose=FALSE) |
|
114 |
+ newparamsBatch <- ocLapply(seq(along=snpBatches), process1, neededPkgs="crlmm") |
|
115 |
+ gc(verbose=FALSE) |
|
116 |
+ if(verbose) message("finished process1") |
|
104 | 117 |
newparams <- vector("list", 3) |
105 | 118 |
names(newparams) <- c("centers", "scales", "N") |
106 | 119 |
newparams[["centers"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 1)) |
... | ... |
@@ -174,6 +187,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
174 | 187 |
dev=1/sqrt( (2*pi)^3*det(SS))*exp(-0.5*dev) |
175 | 188 |
} |
176 | 189 |
} |
190 |
+ |
|
177 | 191 |
if (verbose) message("OK") |
178 | 192 |
## |
179 | 193 |
## BC: must keep SD |
... | ... |
@@ -187,13 +201,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
187 | 201 |
## ## MOVE TO C####### |
188 | 202 |
## |
189 | 203 |
## running in batches |
190 |
- process2 <- function(idxBatch, snpBatches, autosomeIndex, XIndex, |
|
191 |
- YIndex, A, B, mixtureParams, fIndex, mIndex, |
|
192 |
- params, cIndexes, SMEDIAN, theKnots, DF, probs, |
|
193 |
- regionInfo, batchSize){ |
|
194 |
- open(A) |
|
195 |
- open(B) |
|
196 |
- open(mixtureParams) |
|
204 |
+ process2 <- function(idxBatch){ |
|
197 | 205 |
snps <- snpBatches[[idxBatch]] |
198 | 206 |
tmpA <- as.matrix(A[snps,]) |
199 | 207 |
tmpB <- as.matrix(B[snps,]) |
... | ... |
@@ -216,20 +224,14 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
216 | 224 |
which(regionInfo[snps, 1])) |
217 | 225 |
A[snps,] <- tmpA |
218 | 226 |
B[snps,] <- tmpB |
219 |
- rm(tmpA, tmpB, snps, rSnps, IndexesBatch, ImNull, last) |
|
220 |
- gc(verbose=FALSE) |
|
221 |
- close(A) |
|
222 |
- close(B) |
|
223 |
- close(mixtureParams) |
|
224 | 227 |
} |
225 |
- ## |
|
226 |
- ocLapply(seq(along=snpBatches), process2, snpBatches=snpBatches, |
|
227 |
- autosomeIndex=autosomeIndex, XIndex=XIndex, YIndex=YIndex, |
|
228 |
- A=A, B=B, mixtureParams=mixtureParams, fIndex=fIndex, |
|
229 |
- mIndex=mIndex, params=params, cIndexes=cIndexes, |
|
230 |
- SMEDIAN=SMEDIAN, theKnots=theKnots, DF=DF, probs=probs, |
|
231 |
- regionInfo=regionInfo, batchSize=ocProbesets(), |
|
232 |
- neededPkgs="crlmm") |
|
228 |
+ gc(verbose=FALSE) |
|
229 |
+ ocLapply(seq(along=snpBatches), process2, neededPkgs="crlmm") |
|
230 |
+ close(A) |
|
231 |
+ close(B) |
|
232 |
+ close(mixtureParams) |
|
233 |
+ gc(verbose=FALSE) |
|
234 |
+ message("Done with process2") |
|
233 | 235 |
## END MOVE TO C####### |
234 | 236 |
## ################## |
235 | 237 |
## |
* collab:
remove getCluster() calls and replace with parStatus()
update man pages for crlmm and genotype.Illumina with respect to the setup for parallelization
add neededPkgs argument to ocLapply calls in crlmmGT2
bump dependency on oligoClasses
Update R/crlmm-illumina.R
contructInf, preprocessInf and genotypeInf no longer exported
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@64211 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -99,7 +99,8 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
99 | 99 |
mIndex=mIndex, params=params, |
100 | 100 |
cIndexes=cIndexes, SMEDIAN=SMEDIAN, |
101 | 101 |
theKnots=theKnots, DF=DF, probs=probs, |
102 |
- batchSize=ocProbesets()) |
|
102 |
+ batchSize=ocProbesets(), |
|
103 |
+ neededPkgs="crlmm") |
|
103 | 104 |
newparams <- vector("list", 3) |
104 | 105 |
names(newparams) <- c("centers", "scales", "N") |
105 | 106 |
newparams[["centers"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 1)) |
... | ... |
@@ -227,7 +228,8 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
227 | 228 |
A=A, B=B, mixtureParams=mixtureParams, fIndex=fIndex, |
228 | 229 |
mIndex=mIndex, params=params, cIndexes=cIndexes, |
229 | 230 |
SMEDIAN=SMEDIAN, theKnots=theKnots, DF=DF, probs=probs, |
230 |
- regionInfo=regionInfo, batchSize=ocProbesets()) |
|
231 |
+ regionInfo=regionInfo, batchSize=ocProbesets(), |
|
232 |
+ neededPkgs="crlmm") |
|
231 | 233 |
## END MOVE TO C####### |
232 | 234 |
## ################## |
233 | 235 |
## |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@58665 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -12,20 +12,14 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
12 | 12 |
## expect objects to be ff |
13 | 13 |
keepIndex <- which( SNR[] > SNRMin) |
14 | 14 |
if(length(keepIndex)==0) stop("No arrays above quality threshold!") |
15 |
+ loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
16 |
+ gns <- getVarInEnv("gns", .crlmmPkgEnv) |
|
15 | 17 |
if(is.null(rownames(A))){ |
16 |
- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
17 |
- gns <- getVarInEnv("gns", .crlmmPkgEnv) |
|
18 | 18 |
stopifnot(nrow(A) == length(gns)) |
19 |
- index <- seq(length=nrow(A)) |
|
19 |
+ index <- seq_len(nrow(A)) |
|
20 |
+ } else { |
|
21 |
+ index <- match(gns, rownames(A)) |
|
20 | 22 |
} |
21 |
- snp.names <- snpNames(pkgname) |
|
22 |
- stopifnot(!is.null(rownames(A))) |
|
23 |
- index <- match(snp.names, rownames(A)) |
|
24 |
-## if(!missing(snp.names)){ |
|
25 |
-## |
|
26 |
-## ##verify that A has only snps. otherwise, calling function must pass rownames |
|
27 |
-## index <- match(snp.names, rownames(A)) |
|
28 |
-## } |
|
29 | 23 |
snpBatches <- splitIndicesByLength(index, ocProbesets()) |
30 | 24 |
NR <- length(unlist(snpBatches)) |
31 | 25 |
if(verbose) message("Calling ", NR, " SNPs for recalibration... ") |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@58663 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -50,7 +50,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
50 | 50 |
## FIXME: XIndex may be greater than ocProbesets() |
51 | 51 |
if(is.null(gender)){ |
52 | 52 |
if(verbose) message("Determining gender.") |
53 |
- gender <- imputeGender(A, B, XIndex, YIndex) |
|
53 |
+ gender <- imputeGender(A, B, XIndex, YIndex, SNR, SNRMin) |
|
54 | 54 |
} |
55 | 55 |
## |
56 | 56 |
Indexes <- list(autosomeIndex, XIndex, YIndex) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@58661 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -2,7 +2,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
2 | 2 |
col.names=NULL, probs=c(1/3, 1/3, 1/3), DF=6, |
3 | 3 |
SNRMin=5, recallMin=10, recallRegMin=1000, |
4 | 4 |
gender=NULL, desctrucitve=FALSE, verbose=TRUE, |
5 |
- returnParams=FALSE, badSNP=.7, snp.names){ |
|
5 |
+ returnParams=FALSE, badSNP=.7){ |
|
6 | 6 |
pkgname <- getCrlmmAnnotationName(cdfName) |
7 | 7 |
stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose)) |
8 | 8 |
open(SNR) |
... | ... |
@@ -18,11 +18,14 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
18 | 18 |
stopifnot(nrow(A) == length(gns)) |
19 | 19 |
index <- seq(length=nrow(A)) |
20 | 20 |
} |
21 |
- if(!missing(snp.names)){ |
|
22 |
- stopifnot(!is.null(rownames(A))) |
|
23 |
- ##verify that A has only snps. otherwise, calling function must pass rownames |
|
24 |
- index <- match(snp.names, rownames(A)) |
|
25 |
- } |
|
21 |
+ snp.names <- snpNames(pkgname) |
|
22 |
+ stopifnot(!is.null(rownames(A))) |
|
23 |
+ index <- match(snp.names, rownames(A)) |
|
24 |
+## if(!missing(snp.names)){ |
|
25 |
+## |
|
26 |
+## ##verify that A has only snps. otherwise, calling function must pass rownames |
|
27 |
+## index <- match(snp.names, rownames(A)) |
|
28 |
+## } |
|
26 | 29 |
snpBatches <- splitIndicesByLength(index, ocProbesets()) |
27 | 30 |
NR <- length(unlist(snpBatches)) |
28 | 31 |
if(verbose) message("Calling ", NR, " SNPs for recalibration... ") |
Clean commented code in crlmm-functions.R
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@58659 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -47,14 +47,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
47 | 47 |
## FIXME: XIndex may be greater than ocProbesets() |
48 | 48 |
if(is.null(gender)){ |
49 | 49 |
if(verbose) message("Determining gender.") |
50 |
- ## XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2 |
|
51 |
- XMedian <- ocLapply(splitIndicesByNode(1:NC), predictGender, theA=A, theB=B, XIndex=XIndex, neededPkgs="crlmm") |
|
52 |
- XMedian <- unlist(XMedian) |
|
53 |
- if(sum(SNR[] > SNRMin)==1){ |
|
54 |
- gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5))) |
|
55 |
- }else{ |
|
56 |
- gender <- kmeans(XMedian, c(min(XMedian[SNR[]>SNRMin]), max(XMedian[SNR[]>SNRMin])))[["cluster"]] |
|
57 |
- } |
|
50 |
+ gender <- imputeGender(A, B, XIndex, YIndex) |
|
58 | 51 |
} |
59 | 52 |
## |
60 | 53 |
Indexes <- list(autosomeIndex, XIndex, YIndex) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@58617 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -47,7 +47,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
47 | 47 |
## FIXME: XIndex may be greater than ocProbesets() |
48 | 48 |
if(is.null(gender)){ |
49 | 49 |
if(verbose) message("Determining gender.") |
50 |
- ## XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2 |
|
50 |
+ ## XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2 |
|
51 | 51 |
XMedian <- ocLapply(splitIndicesByNode(1:NC), predictGender, theA=A, theB=B, XIndex=XIndex, neededPkgs="crlmm") |
52 | 52 |
XMedian <- unlist(XMedian) |
53 | 53 |
if(sum(SNR[] > SNRMin)==1){ |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@54377 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -100,7 +100,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
100 | 100 |
tmp |
101 | 101 |
} |
102 | 102 |
## |
103 |
- if(verbose) message("Calling process1") |
|
103 |
+ ##if(verbose) message("Calling process1") |
|
104 | 104 |
newparamsBatch <- ocLapply(seq(along=snpBatches), process1, |
105 | 105 |
snpBatches=snpBatches, |
106 | 106 |
autosomeIndex=autosomeIndex, XIndex=XIndex, |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@54261 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -25,7 +25,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
25 | 25 |
} |
26 | 26 |
snpBatches <- splitIndicesByLength(index, ocProbesets()) |
27 | 27 |
NR <- length(unlist(snpBatches)) |
28 |
- if(verbose) cat("Calling", NR, "SNPs for recalibration... ") |
|
28 |
+ if(verbose) message("Calling ", NR, " SNPs for recalibration... ") |
|
29 | 29 |
NC <- ncol(A) |
30 | 30 |
## |
31 | 31 |
if(verbose) message("Loading annotations.") |
... | ... |
@@ -61,7 +61,6 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
61 | 61 |
cIndexes <- list(keepIndex, |
62 | 62 |
keepIndex[which(gender[keepIndex]==2)], |
63 | 63 |
keepIndex[which(gender[keepIndex]==1)]) |
64 |
- if(verbose) cat("Calling", NR, "SNPs for recalibration... ") |
|
65 | 64 |
## call C |
66 | 65 |
fIndex <- which(gender==2) |
67 | 66 |
mIndex <- which(gender==1) |
... | ... |
@@ -101,6 +100,7 @@ crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
101 | 100 |
tmp |
102 | 101 |
} |
103 | 102 |
## |
103 |
+ if(verbose) message("Calling process1") |
|
104 | 104 |
newparamsBatch <- ocLapply(seq(along=snpBatches), process1, |
105 | 105 |
snpBatches=snpBatches, |
106 | 106 |
autosomeIndex=autosomeIndex, XIndex=XIndex, |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@54171 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,264 @@ |
1 |
+crlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
|
2 |
+ col.names=NULL, probs=c(1/3, 1/3, 1/3), DF=6, |
|
3 |
+ SNRMin=5, recallMin=10, recallRegMin=1000, |
|
4 |
+ gender=NULL, desctrucitve=FALSE, verbose=TRUE, |
|
5 |
+ returnParams=FALSE, badSNP=.7, snp.names){ |
|
6 |
+ pkgname <- getCrlmmAnnotationName(cdfName) |
|
7 |
+ stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose)) |
|
8 |
+ open(SNR) |
|
9 |
+ open(A) |
|
10 |
+ open(B) |
|
11 |
+ open(mixtureParams) |
|
12 |
+ ## expect objects to be ff |
|
13 |
+ keepIndex <- which( SNR[] > SNRMin) |
|
14 |
+ if(length(keepIndex)==0) stop("No arrays above quality threshold!") |
|
15 |
+ if(is.null(rownames(A))){ |
|
16 |
+ loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
17 |
+ gns <- getVarInEnv("gns", .crlmmPkgEnv) |
|
18 |
+ stopifnot(nrow(A) == length(gns)) |
|
19 |
+ index <- seq(length=nrow(A)) |
|
20 |
+ } |
|
21 |
+ if(!missing(snp.names)){ |
|
22 |
+ stopifnot(!is.null(rownames(A))) |
|
23 |
+ ##verify that A has only snps. otherwise, calling function must pass rownames |
|
24 |
+ index <- match(snp.names, rownames(A)) |
|
25 |
+ } |
|
26 |
+ snpBatches <- splitIndicesByLength(index, ocProbesets()) |
|
27 |
+ NR <- length(unlist(snpBatches)) |
|
28 |
+ if(verbose) cat("Calling", NR, "SNPs for recalibration... ") |
|
29 |
+ NC <- ncol(A) |
|
30 |
+ ## |
|
31 |
+ if(verbose) message("Loading annotations.") |
|
32 |
+ obj1 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
|
33 |
+ obj2 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
|
34 |
+ ## this is toget rid of the 'no visible binding' notes |
|
35 |
+ ## variable definitions |
|
36 |
+ XIndex <- getVarInEnv("XIndex") |
|
37 |
+ autosomeIndex <- getVarInEnv("autosomeIndex") |
|
38 |
+ YIndex <- getVarInEnv("YIndex") |
|
39 |
+ SMEDIAN <- getVarInEnv("SMEDIAN") |
|
40 |
+ theKnots <- getVarInEnv("theKnots") |
|
41 |
+ regionInfo <- getVarInEnv("regionInfo") |
|
42 |
+ params <- getVarInEnv("params") |
|
43 |
+ rm(list=c(obj1, obj2), envir=.crlmmPkgEnv) |
|
44 |
+ rm(obj1, obj2) |
|
45 |
+ ## |
|
46 |
+ ## IF gender not provide, we predict |
|
47 |
+ ## FIXME: XIndex may be greater than ocProbesets() |
|
48 |
+ if(is.null(gender)){ |
|
49 |
+ if(verbose) message("Determining gender.") |
|
50 |
+ ## XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2 |
|
51 |
+ XMedian <- ocLapply(splitIndicesByNode(1:NC), predictGender, theA=A, theB=B, XIndex=XIndex, neededPkgs="crlmm") |
|
52 |
+ XMedian <- unlist(XMedian) |
|
53 |
+ if(sum(SNR[] > SNRMin)==1){ |
|
54 |
+ gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5))) |
|
55 |
+ }else{ |
|
56 |
+ gender <- kmeans(XMedian, c(min(XMedian[SNR[]>SNRMin]), max(XMedian[SNR[]>SNRMin])))[["cluster"]] |
|
57 |
+ } |
|
58 |
+ } |
|
59 |
+ ## |
|
60 |
+ Indexes <- list(autosomeIndex, XIndex, YIndex) |
|
61 |
+ cIndexes <- list(keepIndex, |
|
62 |
+ keepIndex[which(gender[keepIndex]==2)], |
|
63 |
+ keepIndex[which(gender[keepIndex]==1)]) |
|
64 |
+ if(verbose) cat("Calling", NR, "SNPs for recalibration... ") |
|
65 |
+ ## call C |
|
66 |
+ fIndex <- which(gender==2) |
|
67 |
+ mIndex <- which(gender==1) |
|
68 |
+ ## different here |
|
69 |
+ ## use gtypeCallerR in batches |
|
70 |
+ ##snpBatches <- splitIndicesByLength(1:nrow(A), ocProbesets()) |
|
71 |
+ newparamsBatch <- vector("list", length(snpBatches)) |
|
72 |
+ process1 <- function(idxBatch, snpBatches, autosomeIndex, XIndex, |
|
73 |
+ YIndex, A, B, mixtureParams, fIndex, mIndex, |
|
74 |
+ params, cIndexes, SMEDIAN, theKnots, DF, probs, batchSize){ |
|
75 |
+ open(A) |
|
76 |
+ open(B) |
|
77 |
+ open(mixtureParams) |
|
78 |
+ snps <- snpBatches[[idxBatch]] |
|
79 |
+ rSnps <- range(snps) |
|
80 |
+ last <- (idxBatch-1)*batchSize |
|
81 |
+ IndexesBatch <- list(autosomeIndex[autosomeIndex %in% snps]-last, |
|
82 |
+ XIndex[XIndex %in% snps]-last, |
|
83 |
+ YIndex[YIndex %in% snps]-last) |
|
84 |
+ IndexesBatch <- lapply(IndexesBatch, as.integer) |
|
85 |
+ tmpA <- as.matrix(A[snps,]) |
|
86 |
+ tmpB <- as.matrix(B[snps,]) |
|
87 |
+ ## newparamsBatch[[idxBatch]] |
|
88 |
+ tmp <- gtypeCallerR(tmpA, tmpB, fIndex, mIndex, |
|
89 |
+ params[["centers"]][snps,], |
|
90 |
+ params[["scales"]][snps,], |
|
91 |
+ params[["N"]][snps,], |
|
92 |
+ IndexesBatch, cIndexes, |
|
93 |
+ sapply(IndexesBatch, length), |
|
94 |
+ sapply(cIndexes, length), SMEDIAN, |
|
95 |
+ theKnots, mixtureParams[], DF, probs, 0.025) |
|
96 |
+ rm(snps, rSnps, IndexesBatch, tmpA, tmpB, last) |
|
97 |
+ gc(verbose=FALSE) |
|
98 |
+ close(A) |
|
99 |
+ close(B) |
|
100 |
+ close(mixtureParams) |
|
101 |
+ tmp |
|
102 |
+ } |
|
103 |
+ ## |
|
104 |
+ newparamsBatch <- ocLapply(seq(along=snpBatches), process1, |
|
105 |
+ snpBatches=snpBatches, |
|
106 |
+ autosomeIndex=autosomeIndex, XIndex=XIndex, |
|
107 |
+ YIndex=YIndex, A=A, B=B, |
|
108 |
+ mixtureParams=mixtureParams, fIndex=fIndex, |
|
109 |
+ mIndex=mIndex, params=params, |
|
110 |
+ cIndexes=cIndexes, SMEDIAN=SMEDIAN, |
|
111 |
+ theKnots=theKnots, DF=DF, probs=probs, |
|
112 |
+ batchSize=ocProbesets()) |
|
113 |
+ newparams <- vector("list", 3) |
|
114 |
+ names(newparams) <- c("centers", "scales", "N") |
|
115 |
+ newparams[["centers"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 1)) |
|
116 |
+ newparams[["scales"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 2)) |
|
117 |
+ newparams[["N"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 3)) |
|
118 |
+ rm(newparamsBatch); gc(verbose=FALSE) |
|
119 |
+ if(verbose) message("Done.") |
|
120 |
+ if(verbose) message("Estimating recalibration parameters.") |
|
121 |
+ d <- newparams[["centers"]] - params$centers |
|
122 |
+ ## |
|
123 |
+ ##regression |
|
124 |
+ Index <- intersect(which(pmin(newparams[["N"]][, 1], |
|
125 |
+ newparams[["N"]][, 2], |
|
126 |
+ newparams[["N"]][, 3]) > recallMin & |
|
127 |
+ !apply(regionInfo, 1, any)), |
|
128 |
+ autosomeIndex) |
|
129 |
+ if(length(Index) < recallRegMin){ |
|
130 |
+ warning("Recalibration not possible. Possible cause: small sample size.") |
|
131 |
+ newparams <- params |
|
132 |
+ dev <- vector("numeric", nrow(newparams[["centers"]])) |
|
133 |
+ SS <- matrix(Inf, 3, 3) |
|
134 |
+ DD <- 0 |
|
135 |
+ }else{ |
|
136 |
+ data4reg <- as.data.frame(newparams[["centers"]][Index,]) |
|
137 |
+ names(data4reg) <- c("AA", "AB", "BB") |
|
138 |
+ regParams <- cbind( coef(lm(AA~AB*BB, data=data4reg)), |
|
139 |
+ c(coef(lm(AB~AA+BB, data=data4reg)), 0), |
|
140 |
+ coef(lm(BB~AA*AB, data=data4reg))) |
|
141 |
+ rownames(regParams) <- c("intercept", "X", "Y", "XY") |
|
142 |
+ rm(data4reg) |
|
143 |
+ ## |
|
144 |
+ minN <- 3 |
|
145 |
+ newparams[["centers"]][newparams[["N"]] < minN] <- NA |
|
146 |
+ Index <- setdiff(which(rowSums(is.na(newparams[["centers"]]))==1), YIndex) |
|
147 |
+ if(verbose) message("Filling out empty centers", appendLF=FALSE) |
|
148 |
+ for(i in Index){ |
|
149 |
+ if(verbose) if(i%%10000==0) message(".", appendLF=FALSE) |
|
150 |
+ mu <- newparams[["centers"]][i, ] |
|
151 |
+ j <- which(is.na(mu)) |
|
152 |
+ newparams[["centers"]][i, j] <- c(1, mu[-j], prod(mu[-j]))%*%regParams[, j] |
|
153 |
+ rm(mu, j) |
|
154 |
+ } |
|
155 |
+ ## |
|
156 |
+ ##remaing NAs are made like originals |
|
157 |
+ if(length(YIndex)>0){ |
|
158 |
+ noMoveIndex <- union(setdiff(which(rowSums(is.na(newparams[["centers"]]))>0), YIndex), |
|
159 |
+ YIndex[rowSums(is.na(newparams[["centers"]][YIndex, ])>1)]) |
|
160 |
+ } |
|
161 |
+ snps2ignore <- which(rowSums(is.na(newparams[["centers"]])) > 0) |
|
162 |
+ snps2keep <- setdiff(autosomeIndex, snps2ignore) |
|
163 |
+ rm(snps2ignore) |
|
164 |
+ newparams[["centers"]][is.na(newparams[["centers"]])] <- params[["centers"]][is.na(newparams[["centers"]])] |
|
165 |
+ if(verbose) cat("\n") |
|
166 |
+ ## |
|
167 |
+ if(verbose) message("Calculating and standardizing size of shift... ", appendLF=FALSE) |
|
168 |
+ GG <- DD <- newparams[["centers"]] - params[["centers"]] |
|
169 |
+ DD <- sweep(DD, 2, colMeans(DD[autosomeIndex, ])) |
|
170 |
+ SS <- cov(DD[autosomeIndex, ]) |
|
171 |
+ SSI <- solve(SS) |
|
172 |
+ dev <- vector("numeric", nrow(DD)) |
|
173 |
+ if(length(YIndex)){ |
|
174 |
+ dev[-YIndex] <- apply(DD[-YIndex, ], 1, function(x) x%*%SSI%*%x) |
|
175 |
+ dev[-YIndex] <- 1/sqrt( (2*pi)^3*det(SS))*exp(-0.5*dev[-YIndex]) |
|
176 |
+ ##Now Y (only two params) |
|
177 |
+ SSY <- SS[c(1, 3), c(1, 3)] |
|
178 |
+ SSI <- solve(SSY) |
|
179 |
+ dev[YIndex] <- apply(DD[YIndex, c(1, 3)], 1, function(x) x%*%SSI%*%x) |
|
180 |
+ dev[YIndex] <- 1/sqrt( (2*pi)^2*det(SSY))*exp(-0.5*dev[YIndex]) |
|
181 |
+ } else { |
|
182 |
+ dev=apply(DD,1,function(x) x%*%SSI%*%x) |
|
183 |
+ dev=1/sqrt( (2*pi)^3*det(SS))*exp(-0.5*dev) |
|
184 |
+ } |
|
185 |
+ } |
|
186 |
+ if (verbose) message("OK") |
|
187 |
+ ## |
|
188 |
+ ## BC: must keep SD |
|
189 |
+ params[-2] <- newparams[-2] |
|
190 |
+ rm(newparams) |
|
191 |
+ gc(verbose=FALSE) |
|
192 |
+ ## |
|
193 |
+ if(verbose) message("Calling ", NR, " SNPs... ", appendLF=FALSE) |
|
194 |
+ ## |
|
195 |
+ ## ################### |
|
196 |
+ ## ## MOVE TO C####### |
|
197 |
+ ## |
|
198 |
+ ## running in batches |
|
199 |
+ process2 <- function(idxBatch, snpBatches, autosomeIndex, XIndex, |
|
200 |
+ YIndex, A, B, mixtureParams, fIndex, mIndex, |
|
201 |
+ params, cIndexes, SMEDIAN, theKnots, DF, probs, |
|
202 |
+ regionInfo, batchSize){ |
|
203 |
+ open(A) |
|
204 |
+ open(B) |
|
205 |
+ open(mixtureParams) |
|
206 |
+ snps <- snpBatches[[idxBatch]] |
|
207 |
+ tmpA <- as.matrix(A[snps,]) |
|
208 |
+ tmpB <- as.matrix(B[snps,]) |
|
209 |
+ rSnps <- range(snps) |
|
210 |
+ last <- (idxBatch-1)*batchSize |
|
211 |
+ IndexesBatch <- list(autosomeIndex[autosomeIndex %in% snps]-last, |
|
212 |
+ XIndex[XIndex %in% snps]-last, |
|
213 |
+ YIndex[YIndex %in% snps]-last) |
|
214 |
+ IndexesBatch <- lapply(IndexesBatch, as.integer) |
|
215 |
+ ImNull <- gtypeCallerR2(tmpA, tmpB, fIndex, mIndex, |
|
216 |
+ params[["centers"]][snps,], |
|
217 |
+ params[["scales"]][snps,], |
|
218 |
+ params[["N"]][snps,], |
|
219 |
+ IndexesBatch, cIndexes, |
|
220 |
+ sapply(IndexesBatch, length), |
|
221 |
+ sapply(cIndexes, length), |
|
222 |
+ SMEDIAN, theKnots, mixtureParams[], |
|
223 |
+ DF, probs, 0.025, |
|
224 |
+ which(regionInfo[snps, 2]), |
|
225 |
+ which(regionInfo[snps, 1])) |
|
226 |
+ A[snps,] <- tmpA |
|
227 |
+ B[snps,] <- tmpB |
|
228 |
+ rm(tmpA, tmpB, snps, rSnps, IndexesBatch, ImNull, last) |
|
229 |
+ gc(verbose=FALSE) |
|
230 |
+ close(A) |
|
231 |
+ close(B) |
|
232 |
+ close(mixtureParams) |
|
233 |
+ } |
|
234 |
+ ## |
|
235 |
+ ocLapply(seq(along=snpBatches), process2, snpBatches=snpBatches, |
|
236 |
+ autosomeIndex=autosomeIndex, XIndex=XIndex, YIndex=YIndex, |
|
237 |
+ A=A, B=B, mixtureParams=mixtureParams, fIndex=fIndex, |
|
238 |
+ mIndex=mIndex, params=params, cIndexes=cIndexes, |
|
239 |
+ SMEDIAN=SMEDIAN, theKnots=theKnots, DF=DF, probs=probs, |
|
240 |
+ regionInfo=regionInfo, batchSize=ocProbesets()) |
|
241 |
+ ## END MOVE TO C####### |
|
242 |
+ ## ################## |
|
243 |
+ ## |
|
244 |
+ dev <- dev/(dev+1/383) |
|
245 |
+ if(!is.null(row.names)){ rownames(A) <- rownames(B) <- names(dev) <- row.names} |
|
246 |
+ if(!is.null(col.names)){ colnames(A) <- colnames(B) <- col.names} |
|
247 |
+ ## |
|
248 |
+ if(length(Index) >= recallRegMin){ |
|
249 |
+ tmp4batchQC <- DD[autosomeIndex,]/(params[["N"]][autosomeIndex,]+1) |
|
250 |
+ tmpSnpQc <- dev[autosomeIndex] |
|
251 |
+ SS <- cov(tmp4batchQC[tmpSnpQc < badSNP,]) |
|
252 |
+ batchQC <- mean(diag(SS)) |
|
253 |
+ }else{ |
|
254 |
+ SS <- matrix(0, 3, 3) |
|
255 |
+ batchQC <- Inf |
|
256 |
+ } |
|
257 |
+ ## |
|
258 |
+ if(verbose) message("Done.") |
|
259 |
+ if (returnParams){ |
|
260 |
+ return(list(calls=A, confs=B, SNPQC=dev, batchQC=batchQC, params=params, DD=DD, covDD=SS, gender=gender, pkgname=pkgname)) |
|
261 |
+ }else{ |
|
262 |
+ return(list(calls=A, confs=B, SNPQC=dev, batchQC=batchQC, DD=DD, covDD=SS, gender=gender, pkgname=pkgname)) |
|
263 |
+ } |
|
264 |
+} |