git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@48944 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -101,93 +101,93 @@ construct <- function(filenames, cdfName, copynumber=FALSE, |
101 | 101 |
return(callSet) |
102 | 102 |
} |
103 | 103 |
|
104 |
-genotype <- function(filenames, |
|
105 |
- cdfName, |
|
106 |
- batch, |
|
107 |
- mixtureSampleSize=10^5, |
|
108 |
- eps=0.1, |
|
109 |
- verbose=TRUE, |
|
110 |
- seed=1, |
|
111 |
- sns, |
|
112 |
- copynumber=TRUE, |
|
113 |
- probs=rep(1/3, 3), |
|
114 |
- DF=6, |
|
115 |
- SNRMin=5, |
|
116 |
- recallMin=10, |
|
117 |
- recallRegMin=1000, |
|
118 |
- gender=NULL, |
|
119 |
- returnParams=TRUE, |
|
120 |
- badSNP=0.7){ |
|
121 |
- if(missing(cdfName)) stop("must specify cdfName") |
|
122 |
- if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames") |
|
123 |
- if(missing(sns)) sns <- basename(filenames) |
|
124 |
- ## callSet contains potentially very big matrices |
|
125 |
- ## More big matrices are created within snprma, that will then be removed. |
|
126 |
- callSet <- construct(filenames=filenames, |
|
127 |
- cdfName=cdfName, |
|
128 |
- copynumber=copynumber, |
|
129 |
- sns=sns, |
|
130 |
- verbose=verbose, |
|
131 |
- batch=batch) |
|
132 |
- ##lM(callSet) <- initializeParamObject(list(featureNames(callSet), unique(protocolData(callSet)$batch))) |
|
133 |
- mixtureParams <- matrix(NA, 4, length(filenames)) |
|
134 |
- snp.index <- which(isSnp(callSet)==1) |
|
135 |
- sample.strata <- splitIndicesByLength(1:ncol(callSet), ocSamples()) |
|
136 |
- iter <- 1 |
|
137 |
-## for(j in batches){ |
|
138 |
- j <- unlist(sample.strata) |
|
139 |
- if(verbose) message("Batch ", iter, " of ", length(sample.strata)) |
|
140 |
- snprmaRes <- snprma(filenames=filenames[j], |
|
141 |
- mixtureSampleSize=mixtureSampleSize, |
|
142 |
- fitMixture=TRUE, |
|
143 |
- eps=eps, |
|
144 |
- verbose=verbose, |
|
145 |
- seed=seed, |
|
146 |
- cdfName=cdfName, |
|
147 |
- sns=sns[j]) |
|
148 |
- stopifnot(identical(featureNames(callSet)[snp.index], snprmaRes$gns)) |
|
149 |
- pData(callSet)$SKW[j] <- snprmaRes$SKW |
|
150 |
- pData(callSet)$SNR[j] <- snprmaRes$SNR |
|
151 |
- suppressWarnings(A(callSet)[snp.index, j] <- snprmaRes[["A"]]) |
|
152 |
- suppressWarnings(B(callSet)[snp.index, j] <- snprmaRes[["B"]]) |
|
153 |
- mixtureParams[, j] <- snprmaRes$mixtureParams |
|
154 |
- rm(snprmaRes); ##gc() |
|
155 |
- if(copynumber){ |
|
156 |
- np.index <- which(isSnp(callSet) == 0) |
|
157 |
- cnrmaRes <- cnrma(filenames=filenames[j], |
|
158 |
- cdfName=cdfName, |
|
159 |
- row.names=featureNames(callSet)[np.index], |
|
160 |
- sns=sns[j], |
|
161 |
- seed=seed, |
|
162 |
- verbose=verbose) |
|
163 |
- stopifnot(identical(featureNames(callSet)[np.index], rownames(cnrmaRes))) |
|
164 |
- A(callSet)[np.index, j] <- cnrmaRes |
|
165 |
- rm(cnrmaRes); ##gc() |
|
166 |
- } |
|
167 |
- ## as.matrix needed when ffdf is used |
|
168 |
- tmp <- crlmmGT(A=as.matrix(A(callSet)[snp.index, j]), |
|
169 |
- B=as.matrix(B(callSet)[snp.index, j]), |
|
170 |
- SNR=callSet$SNR[j], |
|
171 |
- mixtureParams=mixtureParams[, j], |
|
172 |
- cdfName=annotation(callSet), |
|
173 |
- row.names=featureNames(callSet)[snp.index], |
|
174 |
- col.names=sampleNames(callSet)[j], |
|
175 |
- probs=probs, |
|
176 |
- DF=DF, |
|
177 |
- SNRMin=SNRMin, |
|
178 |
- recallMin=recallMin, |
|
179 |
- recallRegMin=recallRegMin, |
|
180 |
- gender=gender, |
|
181 |
- verbose=verbose, |
|
182 |
- returnParams=returnParams, |
|
183 |
- badSNP=badSNP) |
|
184 |
- snpCall(callSet)[snp.index, j] <- tmp[["calls"]] |
|
185 |
- snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]] |
|
186 |
- callSet$gender[j] <- tmp$gender |
|
187 |
- iter <- iter+1 |
|
188 |
-## } |
|
189 |
- return(callSet) |
|
190 |
-} |
|
104 |
+##genotype <- function(filenames, |
|
105 |
+## cdfName, |
|
106 |
+## batch, |
|
107 |
+## mixtureSampleSize=10^5, |
|
108 |
+## eps=0.1, |
|
109 |
+## verbose=TRUE, |
|
110 |
+## seed=1, |
|
111 |
+## sns, |
|
112 |
+## copynumber=TRUE, |
|
113 |
+## probs=rep(1/3, 3), |
|
114 |
+## DF=6, |
|
115 |
+## SNRMin=5, |
|
116 |
+## recallMin=10, |
|
117 |
+## recallRegMin=1000, |
|
118 |
+## gender=NULL, |
|
119 |
+## returnParams=TRUE, |
|
120 |
+## badSNP=0.7){ |
|
121 |
+## if(missing(cdfName)) stop("must specify cdfName") |
|
122 |
+## if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames") |
|
123 |
+## if(missing(sns)) sns <- basename(filenames) |
|
124 |
+## ## callSet contains potentially very big matrices |
|
125 |
+## ## More big matrices are created within snprma, that will then be removed. |
|
126 |
+## callSet <- construct(filenames=filenames, |
|
127 |
+## cdfName=cdfName, |
|
128 |
+## copynumber=copynumber, |
|
129 |
+## sns=sns, |
|
130 |
+## verbose=verbose, |
|
131 |
+## batch=batch) |
|
132 |
+## ##lM(callSet) <- initializeParamObject(list(featureNames(callSet), unique(protocolData(callSet)$batch))) |
|
133 |
+## mixtureParams <- matrix(NA, 4, length(filenames)) |
|
134 |
+## snp.index <- which(isSnp(callSet)==1) |
|
135 |
+## sample.strata <- splitIndicesByLength(1:ncol(callSet), ocSamples()) |
|
136 |
+## iter <- 1 |
|
137 |
+#### for(j in batches){ |
|
138 |
+## j <- unlist(sample.strata) |
|
139 |
+## if(verbose) message("Batch ", iter, " of ", length(sample.strata)) |
|
140 |
+## snprmaRes <- snprma(filenames=filenames[j], |
|
141 |
+## mixtureSampleSize=mixtureSampleSize, |
|
142 |
+## fitMixture=TRUE, |
|
143 |
+## eps=eps, |
|
144 |
+## verbose=verbose, |
|
145 |
+## seed=seed, |
|
146 |
+## cdfName=cdfName, |
|
147 |
+## sns=sns[j]) |
|
148 |
+## stopifnot(identical(featureNames(callSet)[snp.index], snprmaRes$gns)) |
|
149 |
+## pData(callSet)$SKW[j] <- snprmaRes$SKW |
|
150 |
+## pData(callSet)$SNR[j] <- snprmaRes$SNR |
|
151 |
+## suppressWarnings(A(callSet)[snp.index, j] <- snprmaRes[["A"]]) |
|
152 |
+## suppressWarnings(B(callSet)[snp.index, j] <- snprmaRes[["B"]]) |
|
153 |
+## mixtureParams[, j] <- snprmaRes$mixtureParams |
|
154 |
+## rm(snprmaRes); ##gc() |
|
155 |
+## if(copynumber){ |
|
156 |
+## np.index <- which(isSnp(callSet) == 0) |
|
157 |
+## cnrmaRes <- cnrma(filenames=filenames[j], |
|
158 |
+## cdfName=cdfName, |
|
159 |
+## row.names=featureNames(callSet)[np.index], |
|
160 |
+## sns=sns[j], |
|
161 |
+## seed=seed, |
|
162 |
+## verbose=verbose) |
|
163 |
+## stopifnot(identical(featureNames(callSet)[np.index], rownames(cnrmaRes))) |
|
164 |
+## A(callSet)[np.index, j] <- cnrmaRes |
|
165 |
+## rm(cnrmaRes); ##gc() |
|
166 |
+## } |
|
167 |
+## ## as.matrix needed when ffdf is used |
|
168 |
+## tmp <- crlmmGT(A=as.matrix(A(callSet)[snp.index, j]), |
|
169 |
+## B=as.matrix(B(callSet)[snp.index, j]), |
|
170 |
+## SNR=callSet$SNR[j], |
|
171 |
+## mixtureParams=mixtureParams[, j], |
|
172 |
+## cdfName=annotation(callSet), |
|
173 |
+## row.names=featureNames(callSet)[snp.index], |
|
174 |
+## col.names=sampleNames(callSet)[j], |
|
175 |
+## probs=probs, |
|
176 |
+## DF=DF, |
|
177 |
+## SNRMin=SNRMin, |
|
178 |
+## recallMin=recallMin, |
|
179 |
+## recallRegMin=recallRegMin, |
|
180 |
+## gender=gender, |
|
181 |
+## verbose=verbose, |
|
182 |
+## returnParams=returnParams, |
|
183 |
+## badSNP=badSNP) |
|
184 |
+## snpCall(callSet)[snp.index, j] <- tmp[["calls"]] |
|
185 |
+## snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]] |
|
186 |
+## callSet$gender[j] <- tmp$gender |
|
187 |
+## iter <- iter+1 |
|
188 |
+#### } |
|
189 |
+## return(callSet) |
|
190 |
+##} |
|
191 | 191 |
|
192 | 192 |
|
193 | 193 |
|
... | ... |
@@ -437,71 +437,71 @@ fit.wls <- function(allele, Ystar, W, Ns, autosome=TRUE){ |
437 | 437 |
##} |
438 | 438 |
|
439 | 439 |
|
440 |
-nuphiAllele <- function(object, allele, Ystar, W, tmp.objects, cnOptions){ |
|
441 |
- I <- isSnp(object) |
|
442 |
- Ystar <- Ystar[I, ] |
|
443 |
- rownames(Ystar) <- featureNames(object)[isSnp(object)] |
|
444 |
- complete <- rowSums(is.na(W)) == 0 & I |
|
445 |
- W <- W[I, ] |
|
446 |
- if(any(!is.finite(W))){## | any(!is.finite(V))){ |
|
447 |
- i <- which(rowSums(!is.finite(W)) > 0) |
|
448 |
- stop("Possible zeros in the within-genotype estimates of the spread (vA, vB). ") |
|
449 |
- } |
|
450 |
- Ns <- tmp.objects[["Ns"]] |
|
451 |
- Ns <- Ns[I, ] |
|
452 |
- CHR <- unique(chromosome(object)) |
|
453 |
- batch <- unique(batch(object)) |
|
454 |
- nuA <- getParam(object, "nuA", batch) |
|
455 |
- nuB <- getParam(object, "nuB", batch) |
|
456 |
- phiA <- getParam(object, "phiA", batch) |
|
457 |
- phiB <- getParam(object, "phiB", batch) |
|
458 |
- if(CHR==23){ |
|
459 |
- phiAX <- getParam(object, "phiAX", batch) |
|
460 |
- phiBX <- getParam(object, "phiBX", batch) |
|
461 |
- } |
|
462 |
- NOHET <- mean(Ns[, "AB"], na.rm=TRUE) < 0.05 |
|
463 |
- if(missing(allele)) stop("must specify allele") |
|
464 |
- if(CHR == 23){ |
|
465 |
- if(length(grep("AB", colnames(W))) > 0){ |
|
466 |
- if(allele == "A") X <- cbind(1, c(1, 0, 2, 1, 0), c(0, 1, 0, 1, 2)) |
|
467 |
- if(allele == "B") X <- cbind(1, c(0, 1, 0, 1, 2), c(1, 0, 2, 1, 0)) |
|
468 |
- } else{ |
|
469 |
- if(allele == "A") X <- cbind(1, c(1, 0, 2, 0), c(0, 1, 0, 2)) |
|
470 |
- if(allele == "B") X <- cbind(1, c(0, 1, 0, 2), c(1, 0, 2, 0)) |
|
471 |
- } |
|
472 |
- betahat <- matrix(NA, 3, nrow(Ystar)) |
|
473 |
- } else {##autosome |
|
474 |
- if(allele == "A") X <- cbind(1, 2:0) else X <- cbind(1, 0:2) |
|
475 |
- if(NOHET) X <- X[-2, ] ##more than 1 X chromosome, but all homozygous |
|
476 |
- betahat <- matrix(NA, 2, nrow(Ystar)) |
|
477 |
- } |
|
478 |
- ww <- rep(1, ncol(Ystar)) |
|
479 |
- II <- which(rowSums(is.nan(Ystar)) == 0) |
|
480 |
- for(i in II){ |
|
481 |
- betahat[, i] <- dqrlsWrapper(W[i, ] * X, Ystar[i, ], ww) |
|
482 |
- } |
|
483 |
- if(allele == "A"){ |
|
484 |
- nuA[complete] <- betahat[1, ] |
|
485 |
- phiA[complete] <- betahat[2, ] |
|
486 |
- object <- pr(object, "nuA", batch, nuA) |
|
487 |
- object <- pr(object, "phiA", batch, phiA) |
|
488 |
- if(CHR == 23){ |
|
489 |
- phiAX[complete] <- betahat[3, ] |
|
490 |
- object <- pr(object, "phiAX", batch, phiAX) |
|
491 |
- } |
|
492 |
- } |
|
493 |
- if(allele=="B"){ |
|
494 |
- nuB[complete] <- betahat[1, ] |
|
495 |
- phiB[complete] <- betahat[2, ] |
|
496 |
- object <- pr(object, "nuB", batch, nuB) |
|
497 |
- object <- pr(object, "phiB", batch, phiB) |
|
498 |
- if(CHR == 23){ |
|
499 |
- phiBX[complete] <- betahat[3, ] |
|
500 |
- object <- pr(object, "phiBX", batch, phiBX) |
|
501 |
- } |
|
502 |
- } |
|
503 |
- return(object) |
|
504 |
-} |
|
440 |
+##nuphiAllele <- function(object, allele, Ystar, W, tmp.objects, cnOptions){ |
|
441 |
+## I <- isSnp(object) |
|
442 |
+## Ystar <- Ystar[I, ] |
|
443 |
+## rownames(Ystar) <- featureNames(object)[isSnp(object)] |
|
444 |
+## complete <- rowSums(is.na(W)) == 0 & I |
|
445 |
+## W <- W[I, ] |
|
446 |
+## if(any(!is.finite(W))){## | any(!is.finite(V))){ |
|
447 |
+## i <- which(rowSums(!is.finite(W)) > 0) |
|
448 |
+## stop("Possible zeros in the within-genotype estimates of the spread (vA, vB). ") |
|
449 |
+## } |
|
450 |
+## Ns <- tmp.objects[["Ns"]] |
|
451 |
+## Ns <- Ns[I, ] |
|
452 |
+## CHR <- unique(chromosome(object)) |
|
453 |
+## batch <- unique(batch(object)) |
|
454 |
+## nuA <- getParam(object, "nuA", batch) |
|
455 |
+## nuB <- getParam(object, "nuB", batch) |
|
456 |
+## phiA <- getParam(object, "phiA", batch) |
|
457 |
+## phiB <- getParam(object, "phiB", batch) |
|
458 |
+## if(CHR==23){ |
|
459 |
+## phiAX <- getParam(object, "phiAX", batch) |
|
460 |
+## phiBX <- getParam(object, "phiBX", batch) |
|
461 |
+## } |
|
462 |
+## NOHET <- mean(Ns[, "AB"], na.rm=TRUE) < 0.05 |
|
463 |
+## if(missing(allele)) stop("must specify allele") |
|
464 |
+## if(CHR == 23){ |
|
465 |
+## if(length(grep("AB", colnames(W))) > 0){ |
|
466 |
+## if(allele == "A") X <- cbind(1, c(1, 0, 2, 1, 0), c(0, 1, 0, 1, 2)) |
|
467 |
+## if(allele == "B") X <- cbind(1, c(0, 1, 0, 1, 2), c(1, 0, 2, 1, 0)) |
|
468 |
+## } else{ |
|
469 |
+## if(allele == "A") X <- cbind(1, c(1, 0, 2, 0), c(0, 1, 0, 2)) |
|
470 |
+## if(allele == "B") X <- cbind(1, c(0, 1, 0, 2), c(1, 0, 2, 0)) |
|
471 |
+## } |
|
472 |
+## betahat <- matrix(NA, 3, nrow(Ystar)) |
|
473 |
+## } else {##autosome |
|
474 |
+## if(allele == "A") X <- cbind(1, 2:0) else X <- cbind(1, 0:2) |
|
475 |
+## if(NOHET) X <- X[-2, ] ##more than 1 X chromosome, but all homozygous |
|
476 |
+## betahat <- matrix(NA, 2, nrow(Ystar)) |
|
477 |
+## } |
|
478 |
+## ww <- rep(1, ncol(Ystar)) |
|
479 |
+## II <- which(rowSums(is.nan(Ystar)) == 0) |
|
480 |
+## for(i in II){ |
|
481 |
+## betahat[, i] <- dqrlsWrapper(W[i, ] * X, Ystar[i, ], ww) |
|
482 |
+## } |
|
483 |
+## if(allele == "A"){ |
|
484 |
+## nuA[complete] <- betahat[1, ] |
|
485 |
+## phiA[complete] <- betahat[2, ] |
|
486 |
+## object <- pr(object, "nuA", batch, nuA) |
|
487 |
+## object <- pr(object, "phiA", batch, phiA) |
|
488 |
+## if(CHR == 23){ |
|
489 |
+## phiAX[complete] <- betahat[3, ] |
|
490 |
+## object <- pr(object, "phiAX", batch, phiAX) |
|
491 |
+## } |
|
492 |
+## } |
|
493 |
+## if(allele=="B"){ |
|
494 |
+## nuB[complete] <- betahat[1, ] |
|
495 |
+## phiB[complete] <- betahat[2, ] |
|
496 |
+## object <- pr(object, "nuB", batch, nuB) |
|
497 |
+## object <- pr(object, "phiB", batch, phiB) |
|
498 |
+## if(CHR == 23){ |
|
499 |
+## phiBX[complete] <- betahat[3, ] |
|
500 |
+## object <- pr(object, "phiBX", batch, phiBX) |
|
501 |
+## } |
|
502 |
+## } |
|
503 |
+## return(object) |
|
504 |
+##} |
|
505 | 505 |
|
506 | 506 |
|
507 | 507 |
|
... | ... |
@@ -532,114 +532,114 @@ predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){ |
532 | 532 |
|
533 | 533 |
|
534 | 534 |
##replace with release/crlmm/R/cnrma-functions |
535 |
-crlmmCopynumber <- function(object, |
|
536 |
- chromosome=1:23, |
|
537 |
- which.batches, |
|
538 |
- MIN.SAMPLES=10, |
|
539 |
- SNRMin=5, |
|
540 |
- MIN.OBS=3, |
|
541 |
- DF.PRIOR=50, |
|
542 |
- bias.adj=FALSE, |
|
543 |
- prior.prob=rep(1/4,4), |
|
544 |
- seed=1, |
|
545 |
- verbose=TRUE, |
|
546 |
- GT.CONF.THR=0.99, |
|
547 |
- PHI.THR=2^6, |
|
548 |
- nHOM.THR=5, |
|
549 |
- MIN.NU=2^3, |
|
550 |
- MIN.PHI=2^3, |
|
551 |
- THR.NU.PHI=TRUE, |
|
552 |
- thresholdCopynumber=TRUE, |
|
553 |
- weighted.lm=TRUE){ |
|
554 |
- ffIsLoaded <- class(calls(object))[[1]] == "ff" |
|
555 |
- if(ffIsLoaded){ |
|
556 |
- open(object) |
|
557 |
- open(object$SKW) |
|
558 |
- open(object$SNR) |
|
559 |
- } |
|
560 |
- stopifnot("batch" %in% varLabels(protocolData(object))) |
|
561 |
- stopifnot("chromosome" %in% fvarLabels(object)) |
|
562 |
- stopifnot("position" %in% fvarLabels(object)) |
|
563 |
- stopifnot("isSnp" %in% fvarLabels(object)) |
|
564 |
- ##batch <- object$batch |
|
565 |
- batch <- batch(object) |
|
566 |
- if(ffIsLoaded) { |
|
567 |
- open(object$SNR) |
|
568 |
- SNR <- object$SNR[, ] |
|
569 |
- } else SNR <- object$SNR |
|
570 |
- batches <- split((1:ncol(object))[SNR > SNRMin], batch[SNR > SNRMin]) |
|
571 |
- if(any(sapply(batches, length) < MIN.SAMPLES)) message("Excluding batches with fewer than ", MIN.SAMPLES, " samples") |
|
572 |
- batches <- batches[sapply(batches, length) >= MIN.SAMPLES] |
|
573 |
- if(missing(which.batches)) which.batches <- seq(along=batches) |
|
574 |
- for(i in chromosome){ |
|
575 |
- if(verbose) cat("Chromosome ", i, "\n") |
|
576 |
- if(i >= 24) next() |
|
577 |
- ii <- which.batches[1] |
|
578 |
- for(j in batches[which.batches]){ |
|
579 |
- if(verbose) message("Batch ", ii, " of ", length(which.batches)) |
|
580 |
- row.index <- which(chromosome(object) == i) |
|
581 |
- ##Note that ffdf assayDataElements are data.frames after subsetting(not matrices) |
|
582 |
-## ca <- as.matrix(CA(object)[row.index, j]) |
|
583 |
-## cb <- as.matrix(CB(object)[row.index, j]) |
|
584 |
-## dimnames(ca) <- dimnames(cb) <- list(featureNames(object)[row.index], sampleNames(object)[j]) |
|
585 |
- tmp <- new("CNSet", |
|
586 |
- call=as.matrix(calls(object)[row.index, j]), |
|
587 |
- callProbability=as.matrix(snpCallProbability(object)[row.index, j]), |
|
588 |
- alleleA=as.matrix(A(object)[row.index, j]), |
|
589 |
- alleleB=as.matrix(B(object)[row.index, j]), |
|
590 |
-## CA=ca, CB=cb, |
|
591 |
- phenoData=phenoData(object)[j, ], |
|
592 |
- annotation=annotation(object)) |
|
593 |
- featureData(tmp) <- addFeatureAnnotation(tmp) |
|
594 |
- featureData(tmp) <- lm.parameters(tmp, batch=unique(batch[j])) |
|
595 |
- tmp$batch <- batch(object)[j] |
|
596 |
- tmp <- computeCopynumber(tmp, |
|
597 |
- MIN.OBS=MIN.OBS, |
|
598 |
- DF.PRIOR=DF.PRIOR, |
|
599 |
- bias.adj=bias.adj, |
|
600 |
- prior.prob=prior.prob, |
|
601 |
- seed=seed, |
|
602 |
- verbose=verbose, |
|
603 |
- GT.CONF.THR=GT.CONF.THR, |
|
604 |
- PHI.THR=PHI.THR, |
|
605 |
- nHOM.THR=nHOM.THR, |
|
606 |
- MIN.NU=MIN.NU, |
|
607 |
- MIN.PHI=MIN.PHI, |
|
608 |
- THR.NU.PHI=THR.NU.PHI, |
|
609 |
- thresholdCopynumber=thresholdCopynumber) |
|
610 |
- fData(tmp) <- fData(tmp)[, -(1:3)] |
|
611 |
-## CA(tmp) <- matrix(as.integer(CA(tmp)*100), nrow=nrow(tmp), ncol=ncol(tmp), |
|
612 |
-## dimnames=list(featureNames(tmp), sampleNames(tmp))) |
|
613 |
-## CB(tmp) <- matrix(as.integer(CB(tmp)*100), nrow=nrow(tmp), ncol=ncol(tmp), |
|
614 |
-## dimnames=list(featureNames(tmp), sampleNames(tmp))) |
|
615 |
-## CA(object)[row.index, j] <- CA(tmp) |
|
616 |
-## CB(object)[row.index, j] <- CB(tmp) |
|
617 |
- ##ad-hocery |
|
618 |
- batchName <- unique(batch(object)[j]) |
|
619 |
- fvarLabels(tmp)[15:17] <- paste(c("corrAB", "corrBB", "corrAA"), batchName, sep=".") |
|
620 |
- fvarLabels(tmp)[13:14] <- paste(c("phiPrimeA", "phiPrimeB"), batchName, sep=".") |
|
621 |
- fvarLabels(tmp) <- gsub("_", ".", fvarLabels(tmp)) |
|
622 |
- ##fvarLabels(tmp) <- gsub("\\.[1-9]", "", fvarLabels(tmp)) |
|
623 |
- if(ffIsLoaded){ |
|
624 |
- physical <- get("physical") |
|
625 |
- fData(tmp) <- fData(tmp)[, fvarLabels(tmp) %in% names(physical(lM(object)))] |
|
626 |
- jj <- match(fvarLabels(tmp), names(lM(object))) |
|
627 |
- lM(object)[row.index, jj] <- fData(tmp) |
|
628 |
- } else { |
|
629 |
- nms <- paste(names(lM(object)), batchName, sep=".") |
|
630 |
- fData(tmp) <- fData(tmp)[, fvarLabels(tmp) %in% nms] |
|
631 |
- for(k in seq_along(fvarLabels(tmp))){ |
|
632 |
- kk <- match(fvarLabels(tmp)[k], paste(names(lM(object)), batchName, sep=".")) |
|
633 |
- column <- match(batchName, colnames(lM(object)[[k]])) |
|
634 |
- lM(object)[[k]][row.index, column] <- fData(tmp)[, k] |
|
635 |
- } |
|
636 |
- } |
|
637 |
- rm(tmp); ##gc() |
|
638 |
- ii <- ii+1 |
|
639 |
- } |
|
640 |
- } |
|
641 |
- return(object) |
|
642 |
-} |
|
535 |
+##crlmmCopynumber <- function(object, |
|
536 |
+## chromosome=1:23, |
|
537 |
+## which.batches, |
|
538 |
+## MIN.SAMPLES=10, |
|
539 |
+## SNRMin=5, |
|
540 |
+## MIN.OBS=3, |
|
541 |
+## DF.PRIOR=50, |
|
542 |
+## bias.adj=FALSE, |
|
543 |
+## prior.prob=rep(1/4,4), |
|
544 |
+## seed=1, |
|
545 |
+## verbose=TRUE, |
|
546 |
+## GT.CONF.THR=0.99, |
|
547 |
+## PHI.THR=2^6, |
|
548 |
+## nHOM.THR=5, |
|
549 |
+## MIN.NU=2^3, |
|
550 |
+## MIN.PHI=2^3, |
|
551 |
+## THR.NU.PHI=TRUE, |
|
552 |
+## thresholdCopynumber=TRUE, |
|
553 |
+## weighted.lm=TRUE){ |
|
554 |
+## ffIsLoaded <- class(calls(object))[[1]] == "ff" |
|
555 |
+## if(ffIsLoaded){ |
|
556 |
+## open(object) |
|
557 |
+## open(object$SKW) |
|
558 |
+## open(object$SNR) |
|
559 |
+## } |
|
560 |
+## stopifnot("batch" %in% varLabels(protocolData(object))) |
|
561 |
+## stopifnot("chromosome" %in% fvarLabels(object)) |
|
562 |
+## stopifnot("position" %in% fvarLabels(object)) |
|
563 |
+## stopifnot("isSnp" %in% fvarLabels(object)) |
|
564 |
+## ##batch <- object$batch |
|
565 |
+## batch <- batch(object) |
|
566 |
+## if(ffIsLoaded) { |
|
567 |
+## open(object$SNR) |
|
568 |
+## SNR <- object$SNR[, ] |
|
569 |
+## } else SNR <- object$SNR |
|
570 |
+## batches <- split((1:ncol(object))[SNR > SNRMin], batch[SNR > SNRMin]) |
|
571 |
+## if(any(sapply(batches, length) < MIN.SAMPLES)) message("Excluding batches with fewer than ", MIN.SAMPLES, " samples") |
|
572 |
+## batches <- batches[sapply(batches, length) >= MIN.SAMPLES] |
|
573 |
+## if(missing(which.batches)) which.batches <- seq(along=batches) |
|
574 |
+## for(i in chromosome){ |
|
575 |
+## if(verbose) cat("Chromosome ", i, "\n") |
|
576 |
+## if(i >= 24) next() |
|
577 |
+## ii <- which.batches[1] |
|
578 |
+## for(j in batches[which.batches]){ |
|
579 |
+## if(verbose) message("Batch ", ii, " of ", length(which.batches)) |
|
580 |
+## row.index <- which(chromosome(object) == i) |
|
581 |
+## ##Note that ffdf assayDataElements are data.frames after subsetting(not matrices) |
|
582 |
+#### ca <- as.matrix(CA(object)[row.index, j]) |
|
583 |
+#### cb <- as.matrix(CB(object)[row.index, j]) |
|
584 |
+#### dimnames(ca) <- dimnames(cb) <- list(featureNames(object)[row.index], sampleNames(object)[j]) |
|
585 |
+## tmp <- new("CNSet", |
|
586 |
+## call=as.matrix(calls(object)[row.index, j]), |
|
587 |
+## callProbability=as.matrix(snpCallProbability(object)[row.index, j]), |
|
588 |
+## alleleA=as.matrix(A(object)[row.index, j]), |
|
589 |
+## alleleB=as.matrix(B(object)[row.index, j]), |
|
590 |
+#### CA=ca, CB=cb, |
|
591 |
+## phenoData=phenoData(object)[j, ], |
|
592 |
+## annotation=annotation(object)) |
|
593 |
+## featureData(tmp) <- addFeatureAnnotation(tmp) |
|
594 |
+## featureData(tmp) <- lm.parameters(tmp, batch=unique(batch[j])) |
|
595 |
+## tmp$batch <- batch(object)[j] |
|
596 |
+## tmp <- computeCopynumber(tmp, |
|
597 |
+## MIN.OBS=MIN.OBS, |
|
598 |
+## DF.PRIOR=DF.PRIOR, |
|
599 |
+## bias.adj=bias.adj, |
|
600 |
+## prior.prob=prior.prob, |
|
601 |
+## seed=seed, |
|
602 |
+## verbose=verbose, |
|
603 |
+## GT.CONF.THR=GT.CONF.THR, |
|
604 |
+## PHI.THR=PHI.THR, |
|
605 |
+## nHOM.THR=nHOM.THR, |
|
606 |
+## MIN.NU=MIN.NU, |
|
607 |
+## MIN.PHI=MIN.PHI, |
|
608 |
+## THR.NU.PHI=THR.NU.PHI, |
|
609 |
+## thresholdCopynumber=thresholdCopynumber) |
|
610 |
+## fData(tmp) <- fData(tmp)[, -(1:3)] |
|
611 |
+#### CA(tmp) <- matrix(as.integer(CA(tmp)*100), nrow=nrow(tmp), ncol=ncol(tmp), |
|
612 |
+#### dimnames=list(featureNames(tmp), sampleNames(tmp))) |
|
613 |
+#### CB(tmp) <- matrix(as.integer(CB(tmp)*100), nrow=nrow(tmp), ncol=ncol(tmp), |
|
614 |
+#### dimnames=list(featureNames(tmp), sampleNames(tmp))) |
|
615 |
+#### CA(object)[row.index, j] <- CA(tmp) |
|
616 |
+#### CB(object)[row.index, j] <- CB(tmp) |
|
617 |
+## ##ad-hocery |
|
618 |
+## batchName <- unique(batch(object)[j]) |
|
619 |
+## fvarLabels(tmp)[15:17] <- paste(c("corrAB", "corrBB", "corrAA"), batchName, sep=".") |
|
620 |
+## fvarLabels(tmp)[13:14] <- paste(c("phiPrimeA", "phiPrimeB"), batchName, sep=".") |
|
621 |
+## fvarLabels(tmp) <- gsub("_", ".", fvarLabels(tmp)) |
|
622 |
+## ##fvarLabels(tmp) <- gsub("\\.[1-9]", "", fvarLabels(tmp)) |
|
623 |
+## if(ffIsLoaded){ |
|
624 |
+## physical <- get("physical") |
|
625 |
+## fData(tmp) <- fData(tmp)[, fvarLabels(tmp) %in% names(physical(lM(object)))] |
|
626 |
+## jj <- match(fvarLabels(tmp), names(lM(object))) |
|
627 |
+## lM(object)[row.index, jj] <- fData(tmp) |
|
628 |
+## } else { |
|
629 |
+## nms <- paste(names(lM(object)), batchName, sep=".") |
|
630 |
+## fData(tmp) <- fData(tmp)[, fvarLabels(tmp) %in% nms] |
|
631 |
+## for(k in seq_along(fvarLabels(tmp))){ |
|
632 |
+## kk <- match(fvarLabels(tmp)[k], paste(names(lM(object)), batchName, sep=".")) |
|
633 |
+## column <- match(batchName, colnames(lM(object)[[k]])) |
|
634 |
+## lM(object)[[k]][row.index, column] <- fData(tmp)[, k] |
|
635 |
+## } |
|
636 |
+## } |
|
637 |
+## rm(tmp); ##gc() |
|
638 |
+## ii <- ii+1 |
|
639 |
+## } |
|
640 |
+## } |
|
641 |
+## return(object) |
|
642 |
+##} |
|
643 | 643 |
|
644 | 644 |
crlmmCopynumberLD <- function(object, |
645 | 645 |
which.batches, |
... | ... |
@@ -1571,47 +1571,47 @@ processCEL2 <- function(i, filenames, row.names, A, SKW, seed, cdfName, pkgname) |
1571 | 1571 |
|
1572 | 1572 |
|
1573 | 1573 |
# steps: quantile normalize hapmap: create 1m_reference_cn.rda object |
1574 |
-cnrma <- function(filenames, cdfName, row.names, sns, seed=1, verbose=FALSE){ |
|
1575 |
- if(missing(cdfName)) stop("must specify cdfName") |
|
1576 |
- pkgname <- getCrlmmAnnotationName(cdfName) |
|
1577 |
- require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available") |
|
1578 |
- if (missing(sns)) sns <- basename(filenames) |
|
1579 |
- loader("npProbesFid.rda", .crlmmPkgEnv, pkgname) |
|
1580 |
- fid <- getVarInEnv("npProbesFid") |
|
1581 |
- fid <- fid[match(row.names, names(fid))] |
|
1582 |
- set.seed(seed) |
|
1583 |
- idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything |
|
1584 |
- SKW <- vector("numeric", length(filenames)) |
|
1585 |
- NP <- matrix(NA, length(fid), length(filenames)) |
|
1586 |
- verbose <- TRUE |
|
1587 |
- if(verbose){ |
|
1588 |
- message("Processing ", length(filenames), " files.") |
|
1589 |
- if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3) |
|
1590 |
- } |
|
1591 |
- if(cdfName=="genomewidesnp6"){ |
|
1592 |
- loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname) |
|
1593 |
- } |
|
1594 |
- if(cdfName=="genomewidesnp5"){ |
|
1595 |
- loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname) |
|
1596 |
- } |
|
1597 |
- reference <- getVarInEnv("reference") |
|
1598 |
- ##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.") |
|
1599 |
- for(i in seq(along=filenames)){ |
|
1600 |
- y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid]) |
|
1601 |
- x <- log2(y[idx2]) |
|
1602 |
- SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3) |
|
1603 |
- rm(x) |
|
1604 |
- NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference)) |
|
1605 |
- if (verbose) |
|
1606 |
- if (getRversion() > '2.7.0') setTxtProgressBar(pb, i) |
|
1607 |
- else cat(".") |
|
1608 |
- } |
|
1609 |
- dimnames(NP) <- list(names(fid), sns) |
|
1610 |
- ##dimnames(NP) <- list(map[, "man_fsetid"], sns) |
|
1611 |
- ##res3 <- list(NP=NP, SKW=SKW) |
|
1612 |
- cat("\n") |
|
1613 |
- return(NP) |
|
1614 |
-} |
|
1574 |
+##cnrma <- function(filenames, cdfName, row.names, sns, seed=1, verbose=FALSE){ |
|
1575 |
+## if(missing(cdfName)) stop("must specify cdfName") |
|
1576 |
+## pkgname <- getCrlmmAnnotationName(cdfName) |
|
1577 |
+## require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available") |
|
1578 |
+## if (missing(sns)) sns <- basename(filenames) |
|
1579 |
+## loader("npProbesFid.rda", .crlmmPkgEnv, pkgname) |
|
1580 |
+## fid <- getVarInEnv("npProbesFid") |
|
1581 |
+## fid <- fid[match(row.names, names(fid))] |
|
1582 |
+## set.seed(seed) |
|
1583 |
+## idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything |
|
1584 |
+## SKW <- vector("numeric", length(filenames)) |
|
1585 |
+## NP <- matrix(NA, length(fid), length(filenames)) |
|
1586 |
+## verbose <- TRUE |
|
1587 |
+## if(verbose){ |
|
1588 |
+## message("Processing ", length(filenames), " files.") |
|
1589 |
+## if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3) |
|
1590 |
+## } |
|
1591 |
+## if(cdfName=="genomewidesnp6"){ |
|
1592 |
+## loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname) |
|
1593 |
+## } |
|
1594 |
+## if(cdfName=="genomewidesnp5"){ |
|
1595 |
+## loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname) |
|
1596 |
+## } |
|
1597 |
+## reference <- getVarInEnv("reference") |
|
1598 |
+## ##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.") |
|
1599 |
+## for(i in seq(along=filenames)){ |
|
1600 |
+## y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid]) |
|
1601 |
+## x <- log2(y[idx2]) |
|
1602 |
+## SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3) |
|
1603 |
+## rm(x) |
|
1604 |
+## NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference)) |
|
1605 |
+## if (verbose) |
|
1606 |
+## if (getRversion() > '2.7.0') setTxtProgressBar(pb, i) |
|
1607 |
+## else cat(".") |
|
1608 |
+## } |
|
1609 |
+## dimnames(NP) <- list(names(fid), sns) |
|
1610 |
+## ##dimnames(NP) <- list(map[, "man_fsetid"], sns) |
|
1611 |
+## ##res3 <- list(NP=NP, SKW=SKW) |
|
1612 |
+## cat("\n") |
|
1613 |
+## return(NP) |
|
1614 |
+##} |
|
1615 | 1615 |
|
1616 | 1616 |
getFlags <- function(object, PHI.THR){ |
1617 | 1617 |
batch <- unique(object$batch) |
... | ... |
@@ -1628,777 +1628,777 @@ getFlags <- function(object, PHI.THR){ |
1628 | 1628 |
} |
1629 | 1629 |
|
1630 | 1630 |
|
1631 |
-instantiateObjects <- function(object, cnOptions){ |
|
1632 |
- Ns <- matrix(NA, nrow(object), 5) |
|
1633 |
- colnames(Ns) <- c("A", "B", "AA", "AB", "BB") |
|
1634 |
- vA <- vB <- muB <- muA <- Ns |
|
1635 |
- normal <- matrix(TRUE, nrow(object), ncol(object)) |
|
1636 |
- dimnames(normal) <- list(featureNames(object), sampleNames(object)) |
|
1637 |
- tmp.objects <- list(vA=vA, |
|
1638 |
- vB=vB, |
|
1639 |
- muB=muB, |
|
1640 |
- muA=muA, |
|
1641 |
- Ns=Ns, |
|
1642 |
- normal=normal) |
|
1643 |
- return(tmp.objects) |
|
1644 |
-} |
|
1645 |
- |
|
1646 |
-thresholdCopynumber <- function(object){ |
|
1647 |
- ca <- CA(object) |
|
1648 |
- cb <- CB(object) |
|
1649 |
- ca[ca < 0.05] <- 0.05 |
|
1650 |
- ca[ca > 5] <- 5 |
|
1651 |
- cb[cb < 0.05] <- 0.05 |
|
1652 |
- cb[cb > 5] <- 5 |
|
1653 |
- CA(object) <- ca |
|
1654 |
- CB(object) <- cb |
|
1655 |
- return(object) |
|
1656 |
-} |
|
1657 |
- |
|
1658 |
-##linear model parameters |
|
1659 |
-lm.parameters <- function(object, batch){##cnOptions){ |
|
1660 |
- fD <- fData(object) |
|
1661 |
- ##batch <- object$batch |
|
1662 |
- uplate <- unique(batch) |
|
1663 |
- parameterNames <- c(paste("tau2A", uplate, sep="_"), |
|
1664 |
- paste("tau2B", uplate, sep="_"), |
|
1665 |
- paste("sig2A", uplate, sep="_"), |
|
1666 |
- paste("sig2B", uplate, sep="_"), |
|
1667 |
- paste("nuA", uplate, sep="_"), |
|
1668 |
- paste("nuA.se", uplate, sep="_"), |
|
1669 |
- paste("nuB", uplate, sep="_"), |
|
1670 |
- paste("nuB.se", uplate, sep="_"), |
|
1671 |
- paste("phiA", uplate, sep="_"), |
|
1672 |
- paste("phiA.se", uplate, sep="_"), |
|
1673 |
- paste("phiB", uplate, sep="_"), |
|
1674 |
- paste("phiB.se", uplate, sep="_"), |
|
1675 |
- paste("phiAX", uplate, sep="_"), |
|
1676 |
- paste("phiBX", uplate, sep="_"), |
|
1677 |
- paste("corr", uplate, sep="_"), |
|
1678 |
- paste("corrA.BB", uplate, sep="_"), |
|
1679 |
- paste("corrB.AA", uplate, sep="_")) |
|
1680 |
- pMatrix <- data.frame(matrix(numeric(0), |
|
1681 |
- nrow(object), |
|
1682 |
- length(parameterNames)), |
|
1683 |
- row.names=featureNames(object)) |
|
1684 |
- colnames(pMatrix) <- parameterNames |
|
1685 |
- fD2 <- cbind(fD, pMatrix) |
|
1686 |
- new("AnnotatedDataFrame", data=fD2, |
|
1687 |
- varMetadata=data.frame(labelDescription=colnames(fD2), |
|
1688 |
- row.names=colnames(fD2))) |
|
1689 |
-} |
|
1690 |
- |
|
1691 |
-nonpolymorphic <- function(object, cnOptions, tmp.objects){ |
|
1692 |
- batch <- unique(object$batch) |
|
1693 |
- CHR <- unique(chromosome(object)) |
|
1694 |
- goodSnps <- function(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR){ |
|
1695 |
- Ns <- tmp.objects[["Ns"]] |
|
1696 |
- ##Ns <- get("Ns", envir) |
|
1697 |
- flags <- getFlags(object, PHI.THR) |
|
1698 |
- fewAA <- Ns[, "AA"] < nAA.THR |
|
1699 |
- fewBB <- Ns[, "BB"] < nBB.THR |
|
1700 |
- flagsA <- flags | fewAA |
|
1701 |
- flagsB <- flags | fewBB |
|
1702 |
- flags <- list(A=flagsA, B=flagsB) |
|
1703 |
- return(flags) |
|
1704 |
- } |
|
1705 |
- nAA.THR <- cnOptions$nHOM.THR |
|
1706 |
- nBB.THR <- cnOptions$nHOM.THR |
|
1707 |
- PHI.THR <- cnOptions$PHI.THR |
|
1708 |
- snpflags <- goodSnps(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR) |
|
1709 |
- flagsA <- snpflags$A |
|
1710 |
- flagsB <- snpflags$B |
|
1711 |
-## if(all(flagsA) | all(flagsB)) stop("all snps are flagged") |
|
1712 |
- nuA <- getParam(object, "nuA", batch) |
|
1713 |
- nuB <- getParam(object, "nuB", batch) |
|
1714 |
- phiA <- getParam(object, "phiA", batch) |
|
1715 |
- phiB <- getParam(object, "phiB", batch) |
|
1716 |
- sns <- sampleNames(object) |
|
1717 |
- muA <- tmp.objects[["muA"]] |
|
1718 |
- muB <- tmp.objects[["muB"]] |
|
1719 |
- A <- A(object) |
|
1720 |
- B <- B(object) |
|
1721 |
-## CA <- CA(object) |
|
1722 |
-## CB <- CB(object) |
|
1723 |
- if(CHR == 23){ |
|
1724 |
- phiAX <- getParam(object, "phiAX", batch) |
|
1725 |
- phiBX <- getParam(object, "phiBX", batch) |
|
1726 |
- } |
|
1727 |
- ##--------------------------------------------------------------------------- |
|
1728 |
- ## Train on unflagged SNPs |
|
1729 |
- ##--------------------------------------------------------------------------- |
|
1730 |
- ##Might be best to train using the X chromosome, since for the |
|
1731 |
- ##X phi and nu have been adjusted for cross-hybridization |
|
1732 |
- ##plateInd <- plate == uplate[p] |
|
1733 |
- ##muA <- muA[!flagsA, p, c("A", "AA")] |
|
1734 |
- ##muB <- muB[!flagsB, p, c("B", "BB")] |
|
1735 |
- muA <- muA[!flagsA, "AA"] |
|
1736 |
- muB <- muB[!flagsB, "BB"] |
|
1737 |
- X <- cbind(1, log2(c(muA, muB))) |
|
1738 |
- Y <- log2(c(phiA[!flagsA], phiB[!flagsB])) |
|
1739 |
- if(nrow(X) > 5000){ |
|
1740 |
- ix <- sample(1:nrow(X), 5000) |
|
1741 |
- } else { |
|
1742 |
- ix <- 1:nrow(X) |
|
1743 |
- } |
|
1744 |
- betahat <- solve(crossprod(X[ix, ]), crossprod(X[ix, ], Y[ix])) |
|
1745 |
- normal <- tmp.objects[["normal"]][!isSnp(object), ] |
|
1746 |
- if(CHR == 23){ |
|
1747 |
- ##normalNP <- envir[["normalNP"]] |
|
1748 |
- ##normalNP <- normalNP[, plate==uplate[p]] |
|
1749 |
- ##nuT <- envir[["nuT"]] |
|
1750 |
- ##phiT <- envir[["phiT"]] |
|
1751 |
- |
|
1752 |
- ##cnvs <- envir[["cnvs"]] |
|
1753 |
- ##loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv) |
|
1754 |
- ##cnProbes <- get("cnProbes", envir=.crlmmPkgEnv) |
|
1755 |
- ##cnProbes <- cnProbes[match(cnvs, rownames(cnProbes)), ] |
|
1756 |
- |
|
1757 |
- ##For build Hg18 |
|
1758 |
- ##http://genome.ucsc.edu/cgi-bin/hgGateway |
|
1759 |
- ##pseudo-autosomal regions on X |
|
1760 |
- ##chrX:1-2,709,520 and chrX:154584237-154913754, respectively |
|
1761 |
- ##par:pseudo-autosomal regions |
|
1762 |
- pseudoAR <- position(object) < 2709520 | (position(object) > 154584237 & position(object) < 154913754) |
|
1763 |
- ##pseudoAR <- cnProbes[, "position"] < 2709520 | (cnProbes[, "position"] > 154584237 & cnProbes[, "position"] < 154913754) |
|
1764 |
- ##in case some of the cnProbes are not annotated |
|
1765 |
- pseudoAR[is.na(pseudoAR)] <- FALSE |
|
1766 |
- pseudoAR <- pseudoAR[!isSnp(object)] |
|
1767 |
- ##gender <- envir[["gender"]] |
|
1768 |
- gender <- object$gender |
|
1769 |
- obj1 <- object[!isSnp(object), ] |
|
1770 |
- A.male <- A(obj1[, gender==1]) |
|
1771 |
- mu1 <- rowMedians(A.male, na.rm=TRUE) |
|
1772 |
- ##mu1 <- rowMedians(NP[, gender=="male"], na.rm=TRUE) |
|
1773 |
- ##mu2 <- rowMedians(NP[, gender=="female"], na.rm=TRUE) |
|
1774 |
- A.female <- A(obj1[, gender==2]) |
|
1775 |
- mu2 <- rowMedians(A.female, na.rm=TRUE) |
|
1776 |
- mus <- log2(cbind(mu1, mu2)) |
|
1777 |
- X.men <- cbind(1, mus[, 1]) |
|
1778 |
- X.fem <- cbind(1, mus[, 2]) |
|
1779 |
- |
|
1780 |
- Yhat1 <- as.numeric(X.men %*% betahat) |
|
1781 |
- Yhat2 <- as.numeric(X.fem %*% betahat) |
|
1782 |
- phi1 <- 2^(Yhat1) |
|
1783 |
- phi2 <- 2^(Yhat2) |
|
1784 |
- nu1 <- 2^(mus[, 1]) - phi1 |
|
1785 |
- nu2 <- 2^(mus[, 2]) - 2*phi2 |
|
1786 |
- |
|
1787 |
- if(any(pseudoAR)){ |
|
1788 |
- nu1[pseudoAR] <- 2^(mus[pseudoAR, 1]) - 2*phi1[pseudoAR] |
|
1789 |
- } |
|
1790 |
-## CT1 <- 1/phi1*(A.male-nu1) |
|
1791 |
-## CT2 <- 1/phi2*(A.female-nu2) |
|
1792 |
-## CA <- CA(obj1) |
|
1793 |
-## CA[, gender==1] <- CT1 |
|
1794 |
-## CA[, gender==2] <- CT2 |
|
1795 |
-## CA(object)[!isSnp(object), ] <- CA |
|
1796 |
- ##only using females to compute the variance |
|
1797 |
- ##normalNP[, gender=="male"] <- NA |
|
1798 |
- normal[, gender==1] <- NA |
|
1799 |
- sig2A <- getParam(object, "sig2A", batch) |
|
1800 |
- normal.f <- normal[, object$gender==2] |
|
1801 |
- sig2A[!isSnp(object)] <- rowMAD(log2(A.female*normal.f), na.rm=TRUE)^2 |
|
1802 |
- sig2A[!isSnp(object) & is.na(sig2A)] <- median(sig2A[!isSnp(object)], na.rm=TRUE) |
|
1803 |
- ##sig2T[, p] <- rowMAD(log2(NP*normalNP), na.rm=TRUE)^2 |
|
1804 |
- object <- pr(object, "sig2A", batch, sig2A) |
|
1805 |
- |
|
1806 |
- nuA[!isSnp(object)] <- nu2 |
|
1807 |
- phiA[!isSnp(object)] <- phi2 |
|
1808 |
- |
|
1809 |
- THR.NU.PHI <- cnOptions$THR.NU.PHI |
|
1810 |
- if(THR.NU.PHI){ |
|
1811 |
- verbose <- cnOptions$verbose |
|
1812 |
- ##Assign values to object |
|
1813 |
- object <- pr(object, "nuA", batch, nuA) |
|
1814 |
- object <- pr(object, "phiA", batch, phiA) |
|
1815 |
- ##if(verbose) message("Thresholding nu and phi") |
|
1816 |
- object <- thresholdModelParams(object, cnOptions) |
|
1817 |
- } else { |
|
1818 |
- object <- pr(object, "nuA", batch, nuA) |
|
1819 |
- object <- pr(object, "phiA", batch, phiA) |
|
1820 |
- } |
|
1821 |
- } else { |
|
1822 |
- A <- A(object)[!isSnp(object), ] |
|
1823 |
- mus <- rowMedians(A * normal, na.rm=TRUE) |
|
1824 |
- crosshyb <- max(median(muA) - median(mus), 0) |
|
1825 |
- X <- cbind(1, log2(mus+crosshyb)) |
|
1826 |
- logPhiT <- X %*% betahat |
|
1827 |
- phiA[!isSnp(object)] <- 2^(logPhiT) |
|
1828 |
- nuA[!isSnp(object)] <- mus-2*phiA[!isSnp(object)] |
|
1829 |
- |
|
1830 |
- THR.NU.PHI <- cnOptions$THR.NU.PHI |
|
1831 |
- if(THR.NU.PHI){ |
|
1832 |
- verbose <- cnOptions$verbose |
|
1833 |
- ##Assign values to object |
|
1834 |
- object <- pr(object, "nuA", batch, nuA) |
|
1835 |
- object <- pr(object, "phiA", batch, phiA) |
|
1836 |
- ##if(verbose) message("Thresholding nu and phi") |
|
1837 |
- object <- thresholdModelParams(object, cnOptions) |
|
1838 |
- ##reassign values (now thresholded at MIN.NU and MIN.PHI |
|
1839 |
- nuA <- getParam(object, "nuA", batch) |
|
1840 |
- phiA <- getParam(object, "phiA", batch) |
|
1841 |
- } |
|
1842 |
- ##CA(object)[!isSnp(object), ] <- 1/phiA[!isSnp(object)]*(A - nuA[!isSnp(object)]) |
|
1843 |
- sig2A <- getParam(object, "sig2A", batch) |
|
1844 |
- sig2A[!isSnp(object)] <- rowMAD(log2(A*normal), na.rm=TRUE)^2 |
|
1845 |
- object <- pr(object, "sig2A", batch, sig2A) |
|
1846 |
- ##added |
|
1847 |
- object <- pr(object, "nuA", batch, nuA) |
|
1848 |
- object <- pr(object, "phiA", batch, phiA) |
|
1849 |
- } |
|
1850 |
- return(object) |
|
1851 |
-} |
|
1852 |
- |
|
1631 |
+##instantiateObjects <- function(object, cnOptions){ |
|
1632 |
+## Ns <- matrix(NA, nrow(object), 5) |
|
1633 |
+## colnames(Ns) <- c("A", "B", "AA", "AB", "BB") |
|
1634 |
+## vA <- vB <- muB <- muA <- Ns |
|
1635 |
+## normal <- matrix(TRUE, nrow(object), ncol(object)) |
|
1636 |
+## dimnames(normal) <- list(featureNames(object), sampleNames(object)) |
|
1637 |
+## tmp.objects <- list(vA=vA, |
|
1638 |
+## vB=vB, |
|
1639 |
+## muB=muB, |
|
1640 |
+## muA=muA, |
|
1641 |
+## Ns=Ns, |
|
1642 |
+## normal=normal) |
|
1643 |
+## return(tmp.objects) |
|
1644 |
+##} |
|
1645 |
+## |
|
1646 |
+##thresholdCopynumber <- function(object){ |
|
1647 |
+## ca <- CA(object) |
|
1648 |
+## cb <- CB(object) |
|
1649 |
+## ca[ca < 0.05] <- 0.05 |
|
1650 |
+## ca[ca > 5] <- 5 |
|
1651 |
+## cb[cb < 0.05] <- 0.05 |
|
1652 |
+## cb[cb > 5] <- 5 |
|
1653 |
+## CA(object) <- ca |
|
1654 |
+## CB(object) <- cb |
|
1655 |
+## return(object) |
|
1656 |
+##} |
|
1657 |
+## |
|
1658 |
+####linear model parameters |
|
1659 |
+##lm.parameters <- function(object, batch){##cnOptions){ |
|
1660 |
+## fD <- fData(object) |
|
1661 |
+## ##batch <- object$batch |
|
1662 |
+## uplate <- unique(batch) |
|
1663 |
+## parameterNames <- c(paste("tau2A", uplate, sep="_"), |
|
1664 |
+## paste("tau2B", uplate, sep="_"), |
|
1665 |
+## paste("sig2A", uplate, sep="_"), |
|
1666 |
+## paste("sig2B", uplate, sep="_"), |
|
1667 |
+## paste("nuA", uplate, sep="_"), |
|
1668 |
+## paste("nuA.se", uplate, sep="_"), |
|
1669 |
+## paste("nuB", uplate, sep="_"), |
|
1670 |
+## paste("nuB.se", uplate, sep="_"), |
|
1671 |
+## paste("phiA", uplate, sep="_"), |
|
1672 |
+## paste("phiA.se", uplate, sep="_"), |
|
1673 |
+## paste("phiB", uplate, sep="_"), |
|
1674 |
+## paste("phiB.se", uplate, sep="_"), |
|
1675 |
+## paste("phiAX", uplate, sep="_"), |
|
1676 |
+## paste("phiBX", uplate, sep="_"), |
|
1677 |
+## paste("corr", uplate, sep="_"), |
|
1678 |
+## paste("corrA.BB", uplate, sep="_"), |
|
1679 |
+## paste("corrB.AA", uplate, sep="_")) |
|
1680 |
+## pMatrix <- data.frame(matrix(numeric(0), |
|
1681 |
+## nrow(object), |
|
1682 |
+## length(parameterNames)), |
|
1683 |
+## row.names=featureNames(object)) |
|
1684 |
+## colnames(pMatrix) <- parameterNames |
|
1685 |
+## fD2 <- cbind(fD, pMatrix) |
|
1686 |
+## new("AnnotatedDataFrame", data=fD2, |
|
1687 |
+## varMetadata=data.frame(labelDescription=colnames(fD2), |
|
1688 |
+## row.names=colnames(fD2))) |
|
1689 |
+##} |
|
1690 |
+## |
|
1691 |
+##nonpolymorphic <- function(object, cnOptions, tmp.objects){ |
|
1692 |
+## batch <- unique(object$batch) |
|
1693 |
+## CHR <- unique(chromosome(object)) |
|
1694 |
+## goodSnps <- function(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR){ |
|
1695 |
+## Ns <- tmp.objects[["Ns"]] |
|
1696 |
+## ##Ns <- get("Ns", envir) |
|
1697 |
+## flags <- getFlags(object, PHI.THR) |
|
1698 |
+## fewAA <- Ns[, "AA"] < nAA.THR |
|
1699 |
+## fewBB <- Ns[, "BB"] < nBB.THR |
|
1700 |
+## flagsA <- flags | fewAA |
|
1701 |
+## flagsB <- flags | fewBB |
|
1702 |
+## flags <- list(A=flagsA, B=flagsB) |
|
1703 |
+## return(flags) |
|
1704 |
+## } |
|
1705 |
+## nAA.THR <- cnOptions$nHOM.THR |
|
1706 |
+## nBB.THR <- cnOptions$nHOM.THR |
|
1707 |
+## PHI.THR <- cnOptions$PHI.THR |
|
1708 |
+## snpflags <- goodSnps(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR) |
|
1709 |
+## flagsA <- snpflags$A |
|
1710 |
+## flagsB <- snpflags$B |
|
1711 |
+#### if(all(flagsA) | all(flagsB)) stop("all snps are flagged") |
|
1712 |
+## nuA <- getParam(object, "nuA", batch) |
|
1713 |
+## nuB <- getParam(object, "nuB", batch) |
|
1714 |
+## phiA <- getParam(object, "phiA", batch) |
|
1715 |
+## phiB <- getParam(object, "phiB", batch) |
|
1716 |
+## sns <- sampleNames(object) |
|
1717 |
+## muA <- tmp.objects[["muA"]] |
|
1718 |
+## muB <- tmp.objects[["muB"]] |
|
1719 |
+## A <- A(object) |
|
1720 |
+## B <- B(object) |
|
1721 |
+#### CA <- CA(object) |
|
1722 |
+#### CB <- CB(object) |
|
1723 |
+## if(CHR == 23){ |
|
1724 |
+## phiAX <- getParam(object, "phiAX", batch) |
|
1725 |
+## phiBX <- getParam(object, "phiBX", batch) |
|
1726 |
+## } |
|
1727 |
+## ##--------------------------------------------------------------------------- |
|
1728 |
+## ## Train on unflagged SNPs |
|
1729 |
+## ##--------------------------------------------------------------------------- |
|
1730 |
+## ##Might be best to train using the X chromosome, since for the |
|
1731 |
+## ##X phi and nu have been adjusted for cross-hybridization |
|
1732 |
+## ##plateInd <- plate == uplate[p] |
|
1733 |
+## ##muA <- muA[!flagsA, p, c("A", "AA")] |
|
1734 |
+## ##muB <- muB[!flagsB, p, c("B", "BB")] |
|
1735 |
+## muA <- muA[!flagsA, "AA"] |
|
1736 |
+## muB <- muB[!flagsB, "BB"] |
|
1737 |
+## X <- cbind(1, log2(c(muA, muB))) |
|
1738 |
+## Y <- log2(c(phiA[!flagsA], phiB[!flagsB])) |
|
1739 |
+## if(nrow(X) > 5000){ |
|
1740 |
+## ix <- sample(1:nrow(X), 5000) |
|
1741 |
+## } else { |
|
1742 |
+## ix <- 1:nrow(X) |
|
1743 |
+## } |
|
1744 |
+## betahat <- solve(crossprod(X[ix, ]), crossprod(X[ix, ], Y[ix])) |
|
1745 |
+## normal <- tmp.objects[["normal"]][!isSnp(object), ] |
|
1746 |
+## if(CHR == 23){ |
|
1747 |
+## ##normalNP <- envir[["normalNP"]] |
|
1748 |
+## ##normalNP <- normalNP[, plate==uplate[p]] |
|
1749 |
+## ##nuT <- envir[["nuT"]] |
|
1750 |
+## ##phiT <- envir[["phiT"]] |
|
1751 |
+## |
|
1752 |
+## ##cnvs <- envir[["cnvs"]] |
|
1753 |
+## ##loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv) |
|
1754 |
+## ##cnProbes <- get("cnProbes", envir=.crlmmPkgEnv) |
|
1755 |
+## ##cnProbes <- cnProbes[match(cnvs, rownames(cnProbes)), ] |
|
1756 |
+## |
|
1757 |
+## ##For build Hg18 |
|
1758 |
+## ##http://genome.ucsc.edu/cgi-bin/hgGateway |
|
1759 |
+## ##pseudo-autosomal regions on X |
|
1760 |
+## ##chrX:1-2,709,520 and chrX:154584237-154913754, respectively |
|
1761 |
+## ##par:pseudo-autosomal regions |
|
1762 |
+## pseudoAR <- position(object) < 2709520 | (position(object) > 154584237 & position(object) < 154913754) |
|
1763 |
+## ##pseudoAR <- cnProbes[, "position"] < 2709520 | (cnProbes[, "position"] > 154584237 & cnProbes[, "position"] < 154913754) |
|
1764 |
+## ##in case some of the cnProbes are not annotated |
|
1765 |
+## pseudoAR[is.na(pseudoAR)] <- FALSE |
|
1766 |
+## pseudoAR <- pseudoAR[!isSnp(object)] |
|
1767 |
+## ##gender <- envir[["gender"]] |
|
1768 |
+## gender <- object$gender |
|
1769 |
+## obj1 <- object[!isSnp(object), ] |
|
1770 |
+## A.male <- A(obj1[, gender==1]) |
|
1771 |
+## mu1 <- rowMedians(A.male, na.rm=TRUE) |
|
1772 |
+## ##mu1 <- rowMedians(NP[, gender=="male"], na.rm=TRUE) |
|
1773 |
+## ##mu2 <- rowMedians(NP[, gender=="female"], na.rm=TRUE) |
|
1774 |
+## A.female <- A(obj1[, gender==2]) |
|
1775 |
+## mu2 <- rowMedians(A.female, na.rm=TRUE) |
|
1776 |
+## mus <- log2(cbind(mu1, mu2)) |
|
1777 |
+## X.men <- cbind(1, mus[, 1]) |
|
1778 |
+## X.fem <- cbind(1, mus[, 2]) |
|
1779 |
+## |
|
1780 |
+## Yhat1 <- as.numeric(X.men %*% betahat) |
|
1781 |
+## Yhat2 <- as.numeric(X.fem %*% betahat) |
|
1782 |
+## phi1 <- 2^(Yhat1) |
|
1783 |
+## phi2 <- 2^(Yhat2) |
|
1784 |
+## nu1 <- 2^(mus[, 1]) - phi1 |
|
1785 |
+## nu2 <- 2^(mus[, 2]) - 2*phi2 |
|
1786 |
+## |
|
1787 |
+## if(any(pseudoAR)){ |
|
1788 |
+## nu1[pseudoAR] <- 2^(mus[pseudoAR, 1]) - 2*phi1[pseudoAR] |
|
1789 |
+## } |
|
1790 |
+#### CT1 <- 1/phi1*(A.male-nu1) |
|
1791 |
+#### CT2 <- 1/phi2*(A.female-nu2) |
|
1792 |
+#### CA <- CA(obj1) |
|
1793 |
+#### CA[, gender==1] <- CT1 |
|
1794 |
+#### CA[, gender==2] <- CT2 |
|
1795 |
+#### CA(object)[!isSnp(object), ] <- CA |
|
1796 |
+## ##only using females to compute the variance |
|
1797 |
+## ##normalNP[, gender=="male"] <- NA |
|
1798 |
+## normal[, gender==1] <- NA |
|
1799 |
+## sig2A <- getParam(object, "sig2A", batch) |
|
1800 |
+## normal.f <- normal[, object$gender==2] |
|
1801 |
+## sig2A[!isSnp(object)] <- rowMAD(log2(A.female*normal.f), na.rm=TRUE)^2 |
|
1802 |
+## sig2A[!isSnp(object) & is.na(sig2A)] <- median(sig2A[!isSnp(object)], na.rm=TRUE) |
|
1803 |
+## ##sig2T[, p] <- rowMAD(log2(NP*normalNP), na.rm=TRUE)^2 |
|
1804 |
+## object <- pr(object, "sig2A", batch, sig2A) |
|
1805 |
+## |
|
1806 |
+## nuA[!isSnp(object)] <- nu2 |
|
1807 |
+## phiA[!isSnp(object)] <- phi2 |
|
1808 |
+## |
|
1809 |
+## THR.NU.PHI <- cnOptions$THR.NU.PHI |
|
1810 |
+## if(THR.NU.PHI){ |
|
1811 |
+## verbose <- cnOptions$verbose |
|
1812 |
+## ##Assign values to object |
|
1813 |
+## object <- pr(object, "nuA", batch, nuA) |
|
1814 |
+## object <- pr(object, "phiA", batch, phiA) |
|
1815 |
+## ##if(verbose) message("Thresholding nu and phi") |
|
1816 |
+## object <- thresholdModelParams(object, cnOptions) |
|
1817 |
+## } else { |
|
1818 |
+## object <- pr(object, "nuA", batch, nuA) |
|
1819 |
+## object <- pr(object, "phiA", batch, phiA) |
|
1820 |
+## } |
|
1821 |
+## } else { |
|
1822 |
+## A <- A(object)[!isSnp(object), ] |
|
1823 |
+## mus <- rowMedians(A * normal, na.rm=TRUE) |
|
1824 |
+## crosshyb <- max(median(muA) - median(mus), 0) |
|
1825 |
+## X <- cbind(1, log2(mus+crosshyb)) |
|
1826 |
+## logPhiT <- X %*% betahat |
|
1827 |
+## phiA[!isSnp(object)] <- 2^(logPhiT) |
|
1828 |
+## nuA[!isSnp(object)] <- mus-2*phiA[!isSnp(object)] |
|
1829 |
+## |
|
1830 |
+## THR.NU.PHI <- cnOptions$THR.NU.PHI |
|
1831 |
+## if(THR.NU.PHI){ |
|
1832 |
+## verbose <- cnOptions$verbose |
|
1833 |
+## ##Assign values to object |
|
1834 |
+## object <- pr(object, "nuA", batch, nuA) |
|
1835 |
+## object <- pr(object, "phiA", batch, phiA) |
|
1836 |
+## ##if(verbose) message("Thresholding nu and phi") |
|
1837 |
+## object <- thresholdModelParams(object, cnOptions) |
|
1838 |
+## ##reassign values (now thresholded at MIN.NU and MIN.PHI |
|
1839 |
+## nuA <- getParam(object, "nuA", batch) |
|
1840 |
+## phiA <- getParam(object, "phiA", batch) |
|
1841 |
+## } |
|
1842 |
+## ##CA(object)[!isSnp(object), ] <- 1/phiA[!isSnp(object)]*(A - nuA[!isSnp(object)]) |
|
1843 |
+## sig2A <- getParam(object, "sig2A", batch) |
|
1844 |
+## sig2A[!isSnp(object)] <- rowMAD(log2(A*normal), na.rm=TRUE)^2 |
|
1845 |
+## object <- pr(object, "sig2A", batch, sig2A) |
|
1846 |
+## ##added |
|
1847 |
+## object <- pr(object, "nuA", batch, nuA) |
|
1848 |
+## object <- pr(object, "phiA", batch, phiA) |
|
1849 |
+## } |
|
1850 |
+## return(object) |
|
1851 |
+##} |
|
1852 |
+## |
|
1853 | 1853 |
##sufficient statistics on the intensity scale |
1854 |
-withinGenotypeMoments <- function(object, cnOptions, tmp.objects){ |
|
1855 |
- normal <- tmp.objects[["normal"]] |
|
1856 |
- ## muA, muB: robust estimates of the within-genotype center (intensity scale) |
|
1857 |
- muA <- tmp.objects[["muA"]] |
|
1858 |
- muB <- tmp.objects[["muB"]] |
|
1859 |
- ## vA, vB: robust estimates of the within-genotype variance (intensity scale) |
|
1860 |
- vA <- tmp.objects[["vA"]] |
|
1861 |
- vB <- tmp.objects[["vB"]] |
|
1862 |
- Ns <- tmp.objects[["Ns"]] |
|
1863 |
- G <- snpCall(object) |
|
1864 |
- GT.CONF.THR <- cnOptions$GT.CONF.THR |
|
1865 |
- CHR <- unique(chromosome(object)) |
|
1866 |
- A <- A(object) |
|
1867 |
- B <- B(object) |
|
1868 |
-## highConf <- (1-exp(-confs(object)/1000)) > GT.CONF.THR |
|
1869 |
- xx <- snpCallProbability(object) |
|
1870 |
- highConf <- (1-exp(-xx/1000)) > GT.CONF.THR |
|
1871 |
- ##highConf <- confs(object) > GT.CONF.THR |
|
1872 |
- ##highConf <- highConf > GT.CONF.THR |
|
1873 |
- if(CHR == 23){ |
|
1874 |
- gender <- object$gender |
|
1875 |
-## gender <- envir[["gender"]] |
|
1876 |
- IX <- matrix(gender, nrow(G), ncol(G), byrow=TRUE) |
|
1877 |
-## IX <- IX == "female" |
|
1878 |
- IX <- IX == 2 ##2=female, 1=male |
|
1879 |
- } else IX <- matrix(TRUE, nrow(G), ncol(G)) |
|
1880 |
- index <- GT.B <- GT.A <- vector("list", 3) |
|
1881 |
- names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB") |
|
1882 |
- ##-------------------------------------------------- |
|
1883 |
- ##within-genotype sufficient statistics |
|
1884 |
- ##-------------------------------------------------- |
|
1885 |
- ##GT.B <- GT.A <- list() |
|
1886 |
- snpIndicator <- matrix(isSnp(object), nrow(object), ncol(object)) ##RS: added |
|
1887 |
- for(j in 1:3){ |
|
1888 |
- GT <- G==j & highConf & IX & snpIndicator |
|
1889 |
- GT <- GT * normal |
|
1890 |
- Ns[, j+2] <- rowSums(GT, na.rm=TRUE) |
|
1891 |
- GT[GT == FALSE] <- NA |
|
1892 |
- GT.A[[j]] <- GT*A |
|
1893 |
- GT.B[[j]] <- GT*B |
|
1894 |
- index[[j]] <- which(Ns[, j+2] > 0 & isSnp(object)) ##RS: added |
|
1895 |
- muA[, j+2] <- rowMedians(GT.A[[j]], na.rm=TRUE) |
|
1896 |
- muB[, j+2] <- rowMedians(GT.B[[j]], na.rm=TRUE) |
|
1897 |
- vA[, j+2] <- rowMAD(GT.A[[j]], na.rm=TRUE) |
|
1898 |
- vB[, j+2] <- rowMAD(GT.B[[j]], na.rm=TRUE) |
|
1899 |
- |
|
1900 |
- ##Shrink towards the typical variance |
|
1901 |
- DF <- Ns[, j+2]-1 |
|
1902 |
- DF[DF < 1] <- 1 |
|
1903 |
- v0A <- median(vA[, j+2], na.rm=TRUE) |
|
1904 |
- v0B <- median(vB[, j+2], na.rm=TRUE) |
|
1905 |
- if(v0A == 0) v0A <- NA |
|
1906 |
- if(v0B == 0) v0B <- NA |
|
1907 |
- DF.PRIOR <- cnOptions$DF.PRIOR |
|
1908 |
- vA[, j+2] <- (vA[, j+2]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF) |
|
1909 |
- vA[is.na(vA[, j+2]), j+2] <- v0A |
|
1910 |
- vB[, j+2] <- (vB[, j+2]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF) |
|
1911 |
- vB[is.na(vB[, j+2]), j+2] <- v0B |
|
1912 |
- } |
|
1913 |
- if(CHR == 23){ |
|
1914 |
- k <- 1 |
|
1915 |
- for(j in c(1,3)){ |
|
1916 |
- GT <- G==j & highConf & !IX |
|
1917 |
- Ns[, k] <- rowSums(GT) |
|
1918 |
- GT[GT == FALSE] <- NA |
|
1919 |
- muA[, k] <- rowMedians(GT*A, na.rm=TRUE) |
|
1920 |
- muB[, k] <- rowMedians(GT*B, na.rm=TRUE) |
|
1921 |
- vA[, k] <- rowMAD(GT*A, na.rm=TRUE) |
|
1922 |
- vB[, k] <- rowMAD(GT*B, na.rm=TRUE) |
|
1923 |
- |
|
1924 |
- DF <- Ns[, k]-1 |
|
1925 |
- DF[DF < 1] <- 1 |
|
1926 |
- v0A <- median(vA[, k], na.rm=TRUE) |
|
1927 |
- v0B <- median(vB[, k], na.rm=TRUE) |
|
1928 |
- vA[, k] <- (vA[, k]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF) |
|
1929 |
- vA[is.na(vA[, k]), k] <- v0A |
|
1930 |
- vB[, k] <- (vB[, k]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF) |
|
1931 |
- vB[is.na(vB[, k]), k] <- v0B |
|
1932 |
- k <- k+1 |
|
1933 |
- } |
|
1934 |
- } |
|
1935 |
- tmp.objects[["Ns"]] <- Ns |
|
1936 |
- tmp.objects[["vA"]] <- vA |
|
1937 |
- tmp.objects[["vB"]] <- vB |
|
1938 |
- tmp.objects[["muA"]] <- muA |
|
1939 |
- tmp.objects[["muB"]] <- muB |
|
1940 |
- tmp.objects$index <- index |
|
1941 |
- tmp.objects$GT.A <- GT.A |
|
1942 |
- tmp.objects$GT.B <- GT.B |
|
1943 |
- return(tmp.objects) |
|
1944 |
-} |
|
1945 |
- |
|
1946 |
-imputeCenter <- function(muA, muB, index.complete, index.missing){ |
|
1947 |
- index <- index.missing |
|
1948 |
- mnA <- muA |
|
1949 |
- mnB <- muB |
|
1950 |
- for(j in 1:3){ |
|
1951 |
- if(length(index[[j]]) == 0) next() |
|
1952 |
- X <- cbind(1, mnA[index.complete, -j, drop=FALSE], mnB[index.complete, -j, drop=FALSE]) |
|
1953 |
- Y <- cbind(mnA[index.complete, j], mnB[index.complete, j]) |
|
1954 |
- betahat <- solve(crossprod(X), crossprod(X,Y)) |
|
1955 |
- X <- cbind(1, mnA[index[[j]], -j, drop=FALSE], mnB[index[[j]], -j, drop=FALSE]) |
|
1956 |
- mus <- X %*% betahat |
|
1957 |
- muA[index[[j]], j] <- mus[, 1] |
|
1958 |
- muB[index[[j]], j] <- mus[, 2] |
|
1959 |
- } |
|
1960 |
- list(muA, muB) |
|
1961 |
-} |
|
1962 |
- |
|
1963 |
-imputeCenterX <- function(muA, muB, Ns, index.complete, MIN.OBS){ |
|
1964 |
- index1 <- which(Ns[, 1] == 0 & Ns[, 3] > MIN.OBS) |
|
1965 |
- if(length(index1) > 0){ |
|
1966 |
- X <- cbind(1, muA[index.complete[[1]], 3], muB[index.complete[[1]], 3]) |
|
1967 |
- Y <- cbind(1, muA[index.complete[[1]], 1], muB[index.complete[[1]], 1]) |
|
1968 |
- betahat <- solve(crossprod(X), crossprod(X,Y)) |
|
1969 |
- ##now with the incomplete SNPs |
|
1970 |
- X <- cbind(1, muA[index1, 3], muB[index1, 3]) |
|
1971 |
- mus <- X %*% betahat |
|
1972 |
- muA[index1, 1] <- mus[, 2] |
|
1973 |
- muB[index1, 1] <- mus[, 3] |
|
1974 |
- } |
|
1975 |
- index1 <- which(Ns[, 3] == 0) |
|
1976 |
- if(length(index1) > 0){ |
|
1977 |
- X <- cbind(1, muA[index.complete[[2]], 1], muB[index.complete[[2]], 1]) |
|
1978 |
- Y <- cbind(1, muA[index.complete[[2]], 3], muB[index.complete[[2]], 3]) |
|
1979 |
- betahat <- solve(crossprod(X), crossprod(X,Y)) |
|
1980 |
- ##now with the incomplete SNPs |
|
1981 |
- X <- cbind(1, muA[index1, 1], muB[index1, 1]) |
|
1982 |
- mus <- X %*% betahat |
|
1983 |
- muA[index1, 3] <- mus[, 2] |
|
1984 |
- muB[index1, 3] <- mus[, 3] |
|
1985 |
- } |
|
1986 |
- list(muA, muB) |
|
1987 |
-} |
|
1854 |
+##withinGenotypeMoments <- function(object, cnOptions, tmp.objects){ |
|
1855 |
+## normal <- tmp.objects[["normal"]] |
|
1856 |
+## ## muA, muB: robust estimates of the within-genotype center (intensity scale) |
|
1857 |
+## muA <- tmp.objects[["muA"]] |
|
1858 |
+## muB <- tmp.objects[["muB"]] |
|
1859 |
+## ## vA, vB: robust estimates of the within-genotype variance (intensity scale) |
|
1860 |
+## vA <- tmp.objects[["vA"]] |
|
1861 |
+## vB <- tmp.objects[["vB"]] |
|
1862 |
+## Ns <- tmp.objects[["Ns"]] |
|
1863 |
+## G <- snpCall(object) |
|
1864 |
+## GT.CONF.THR <- cnOptions$GT.CONF.THR |
|
1865 |
+## CHR <- unique(chromosome(object)) |
|
1866 |
+## A <- A(object) |
|
1867 |
+## B <- B(object) |
|
1868 |
+#### highConf <- (1-exp(-confs(object)/1000)) > GT.CONF.THR |
|
1869 |
+## xx <- snpCallProbability(object) |
|
1870 |
+## highConf <- (1-exp(-xx/1000)) > GT.CONF.THR |
|
1871 |
+## ##highConf <- confs(object) > GT.CONF.THR |
|
1872 |
+## ##highConf <- highConf > GT.CONF.THR |
|
1873 |
+## if(CHR == 23){ |
|
1874 |
+## gender <- object$gender |
|
1875 |
+#### gender <- envir[["gender"]] |
|
1876 |
+## IX <- matrix(gender, nrow(G), ncol(G), byrow=TRUE) |
|
1877 |
+#### IX <- IX == "female" |
|
1878 |
+## IX <- IX == 2 ##2=female, 1=male |
|
1879 |
+## } else IX <- matrix(TRUE, nrow(G), ncol(G)) |
|
1880 |
+## index <- GT.B <- GT.A <- vector("list", 3) |
|
1881 |
+## names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB") |
|
1882 |
+## ##-------------------------------------------------- |
|
1883 |
+## ##within-genotype sufficient statistics |
|
1884 |
+## ##-------------------------------------------------- |
|
1885 |
+## ##GT.B <- GT.A <- list() |
|
1886 |
+## snpIndicator <- matrix(isSnp(object), nrow(object), ncol(object)) ##RS: added |
|
1887 |
+## for(j in 1:3){ |
|
1888 |
+## GT <- G==j & highConf & IX & snpIndicator |
|
1889 |
+## GT <- GT * normal |
|
1890 |
+## Ns[, j+2] <- rowSums(GT, na.rm=TRUE) |
|
1891 |
+## GT[GT == FALSE] <- NA |
|
1892 |
+## GT.A[[j]] <- GT*A |
|
1893 |
+## GT.B[[j]] <- GT*B |
|
1894 |
+## index[[j]] <- which(Ns[, j+2] > 0 & isSnp(object)) ##RS: added |
|
1895 |
+## muA[, j+2] <- rowMedians(GT.A[[j]], na.rm=TRUE) |
|
1896 |
+## muB[, j+2] <- rowMedians(GT.B[[j]], na.rm=TRUE) |
|
1897 |
+## vA[, j+2] <- rowMAD(GT.A[[j]], na.rm=TRUE) |
|
1898 |
+## vB[, j+2] <- rowMAD(GT.B[[j]], na.rm=TRUE) |
|
1899 |
+## |
|
1900 |
+## ##Shrink towards the typical variance |
|
1901 |
+## DF <- Ns[, j+2]-1 |
|
1902 |
+## DF[DF < 1] <- 1 |
|
1903 |
+## v0A <- median(vA[, j+2], na.rm=TRUE) |
|
1904 |
+## v0B <- median(vB[, j+2], na.rm=TRUE) |
|
1905 |
+## if(v0A == 0) v0A <- NA |
|
1906 |
+## if(v0B == 0) v0B <- NA |
|
1907 |
+## DF.PRIOR <- cnOptions$DF.PRIOR |
|
1908 |
+## vA[, j+2] <- (vA[, j+2]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF) |
|
1909 |
+## vA[is.na(vA[, j+2]), j+2] <- v0A |
|
1910 |
+## vB[, j+2] <- (vB[, j+2]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF) |
|
1911 |
+## vB[is.na(vB[, j+2]), j+2] <- v0B |
|
1912 |
+## } |
|
1913 |
+## if(CHR == 23){ |
|
1914 |
+## k <- 1 |
|
1915 |
+## for(j in c(1,3)){ |
|
1916 |
+## GT <- G==j & highConf & !IX |
|
1917 |
+## Ns[, k] <- rowSums(GT) |
|
1918 |
+## GT[GT == FALSE] <- NA |
|
1919 |
+## muA[, k] <- rowMedians(GT*A, na.rm=TRUE) |
|
1920 |
+## muB[, k] <- rowMedians(GT*B, na.rm=TRUE) |
|
1921 |
+## vA[, k] <- rowMAD(GT*A, na.rm=TRUE) |
|
1922 |
+## vB[, k] <- rowMAD(GT*B, na.rm=TRUE) |
|
1923 |
+## |
|
1924 |
+## DF <- Ns[, k]-1 |
|
1925 |
+## DF[DF < 1] <- 1 |
|
1926 |
+## v0A <- median(vA[, k], na.rm=TRUE) |
|
1927 |
+## v0B <- median(vB[, k], na.rm=TRUE) |
|
1928 |
+## vA[, k] <- (vA[, k]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF) |
|
1929 |
+## vA[is.na(vA[, k]), k] <- v0A |
|
1930 |
+## vB[, k] <- (vB[, k]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF) |
|
1931 |
+## vB[is.na(vB[, k]), k] <- v0B |
|
1932 |
+## k <- k+1 |
|
1933 |
+## } |
|
1934 |
+## } |
|
1935 |
+## tmp.objects[["Ns"]] <- Ns |
|
1936 |
+## tmp.objects[["vA"]] <- vA |
|
1937 |
+## tmp.objects[["vB"]] <- vB |
|
1938 |
+## tmp.objects[["muA"]] <- muA |
|
1939 |
+## tmp.objects[["muB"]] <- muB |
|
1940 |
+## tmp.objects$index <- index |
|
1941 |
+## tmp.objects$GT.A <- GT.A |
|
1942 |
+## tmp.objects$GT.B <- GT.B |
|
1943 |
+## return(tmp.objects) |
|
1944 |
+##} |
|
1988 | 1945 |
|
1989 |
-oneBatch <- function(object, cnOptions, tmp.objects){ |
|
1990 |
- muA <- tmp.objects[["muA"]] |
|
1991 |
- muB <- tmp.objects[["muB"]] |
|
1992 |
- Ns <- tmp.objects[["Ns"]] |
|
1993 |
- CHR <- unique(chromosome(object)) |
|
1994 |
- ##--------------------------------------------------------------------------- |
|
1995 |
- ## Impute sufficient statistics for unobserved genotypes (plate-specific) |
|
1996 |
- ##--------------------------------------------------------------------------- |
|
1997 |
- MIN.OBS <- cnOptions$MIN.OBS |
|
1998 |
- index.AA <- which(Ns[, "AA"] >= MIN.OBS) |
|
1999 |
- index.AB <- which(Ns[, "AB"] >= MIN.OBS) |
|
2000 |
- index.BB <- which(Ns[, "BB"] >= MIN.OBS) |
|
2001 |
- correct.orderA <- muA[, "AA"] > muA[, "BB"] |
|
2002 |
- correct.orderB <- muB[, "BB"] > muB[, "AA"] |
|
2003 |
- ##For chr X, this will ignore the males |
|
2004 |
- nobs <- rowSums(Ns[, 3:5] >= MIN.OBS, na.rm=TRUE) == 3 |
|
2005 |
- index.complete <- which(correct.orderA & correct.orderB & nobs) ##be selective here |
|
2006 |
- size <- min(5000, length(index.complete)) |
|
2007 |
- if(size == 5000) index.complete <- sample(index.complete, 5000) |
|
2008 |
- if(length(index.complete) < 200){ |
|
2009 |
- stop("fewer than 200 snps pass criteria for predicting the sufficient statistics") |
|
2010 |
- } |
|
2011 |
- index <- tmp.objects[["index"]] |
|
2012 |
- index[[1]] <- which(Ns[, "AA"] == 0 & (Ns[, "AB"] >= MIN.OBS & Ns[, "BB"] >= MIN.OBS)) |
|
2013 |
- index[[2]] <- which(Ns[, "AB"] == 0 & (Ns[, "AA"] >= MIN.OBS & Ns[, "BB"] >= MIN.OBS)) |
|
2014 |
- index[[3]] <- which(Ns[, "BB"] == 0 & (Ns[, "AB"] >= MIN.OBS & Ns[, "AA"] >= MIN.OBS)) |
|
2015 |
- mnA <- muA[, 3:5] |
|
2016 |
- mnB <- muB[, 3:5] |
|
2017 |
- for(j in 1:3){ |
|
2018 |
- if(length(index[[j]]) == 0) next() |
|
2019 |
- X <- cbind(1, mnA[index.complete, -j, drop=FALSE], mnB[index.complete, -j, drop=FALSE]) |
|
2020 |
- Y <- cbind(mnA[index.complete, j], mnB[index.complete, j]) |
|
2021 |
- betahat <- solve(crossprod(X), crossprod(X,Y)) |
|
2022 |
- X <- cbind(1, mnA[index[[j]], -j, drop=FALSE], mnB[index[[j]], -j, drop=FALSE]) |
|
2023 |
- mus <- X %*% betahat |
|
2024 |
- muA[index[[j]], j+2] <- mus[, 1] |
|
2025 |
- muB[index[[j]], j+2] <- mus[, 2] |
|
2026 |
- } |
|
2027 |
- nobsA <- Ns[, "A"] > MIN.OBS |
|
2028 |
- nobsB <- Ns[, "B"] > MIN.OBS |
|
2029 |
- notMissing <- !(is.na(muA[, "A"]) | is.na(muA[, "B"]) | is.na(muB[, "A"]) | is.na(muB[, "B"])) |
|
2030 |
- complete <- list() |
|
2031 |
- complete[[1]] <- which(correct.orderA & correct.orderB & nobsA & notMissing) ##be selective here |
|
2032 |
- complete[[2]] <- which(correct.orderA & correct.orderB & nobsB & notMissing) ##be selective here |
|
2033 |
- size <- min(5000, length(complete[[1]])) |
|
2034 |
- if(size > 5000) complete <- lapply(complete, function(x) sample(x, size)) |
|
2035 |
- if(CHR == 23){ |
|
2036 |
- index <- list() |
|
2037 |
- index[[1]] <- which(Ns[, "A"] == 0) |
|
2038 |
- index[[2]] <- which(Ns[, "B"] == 0) |
|
2039 |
- cols <- 2:1 |
|
2040 |
- for(j in 1:2){ |
|
2041 |
- if(length(index[[j]]) == 0) next() |
|
2042 |
- X <- cbind(1, muA[complete[[j]], cols[j]], muB[complete[[j]], cols[j]]) |
|
2043 |
- Y <- cbind(muA[complete[[j]], j], muB[complete[[j]], j]) |
|
2044 |
- betahat <- solve(crossprod(X), crossprod(X,Y)) |
|
2045 |
- X <- cbind(1, muA[index[[j]], cols[j]], muB[index[[j]], cols[j]]) |
|
2046 |
- mus <- X %*% betahat |
|
2047 |
- muA[index[[j]], j] <- mus[, 1] |
|
2048 |
- muB[index[[j]], j] <- mus[, 2] |
|
2049 |
- } |
|
2050 |
- } |
|
2051 |
- ##missing two genotypes |
|
2052 |
- noAA <- Ns[, "AA"] < MIN.OBS |
|
2053 |
- noAB <- Ns[, "AB"] < MIN.OBS |
|
2054 |
- noBB <- Ns[, "BB"] < MIN.OBS |
|
2055 |
- index[[1]] <- noAA & noAB |
|
2056 |
- index[[2]] <- noBB & noAB |
|
2057 |
- index[[3]] <- noAA & noBB |
|
2058 |
-## snpflags <- envir[["snpflags"]] |
|
2059 |
- snpflags <- index[[1]] | index[[2]] | index[[3]] |
|
2060 |
-## snpflags[, p] <- index[[1]] | index[[2]] | index[[3]] |
|
2061 |
- |
|
2062 |
- ##--------------------------------------------------------------------------- |
|
2063 |
- ## Two genotype clusters not observed -- would sequence help? (didn't seem to) |
|
2064 |
- ## 1. extract index of complete data |
|
2065 |
- ## 2. Regress mu_missing ~ sequence + mu_observed |
|
2066 |
- ## 3. solve for nu assuming the median is 2 |
|
2067 |
- ##--------------------------------------------------------------------------- |
|
2068 |
- cols <- c(3, 1, 2) |
|
2069 |