git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@43029 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,2071 @@ |
1 |
+rowCovs <- function(x, y, ...){ |
|
2 |
+ notna <- !is.na(x) |
|
3 |
+ N <- rowSums(notna) |
|
4 |
+ x <- suppressWarnings(log2(x)) |
|
5 |
+ if(!missing(y)) y <- suppressWarnings(log2(y)) |
|
6 |
+ return(rowSums((x - rowMeans(x, ...)) * (y - rowMeans(y, ...)), ...)/(N-1)) |
|
7 |
+} |
|
8 |
+ |
|
9 |
+rowMAD <- function(x, y, ...){ |
|
10 |
+ notna <- !is.na(x) |
|
11 |
+ mad <- 1.4826*rowMedians(abs(x-rowMedians(x, ...)), ...) |
|
12 |
+ return(mad) |
|
13 |
+} |
|
14 |
+ |
|
15 |
+rowCors <- function(x, y, ...){ |
|
16 |
+ N <- rowSums(!is.na(x)) |
|
17 |
+ x <- suppressWarnings(log2(x)) |
|
18 |
+ y <- suppressWarnings(log2(y)) |
|
19 |
+ sd.x <- rowSds(x, ...) |
|
20 |
+ sd.y <- rowSds(y, ...) |
|
21 |
+ covar <- rowSums((x - rowMeans(x, ...)) * (y - rowMeans(y, ...)), ...)/(N-1) |
|
22 |
+ return(covar/(sd.x*sd.y)) |
|
23 |
+} |
|
24 |
+ |
|
25 |
+generateX <- function(w, X) as.numeric(diag(w) %*% X) |
|
26 |
+generateIXTX <- function(x, nrow=3) { |
|
27 |
+ X <- matrix(x, nrow=nrow) |
|
28 |
+ XTX <- crossprod(X) |
|
29 |
+ solve(XTX) |
|
30 |
+} |
|
31 |
+ |
|
32 |
+ |
|
33 |
+nuphiAllele <- function(object, allele, Ystar, W, tmp.objects, cnOptions){ |
|
34 |
+ I <- isSnp(object) |
|
35 |
+ object.snp <- object[I, ] |
|
36 |
+ Ystar <- Ystar[I, ] |
|
37 |
+ complete <- rowSums(is.na(W)) == 0 & I |
|
38 |
+ W <- W[I, ] |
|
39 |
+ if(any(!is.finite(W))){## | any(!is.finite(V))){ |
|
40 |
+ i <- which(rowSums(!is.finite(W)) > 0) |
|
41 |
+ ##browser() |
|
42 |
+ stop("Possible zeros in the within-genotype estimates of the spread (vA, vB). ") |
|
43 |
+ } |
|
44 |
+ Ns <- tmp.objects[["Ns"]] |
|
45 |
+ Ns <- Ns[I, ] |
|
46 |
+ |
|
47 |
+ CHR <- unique(chromosome(object)) |
|
48 |
+ batch <- unique(object$batch) |
|
49 |
+ nuA <- getParam(object, "nuA", batch) |
|
50 |
+ nuB <- getParam(object, "nuB", batch) |
|
51 |
+ nuA.se <- getParam(object, "nuA.se", batch) |
|
52 |
+ nuB.se <- getParam(object, "nuB.se", batch) |
|
53 |
+ phiA <- getParam(object, "phiA", batch) |
|
54 |
+ phiB <- getParam(object, "phiB", batch) |
|
55 |
+ phiA.se <- getParam(object, "phiA.se", batch) |
|
56 |
+ phiB.se <- getParam(object, "phiB.se", batch) |
|
57 |
+ if(CHR==23){ |
|
58 |
+ phiAX <- getParam(object, "phiAX", batch) |
|
59 |
+ phiBX <- getParam(object, "phiBX", batch) |
|
60 |
+ } |
|
61 |
+ NOHET <- mean(Ns[, "AB"], na.rm=TRUE) < 0.05 |
|
62 |
+ if(missing(allele)) stop("must specify allele") |
|
63 |
+ if(CHR == 23){ |
|
64 |
+ ##Design matrix for X chromosome depends on whether there was a sufficient number of AB genotypes |
|
65 |
+ if(length(grep("AB", colnames(W))) > 0){ |
|
66 |
+ if(allele == "A") X <- cbind(1, c(1, 0, 2, 1, 0), c(0, 1, 0, 1, 2)) |
|
67 |
+ if(allele == "B") X <- cbind(1, c(0, 1, 0, 1, 2), c(1, 0, 2, 1, 0)) |
|
68 |
+ } else{ |
|
69 |
+ if(allele == "A") X <- cbind(1, c(1, 0, 2, 0), c(0, 1, 0, 2)) |
|
70 |
+ if(allele == "B") X <- cbind(1, c(0, 1, 0, 2), c(1, 0, 2, 0)) |
|
71 |
+ } |
|
72 |
+ } else {##autosome |
|
73 |
+ if(allele == "A") X <- cbind(1, 2:0) else X <- cbind(1, 0:2) |
|
74 |
+ if(NOHET) X <- X[-2, ] ##more than 1 X chromosome, but all homozygous |
|
75 |
+ } |
|
76 |
+ |
|
77 |
+ ##How to quickly generate Xstar, Xstar = diag(W) %*% X |
|
78 |
+ Xstar <- apply(W, 1, generateX, X) |
|
79 |
+ IXTX <- apply(Xstar, 2, generateIXTX, nrow=nrow(X)) |
|
80 |
+ ##as.numeric(diag(W[1, ]) %*% X) |
|
81 |
+ if(CHR == 23){ |
|
82 |
+ betahat <- matrix(NA, 3, nrow(Ystar)) |
|
83 |
+ ses <- matrix(NA, 3, nrow(Ystar)) |
|
84 |
+ } else{ |
|
85 |
+ betahat <- matrix(NA, 2, nrow(Ystar)) |
|
86 |
+ ses <- matrix(NA, 2, nrow(Ystar)) |
|
87 |
+ } |
|
88 |
+ for(i in 1:nrow(Ystar)){ |
|
89 |
+ betahat[, i] <- crossprod(matrix(IXTX[, i], ncol(X), ncol(X)), crossprod(matrix(Xstar[, i], nrow=nrow(X)), Ystar[i, ])) |
|
90 |
+ ssr <- sum((Ystar[i, ] - matrix(Xstar[, i], nrow(X), ncol(X)) %*% matrix(betahat[, i], ncol(X), 1))^2) |
|
91 |
+ ses[, i] <- sqrt(diag(matrix(IXTX[, i], ncol(X), ncol(X)) * ssr)) |
|
92 |
+ } |
|
93 |
+ if(allele == "A"){ |
|
94 |
+ nuA[complete] <- betahat[1, ] |
|
95 |
+ phiA[complete] <- betahat[2, ] |
|
96 |
+ nuA.se[complete] <- ses[1, ] |
|
97 |
+ phiA.se[complete] <- ses[2, ] |
|
98 |
+ object <- pr(object, "nuA", batch, nuA) |
|
99 |
+ object <- pr(object, "nuA.se", batch, nuA.se) |
|
100 |
+ object <- pr(object, "phiA", batch, phiA) |
|
101 |
+ object <- pr(object, "phiA.se", batch, phiA.se) |
|
102 |
+ if(CHR == 23){ |
|
103 |
+ phiAX[complete] <- betahat[3, ] |
|
104 |
+ object <- pr(object, "phiAX", batch, phiAX) |
|
105 |
+ } |
|
106 |
+ } |
|
107 |
+ if(allele=="B"){ |
|
108 |
+ nuB[complete] <- betahat[1, ] |
|
109 |
+ phiB[complete] <- betahat[2, ] |
|
110 |
+ nuB.se[complete] <- ses[1, ] |
|
111 |
+ phiB.se[complete] <- ses[2, ] |
|
112 |
+ object <- pr(object, "nuB", batch, nuB) |
|
113 |
+ object <- pr(object, "nuB.se", batch, nuB.se) |
|
114 |
+ object <- pr(object, "phiB", batch, phiB) |
|
115 |
+ object <- pr(object, "phiB.se", batch, phiB.se) |
|
116 |
+ if(CHR == 23){ |
|
117 |
+ phiBX[complete] <- betahat[3, ] |
|
118 |
+ object <- pr(object, "phiBX", batch, phiBX) |
|
119 |
+ } |
|
120 |
+ } |
|
121 |
+ THR.NU.PHI <- cnOptions$THR.NU.PHI |
|
122 |
+ if(THR.NU.PHI){ |
|
123 |
+ verbose <- cnOptions$verbose |
|
124 |
+ if(verbose) message("Thresholding nu and phi") |
|
125 |
+ object <- thresholdModelParams(object, cnOptions) |
|
126 |
+ } |
|
127 |
+ return(object) |
|
128 |
+} |
|
129 |
+ |
|
130 |
+celDates <- function(celfiles){ |
|
131 |
+ if(!all(file.exists(celfiles))) stop("1 or more cel file does not exist") |
|
132 |
+ celdates <- vector("character", length(celfiles)) |
|
133 |
+ celtimes <- vector("character", length(celfiles)) |
|
134 |
+ for(i in seq(along=celfiles)){ |
|
135 |
+ if(i %% 100 == 0) cat(".") |
|
136 |
+ tmp <- read.celfile.header(celfiles[i], info="full")$DatHeader |
|
137 |
+ tmp <- strsplit(tmp, "\ +") |
|
138 |
+ celdates[i] <- tmp[[1]][6] |
|
139 |
+ celtimes[i] <- tmp[[1]][7] |
|
140 |
+ } |
|
141 |
+ tmp <- paste(celdates, celtimes) |
|
142 |
+ celdts <- strptime(tmp, "%m/%d/%y %H:%M:%S") |
|
143 |
+ return(celdts) |
|
144 |
+} |
|
145 |
+ |
|
146 |
+predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){ |
|
147 |
+ pkgname <- paste(cdfName, "Crlmm", sep="") |
|
148 |
+ path <- system.file("extdata", package=pkgname) |
|
149 |
+ loader("23index.rda", .crlmmPkgEnv, pkgname) |
|
150 |
+ index <- getVarInEnv("index") |
|
151 |
+ ##load(file.path(path, "23index.rda")) |
|
152 |
+ XIndex <- index[[1]] |
|
153 |
+ if(class(res) != "ABset"){ |
|
154 |
+ A <- res$A |
|
155 |
+ B <- res$B |
|
156 |
+ } else{ |
|
157 |
+ A <- res@assayData[["A"]] |
|
158 |
+ B <- res@assayData[["B"]] |
|
159 |
+ } |
|
160 |
+ tmp <- which(rowSums(is.na(A)) > 0) |
|
161 |
+ XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median, na.rm=TRUE)/2 |
|
162 |
+ SNR <- res$SNR |
|
163 |
+ if(sum(SNR>SNRMin)==1){ |
|
164 |
+ gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5))) |
|
165 |
+ }else{ |
|
166 |
+ gender <- kmeans(XMedian, c(min(XMedian[SNR>SNRMin]), max(XMedian[SNR>SNRMin])))[["cluster"]] |
|
167 |
+ } |
|
168 |
+ return(gender) |
|
169 |
+} |
|
170 |
+ |
|
171 |
+ |
|
172 |
+combineIntensities <- function(res, cnrmaResult, cdfName){ |
|
173 |
+ rownames(res$B) <- rownames(res$A) <- res$gns |
|
174 |
+ colnames(res$B) <- colnames(res$A) <- res$sns |
|
175 |
+ if(!is.null(cnrmaResult)){ |
|
176 |
+ NP <- cnrmaResult$NP |
|
177 |
+ blank <- matrix(NA, nrow(NP), ncol(NP)) |
|
178 |
+ dimnames(blank) <- dimnames(NP) |
|
179 |
+ A <- rbind(res$A, NP) |
|
180 |
+ B <- rbind(res$B, blank) |
|
181 |
+ } else { |
|
182 |
+ A <- res$A |
|
183 |
+ B <- res$B |
|
184 |
+ } |
|
185 |
+ ABset <- new("SnpQSet", |
|
186 |
+ senseThetaA=A, |
|
187 |
+ senseThetaB=B, |
|
188 |
+ annotation=cdfName) |
|
189 |
+ return(ABset) |
|
190 |
+} |
|
191 |
+ |
|
192 |
+##harmonizeSnpSet <- function(callSet, ABset, cdfName){ |
|
193 |
+## blank <- matrix(NA, length(cnNames(ABset, cdfName)), ncol(ABset)) |
|
194 |
+## rownames(blank) <- cnNames(ABset, cdfName) |
|
195 |
+## colnames(blank) <- sampleNames(ABset) |
|
196 |
+## crlmmCalls <- rbind(calls(callSet), blank) |
|
197 |
+## crlmmConf <- rbind(confs(callSet), blank) |
|
198 |
+## fD <- as.matrix(fData(callSet)) |
|
199 |
+## fD2 <- matrix(NA, nrow(blank), ncol(fD)) |
|
200 |
+## rownames(fD2) <- rownames(blank) |
|
201 |
+## fD <- rbind(fD, fD2) |
|
202 |
+## aD <- assayDataNew("lockedEnvironment", |
|
203 |
+## calls=crlmmCalls, |
|
204 |
+## callProbability=crlmmConf) |
|
205 |
+## ##Make callSet the same dimension as ABset |
|
206 |
+## fD <- new("AnnotatedDataFrame", |
|
207 |
+## data=data.frame(fD), |
|
208 |
+## varMetadata=fvarMetadata(callSet)) |
|
209 |
+## callSet <- new("SnpSet", |
|
210 |
+## call=crlmmCalls, |
|
211 |
+## callProbability=crlmmConf, |
|
212 |
+## featureData=fD, |
|
213 |
+## phenoData=phenoData(callSet), |
|
214 |
+## protocolData=protocolData(ABset), |
|
215 |
+## annotation=annotation(ABset)) |
|
216 |
+## stopifnot(all.equal(dimnames(callSet), dimnames(ABset))) |
|
217 |
+## callSet |
|
218 |
+##} |
|
219 |
+ |
|
220 |
+harmonizeDimnamesTo <- function(object1, object2){ |
|
221 |
+ #object2 should be a subset of object 1 |
|
222 |
+ object2 <- object2[featureNames(object2) %in% featureNames(object1), ] |
|
223 |
+ object1 <- object1[match(featureNames(object2), featureNames(object1)), ] |
|
224 |
+ object1 <- object1[, match(sampleNames(object2), sampleNames(object1))] |
|
225 |
+ stopifnot(all.equal(featureNames(object1), featureNames(object2))) |
|
226 |
+ stopifnot(all.equal(sampleNames(object1), sampleNames(object2))) |
|
227 |
+ return(object1) |
|
228 |
+} |
|
229 |
+ |
|
230 |
+crlmmCopynumber <- function(filenames, cnOptions, ...){ |
|
231 |
+ crlmmWrapper(filenames, cnOptions, ...) |
|
232 |
+} |
|
233 |
+ |
|
234 |
+crlmmWrapper <- function(filenames, cnOptions, ...){ |
|
235 |
+ cdfName <- cnOptions[["cdfName"]] |
|
236 |
+ load.it <- cnOptions[["load.it"]] |
|
237 |
+ save.it <- cnOptions[["save.it"]] |
|
238 |
+ splitByChr <- cnOptions[["splitByChr"]] |
|
239 |
+ crlmmFile <- cnOptions[["crlmmFile"]] |
|
240 |
+ intensityFile=cnOptions[["intensityFile"]] |
|
241 |
+ rgFile=cnOptions[["rgFile"]] |
|
242 |
+ use.ff=cnOptions[["use.ff"]] |
|
243 |
+ outdir <- cnOptions[["outdir"]] |
|
244 |
+ tmpdir <- cnOptions[["tmpdir"]] |
|
245 |
+ |
|
246 |
+ if(missing(cdfName)) stop("cdfName is missing -- a valid cdfName is required. See crlmm:::validCdfNames()") |
|
247 |
+ platform <- whichPlatform(cdfName) |
|
248 |
+ if(!(platform %in% c("affymetrix", "illumina"))){ |
|
249 |
+ stop("Only 'affymetrix' and 'illumina' platforms are supported at this time.") |
|
250 |
+ } else { |
|
251 |
+ message("Checking whether annotation package for the ", platform, " platform is available") |
|
252 |
+ if(!isValidCdfName(cdfName)){ |
|
253 |
+ cat("FALSE\n") |
|
254 |
+ stop(cdfName, " is not a valid entry. See crlmm:::validCdfNames(platform)") |
|
255 |
+ } else cat("TRUE\n") |
|
256 |
+ } |
|
257 |
+ if(missing(intensityFile)) stop("must specify 'intensityFile'.") |
|
258 |
+ if(missing(crlmmFile)) stop("must specify 'crlmmFile'.") |
|
259 |
+ if(platform == "illumina"){ |
|
260 |
+ if(missing(rgFile)){ |
|
261 |
+ ##stop("must specify 'rgFile'.") |
|
262 |
+ rgFile <- file.path(dirname(crlmmFile), "rgFile.rda") |
|
263 |
+ message("rgFile not specified. Using ", rgFile) |
|
264 |
+ } |
|
265 |
+ if(!load.it){ |
|
266 |
+ RG <- readIdatFiles(...) |
|
267 |
+ if(save.it) save(RG, file=rgFile) |
|
268 |
+ } |
|
269 |
+ if(load.it & !file.exists(rgFile)){ |
|
270 |
+ message("load.it is TRUE, bug rgFile not present. Attempting to read the idatFiles.") |
|
271 |
+ RG <- readIdatFiles(...) |
|
272 |
+ if(save.it) save(RG, file=rgFile) |
|
273 |
+ } |
|
274 |
+ if(load.it & file.exists(rgFile)){ |
|
275 |
+ message("Loading RG file") |
|
276 |
+ load(rgFile) |
|
277 |
+ RG <- get("RG") |
|
278 |
+ } |
|
279 |
+ } |
|
280 |
+ if(!(file.exists(dirname(crlmmFile)))) stop(dirname(crlmmFile), " does not exist.") |
|
281 |
+ if(!(file.exists(dirname(intensityFile)))) stop(dirname(intensityFile), " does not exist.") |
|
282 |
+ |
|
283 |
+ ##--------------------------------------------------------------------------- |
|
284 |
+ ## FIX |
|
285 |
+ outfiles <- file.path(dirname(crlmmFile), paste("crlmmSetList_", 1:24, ".rda", sep="")) |
|
286 |
+ if(load.it & all(file.exists(outfiles))){ |
|
287 |
+ load(outfiles[1]) |
|
288 |
+ crlmmSetList <- get("crlmmSetList") |
|
289 |
+ if(!all(sampleNames(crlmmSetList) == basename(filenames))){ |
|
290 |
+ stop("load.it is TRUE, but sampleNames(crlmmSetList != basename(filenames))") |
|
291 |
+ } else{ |
|
292 |
+ return("load.it is TRUE and 'crlmmSetList_<CHR>.rda' objects found. Nothing to do...") |
|
293 |
+ } |
|
294 |
+ } |
|
295 |
+ if(load.it){ |
|
296 |
+ if(!file.exists(crlmmFile)){ |
|
297 |
+ message("load.it is TRUE, but ", crlmmFile, " does not exist. Rerunning the genotype calling algorithm") |
|
298 |
+ load.it <- FALSE |
|
299 |
+ } |
|
300 |
+ } |
|
301 |
+ |
|
302 |
+ if(platform == "affymetrix"){ |
|
303 |
+ if(!file.exists(crlmmFile) | !load.it){ |
|
304 |
+ callSet <- crlmm(filenames=filenames, |
|
305 |
+ cdfName=cdfName, |
|
306 |
+ save.it=TRUE, |
|
307 |
+ load.it=load.it, |
|
308 |
+ intensityFile=intensityFile) |
|
309 |
+ message("Quantile normalizing the copy number probes...") |
|
310 |
+ cnrmaResult <- cnrma(filenames=filenames, cdfName=cdfName, outdir=outdir) |
|
311 |
+ if(save.it){ |
|
312 |
+ message("Saving callSet and cnrmaResult to", crlmmFile) |
|
313 |
+ save(callSet, cnrmaResult, file=crlmmFile) |
|
314 |
+ } |
|
315 |
+ } else { |
|
316 |
+ message("Loading ", crlmmFile, "...") |
|
317 |
+ load(intensityFile) |
|
318 |
+ load(crlmmFile) |
|
319 |
+ callSet <- get("callSet") |
|
320 |
+ cnrmaResult <- get("cnrmaResult") |
|
321 |
+ } |
|
322 |
+ scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate)) |
|
323 |
+ protocolData(callSet) <- new("AnnotatedDataFrame", |
|
324 |
+ data=scanDates, |
|
325 |
+ varMetadata=data.frame(labelDescription=colnames(scanDates), |
|
326 |
+ row.names=colnames(scanDates))) |
|
327 |
+ } |
|
328 |
+ if(platform == "illumina"){ |
|
329 |
+ if(!file.exists(crlmmFile) | !load.it){ |
|
330 |
+ callSet <- crlmmIllumina(RG=RG, |
|
331 |
+ cdfName=cdfName, |
|
332 |
+ sns=sampleNames(RG), |
|
333 |
+ returnParams=TRUE, |
|
334 |
+ save.it=TRUE, |
|
335 |
+ intensityFile=intensityFile) |
|
336 |
+ if(save.it) save(callSet, file=crlmmFile) |
|
337 |
+ } else { |
|
338 |
+ message("Loading ", crlmmFile, "...") |
|
339 |
+ load(crlmmFile) |
|
340 |
+ callSet <- get("callSet") |
|
341 |
+ } |
|
342 |
+ protocolData(callSet) <- protocolData(RG) |
|
343 |
+ } |
|
344 |
+ load(intensityFile) |
|
345 |
+ if(platform=="illumina"){ |
|
346 |
+ if(exists("cnAB")){ |
|
347 |
+ np.A <- as.integer(cnAB$A) |
|
348 |
+ np.B <- as.integer(cnAB$B) |
|
349 |
+ np <- ifelse(np.A > np.B, np.A, np.B) |
|
350 |
+ np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A)) |
|
351 |
+ rownames(np) <- cnAB$gns |
|
352 |
+ colnames(np) <- cnAB$sns |
|
353 |
+ cnAB$NP <- np |
|
354 |
+ ##sampleNames(callSet) <- res$sns |
|
355 |
+ sampleNames(callSet) <- cnAB$sns |
|
356 |
+ cnrmaResult <- get("cnAB") |
|
357 |
+ } else cnrmaResult <- NULL |
|
358 |
+ } |
|
359 |
+ if(platform=="affymetrix"){ |
|
360 |
+ if(exists("cnrmaResult")){ |
|
361 |
+ cnrmaResult <- get("cnrmaResult") |
|
362 |
+ } else cnrmaResult <- NULL |
|
363 |
+ } |
|
364 |
+ ABset <- combineIntensities(get("res"), cnrmaResult, cdfName) |
|
365 |
+ if(platform=="affymetrix") { |
|
366 |
+ protocolData(callSet)[["ScanDate"]] <- as.character(celDates(filenames)) |
|
367 |
+ sampleNames(protocolData(callSet)) <- sampleNames(callSet) |
|
368 |
+ } |
|
369 |
+ ##callSet <- harmonizeSnpSet(callSet, ABset, cdfName) |
|
370 |
+ ##Make callSet the same dimension as ABset |
|
371 |
+ if(nrow(callSet) != nrow(ABset)){ |
|
372 |
+ callsConfs <- calls <- matrix(NA, nrow(ABset), ncol(ABset)) |
|
373 |
+ dimnames(callsConfs) <- dimnames(calls) <- list(featureNames(ABset), sampleNames(ABset)) |
|
374 |
+ fd <- data.frame(matrix(NA, nrow(calls), length(fvarLabels(callSet)))) |
|
375 |
+ rownames(fd) <- featureNames(ABset) |
|
376 |
+ colnames(fd) <- fvarLabels(callSet) |
|
377 |
+ fD <- new("AnnotatedDataFrame", |
|
378 |
+ data=data.frame(fd), |
|
379 |
+ varMetadata=data.frame(labelDescription=colnames(fd), row.names=colnames(fd))) |
|
380 |
+ callSet2 <- new("SnpSet", |
|
381 |
+ call=calls, |
|
382 |
+ callProbability=callsConfs, |
|
383 |
+ featureData=fD, |
|
384 |
+ phenoData=phenoData(callSet), |
|
385 |
+ experimentData=experimentData(callSet), |
|
386 |
+ protocolData=protocolData(callSet), |
|
387 |
+ annotation=annotation(callSet)) |
|
388 |
+ ## match the featureNames of the original callSet (snps only) to the ABset object |
|
389 |
+ index <- match(featureNames(callSet), featureNames(ABset)) |
|
390 |
+ if(any(is.na(index))) stop("missing values in match") |
|
391 |
+ ## Next, assign the calls to the appropriate subset of the full callSet (includes NPs) |
|
392 |
+ calls(callSet2)[index, ] <- calls(callSet) |
|
393 |
+ confs(callSet2)[index, ] <- confs(callSet) |
|
394 |
+ fData(callSet2)[index, ] <- fData(callSet) |
|
395 |
+ callSet <- callSet2 |
|
396 |
+ rm(callsConfs, calls, callSet2, fd, fD); gc() |
|
397 |
+ } else{ |
|
398 |
+ callSet <- callSet[match(featureNames(ABset), featureNames(callSet)), ] |
|
399 |
+ } |
|
400 |
+ stopifnot(all.equal(featureNames(callSet), featureNames(ABset))) |
|
401 |
+ stopifnot(all.equal(sampleNames(callSet), sampleNames(ABset))) |
|
402 |
+ |
|
403 |
+ ## create object with all of the assay data elements |
|
404 |
+ ## add an indicator to featureData for whether it is a snp or a np probe |
|
405 |
+ ## add annotation |
|
406 |
+ ## how should we combine the phenoData? |
|
407 |
+ pd1 <- phenoData(ABset) |
|
408 |
+ pd2 <- phenoData(callSet) |
|
409 |
+ pd <- cbind(pData(pd1), pData(pd2)) |
|
410 |
+ pD <- new("AnnotatedDataFrame", data=pd, |
|
411 |
+ varMetadata=data.frame(labelDescription=colnames(pd), |
|
412 |
+ row.names=colnames(pd))) |
|
413 |
+ nr <- nrow(ABset); nc <- ncol(ABset) |
|
414 |
+ if(!use.ff){ |
|
415 |
+ callSetPlus <- new("SnpCallSetPlus", |
|
416 |
+ senseThetaA=A(ABset), |
|
417 |
+ senseThetaB=B(ABset), |
|
418 |
+ calls=calls(callSet), |
|
419 |
+ callsConfidence=confs(callSet), |
|
420 |
+ phenoData=pD, |
|
421 |
+ featureData=featureData(callSet), |
|
422 |
+ annotation=annotation(ABset), |
|
423 |
+ experimentData=experimentData(callSet), |
|
424 |
+ protocolData=protocolData(callSet)) |
|
425 |
+ |
|
426 |
+ } else { |
|
427 |
+ callSetPlus <- new("SnpCallSetPlusFF", |
|
428 |
+ senseThetaA=ff(as.integer(A(ABset)), dim=c(nr,nc), vmode="integer", dimnames=list(featureNames(ABset), sampleNames(ABset))), |
|
429 |
+ senseThetaB=ff(as.integer(B(ABset)), dim=c(nr, nc), vmode="integer", dimnames=list(featureNames(ABset), sampleNames(ABset))), |
|
430 |
+ calls=ff(as.integer(calls(callSet)), dim=c(nr, nc), vmode="integer", dimnames=list(featureNames(ABset), sampleNames(ABset))), |
|
431 |
+ callsConfidence=ff(as.integer(confs(callSet)), dim=c(nr, nc), vmode="integer", dimnames=list(featureNames(ABset), sampleNames(ABset))), |
|
432 |
+ phenoData=pD, |
|
433 |
+ featureData=featureData(callSet), |
|
434 |
+ annotation=annotation(ABset), |
|
435 |
+ experimentData=experimentData(callSet), |
|
436 |
+ protocolData=protocolData(callSet)) |
|
437 |
+ } |
|
438 |
+ featureData(callSetPlus) <- addFeatureAnnotation(callSetPlus) |
|
439 |
+ if(splitByChr){ |
|
440 |
+ saved.objects <- splitByChromosome(callSetPlus, cnOptions) |
|
441 |
+ ##callSetPlus <- list.files(outdir, pattern="", full.names=TRUE) |
|
442 |
+ if(!save.it) unlink(intensityFile) |
|
443 |
+ return(saved.objects) |
|
444 |
+ } |
|
445 |
+ if(!save.it){ |
|
446 |
+ message("Cleaning up") |
|
447 |
+ unlink(intensityFile) |
|
448 |
+ } |
|
449 |
+ return(callSetPlus) |
|
450 |
+} |
|
451 |
+ |
|
452 |
+validCdfNames <- function(){ |
|
453 |
+ c("genomewidesnp6", |
|
454 |
+ "genomewidesnp5", |
|
455 |
+ "human370v1c", |
|
456 |
+ "human370quadv3c", |
|
457 |
+ "human550v3b", |
|
458 |
+ "human650v3a", |
|
459 |
+ "human610quadv1b", |
|
460 |
+ "human660quadv1a", |
|
461 |
+ "human1mduov3b") |
|
462 |
+} |
|
463 |
+ |
|
464 |
+##validCdfNames <- function(platform){ |
|
465 |
+## if(!missing(platform)){ |
|
466 |
+## if(!platform %in% c("illumina", "affymetrix")) |
|
467 |
+## stop("only illumina and affymetrix platforms are supported.") |
|
468 |
+## if(platform=="illumina"){ |
|
469 |
+## chipList = c("human1mv1c", # 1M |
|
470 |
+## "human370v1c", # 370CNV |
|
471 |
+## "human650v3a", # 650Y |
|
472 |
+## "human610quadv1b", # 610 quad |
|
473 |
+## "human660quadv1a", # 660 quad |
|
474 |
+## "human370quadv3c", # 370CNV quad |
|
475 |
+## "human550v3b", # 550K |
|
476 |
+## "human1mduov3b") # 1M Duo |
|
477 |
+## } |
|
478 |
+## if(platform=="affymetrix"){ |
|
479 |
+## chipList=c("genomewidesnp6", "genomewidesnp5") |
|
480 |
+## } |
|
481 |
+## } else{ |
|
482 |
+## chipList <- list() |
|
483 |
+## chipList$affymetrix <- c("genomewidesnp6","genomewidesnp5") |
|
484 |
+## chipList$illumina <- c("human370v1c", |
|
485 |
+## "human370quadv3c", |
|
486 |
+## "human550v3b", |
|
487 |
+## "human650v3a", |
|
488 |
+## "human610quadv1b", |
|
489 |
+## "human660quadv1a", |
|
490 |
+## "human1mduov3b") |
|
491 |
+## } |
|
492 |
+## return(chipList) |
|
493 |
+##} |
|
494 |
+isValidCdfName <- function(cdfName){ |
|
495 |
+ chipList <- validCdfNames() |
|
496 |
+ result <- cdfName %in% chipList |
|
497 |
+ if(!(result)){ |
|
498 |
+ warning("cdfName must be one of the following: ", |
|
499 |
+ chipList) |
|
500 |
+ } |
|
501 |
+ return(result) |
|
502 |
+} |
|
503 |
+ |
|
504 |
+whichPlatform <- function(cdfName){ |
|
505 |
+ index <- grep("genomewidesnp", cdfName) |
|
506 |
+ if(length(index) > 0){ |
|
507 |
+ platform <- "affymetrix" |
|
508 |
+ } else{ |
|
509 |
+ index <- grep("human", cdfName) |
|
510 |
+ platform <- "illumina" |
|
511 |
+ } |
|
512 |
+ return(platform) |
|
513 |
+} |
|
514 |
+ |
|
515 |
+ |
|
516 |
+##isValidCdfName <- function(cdfName, platform){ |
|
517 |
+## chipList <- validCdfNames(platform) |
|
518 |
+## if(!(cdfName %in% chipList)){ |
|
519 |
+## warning("cdfName must be one of the following: ", |
|
520 |
+## chipList) |
|
521 |
+## } |
|
522 |
+## result <- cdfName %in% chipList |
|
523 |
+## return(result) |
|
524 |
+##} |
|
525 |
+ |
|
526 |
+ |
|
527 |
+ |
|
528 |
+# steps: quantile normalize hapmap: create 1m_reference_cn.rda object |
|
529 |
+cnrma <- function(filenames, cdfName, sns, seed=1, verbose=FALSE, outdir){ |
|
530 |
+ if(missing(cdfName)) stop("must specify cdfName") |
|
531 |
+ pkgname <- getCrlmmAnnotationName(cdfName) |
|
532 |
+ require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available") |
|
533 |
+ if (missing(sns)) sns <- basename(filenames) |
|
534 |
+ loader("npProbesFid.rda", .crlmmPkgEnv, pkgname) |
|
535 |
+ fid <- getVarInEnv("npProbesFid") |
|
536 |
+ set.seed(seed) |
|
537 |
+ idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything |
|
538 |
+ SKW <- vector("numeric", length(filenames)) |
|
539 |
+## if(bigmemory){ |
|
540 |
+## NP <- filebacked.big.matrix(length(pnsa), length(filenames), |
|
541 |
+## type="integer", |
|
542 |
+## init=as.integer(0), |
|
543 |
+## backingpath=outdir, |
|
544 |
+## backingfile="NP.bin", |
|
545 |
+## descriptorfile="NP.desc") |
|
546 |
+## } else{ |
|
547 |
+ NP <- matrix(NA, length(fid), length(filenames)) |
|
548 |
+## } |
|
549 |
+ verbose <- TRUE |
|
550 |
+ if(verbose){ |
|
551 |
+ message("Processing ", length(filenames), " files.") |
|
552 |
+ if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3) |
|
553 |
+ } |
|
554 |
+ if(cdfName=="genomewidesnp6"){ |
|
555 |
+ loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname) |
|
556 |
+ } |
|
557 |
+ if(cdfName=="genomewidesnp5"){ |
|
558 |
+ loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname) |
|
559 |
+ } |
|
560 |
+ reference <- getVarInEnv("reference") |
|
561 |
+ ##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.") |
|
562 |
+ for(i in seq(along=filenames)){ |
|
563 |
+ y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid]) |
|
564 |
+ x <- log2(y[idx2]) |
|
565 |
+ SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3) |
|
566 |
+ rm(x) |
|
567 |
+ NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference)) |
|
568 |
+ if (verbose) |
|
569 |
+ if (getRversion() > '2.7.0') setTxtProgressBar(pb, i) |
|
570 |
+ else cat(".") |
|
571 |
+ } |
|
572 |
+ dimnames(NP) <- list(names(fid), sns) |
|
573 |
+ ##dimnames(NP) <- list(map[, "man_fsetid"], sns) |
|
574 |
+ res3 <- list(NP=NP, SKW=SKW) |
|
575 |
+ cat("\n") |
|
576 |
+ return(res3) |
|
577 |
+} |
|
578 |
+ |
|
579 |
+getFlags <- function(object, PHI.THR){ |
|
580 |
+ batch <- unique(object$batch) |
|
581 |
+ nuA <- getParam(object, "nuA", batch) |
|
582 |
+ nuB <- getParam(object, "nuB", batch) |
|
583 |
+ phiA <- getParam(object, "phiA", batch) |
|
584 |
+ phiB <- getParam(object, "phiB", batch) |
|
585 |
+ negativeNus <- nuA < 1 | nuB < 1 |
|
586 |
+ negativePhis <- phiA < PHI.THR | phiB < PHI.THR |
|
587 |
+ negativeCoef <- negativeNus | negativePhis |
|
588 |
+ notfinitePhi <- !is.finite(phiA) | !is.finite(phiB) |
|
589 |
+ flags <- negativeCoef | notfinitePhi |
|
590 |
+ return(flags) |
|
591 |
+} |
|
592 |
+ |
|
593 |
+ |
|
594 |
+instantiateObjects <- function(object, cnOptions){ |
|
595 |
+ Ns <- matrix(NA, nrow(object), 5) |
|
596 |
+ colnames(Ns) <- c("A", "B", "AA", "AB", "BB") |
|
597 |
+ vA <- vB <- muB <- muA <- Ns |
|
598 |
+ normal <- matrix(TRUE, nrow(object), ncol(object)) |
|
599 |
+ dimnames(normal) <- list(featureNames(object), sampleNames(object)) |
|
600 |
+ tmp.objects <- list(vA=vA, |
|
601 |
+ vB=vB, |
|
602 |
+ muB=muB, |
|
603 |
+ muA=muA, |
|
604 |
+ Ns=Ns, |
|
605 |
+ normal=normal) |
|
606 |
+ return(tmp.objects) |
|
607 |
+} |
|
608 |
+ |
|
609 |
+##updateNuPhi <- function(crlmmSetList, cnSet){ |
|
610 |
+## ##TODO: remove the use of environments. |
|
611 |
+## cdfName <- annotation(crlmmSetList[[1]]) |
|
612 |
+## ##repopulate the environment |
|
613 |
+## crlmmSetList <- crlmmSetList[, match(sampleNames(cnSet), sampleNames(crlmmSetList))] |
|
614 |
+## crlmmSetList <- crlmmSetList[match(featureNames(cnSet), featureNames(crlmmSetList)), ] |
|
615 |
+## ##envir <- getCopyNumberEnvironment(crlmmSetList, cnSet) |
|
616 |
+## Nset <- crlmmSetList[[1]] |
|
617 |
+## ##indices of polymorphic loci |
|
618 |
+## index <- snpIndex(Nset) |
|
619 |
+## ABset <- Nset[index, ] |
|
620 |
+## ##indices of nonpolymorphic loci |
|
621 |
+## NPset <- Nset[-index, ] |
|
622 |
+## ##genotypes/confidences |
|
623 |
+## snpset <- crlmmSetList[[2]][index,] |
|
624 |
+## ##previous version of compute copy number |
|
625 |
+## envir <- new.env() |
|
626 |
+## message("Running bias adjustment...") |
|
627 |
+#### .computeCopynumber(chrom=CHR, |
|
628 |
+#### A=A(ABset), |
|
629 |
+#### B=B(ABset), |
|
630 |
+#### calls=calls(snpset), |
|
631 |
+#### conf=confs(snpset), |
|
632 |
+#### NP=A(NPset), |
|
633 |
+#### plate=batch, |
|
634 |
+#### envir=envir, |
|
635 |
+#### SNR=ABset$SNR, |
|
636 |
+#### bias.adj=TRUE, |
|
637 |
+#### SNRmin=SNRmin, |
|
638 |
+#### ...) |
|
639 |
+##} |
|
640 |
+ |
|
641 |
+##ist2locusSet <- function(envir, ABset, NPset, CHR, cdfName){ |
|
642 |
+## if(missing(CHR)) stop("Must specify chromosome") |
|
643 |
+## pkgname <- paste(cdfName, "Crlmm", sep="") |
|
644 |
+## path <- system.file("extdata", package=pkgname) |
|
645 |
+## loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv) |
|
646 |
+## cnProbes <- get("cnProbes", envir=.crlmmPkgEnv) |
|
647 |
+## loader("snpProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv) |
|
648 |
+## snpProbes <- get("snpProbes", envir=.crlmmPkgEnv) |
|
649 |
+## ##require(oligoClasses) || stop("oligoClasses package not available") |
|
650 |
+## if(length(ls(envir)) == 0) stop("environment empty") |
|
651 |
+## batch <- envir[["plate"]] |
|
652 |
+## ##SNP copy number |
|
653 |
+## CA <- envir[["CA"]] |
|
654 |
+## dimnames(CA) <- list(envir[["snps"]], envir[["sns"]]) |
|
655 |
+## CB <- envir[["CB"]] |
|
656 |
+## dimnames(CB) <- dimnames(CA) |
|
657 |
+## ##NP copy number |
|
658 |
+## if(!is.null(NPset)){ |
|
659 |
+## CT <- envir[["CT"]] |
|
660 |
+## rownames(CT) <- envir[["cnvs"]] |
|
661 |
+## colnames(CT) <- envir[["sns"]] |
|
662 |
+## sig2T <- envir[["sig2T"]] |
|
663 |
+## rownames(sig2T) <- rownames(CT) |
|
664 |
+## nuT <- envir[["nuT"]] |
|
665 |
+## colnames(nuT) <- paste("nuT", unique(batch), sep="_") |
|
666 |
+## phiT <- envir[["phiT"]] |
|
667 |
+## colnames(phiT) <- paste("phiT", unique(batch), sep="_") |
|
668 |
+## naMatrix <- matrix(NA, nrow(CT), ncol(CT)) |
|
669 |
+## dimnames(naMatrix) <- dimnames(CT) |
|
670 |
+## } else{ |
|
671 |
+## sig2T <- nuT <- phiT <- naMatrix <- CT <- NULL |
|
672 |
+## } |
|
673 |
+## CA <- rbind(CA, CT) |
|
674 |
+## CB <- rbind(CB, naMatrix) |
|
675 |
+## |
|
676 |
+## ##SNP parameters |
|
677 |
+## tau2A <- envir[["tau2A"]] |
|
678 |
+## colnames(tau2A) <- paste("tau2A", unique(batch), sep="_") |
|
679 |
+## tau2B <- envir[["tau2B"]] |
|
680 |
+## colnames(tau2B) <- paste("tau2B", unique(batch), sep="_") |
|
681 |
+## sig2A <- envir[["sig2A"]] |
|
682 |
+## colnames(sig2A) <- paste("sig2A", unique(batch), sep="_") |
|
683 |
+## sig2B <- envir[["sig2B"]] |
|
684 |
+## colnames(sig2B) <- paste("sig2B", unique(batch), sep="_") |
|
685 |
+## nuA <- envir[["nuA"]] |
|
686 |
+## colnames(nuA) <- paste("nuA", unique(batch), sep="_") |
|
687 |
+## nuB <- envir[["nuB"]] |
|
688 |
+## colnames(nuB) <- paste("nuB", unique(batch), sep="_") |
|
689 |
+## phiA <- envir[["phiA"]] |
|
690 |
+## colnames(phiA) <- paste("phiA", unique(batch), sep="_") |
|
691 |
+## phiB <- envir[["phiB"]] |
|
692 |
+## colnames(phiB) <- paste("phiB", unique(batch), sep="_") |
|
693 |
+## corr <- envir[["corr"]] |
|
694 |
+## colnames(corr) <- paste("corr", unique(batch), sep="_") |
|
695 |
+## corrA.BB <- envir[["corrA.BB"]] |
|
696 |
+## colnames(corrA.BB) <- paste("corrA.BB", unique(batch), sep="_") |
|
697 |
+## corrB.AA <- envir[["corrB.AA"]] |
|
698 |
+## colnames(corrB.AA) <- paste("corrB.AA", unique(batch), sep="_") |
|
699 |
+## |
|
700 |
+## |
|
701 |
+## ##Combine SNP and NP parameters |
|
702 |
+## if(!is.null(NPset)){ |
|
703 |
+## naMatrixParams <- matrix(NA, nrow(CT), length(unique(batch))) |
|
704 |
+## dimnames(naMatrixParams) <- list(rownames(CT), unique(batch)) |
|
705 |
+## } else{ |
|
706 |
+## naMatrixParams <- NULL |
|
707 |
+## } |
|
708 |
+## tau2A <- rbind(tau2A, naMatrixParams) |
|
709 |
+## tau2B <- rbind(tau2B, naMatrixParams) |
|
710 |
+## sig2A <- rbind(sig2A, sig2T) |
|
711 |
+## sig2B <- rbind(sig2B, naMatrixParams) |
|
712 |
+## corr <- rbind(corr, naMatrixParams) |
|
713 |
+## corrA.BB <- rbind(corrA.BB, naMatrixParams) |
|
714 |
+## corrB.AA <- rbind(corrB.AA, naMatrixParams) |
|
715 |
+## nuA <- rbind(nuA, nuT) |
|
716 |
+## phiA <- rbind(phiA, phiT) |
|
717 |
+## nuB <- rbind(nuB, naMatrixParams) |
|
718 |
+## phiB <- rbind(phiB, naMatrixParams) |
|
719 |
+## rownames(tau2A) <- rownames(tau2B) <- rownames(sig2A) <- rownames(sig2B) <- rownames(CA) |
|
720 |
+## rownames(corr) <- rownames(corrA.BB) <- rownames(corrB.AA) <- rownames(CA) |
|
721 |
+## rownames(nuA) <- rownames(phiA) <- rownames(nuB) <- rownames(phiB) <- rownames(CA) |
|
722 |
+## ##phenodata |
|
723 |
+## phenodata <- phenoData(ABset) |
|
724 |
+## phenodata <- phenodata[match(envir[["sns"]], sampleNames(phenodata)), ] |
|
725 |
+## if(!("batch" %in% varLabels(phenodata))) phenodata$batch <- envir[["plate"]] |
|
726 |
+## |
|
727 |
+## ##Feature Data |
|
728 |
+## position.snp <- snpProbes[match(envir[["snps"]], rownames(snpProbes)), "position"] |
|
729 |
+## names(position.snp) <- envir[["snps"]] |
|
730 |
+## if(!is.null(NPset)){ |
|
731 |
+## position.np <- cnProbes[match(envir[["cnvs"]], rownames(cnProbes)), "position"] |
|
732 |
+## names(position.np) <- envir[["cnvs"]] |
|
733 |
+## } else position.np <- NULL |
|
734 |
+## position <- c(position.snp, position.np) |
|
735 |
+## if(!(identical(names(position), rownames(CA)))){ |
|
736 |
+## position <- position[match(rownames(CA), names(position))] |
|
737 |
+## } |
|
738 |
+## if(sum(duplicated(names(position))) > 0){ |
|
739 |
+## warning("Removing rows with NA identifiers...") |
|
740 |
+## ##RS: fix this |
|
741 |
+## I <- which(!is.na(names(position))) |
|
742 |
+## } else I <- seq(along=names(position)) |
|
743 |
+## fd <- data.frame(cbind(CHR, |
|
744 |
+## position[I], |
|
745 |
+## tau2A[I,, drop=FALSE], |
|
746 |
+## tau2B[I,, drop=FALSE], |
|
747 |
+## sig2A[I,, drop=FALSE], |
|
748 |
+## sig2B[I,, drop=FALSE], |
|
749 |
+## nuA[I,, drop=FALSE], |
|
750 |
+## nuB[I,, drop=FALSE], |
|
751 |
+## phiA[I,, drop=FALSE], |
|
752 |
+## phiB[I,, drop=FALSE], |
|
753 |
+## corr[I,, drop=FALSE], |
|
754 |
+## corrA.BB[I,, drop=FALSE], |
|
755 |
+## corrB.AA[I,, drop=FALSE])) |
|
756 |
+## colnames(fd)[1:2] <- c("chromosome", "position") |
|
757 |
+## rownames(fd) <- rownames(CA)[I] |
|
758 |
+## fD <- new("AnnotatedDataFrame", |
|
759 |
+## data=fd, |
|
760 |
+## varMetadata=data.frame(labelDescription=colnames(fd))) |
|
761 |
+## placeholder <- matrix(NA, nrow(CA), ncol(CA)) |
|
762 |
+## dimnames(placeholder) <- dimnames(CA) |
|
763 |
+## assayData <- assayDataNew("lockedEnvironment", |
|
764 |
+## CA=placeholder[I, ], |
|
765 |
+## CB=placeholder[I, ]) |
|
766 |
+## cnset <- new("CopyNumberSet", |
|
767 |
+## assayData=assayData, |
|
768 |
+## featureData=fD, |
|
769 |
+## phenoData=phenodata, |
|
770 |
+## annotation=cdfName) |
|
771 |
+## CA(cnset) <- CA ##replacement method converts to integer |
|
772 |
+## CB(cnset) <- CB ##replacement method converts to integer |
|
773 |
+## cnset <- thresholdCopyNumberSet(cnset) |
|
774 |
+## return(cnset) |
|
775 |
+## |
|
776 |
+ |
|
777 |
+ |
|
778 |
+ |
|
779 |
+thresholdCopynumber <- function(object){ |
|
780 |
+ ca <- CA(object) |
|
781 |
+ cb <- CB(object) |
|
782 |
+ ca[ca < 0.05] <- 0.05 |
|
783 |
+ ca[ca > 5] <- 5 |
|
784 |
+ cb[cb < 0.05] <- 0.05 |
|
785 |
+ cb[cb > 5] <- 5 |
|
786 |
+ CA(object) <- ca |
|
787 |
+ CB(object) <- cb |
|
788 |
+ return(object) |
|
789 |
+} |
|
790 |
+ |
|
791 |
+##preprocessOptions <- function(crlmmFile="snpsetObject.rda", |
|
792 |
+## intensityFile="normalizedIntensities.rda", |
|
793 |
+## rgFile="rgFile.rda"){ |
|
794 |
+## |
|
795 |
+##} |
|
796 |
+ |
|
797 |
+cnOptions <- function(tmpdir=tempdir(), |
|
798 |
+ outdir="./", |
|
799 |
+ cdfName, |
|
800 |
+ crlmmFile="snpsetObject.rda", |
|
801 |
+ intensityFile="normalizedIntensities.rda", |
|
802 |
+ rgFile="rgFile.rda", |
|
803 |
+ save.it=TRUE, |
|
804 |
+ load.it=TRUE, |
|
805 |
+ splitByChr=TRUE, |
|
806 |
+ use.ff=FALSE, |
|
807 |
+ MIN.OBS=3, |
|
808 |
+ MIN.SAMPLES=10, |
|
809 |
+ batch=NULL, |
|
810 |
+ DF.PRIOR=50, |
|
811 |
+ bias.adj=FALSE, |
|
812 |
+ prior.prob=rep(1/4, 4), |
|
813 |
+ SNRmin=5, |
|
814 |
+ seed=123, |
|
815 |
+ verbose=TRUE, |
|
816 |
+ GT.CONF.THR=0.99, |
|
817 |
+ PHI.THR=2^6,##used in nonpolymorphic fxn for training |
|
818 |
+ nAA.THR=5, ##used in nonpolymorphic fxn for training |
|
819 |
+ nBB.THR=5, ##used in nonpolymorphic fxn for training |
|
820 |
+ MIN.NU=2^3, |
|
821 |
+ MIN.PHI=2^3, |
|
822 |
+ THR.NU.PHI=TRUE, |
|
823 |
+ thresholdCopynumber=TRUE, |
|
824 |
+ unlink=TRUE, |
|
825 |
+ hiddenMarkovModel=FALSE, |
|
826 |
+ circularBinarySegmentation=FALSE, |
|
827 |
+ cbsOpts=NULL, |
|
828 |
+ hmmOpts=NULL, ...){ |
|
829 |
+ if(missing(cdfName)) stop("must specify cdfName") |
|
830 |
+ if(!file.exists(outdir)){ |
|
831 |
+ message(outdir, " does not exist. Trying to create it.") |
|
832 |
+ dir.create(outdir, recursive=TRUE) |
|
833 |
+ } |
|
834 |
+ stopifnot(isValidCdfName(cdfName)) |
|
835 |
+ if(hiddenMarkovModel){ |
|
836 |
+ hmmOpts <- hmmOptions(...) |
|
837 |
+ } |
|
838 |
+ if(is.null(batch)) |
|
839 |
+ stop("batch must have the same length as the number of samples") |
|
840 |
+ list(tmpdir=tmpdir, |
|
841 |
+ outdir=outdir, |
|
842 |
+ cdfName=cdfName, |
|
843 |
+ crlmmFile=file.path(outdir, crlmmFile), |
|
844 |
+ intensityFile=file.path(outdir, intensityFile), |
|
845 |
+ rgFile=file.path(outdir, rgFile), |
|
846 |
+ save.it=save.it, |
|
847 |
+ load.it=load.it, |
|
848 |
+ splitByChr=splitByChr, |
|
849 |
+ use.ff=use.ff, |
|
850 |
+ MIN.OBS=MIN.OBS, |
|
851 |
+ MIN.SAMPLES=MIN.SAMPLES, |
|
852 |
+ batch=batch, |
|
853 |
+ DF.PRIOR=DF.PRIOR, |
|
854 |
+ GT.CONF.THR=GT.CONF.THR, |
|
855 |
+ prior.prob=prior.prob, |
|
856 |
+ bias.adj=bias.adj, |
|
857 |
+ SNRmin=SNRmin, |
|
858 |
+ seed=seed, |
|
859 |
+ verbose=verbose, |
|
860 |
+ PHI.THR=PHI.THR, |
|
861 |
+ nAA.THR=nAA.THR, |
|
862 |
+ nBB.THR=nBB.THR, |
|
863 |
+ MIN.NU=MIN.NU, |
|
864 |
+ MIN.PHI=MIN.PHI, |
|
865 |
+ THR.NU.PHI=THR.NU.PHI, |
|
866 |
+ unlink=unlink, |
|
867 |
+ hiddenMarkovModel=hiddenMarkovModel, |
|
868 |
+ circularBinarySegmentation=circularBinarySegmentation, |
|
869 |
+ cbsOpts=cbsOpts, |
|
870 |
+ hmmOpts=hmmOpts) ##remove SnpCallSetPlus object |
|
871 |
+} |
|
872 |
+ |
|
873 |
+##linear model parameters |
|
874 |
+lm.parameters <- function(object, cnOptions){ |
|
875 |
+ fD <- fData(object) |
|
876 |
+ batch <- object$batch |
|
877 |
+ uplate <- unique(batch) |
|
878 |
+ parameterNames <- c(paste("tau2A", uplate, sep="_"), |
|
879 |
+ paste("tau2B", uplate, sep="_"), |
|
880 |
+ paste("sig2A", uplate, sep="_"), |
|
881 |
+ paste("sig2B", uplate, sep="_"), |
|
882 |
+ paste("nuA", uplate, sep="_"), |
|
883 |
+ paste("nuA.se", uplate, sep="_"), |
|
884 |
+ paste("nuB", uplate, sep="_"), |
|
885 |
+ paste("nuB.se", uplate, sep="_"), |
|
886 |
+ paste("phiA", uplate, sep="_"), |
|
887 |
+ paste("phiA.se", uplate, sep="_"), |
|
888 |
+ paste("phiB", uplate, sep="_"), |
|
889 |
+ paste("phiB.se", uplate, sep="_"), |
|
890 |
+ paste("phiAX", uplate, sep="_"), |
|
891 |
+ paste("phiBX", uplate, sep="_"), |
|
892 |
+ paste("corr", uplate, sep="_"), |
|
893 |
+ paste("corrA.BB", uplate, sep="_"), |
|
894 |
+ paste("corrB.AA", uplate, sep="_")) |
|
895 |
+ pMatrix <- data.frame(matrix(numeric(0), |
|
896 |
+ nrow(object), |
|
897 |
+ length(parameterNames)), |
|
898 |
+ row.names=featureNames(object)) |
|
899 |
+ colnames(pMatrix) <- parameterNames |
|
900 |
+ fD2 <- cbind(fD, pMatrix) |
|
901 |
+ new("AnnotatedDataFrame", data=fD2, |
|
902 |
+ varMetadata=data.frame(labelDescription=colnames(fD2), |
|
903 |
+ row.names=colnames(fD2))) |
|
904 |
+} |
|
905 |
+ |
|
906 |
+nonpolymorphic <- function(object, cnOptions, tmp.objects){ |
|
907 |
+ batch <- unique(object$batch) |
|
908 |
+ CHR <- unique(chromosome(object)) |
|
909 |
+ goodSnps <- function(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR){ |
|
910 |
+ Ns <- tmp.objects[["Ns"]] |
|
911 |
+ ##Ns <- get("Ns", envir) |
|
912 |
+ flags <- getFlags(object, PHI.THR) |
|
913 |
+ fewAA <- Ns[, "AA"] < nAA.THR |
|
914 |
+ fewBB <- Ns[, "BB"] < nBB.THR |
|
915 |
+ flagsA <- flags | fewAA |
|
916 |
+ flagsB <- flags | fewBB |
|
917 |
+ flags <- list(A=flagsA, B=flagsB) |
|
918 |
+ return(flags) |
|
919 |
+ } |
|
920 |
+ nAA.THR <- cnOptions$nAA.THR |
|
921 |
+ nBB.THR <- cnOptions$nBB.THR |
|
922 |
+ PHI.THR <- cnOptions$PHI.THR |
|
923 |
+ snpflags <- goodSnps(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR) |
|
924 |
+ flagsA <- snpflags$A |
|
925 |
+ flagsB <- snpflags$B |
|
926 |
+## if(all(flagsA) | all(flagsB)) stop("all snps are flagged") |
|
927 |
+ nuA <- getParam(object, "nuA", batch) |
|
928 |
+ nuB <- getParam(object, "nuB", batch) |
|
929 |
+ phiA <- getParam(object, "phiA", batch) |
|
930 |
+ phiB <- getParam(object, "phiB", batch) |
|
931 |
+ sns <- sampleNames(object) |
|
932 |
+ muA <- tmp.objects[["muA"]] |
|
933 |
+ muB <- tmp.objects[["muB"]] |
|
934 |
+ A <- A(object) |
|
935 |
+ B <- B(object) |
|
936 |
+ CA <- CA(object) |
|
937 |
+ CB <- CB(object) |
|
938 |
+ if(CHR == 23){ |
|
939 |
+ phiAX <- getParam(object, "phiAX", batch) |
|
940 |
+ phiBX <- getParam(object, "phiBX", batch) |
|
941 |
+ } |
|
942 |
+ ##--------------------------------------------------------------------------- |
|
943 |
+ ## Train on unflagged SNPs |
|
944 |
+ ##--------------------------------------------------------------------------- |
|
945 |
+ ##Might be best to train using the X chromosome, since for the |
|
946 |
+ ##X phi and nu have been adjusted for cross-hybridization |
|
947 |
+ ##plateInd <- plate == uplate[p] |
|
948 |
+ ##muA <- muA[!flagsA, p, c("A", "AA")] |
|
949 |
+ ##muB <- muB[!flagsB, p, c("B", "BB")] |
|
950 |
+ muA <- muA[!flagsA, "AA"] |
|
951 |
+ muB <- muB[!flagsB, "BB"] |
|
952 |
+ X <- cbind(1, log2(c(muA, muB))) |
|
953 |
+ Y <- log2(c(phiA[!flagsA], phiB[!flagsB])) |
|
954 |
+ if(nrow(X) > 5000){ |
|
955 |
+ ix <- sample(1:nrow(X), 5000) |
|
956 |
+ } else { |
|
957 |
+ ix <- 1:nrow(X) |
|
958 |
+ } |
|
959 |
+ betahat <- solve(crossprod(X[ix, ]), crossprod(X[ix, ], Y[ix])) |
|
960 |
+ normal <- tmp.objects[["normal"]][!isSnp(object), ] |
|
961 |
+ if(CHR == 23){ |
|
962 |
+ ##normalNP <- envir[["normalNP"]] |
|
963 |
+ ##normalNP <- normalNP[, plate==uplate[p]] |
|
964 |
+ ##nuT <- envir[["nuT"]] |
|
965 |
+ ##phiT <- envir[["phiT"]] |
|
966 |
+ |
|
967 |
+ ##cnvs <- envir[["cnvs"]] |
|
968 |
+ ##loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv) |
|
969 |
+ ##cnProbes <- get("cnProbes", envir=.crlmmPkgEnv) |
|
970 |
+ ##cnProbes <- cnProbes[match(cnvs, rownames(cnProbes)), ] |
|
971 |
+ |
|
972 |
+ ##For build Hg18 |
|
973 |
+ ##http://genome.ucsc.edu/cgi-bin/hgGateway |
|
974 |
+ ##pseudo-autosomal regions on X |
|
975 |
+ ##chrX:1-2,709,520 and chrX:154584237-154913754, respectively |
|
976 |
+ ##par:pseudo-autosomal regions |
|
977 |
+ pseudoAR <- position(object) < 2709520 | (position(object) > 154584237 & position(object) < 154913754) |
|
978 |
+ ##pseudoAR <- cnProbes[, "position"] < 2709520 | (cnProbes[, "position"] > 154584237 & cnProbes[, "position"] < 154913754) |
|
979 |
+ ##in case some of the cnProbes are not annotated |
|
980 |
+ pseudoAR[is.na(pseudoAR)] <- FALSE |
|
981 |
+ pseudoAR <- pseudoAR[!isSnp(object)] |
|
982 |
+ ##gender <- envir[["gender"]] |
|
983 |
+ gender <- object$gender |
|
984 |
+ obj1 <- object[!isSnp(object), ] |
|
985 |
+ A.male <- A(obj1[, gender==1]) |
|
986 |
+ mu1 <- rowMedians(A.male, na.rm=TRUE) |
|
987 |
+ ##mu1 <- rowMedians(NP[, gender=="male"], na.rm=TRUE) |
|
988 |
+ ##mu2 <- rowMedians(NP[, gender=="female"], na.rm=TRUE) |
|
989 |
+ A.female <- A(obj1[, gender==2]) |
|
990 |
+ mu2 <- rowMedians(A.female, na.rm=TRUE) |
|
991 |
+ mus <- log2(cbind(mu1, mu2)) |
|
992 |
+ X.men <- cbind(1, mus[, 1]) |
|
993 |
+ X.fem <- cbind(1, mus[, 2]) |
|
994 |
+ |
|
995 |
+ Yhat1 <- as.numeric(X.men %*% betahat) |
|
996 |
+ Yhat2 <- as.numeric(X.fem %*% betahat) |
|
997 |
+ phi1 <- 2^(Yhat1) |
|
998 |
+ phi2 <- 2^(Yhat2) |
|
999 |
+ nu1 <- 2^(mus[, 1]) - phi1 |
|
1000 |
+ nu2 <- 2^(mus[, 2]) - 2*phi2 |
|
1001 |
+ if(any(pseudoAR)){ |
|
1002 |
+ nu1[pseudoAR] <- 2^(mus[pseudoAR, 1]) - 2*phi1[pseudoAR] |
|
1003 |
+ } |
|
1004 |
+ CT1 <- 1/phi1*(A.male-nu1) |
|
1005 |
+ CT2 <- 1/phi2*(A.female-nu2) |
|
1006 |
+ ##CT2 <- 1/phi2*(NP[, gender=="female"]-nu2) |
|
1007 |
+ ##CT1 <- matrix(as.integer(100*CT1), nrow(CT1), ncol(CT1)) |
|
1008 |
+ ##CT2 <- matrix(as.integer(100*CT2), nrow(CT2), ncol(CT2)) |
|
1009 |
+ ##CT <- envir[["CT"]] |
|
1010 |
+ CA <- CA(obj1) |
|
1011 |
+ CA[, gender==1] <- CT1 |
|
1012 |
+ CA[, gender==2] <- CT2 |
|
1013 |
+ CA(object)[!isSnp(object), ] <- CA |
|
1014 |
+ ##CT[, plate==uplate[p] & gender=="male"] <- CT1 |
|
1015 |
+ ##CT[, plate==uplate[p] & gender=="female"] <- CT2 |
|
1016 |
+ ##envir[["CT"]] <- CT |
|
1017 |
+ |
|
1018 |
+ ##only using females to compute the variance |
|
1019 |
+ ##normalNP[, gender=="male"] <- NA |
|
1020 |
+ normal[, gender==1] <- NA |
|
1021 |
+ sig2A <- getParam(object, "sig2A", batch) |
|
1022 |
+ normal.f <- normal[, object$gender==2] |
|
1023 |
+ sig2A[!isSnp(object)] <- rowMAD(log2(A.female*normal.f), na.rm=TRUE)^2 |
|
1024 |
+ ##sig2T[, p] <- rowMAD(log2(NP*normalNP), na.rm=TRUE)^2 |
|
1025 |
+ object <- pr(object, "sig2A", batch, sig2A) |
|
1026 |
+ |
|
1027 |
+ nuA[!isSnp(object)] <- nu2 |
|
1028 |
+ object <- pr(object, "nuA", batch, nuA) |
|
1029 |
+ ##nuT[, p] <- nu2 |
|
1030 |
+ phiA[!isSnp(object)] <- phi2 |
|
1031 |
+ object <- pr(object, "phiA", batch, phiA) |
|
1032 |
+ |
|
1033 |
+ } else { |
|
1034 |
+ A <- A(object)[!isSnp(object), ] |
|
1035 |
+ mus <- rowMedians(A * normal, na.rm=TRUE) |
|
1036 |
+ crosshyb <- max(median(muA) - median(mus), 0) |
|
1037 |
+ X <- cbind(1, log2(mus+crosshyb)) |
|
1038 |
+ logPhiT <- X %*% betahat |
|
1039 |
+ phiA[!isSnp(object)] <- 2^(logPhiT) |
|
1040 |
+ nuA[!isSnp(object)] <- mus-2*phiA[!isSnp(object)] |
|
1041 |
+ CA(object)[!isSnp(object), ] <- 1/phiA[!isSnp(object)]*(A - nuA[!isSnp(object)]) |
|
1042 |
+ sig2A <- getParam(object, "sig2A", batch) |
|
1043 |
+ sig2A[!isSnp(object)] <- rowMAD(log2(A*normal), na.rm=TRUE)^2 |
|
1044 |
+ object <- pr(object, "sig2A", batch, sig2A) |
|
1045 |
+ ##added |
|
1046 |
+ object <- pr(object, "nuA", batch, nuA) |
|
1047 |
+ object <- pr(object, "phiA", batch, phiA) |
|
1048 |
+ } |
|
1049 |
+ return(object) |
|
1050 |
+} |
|
1051 |
+ |
|
1052 |
+##sufficient statistics on the intensity scale |
|
1053 |
+withinGenotypeMoments <- function(object, cnOptions, tmp.objects){ |
|
1054 |
+ normal <- tmp.objects[["normal"]] |
|
1055 |
+ |
|
1056 |
+ ## muA, muB: robust estimates of the within-genotype center (intensity scale) |
|
1057 |
+ muA <- tmp.objects[["muA"]] |
|
1058 |
+ muB <- tmp.objects[["muB"]] |
|
1059 |
+ ## vA, vB: robust estimates of the within-genotype variance (intensity scale) |
|
1060 |
+ vA <- tmp.objects[["vA"]] |
|
1061 |
+ vB <- tmp.objects[["vB"]] |
|
1062 |
+ Ns <- tmp.objects[["Ns"]] |
|
1063 |
+ G <- calls(object) |
|
1064 |
+ GT.CONF.THR <- cnOptions$GT.CONF.THR |
|
1065 |
+ CHR <- unique(chromosome(object)) |
|
1066 |
+ |
|
1067 |
+ A <- A(object) |
|
1068 |
+ B <- B(object) |
|
1069 |
+ highConf <- (1-exp(-callsConfidence(object)/1000)) > GT.CONF.THR |
|
1070 |
+ ##highConf <- highConf > GT.CONF.THR |
|
1071 |
+ if(CHR == 23){ |
|
1072 |
+ gender <- object$gender |
|
1073 |
+## gender <- envir[["gender"]] |
|
1074 |
+ IX <- matrix(gender, nrow(G), ncol(G), byrow=TRUE) |
|
1075 |
+## IX <- IX == "female" |
|
1076 |
+ IX <- IX == 2 ##2=female, 1=male |
|
1077 |
+ } else IX <- matrix(TRUE, nrow(G), ncol(G)) |
|
1078 |
+ index <- GT.B <- GT.A <- vector("list", 3) |
|
1079 |
+ names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB") |
|
1080 |
+ ##-------------------------------------------------- |
|
1081 |
+ ##within-genotype sufficient statistics |
|
1082 |
+ ##-------------------------------------------------- |
|
1083 |
+ ##GT.B <- GT.A <- list() |
|
1084 |
+ snpIndicator <- matrix(isSnp(object), nrow(object), ncol(object)) ##RS: added |
|
1085 |
+ for(j in 1:3){ |
|
1086 |
+ GT <- G==j & highConf & IX & snpIndicator |
|
1087 |
+ GT <- GT * normal |
|
1088 |
+ Ns[, j+2] <- rowSums(GT, na.rm=TRUE) |
|
1089 |
+ GT[GT == FALSE] <- NA |
|
1090 |
+ GT.A[[j]] <- GT*A |
|
1091 |
+ GT.B[[j]] <- GT*B |
|
1092 |
+ index[[j]] <- which(Ns[, j+2] > 0 & isSnp(object)) ##RS: added |
|
1093 |
+ muA[, j+2] <- rowMedians(GT.A[[j]], na.rm=TRUE) |
|
1094 |
+ muB[, j+2] <- rowMedians(GT.B[[j]], na.rm=TRUE) |
|
1095 |
+ vA[, j+2] <- rowMAD(GT.A[[j]], na.rm=TRUE) |
|
1096 |
+ vB[, j+2] <- rowMAD(GT.B[[j]], na.rm=TRUE) |
|
1097 |
+ |
|
1098 |
+ ##Shrink towards the typical variance |
|
1099 |
+ DF <- Ns[, j+2]-1 |
|
1100 |
+ DF[DF < 1] <- 1 |
|
1101 |
+ v0A <- median(vA[, j+2], na.rm=TRUE) |
|
1102 |
+ v0B <- median(vB[, j+2], na.rm=TRUE) |
|
1103 |
+ if(v0A == 0) v0A <- NA |
|
1104 |
+ if(v0B == 0) v0B <- NA |
|
1105 |
+ DF.PRIOR <- cnOptions$DF.PRIOR |
|
1106 |
+ vA[, j+2] <- (vA[, j+2]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF) |
|
1107 |
+ vA[is.na(vA[, j+2]), j+2] <- v0A |
|
1108 |
+ vB[, j+2] <- (vB[, j+2]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF) |
|
1109 |
+ vB[is.na(vB[, j+2]), j+2] <- v0B |
|
1110 |
+ } |
|
1111 |
+ if(CHR == 23){ |
|
1112 |
+ k <- 1 |
|
1113 |
+ for(j in c(1,3)){ |
|
1114 |
+ GT <- G==j & highConf & !IX |
|
1115 |
+ Ns[, k] <- rowSums(GT) |
|
1116 |
+ GT[GT == FALSE] <- NA |
|
1117 |
+ muA[, k] <- rowMedians(GT*A, na.rm=TRUE) |
|
1118 |
+ muB[, k] <- rowMedians(GT*B, na.rm=TRUE) |
|
1119 |
+ vA[, k] <- rowMAD(GT*A, na.rm=TRUE) |
|
1120 |
+ vB[, k] <- rowMAD(GT*B, na.rm=TRUE) |
|
1121 |
+ |
|
1122 |
+ DF <- Ns[, k]-1 |
|
1123 |
+ DF[DF < 1] <- 1 |
|
1124 |
+ v0A <- median(vA[, k], na.rm=TRUE) |
|
1125 |
+ v0B <- median(vB[, k], na.rm=TRUE) |
|
1126 |
+ vA[, k] <- (vA[, k]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF) |
|
1127 |
+ vA[is.na(vA[, k]), k] <- v0A |
|
1128 |
+ vB[, k] <- (vB[, k]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF) |
|
1129 |
+ vB[is.na(vB[, k]), k] <- v0B |
|
1130 |
+ k <- k+1 |
|
1131 |
+ } |
|
1132 |
+ } |
|
1133 |
+ tmp.objects[["Ns"]] <- Ns |
|
1134 |
+ tmp.objects[["vA"]] <- vA |
|
1135 |
+ tmp.objects[["vB"]] <- vB |
|
1136 |
+ tmp.objects[["muA"]] <- muA |
|
1137 |
+ tmp.objects[["muB"]] <- muB |
|
1138 |
+ tmp.objects$index <- index |
|
1139 |
+ tmp.objects$GT.A <- GT.A |
|
1140 |
+ tmp.objects$GT.B <- GT.B |
|
1141 |
+ return(tmp.objects) |
|
1142 |
+} |
|
1143 |
+ |
|
1144 |
+ |
|
1145 |
+oneBatch <- function(object, cnOptions, tmp.objects){ |
|
1146 |
+ muA <- tmp.objects[["muA"]] |
|
1147 |
+ muB <- tmp.objects[["muB"]] |
|
1148 |
+ Ns <- tmp.objects[["Ns"]] |
|
1149 |
+ CHR <- unique(chromosome(object)) |
|
1150 |
+ ##--------------------------------------------------------------------------- |
|
1151 |
+ ## Impute sufficient statistics for unobserved genotypes (plate-specific) |
|
1152 |
+ ##--------------------------------------------------------------------------- |
|
1153 |
+ MIN.OBS <- cnOptions$MIN.OBS |
|
1154 |
+ index.AA <- which(Ns[, "AA"] >= MIN.OBS) |
|
1155 |
+ index.AB <- which(Ns[, "AB"] >= MIN.OBS) |
|
1156 |
+ index.BB <- which(Ns[, "BB"] >= MIN.OBS) |
|
1157 |
+ correct.orderA <- muA[, "AA"] > muA[, "BB"] |
|
1158 |
+ correct.orderB <- muB[, "BB"] > muB[, "AA"] |
|
1159 |
+ ##For chr X, this will ignore the males |
|
1160 |
+ nobs <- rowSums(Ns[, 3:5] >= MIN.OBS, na.rm=TRUE) == 3 |
|
1161 |
+ index.complete <- which(correct.orderA & correct.orderB & nobs) ##be selective here |
|
1162 |
+ size <- min(5000, length(index.complete)) |
|
1163 |
+ if(size == 5000) index.complete <- sample(index.complete, 5000) |
|
1164 |
+ if(length(index.complete) < 200){ |
|
1165 |
+ stop("fewer than 200 snps pass criteria for predicting the sufficient statistics") |
|
1166 |
+ } |
|
1167 |
+ index <- tmp.objects[["index"]] |
|
1168 |
+ index[[1]] <- which(Ns[, "AA"] == 0 & (Ns[, "AB"] >= MIN.OBS & Ns[, "BB"] >= MIN.OBS)) |
|
1169 |
+ index[[2]] <- which(Ns[, "AB"] == 0 & (Ns[, "AA"] >= MIN.OBS & Ns[, "BB"] >= MIN.OBS)) |
|
1170 |
+ index[[3]] <- which(Ns[, "BB"] == 0 & (Ns[, "AB"] >= MIN.OBS & Ns[, "AA"] >= MIN.OBS)) |
|
1171 |
+ mnA <- muA[, 3:5] |
|
1172 |
+ mnB <- muB[, 3:5] |
|
1173 |
+ for(j in 1:3){ |
|
1174 |
+ if(length(index[[j]]) == 0) next() |
|
1175 |
+ X <- cbind(1, mnA[index.complete, -j, drop=FALSE], mnB[index.complete, -j, drop=FALSE]) |
|
1176 |
+ Y <- cbind(mnA[index.complete, j], mnB[index.complete, j]) |
|
1177 |
+ betahat <- solve(crossprod(X), crossprod(X,Y)) |
|
1178 |
+ X <- cbind(1, mnA[index[[j]], -j, drop=FALSE], mnB[index[[j]], -j, drop=FALSE]) |
|
1179 |
+ mus <- X %*% betahat |
|
1180 |
+ muA[index[[j]], j+2] <- mus[, 1] |
|
1181 |
+ muB[index[[j]], j+2] <- mus[, 2] |
|
1182 |
+ } |
|
1183 |
+ nobsA <- Ns[, "A"] > 10 |
|
1184 |
+ nobsB <- Ns[, "B"] > 10 |
|
1185 |
+ notMissing <- !(is.na(muA[, "A"]) | is.na(muA[, "B"]) | is.na(muB[, "A"]) | is.na(muB[, "B"])) |
|
1186 |
+ complete <- list() |
|
1187 |
+ complete[[1]] <- which(correct.orderA & correct.orderB & nobsA & notMissing) ##be selective here |
|
1188 |
+ complete[[2]] <- which(correct.orderA & correct.orderB & nobsB & notMissing) ##be selective here |
|
1189 |
+ size <- min(5000, length(complete[[1]])) |
|
1190 |
+ if(size == 5000) complete <- lapply(complete, function(x) sample(x, size)) |
|
1191 |
+ if(CHR == 23){ |
|
1192 |
+ index <- list() |
|
1193 |
+ index[[1]] <- which(Ns[, "A"] == 0) |
|
1194 |
+ index[[2]] <- which(Ns[, "B"] == 0) |
|
1195 |
+ cols <- 2:1 |
|
1196 |
+ for(j in 1:2){ |
|
1197 |
+ if(length(index[[j]]) == 0) next() |
|
1198 |
+ X <- cbind(1, muA[complete[[j]], cols[j]], muB[complete[[j]], cols[j]]) |
|
1199 |
+ Y <- cbind(muA[complete[[j]], j], muB[complete[[j]], j]) |
|
1200 |
+ betahat <- solve(crossprod(X), crossprod(X,Y)) |
|
1201 |
+ X <- cbind(1, muA[index[[j]], cols[j]], muB[index[[j]], cols[j]]) |
|
1202 |
+ mus <- X %*% betahat |
|
1203 |
+ muA[index[[j]], j] <- mus[, 1] |
|
1204 |
+ muB[index[[j]], j] <- mus[, 2] |
|
1205 |
+ } |
|
1206 |
+ } |
|
1207 |
+ ##missing two genotypes |
|
1208 |
+ noAA <- Ns[, "AA"] < MIN.OBS |
|
1209 |
+ noAB <- Ns[, "AB"] < MIN.OBS |
|
1210 |
+ noBB <- Ns[, "BB"] < MIN.OBS |
|
1211 |
+ index[[1]] <- noAA & noAB |
|
1212 |
+ index[[2]] <- noBB & noAB |
|
1213 |
+ index[[3]] <- noAA & noBB |
|
1214 |
+## snpflags <- envir[["snpflags"]] |
|
1215 |
+ snpflags <- index[[1]] | index[[2]] | index[[3]] |
|
1216 |
+## snpflags[, p] <- index[[1]] | index[[2]] | index[[3]] |
|
1217 |
+ |
|
1218 |
+ ##--------------------------------------------------------------------------- |
|
1219 |
+ ## Two genotype clusters not observed -- would sequence help? (didn't seem to) |
|
1220 |
+ ## 1. extract index of complete data |
|
1221 |
+ ## 2. Regress mu_missing ~ sequence + mu_observed |
|
1222 |
+ ## 3. solve for nu assuming the median is 2 |
|
1223 |
+ ##--------------------------------------------------------------------------- |
|
1224 |
+ cols <- c(3, 1, 2) |
|
1225 |
+ for(j in 1:3){ |
|
1226 |
+ if(sum(index[[j]]) == 0) next() |
|
1227 |
+ k <- cols[j] |
|
1228 |
+ X <- cbind(1, mnA[index.complete, k], mnB[index.complete, k]) |
|
1229 |
+ Y <- cbind(mnA[index.complete, -k], |
|
1230 |
+ mnB[index.complete, -k]) |
|
1231 |
+ betahat <- solve(crossprod(X), crossprod(X,Y)) |
|
1232 |
+ X <- cbind(1, mnA[index[[j]], k], mnB[index[[j]], k]) |
|
1233 |
+ mus <- X %*% betahat |
|
1234 |
+ muA[index[[j]], -c(1, 2, k+2)] <- mus[, 1:2] |
|
1235 |
+ muB[index[[j]], -c(1, 2, k+2)] <- mus[, 3:4] |
|
1236 |
+ } |
|
1237 |
+ negA <- rowSums(muA < 0) > 0 |
|
1238 |
+ negB <- rowSums(muB < 0) > 0 |
|
1239 |
+ snpflags <- snpflags| negA | negB | rowSums(is.na(muA[, 3:5]), na.rm=TRUE) > 0 |
|
1240 |
+ tmp.objects$snpflags <- snpflags |
|
1241 |
+ tmp.objects[["muA"]] <- muA |
|
1242 |
+ tmp.objects[["muB"]] <- muB |
|
1243 |
+ tmp.objects[["snpflags"]] <- snpflags |
|
1244 |
+ return(tmp.objects) |
|
1245 |
+} |
|
1246 |
+ |
|
1247 |
+##Estimate tau2, sigma2, and correlation (updates the object) |
|
1248 |
+locationAndScale <- function(object, cnOptions, tmp.objects){ |
|
1249 |
+ Ns <- tmp.objects[["Ns"]] |
|
1250 |
+ index <- tmp.objects[["index"]] |
|
1251 |
+ |
|
1252 |
+ index.AA <- index[[1]] |
|
1253 |
+ index.AB <- index[[2]] |
|
1254 |
+ index.BB <- index[[3]] |
|
1255 |
+ rm(index); gc() |
|
1256 |
+ |
|
1257 |
+ GT.A <- tmp.objects[["GT.A"]] |
|
1258 |
+ GT.B <- tmp.objects[["GT.B"]] |
|
1259 |
+ AA.A <- GT.A[[1]] |
|
1260 |
+ AB.A <- GT.A[[2]] |
|
1261 |
+ BB.A <- GT.A[[3]] |
|
1262 |
+ |
|
1263 |
+ AA.B <- GT.B[[1]] |
|
1264 |
+ AB.B <- GT.B[[2]] |
|
1265 |
+ BB.B <- GT.B[[3]] |
|
1266 |
+ |
|
1267 |
+ x <- BB.A[index.BB, ] |
|
1268 |
+ batch <- unique(object$batch) |
|
1269 |
+ DF.PRIOR <- cnOptions$DF.PRIOR |
|
1270 |
+ |
|
1271 |
+ tau2A <- getParam(object, "tau2A", batch) |
|
1272 |
+ tau2A[index.BB] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2 |
|
1273 |
+ DF <- Ns[, "BB"]-1 |
|
1274 |
+ DF[DF < 1] <- 1 |
|
1275 |
+ med <- median(tau2A, na.rm=TRUE) |
|
1276 |
+ tau2A <- (tau2A * DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
|
1277 |
+ tau2A[is.na(tau2A) & isSnp(object)] <- med |
|
1278 |
+ object <- pr(object, "tau2A", batch, tau2A) |
|
1279 |
+ |
|
1280 |
+ sig2B <- getParam(object, "sig2B", batch) |
|
1281 |
+ x <- BB.B[index.BB, ] |
|
1282 |
+ sig2B[index.BB] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2 |
|
1283 |
+ med <- median(sig2B, na.rm=TRUE) |
|
1284 |
+ sig2B <- (sig2B * DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
|
1285 |
+ sig2B[is.na(sig2B) & isSnp(object)] <- med |
|
1286 |
+ object <- pr(object, "sig2B", batch, sig2B) |
|
1287 |
+ |
|
1288 |
+ tau2B <- getParam(object, "tau2B", batch) |
|
1289 |
+ x <- AA.B[index.AA, ] |
|
1290 |
+ tau2B[index.AA] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2 |
|
1291 |
+ DF <- Ns[, "AA"]-1 |
|
1292 |
+ DF[DF < 1] <- 1 |
|
1293 |
+ med <- median(tau2B, na.rm=TRUE) |
|
1294 |
+ tau2B <- (tau2B * DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
|
1295 |
+ tau2B[is.na(tau2B) & isSnp(object)] <- med |
|
1296 |
+ object <- pr(object, "tau2B", batch, tau2B) |
|
1297 |
+ |
|
1298 |
+ sig2A <- getParam(object, "sig2A", batch) |
|
1299 |
+ x <- AA.A[index.AA, ] |
|
1300 |
+ sig2A[index.AA] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2##var(log(IA)|AA) |
|
1301 |
+ med <- median(sig2A, na.rm=TRUE) |
|
1302 |
+ sig2A <- (sig2A * DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
|
1303 |
+ sig2A[is.na(sig2A) & isSnp(object)] <- med |
|
1304 |
+ object <- pr(object, "sig2A", batch, sig2A) |
|
1305 |
+ |
|
1306 |
+ if(length(index.AB) > 0){ ##all homozygous is possible |
|
1307 |
+ corr <- getParam(object, "corr", batch) |
|
1308 |
+ x <- AB.A[index.AB, ] |
|
1309 |
+ y <- AB.B[index.AB, ] |
|
1310 |
+ corr[index.AB] <- rowCors(x, y, na.rm=TRUE) |
|
1311 |
+ corr[corr < 0] <- 0 |
|
1312 |
+ DF <- Ns[, "AB"]-1 |
|
1313 |
+ DF[DF<1] <- 1 |
|
1314 |
+ med <- median(corr, na.rm=TRUE) |
|
1315 |
+ corr <- (corr*DF + med * DF.PRIOR)/(DF.PRIOR + DF) |
|
1316 |
+ corr[is.na(corr) & isSnp(object)] <- med |
|
1317 |
+ object <- pr(object, "corr", batch, corr) |
|
1318 |
+ } |
|
1319 |
+ corrB.AA <- getParam(object, "corrB.AA", batch) |
|
1320 |
+ backgroundB <- AA.B[index.AA, ] |
|
1321 |
+ signalA <- AA.A[index.AA, ] |
|
1322 |
+ corrB.AA[index.AA] <- rowCors(backgroundB, signalA, na.rm=TRUE) |
|
1323 |
+ DF <- Ns[, "AA"]-1 |
|
1324 |
+ DF[DF < 1] <- 1 |
|
1325 |
+ med <- median(corrB.AA, na.rm=TRUE) |
|
1326 |
+ corrB.AA <- (corrB.AA*DF + med*DF.PRIOR)/(DF.PRIOR + DF) |
|
1327 |
+ corrB.AA[is.na(corrB.AA) & isSnp(object)] <- med |
|
1328 |
+ object <- pr(object, "corrB.AA", batch, corrB.AA) |
|
1329 |
+ |
|
1330 |
+ corrA.BB <- getParam(object, "corrA.BB", batch) |
|
1331 |
+ backgroundA <- BB.A[index.BB, ] |
|
1332 |
+ signalB <- BB.B[index.BB, ] |
|
1333 |
+ corrA.BB[index.BB] <- rowCors(backgroundA, signalB, na.rm=TRUE) |
|
1334 |
+ DF <- Ns[, "BB"]-1 |
|
1335 |
+ DF[DF < 1] <- 1 |
|
1336 |
+ med <- median(corrA.BB, na.rm=TRUE) |
|
1337 |
+ corrA.BB <- (corrA.BB*DF + med*DF.PRIOR)/(DF.PRIOR + DF) |
|
1338 |
+ corrA.BB[is.na(corrA.BB) & isSnp(object)] <- med |
|
1339 |
+ object <- pr(object, "corrA.BB", batch, corrA.BB) |
|
1340 |
+ return(object) |
|
1341 |
+} |
|
1342 |
+ |
|
1343 |
+##coefs <- function(plateIndex, conf, MIN.OBS=3, envir, CONF.THR=0.99){ |
|
1344 |
+coefs <- function(object, cnOptions, tmp.objects){ |
|
1345 |
+## p <- plateIndex |
|
1346 |
+ batch <- unique(object$batch) |
|
1347 |
+## plates.completed <- envir[["plates.completed"]] |
|
1348 |
+## if(!plates.completed[p]) return() |
|
1349 |
+## CHR <- envir[["chrom"]] |
|
1350 |
+ CHR <- unique(chromosome(object)) |
|
1351 |
+## plate <- envir[["plate"]] |
|
1352 |
+ muA <- tmp.objects[["muA"]] |
|
1353 |
+ muB <- tmp.objects[["muB"]] |
|
1354 |
+ vA <- tmp.objects[["vA"]] |
|
1355 |
+ vB <- tmp.objects[["vB"]] |
|
1356 |
+ Ns <- tmp.objects[["Ns"]] |
|
1357 |
+## uplate <- tmp.objects[["uplate"]] |
|
1358 |
+ if(CHR != 23){ |
|
1359 |
+ IA <- muA[, 3:5] |
|
1360 |
+ IB <- muB[, 3:5] |
|
1361 |
+ vA <- vA[, 3:5] |
|
1362 |
+ vB <- vB[, 3:5] |
|
1363 |
+ Np <- Ns[, 3:5] |
|
1364 |
+ } else { |
|
1365 |
+ NOHET <- is.na(median(vA[, "AB"], na.rm=TRUE)) |
|
1366 |
+ if(NOHET){ |
|
1367 |
+ IA <- muA[, -4] |
|
1368 |
+ IB <- muB[, -4] |
|
1369 |
+ vA <- vA[, -4] |
|
1370 |
+ vB <- vB[, -4] |
|
1371 |
+ Np <- Ns[, -4] |
|
1372 |
+ } else{ |
|
1373 |
+ IA <- muA |
|
1374 |
+ IB <- muB |
|
1375 |
+ vA <- vA |
|
1376 |
+ vB <- vB |
|
1377 |
+ Np <- Ns |
|
1378 |
+ } |
|
1379 |
+ |
|
1380 |
+ } |
|
1381 |
+ Np[Np < 1] <- 1 |
|
1382 |
+ vA2 <- vA^2/Np |
|
1383 |
+ vB2 <- vB^2/Np |
|
1384 |
+ wA <- sqrt(1/vA2) |
|
1385 |
+ wB <- sqrt(1/vB2) |
|
1386 |
+ YA <- IA*wA |
|
1387 |
+ YB <- IB*wB |
|
1388 |
+ ##update lm.coefficients stored in object |
|
1389 |
+ object <- nuphiAllele(object, allele="A", Ystar=YA, W=wA, tmp.objects=tmp.objects, cnOptions=cnOptions) |
|
1390 |
+ object <- nuphiAllele(object, allele="B", Ystar=YB, W=wB, tmp.objects=tmp.objects, cnOptions=cnOptions) |
|
1391 |
+ ##--------------------------------------------------------------------------- |
|
1392 |
+ ##Estimate crosshyb using X chromosome and sequence information |
|
1393 |
+ ##--------------------------------------------------------------------------- |
|
1394 |
+ ##browser() |
|
1395 |
+ ####data(sequences, package="genomewidesnp6Crlmm") |
|
1396 |
+ ##snpflags <- envir[["snpflags"]] |
|
1397 |
+ ##muA <- envir[["muA"]][, p, 3:5] |
|
1398 |
+ ##muB <- envir[["muB"]][, p, 3:5] |
|
1399 |
+ ##Y <- envir[["phiAx"]] |
|
1400 |
+ ##load("sequences.rda") |
|
1401 |
+ ##seqA <- sequences[, "A", ][, 1] |
|
1402 |
+ ##seqA <- seqA[match(snps, names(seqA))] |
|
1403 |
+ ##X <- cbind(1, sequenceDesignMatrix(seqA)) |
|
1404 |
+ ##X <- cbind(X, nuA[, p], phiA[, p], nuB[, p], phiB[, p]) |
|
1405 |
+ ##missing <- rowSums(is.na(X)) > 0 |
|
1406 |
+ ##betahat <- solve(crossprod(X[!missing, ]), crossprod(X[!missing, ], Y[!missing])) |
|
1407 |
+ return(object) |
|
1408 |
+} |
|
1409 |
+ |
|
1410 |
+polymorphic <- function(object, cnOptions, tmp.objects){ |
|
1411 |
+ batch <- unique(object$batch) |
|
1412 |
+ CHR <- unique(chromosome(object)) |
|
1413 |
+ vA <- tmp.objects[["vA"]] |
|
1414 |
+ vB <- tmp.objects[["vB"]] |
|
1415 |
+ Ns <- tmp.objects[["Ns"]] |
|
1416 |
+ |
|
1417 |
+ nuA <- getParam(object, "nuA", batch) |
|
1418 |
+ nuB <- getParam(object, "nuB", batch) |
|
1419 |
+ nuA.se <- getParam(object, "nuA.se", batch) |
|
1420 |
+ nuB.se <- getParam(object, "nuB.se", batch) |
|
1421 |
+ |
|
1422 |
+ phiA <- getParam(object, "phiA", batch) |
|
1423 |
+ phiB <- getParam(object, "phiB", batch) |
|
1424 |
+ phiA.se <- getParam(object, "phiA.se", batch) |
|
1425 |
+ phiB.se <- getParam(object, "phiB.se", batch) |
|
1426 |
+ A <- A(object) |
|
1427 |
+ B <- B(object) |
|
1428 |
+ |
|
1429 |
+ NOHET <- mean(Ns[, "AB"], na.rm=TRUE) < 0.05 |
|
1430 |
+ ##--------------------------------------------------------------------------- |
|
1431 |
+ ## Estimate CA, CB |
|
1432 |
+ ##--------------------------------------------------------------------------- |
|
1433 |
+ if(CHR == 23){ |
|
1434 |
+ phiAX <- getParam(object, "phiAX", batch) ##nonspecific hybridization coef |
|
1435 |
+ phiBX <- getParam(object, "phiBX", batch) ##nonspecific hybridization coef |
|
1436 |
+ phistar <- phiBX/phiA |
|
1437 |
+ tmp <- (B-nuB - phistar*A + phistar*nuA)/phiB |
|
1438 |
+ copyB <- tmp/(1-phistar*phiAX/phiB) |
|
1439 |
+ copyA <- (A-nuA-phiAX*copyB)/phiA |
|
1440 |
+ CB(object) <- copyB ## multiplies by 100 and converts to integer |
|
1441 |
+ CA(object) <- copyA |
|
1442 |
+ } else{ |
|
1443 |
+ CA(object) <- matrix((1/phiA*(A-nuA)), nrow(A), ncol(A)) |
|
1444 |
+ CB(object) <- matrix((1/phiB*(B-nuB)), nrow(B), ncol(B)) |
|
1445 |
+ |
|
1446 |
+ } |
|
1447 |
+ return(object) |
|
1448 |
+} |
|
1449 |
+ |
|
1450 |
+biasAdj <- function(object, cnOptions, tmp.objects){ |
|
1451 |
+ I <- isSnp(object) |
|
1452 |
+ gender <- object$gender |
|
1453 |
+ CHR <- unique(chromosome(object)) |
|
1454 |
+ batch <- unique(object$batch) |
|
1455 |
+ if(CHR == 23){ |
|
1456 |
+ phiAX <- getParam(object, "phiAX", batch)[I] |
|
1457 |
+ phiBX <- getParam(object, "phiBX", batch)[I] |
|
1458 |
+ } |
|
1459 |
+ A <- A(object)[I, ] |
|
1460 |
+ B <- B(object)[I, ] |
|
1461 |
+ sig2A <- getParam(object, "sig2A", batch)[I] |
|
1462 |
+ sig2B <- getParam(object, "sig2B", batch)[I] |
|
1463 |
+ tau2A <- getParam(object, "tau2A", batch)[I] |
|
1464 |
+ tau2B <- getParam(object, "tau2B", batch)[I] |
|
1465 |
+ corrA.BB <- getParam(object, "corrA.BB", batch)[I] |
|
1466 |
+ corrB.AA <- getParam(object, "corrB.AA", batch)[I] |
|
1467 |
+ corr <- getParam(object, "corr", batch)[I] |
|
1468 |
+ nuA <- getParam(object, "nuA", batch)[I] |
|
1469 |
+ nuB <- getParam(object, "nuB", batch)[I] |
|
1470 |
+ phiA <- getParam(object, "phiA", batch)[I] |
|
1471 |
+ phiB <- getParam(object, "phiB", batch)[I] |
|
1472 |
+ normal <- tmp.objects[["normal"]][I, ] |
|
1473 |
+ prior.prob <- cnOptions$prior.prob |
|
1474 |
+ emit <- array(NA, dim=c(nrow(A), ncol(A), 10))##SNPs x sample x 'truth' |
|
1475 |
+ lA <- log2(A) |
|
1476 |
+ lB <- log2(B) |
|
1477 |
+ X <- cbind(lA, lB) |
|
1478 |
+ counter <- 1##state counter |
|
1479 |
+ for(CT in 0:3){ |
|
1480 |
+ for(CA in 0:CT){ |
|
1481 |
+ cat(".") |
|
1482 |
+ CB <- CT-CA |
|
1483 |
+ A.scale <- sqrt(tau2A*(CA==0) + sig2A*(CA > 0)) |
|
1484 |
+ B.scale <- sqrt(tau2B*(CA==0) + sig2B*(CA > 0)) |
|
1485 |
+ if(CA == 0 & CB == 0) rho <- 0 |
|
1486 |
+ if(CA == 0 & CB > 0) rho <- corrA.BB |
|
1487 |
+ if(CA > 0 & CB == 0) rho <- corrB.AA |
|
1488 |
+ if(CA > 0 & CB > 0) rho <- corr |
|
1489 |
+ if(CHR == 23){ |
|
1490 |
+ ##means <- cbind(suppressWarnings(log2(nuA[, p]+CA*phiA[, p] + CB*phiAx[, p])), suppressWarnings(log2(nuB[, p]+CB*phiB[, p] + CA*phiBx[, p]))) |
|
1491 |
+ meanA <- suppressWarnings(log2(nuA+CA*phiA + CB*phiAX)) |
|
1492 |
+ meanB <- suppressWarnings(log2(nuB+CB*phiB + CA*phiBX)) |
|
1493 |
+ } else{ |
|
1494 |
+ ##means <- cbind(suppressWarnings(log2(nuA+CA*phiA)), suppressWarnings(log2(nuB+CB*phiB))) |
|
1495 |
+ meanA <- suppressWarnings(log2(nuA+CA*phiA)) |
|
1496 |
+ meanB <- suppressWarnings(log2(nuB+CB*phiB)) |
|
1497 |
+ covs <- rho*A.scale*B.scale |
|
1498 |
+ A.scale2 <- A.scale^2 |
|
1499 |
+ B.scale2 <- B.scale^2 |
|
1500 |
+ } |
|
1501 |
+ Q.x.y <- 1/(1-rho^2)*(((lA - meanA)/A.scale)^2 + ((lB - meanB)/B.scale)^2 - 2*rho*((lA - meanA)*(lB - meanB))/(A.scale*B.scale)) |
|
1502 |
+ f.x.y <- 1/(2*pi*A.scale*B.scale*sqrt(1-rho^2))*exp(-0.5*Q.x.y) |
|
1503 |
+ emit[, , counter] <- f.x.y |
|
1504 |
+ counter <- counter+1 |
|
1505 |
+ } |
|
1506 |
+ } |
|
1507 |
+ priorProb <- cnOptions$prior.prob |
|
1508 |
+ homDel <- priorProb[1]*emit[, , 1] |
|
1509 |
+ hemDel <- priorProb[2]*emit[, , c(2, 3)] # + priorProb[3]*emit[, c(4, 5, 6)] + priorProb[4]*emit[, c(7:10)] |
|
1510 |
+ norm <- priorProb[3]*emit[, , 4:6] |
|
1511 |
+ amp <- priorProb[4]*emit[, , 7:10] |
|
1512 |
+ ##sum over the different combinations within each copy number state |
|
1513 |
+ hemDel <- hemDel[, , 1] + hemDel[, , 2] |
|
1514 |
+ norm <- norm[, , 1] + norm[, , 2] + norm[, , 3] |
|
1515 |
+ amp <- amp[, , 1] + amp[, , 2] + amp[ , , 3] + amp[, , 4] |
|
1516 |
+ total <- homDel + hemDel + norm + amp |
|
1517 |
+ homDel <- homDel/total |
|
1518 |
+ hemDel <- hemDel/total |
|
1519 |
+ norm <- norm/total |
|
1520 |
+ amp <- amp/total |
|
1521 |
+ tmp.objects$posteriorProb <- list(hemDel=hemDel, norm=norm, amp=amp) |
|
1522 |
+ ##envir[["posteriorProb"]] <- list(hemDel=hemDel, norm=norm, amp=amp) |
|
1523 |
+ posteriorProb <- array(NA, dim=c(nrow(A), ncol(A), 4)) |
|
1524 |
+ posteriorProb[, , 1] <- homDel |
|
1525 |
+ posteriorProb[, , 2] <- hemDel |
|
1526 |
+ posteriorProb[, , 3] <- norm |
|
1527 |
+ posteriorProb[, , 4] <- amp |
|
1528 |
+ mostLikelyState <- apply(posteriorProb, c(1, 2), function(x) order(x, decreasing=TRUE)[1]) |
|
1529 |
+ if(CHR == 23){ |
|
1530 |
+ ##so state index 3 is the most likely state for men and women |
|
1531 |
+ mostLikelyState[, gender==1] <- mostLikelyState[, gender==1] + 1 |
|
1532 |
+ } |
|
1533 |
+ proportionSamplesAltered <- rowMeans(mostLikelyState != 3) |
|
1534 |
+ ii <- proportionSamplesAltered < 0.8 & proportionSamplesAltered > 0.01 |
|
1535 |
+ ## only exclude observations from one tail, depending on |
|
1536 |
+ ## whether more are up or down |
|
1537 |
+ moreup <- rowSums(mostLikelyState > 3) > rowSums(mostLikelyState < 3) ##3 is normal |
|
1538 |
+ NORM <- matrix(FALSE, nrow(A), ncol(A)) |
|
1539 |
+ NORM[proportionSamplesAltered > 0.8, ] <- FALSE |
|
1540 |
+ ratioUp <- posteriorProb[, , 4]/posteriorProb[, , 3] |
|
1541 |
+ NORM[ii & moreup, ] <- ratioUp[moreup & ii] < 1 ##normal more likely |
|
1542 |
+ ratioDown <- posteriorProb[, , 2]/posteriorProb[, , 3] |
|
1543 |
+ NORM[ii & !moreup, ] <- ratioDown[!moreup & ii] < 1 ##normal more likely |
|
1544 |
+ normal <- NORM*normal |
|
1545 |
+ tmp <- tmp.objects[["normal"]] |
|
1546 |
+ tmp[I, ] <- normal |
|
1547 |
+ tmp.objects[["normal"]] <- tmp |
|
1548 |
+ return(tmp.objects) |
|
1549 |
+} |
|
1550 |
+ |
|
1551 |
+ |
|
1552 |
+##biasAdjNP <- function(plateIndex, envir, priorProb){ |
|
1553 |
+biasAdjNP <- function(object, cnOptions, tmp.objects){ |
|
1554 |
+ batch <- unique(object$batch) |
|
1555 |
+ normalNP <- tmp.objects[["normal"]][!isSnp(object), ] |
|
1556 |
+ CHR <- unique(chromosome(object)) |
|
1557 |
+ A <- A(object)[!isSnp(object), ] |
|
1558 |
+ sig2A <- getParam(object, "sig2A", batch) |
|
1559 |
+ gender <- object$gender |
|
1560 |
+ ##Assume that on the log-scale, that the background variance is the same... |
|
1561 |
+ tau2A <- sig2A |
|
1562 |
+ nuA <- getParam(object, "nuA", batch) |
|
1563 |
+ phiA <- getParam(object, "phiA", batch) |
|
1564 |
+ prior.prob <- cnOptions$prior.prob |
|
1565 |
+ emit <- array(NA, dim=c(nrow(A), ncol(A), 4))##SNPs x sample x 'truth' |
|
1566 |
+ lT <- log2(A) |
|
1567 |
+ I <- isSnp(object) |
|
1568 |
+ counter <- 1 ##state counter |
|
1569 |
+ for(CT in 0:3){ |
|
1570 |
+ sds <- sqrt(tau2A[I]*(CT==0) + sig2A[I]*(CT > 0)) |
|
1571 |
+ means <- suppressWarnings(log2(nuA[I]+CT*phiA[I])) |
|
1572 |
+ tmp <- dnorm(lT, mean=means, sd=sds) |
|
1573 |
+ emit[, , counter] <- tmp |
|
1574 |
+ counter <- counter+1 |
|
1575 |
+ } |
|
1576 |
+ mostLikelyState <- apply(emit, c(1, 2), function(x) order(x, decreasing=TRUE)[1]) |
|
1577 |
+ if(CHR == 23){ |
|
1578 |
+ ## the state index for male on chromosome 23 is 2 |
|
1579 |
+ ## add 1 so that the state index is 3 for 'normal' state |
|
1580 |
+ mostLikelyState[, gender=="male"] <- mostLikelyState[, gender==1] + 1 |
|
1581 |
+ } |
|
1582 |
+ tmp3 <- mostLikelyState != 3 |
|
1583 |
+ ##Those near 1 have NaNs for nu and phi. this occurs by NaNs in the muA[,, "A"] or muA[, , "B"] for X chromosome |
|
1584 |
+ proportionSamplesAltered <- rowMeans(tmp3)##prop normal |
|
1585 |
+ ii <- proportionSamplesAltered < 0.75 |
|
1586 |
+ moreup <- rowSums(mostLikelyState > 3) > rowSums(mostLikelyState < 3) |
|
1587 |
+ notUp <- mostLikelyState[ii & moreup, ] <= 3 |
|
1588 |
+ notDown <- mostLikelyState[ii & !moreup, ] >= 3 |
|
1589 |
+ NORM <- matrix(TRUE, nrow(A), ncol(A)) |
|
1590 |
+ NORM[ii & moreup, ] <- notUp |
|
1591 |
+ NORM[ii & !moreup, ] <- notDown |
|
1592 |
+ normalNP <- normalNP*NORM |
|
1593 |
+ |
|
1594 |
+ ##flagAltered <- which(proportionSamplesAltered > 0.5) |
|
1595 |
+ ##envir[["flagAlteredNP"]] <- flagAltered |
|
1596 |
+ normal <- tmp.objects[["normal"]] |
|
1597 |
+ normal[!isSnp(object), ] <- normalNP |
|
1598 |
+ tmp.objects[["normal"]] <- normal |
|
1599 |
+ return(tmp.objects) |
|
1600 |
+} |
|
1601 |
+ |
|
1602 |
+ |
|
1603 |
+getParams <- function(object, batch){ |
|
1604 |
+ batch <- unique(object$batch) |
|
1605 |
+ if(length(batch) > 1) stop("batch variable not unique") |
|
1606 |
+ nuA <- as.numeric(fData(object)[, match(paste("nuA", batch, sep="_"), fvarLabels(object))]) |
|
1607 |
+ nuB <- as.numeric(fData(object)[, match(paste("nuB", batch, sep="_"), fvarLabels(object))]) |
|
1608 |
+ phiA <- as.numeric(fData(object)[, match(paste("phiA", batch, sep="_"), fvarLabels(object))]) |
|
1609 |
+ phiB <- as.numeric(fData(object)[, match(paste("phiB", batch, sep="_"), fvarLabels(object))]) |
|
1610 |
+ tau2A <- as.numeric(fData(object)[, match(paste("tau2A", batch, sep="_"), fvarLabels(object))]) |
|
1611 |
+ tau2B <- as.numeric(fData(object)[, match(paste("tau2B", batch, sep="_"), fvarLabels(object))]) |
|
1612 |
+ sig2A <- as.numeric(fData(object)[, match(paste("sig2A", batch, sep="_"), fvarLabels(object))]) |
|
1613 |
+ sig2B <- as.numeric(fData(object)[, match(paste("sig2B", batch, sep="_"), fvarLabels(object))]) |
|
1614 |
+ corrA.BB <- as.numeric(fData(object)[, match(paste("corrA.BB", batch, sep="_"), fvarLabels(object))]) |
|
1615 |
+ corrB.AA <- as.numeric(fData(object)[, match(paste("corrB.AA", batch, sep="_"), fvarLabels(object))]) |
|
1616 |
+ corr <- as.numeric(fData(object)[, match(paste("corr", batch, sep="_"), fvarLabels(object))]) |
|
1617 |
+ params <- list(nuA=nuA, |
|
1618 |
+ nuB=nuB, |
|
1619 |
+ phiA=phiA, |
|
1620 |
+ phiB=phiB, |
|
1621 |
+ tau2A=tau2A, |
|
1622 |
+ tau2B=tau2B, |
|
1623 |
+ sig2A=sig2A, |
|
1624 |
+ sig2B=sig2B, |
|
1625 |
+ corrA.BB=corrA.BB, |
|
1626 |
+ corrB.AA=corrB.AA, |
|
1627 |
+ corr=corr) |
|
1628 |
+ return(params) |
|
1629 |
+} |
|
1630 |
+ |
|
1631 |
+hmmOptions <- function(copynumberStates=0:4, |
|
1632 |
+ EMIT.THR=-10, |
|
1633 |
+ scaleSds=TRUE, |
|
1634 |
+ verbose=TRUE, |
|
1635 |
+ log.initialP, |
|
1636 |
+ normalIndex=3, |
|
1637 |
+ normal2altered=0.01, |
|
1638 |
+ altered2normal=1, |
|
1639 |
+ altered2altered=0.001, |
|
1640 |
+ TAUP=1e8, |
|
1641 |
+ save.it=TRUE, |
|
1642 |
+ MIN.MARKERS=5){ ## whether the save the emission probabilities |
|
1643 |
+ if(missing(log.initialP)) log.initialP <- log(rep(1/length(copynumberStates), length(copynumberStates))) |
|
1644 |
+ list(copynumberStates=copynumberStates, |
|
1645 |
+ EMIT.THR=EMIT.THR, |
|
1646 |
+ scaleSds=scaleSds, |
|
1647 |
+ verbose=verbose, |
|
1648 |
+ log.initialP=log.initialP, |
|
1649 |
+ normalIndex=normalIndex, |
|
1650 |
+ normal2altered=normal2altered, |
|
1651 |
+ altered2normal=altered2normal, |
|
1652 |
+ altered2altered=altered2altered, |
|
1653 |
+ TAUP=TAUP, |
|
1654 |
+ save.it=save.it, |
|
1655 |
+ MIN.MARKERS=MIN.MARKERS) |
|
1656 |
+} |
|
1657 |
+ |
|
1658 |
+computeHmm.CrlmmSet <- function(object, cnOptions){ |
|
1659 |
+ hmmOptions <- cnOptions[["hmmOpts"]] |
|
1660 |
+ object <- object[order(chromosome(object), position(object)), ] |
|
1661 |
+ ##emission <- hmmOptions[["emission"]] |
|
1662 |
+ chrom <- unique(chromosome(object)) |
|
1663 |
+ tPr <- transitionProbability(chromosome=chromosome(object), |
|
1664 |
+ position=position(object), |
|
1665 |
+ TAUP=hmmOptions[["TAUP"]]) |
|
1666 |
+ |
|
1667 |
+ emission <- computeEmission(object, hmmOptions) |
|
1668 |
+## if(cnOptions[["save.it"]]) |
|
1669 |
+## save(emission, |
|
1670 |
+## file=file.path(cnOptions[["outdir"]], paste("emission_", chrom, ".rda", sep=""))) |
|
1671 |
+ hmmPredictions <- viterbi.CrlmmSet(object, |
|
1672 |
+ hmmOptions=hmmOptions, |
|
1673 |
+ emissionPr=emission, |
|
1674 |
+ transitionPr=tPr[, "transitionPr"], |
|
1675 |
+ chromosomeArm=tPr[, "arm"]) |
|
1676 |
+ segments <- breaks(x=hmmPredictions, |
|
1677 |
+ states=hmmOptions[["copynumberStates"]], |
|
1678 |
+ position=position(object), |
|
1679 |
+ chromosome=chromosome(object)) |
|
1680 |
+ ## |
|
1681 |
+ object <- new("SegmentSet", |
|
1682 |
+ CA=object@assayData[["CA"]], ## keep as an integer |
|
1683 |
+ CB=object@assayData[["CB"]], ## keep as an integer |
|
1684 |
+ senseThetaA=A(object), |
|
1685 |
+ senseThetaB=B(object), |
|
1686 |
+ calls=calls(object), |
|
1687 |
+ callsConfidence=callsConfidence(object), |
|
1688 |
+ featureData=featureData(object), |
|
1689 |
+ phenoData=phenoData(object), |
|
1690 |
+ protocolData=protocolData(object), |
|
1691 |
+ experimentData=experimentData(object), |
|
1692 |
+ annotation=annotation(object), |
|
1693 |
+ segmentData=segments, |
|
1694 |
+ emissionPr=emission) |
|
1695 |
+ |
|
1696 |
+} |
|
1697 |
+ |
|
1698 |
+viterbi.CrlmmSet <- function(object, hmmOptions, emissionPr, transitionPr, chromosomeArm){ |
|
1699 |
+ viterbi(emission=emissionPr, |
|
1700 |
+ tau=transitionPr, |
|
1701 |
+ initialStateProbs=hmmOptions[["log.initialP"]], |
|
1702 |
+ arm=chromosomeArm, |
|
1703 |
+ normalIndex=hmmOptions[["normalIndex"]], |
|
1704 |
+ normal2altered=hmmOptions[["normal2altered"]], |
|
1705 |
+ altered2normal=hmmOptions[["altered2normal"]], |
|
1706 |
+ altered2altered=hmmOptions[["altered2altered"]]) |
|
1707 |
+} |
|
1708 |
+ |
|
1709 |
+ |
|
1710 |
+ |
|
1711 |
+ |
|
1712 |
+ |
|
1713 |
+thresholdModelParams <- function(object, cnOptions){ |
|
1714 |
+ MIN.NU <- cnOptions$MIN.NU |
|
1715 |
+ MIN.PHI <- cnOptions$MIN.PHI |
|
1716 |
+ batch <- unique(object$batch) |
|
1717 |
+ nuA <- getParam(object, "nuA", batch) |
|
1718 |
+ nuA[nuA < MIN.NU] <- MIN.NU |
|
1719 |
+ nuB <- getParam(object, "nuB", batch) |
|
1720 |
+ nuB[nuB < MIN.NU] <- MIN.NU |
|
1721 |
+ phiA <- getParam(object, "phiA", batch) |
|
1722 |
+ phiA[phiA < MIN.PHI] <- MIN.PHI |
|
1723 |
+ phiB <- getParam(object, "phiB", batch) |
|
1724 |
+ phiB[phiB < MIN.PHI] <- MIN.PHI |
|
1725 |
+ phiAX <- as.numeric(getParam(object, "phiAX", batch)) |
|
1726 |
+ phiAX[phiAX < MIN.PHI] <- MIN.PHI |
|
1727 |
+ phiBX <- as.numeric(getParam(object, "phiBX", batch)) |
|
1728 |
+ phiBX[phiBX < MIN.PHI] <- MIN.PHI |
|
1729 |
+ |
|
1730 |
+ object <- pr(object, "nuA", batch, nuA) |
|
1731 |
+ object <- pr(object, "nuB", batch, nuB) |
|
1732 |
+ object <- pr(object, "phiA", batch, phiA) |
|
1733 |
+ object <- pr(object, "phiB", batch, phiB) |
|
1734 |
+ object <- pr(object, "phiAX", batch, phiAX) |
|
1735 |
+ object <- pr(object, "phiBX", batch, phiBX) |
|
1736 |
+ return(object) |
|
1737 |
+} |
|
1738 |
+ |
|
1739 |
+ |
|
1740 |
+computeEmission.CrlmmSet <- function(object, hmmOptions){ |
|
1741 |
+ EMIT.THR <- hmmOptions[["EMIT.THR"]] |
|
1742 |
+ cnStates <- hmmOptions[["copynumberStates"]] |
|
1743 |
+ object <- object[order(chromosome(object), position(object)), ] |
|
1744 |
+ if(any(diff(position(object)) < 0)) stop("must be ordered by chromosome and physical position") |
|
1745 |
+ emissionProbs <- array(NA, dim=c(nrow(object), ncol(object), length(hmmOptions[["copynumberStates"]]))) |
|
1746 |
+ dimnames(emissionProbs) <- list(featureNames(object), |
|
1747 |
+ sampleNames(object), |
|
1748 |
+ paste("copy.number_", hmmOptions[["copynumberStates"]], sep="")) |
|
1749 |
+ batch <- object$batch |
|
1750 |
+ for(i in seq(along=unique(batch))){ |
|
1751 |
+ emissionProbs[, batch == unique(batch)[i], ] <- getEmission(object[, batch==unique(batch)[i]], hmmOptions) |
|
1752 |
+ } |
|
1753 |
+ if(EMIT.THR > -Inf){ ## truncate emission probabilities for outliers |
|
1754 |
+ emissionProbs[emissionProbs < EMIT.THR] <- EMIT.THR |
|
1755 |
+ } |
|
1756 |
+ emissionProbs |
|
1757 |
+} |
|
1758 |
+ |
|
1759 |
+getEmission <- function(object, hmmOptions){ |
|
1760 |
+ emissionProbs <- array(NA, dim=c(nrow(object), |
|
1761 |
+ ncol(object), length(hmmOptions[["copynumberStates"]]))) |
|
1762 |
+ emit.snps <- getEmission.snps(object[isSnp(object), ], hmmOptions) |
|
1763 |
+ emit.nps <- getEmission.nps(object[!isSnp(object), ], hmmOptions) |
|
1764 |
+ emissionProbs[isSnp(object), , ] <- emit.snps |
|
1765 |
+ emissionProbs[!isSnp(object), , ] <- emit.nps |
|
1766 |
+ emissionProbs |
|
1767 |
+} |
|
1768 |
+ |
|
1769 |
+getEmission.nps <- function(object, hmmOptions){ |
|
1770 |
+ ##**************************************************** |
|
1771 |
+ ## * |
|
1772 |
+ ## Emission probabilities for nonpolymorphic probes * |
|
1773 |
+ ## * |
|
1774 |
+ ##**************************************************** |
|
1775 |
+ batch <- unique(object$batch) |
|
1776 |
+ scaleSds <- hmmOptions[["scaleSds"]] |
|
1777 |
+ cnStates <- hmmOptions[["copynumberStates"]] |
|
1778 |
+ verbose <- hmmOptions[["verbose"]] |
|
1779 |
+ if(verbose) message("Computing emission probabilities for nonpolymorphic probes.") |
|
1780 |
+ if(scaleSds){ |
|
1781 |
+ a <- CA(object) |
|
1782 |
+ sds.a <- apply(a, 2, sd, na.rm=TRUE) |
|
1783 |
+ sds.a <- log2(sds.a/median(sds.a)) |
|
1784 |
+ sds.a <- matrix(sds.a, nrow(object), ncol(object), byrow=TRUE) |
|
1785 |
+ } else sds.a <- matrix(0, nrow(object), ncol(object)) |
|
1786 |
+ emissionProbs <- array(NA, dim=c(nrow(object), |
|
1787 |
+ ncol(object), length(cnStates))) |
|
1788 |
+ nuA <- getParam(object, "nuA", batch) |
|
1789 |
+ phiA <- getParam(object, "phiA", batch) |
|
1790 |
+ sig2A <- getParam(object, "sig2A", batch) |
|
1791 |
+ ##tau2A <- getParam(object, "tau2A", batch) |
|
1792 |
+ ##Assume that on the log-scale, that the background variance is the same... |
|
1793 |
+ ##tau2A <- sig2A |
|
1794 |
+ a <- as.numeric(log2(A(object))) |
|
1795 |
+ for(k in seq(along=cnStates)){ |
|
1796 |
+ CT <- cnStates[k] |
|
1797 |
+ mus.matrix=matrix(log2(nuA + CT*phiA), nrow(object), ncol(object)) |
|
1798 |
+ mus <- as.numeric(matrix(log2(nuA + CT*phiA), nrow(object), ncol(object))) |
|
1799 |
+ sds.matrix <- matrix(sqrt(sig2A), nrow(object), ncol(object)) |
|
1800 |
+ sds.matrix <- sds.matrix + sds.a |
|
1801 |
+ sds <- as.numeric(sds.matrix) |
|
1802 |
+ suppressWarnings(tmp <- matrix(dnorm(a, mean=mus, sd=sds), nrow(object), ncol(object))) |
|
1803 |
+ emissionProbs[, , k] <- log(tmp) |
|
1804 |
+ } |
|
1805 |
+ emissionProbs |
|
1806 |
+} |
|
1807 |
+ |
|
1808 |
+ |
|
1809 |
+getEmission.snps <- function(object, hmmOptions){ |
|
1810 |
+ batch <- unique(object$batch) |
|
1811 |
+ if(length(batch) > 1) stop("batch variable not unique") |
|
1812 |
+ scaleSds <- hmmOptions[["scaleSds"]] |
|
1813 |
+ cnStates <- hmmOptions[["copynumberStates"]] |
|
1814 |
+ verbose <- hmmOptions[["verbose"]] |
|
1815 |
+ if(scaleSds){ |
|
1816 |
+ a <- CA(object) |
|
1817 |
+ b <- CB(object) |
|
1818 |
+ sds.a <- apply(a, 2, sd, na.rm=TRUE) |
|
1819 |
+ sds.b <- apply(b, 2, sd, na.rm=TRUE) |
|
1820 |
+ sds.a <- log2(sds.a/median(sds.a)) |
|
1821 |
+ sds.b <- log2(sds.b/median(sds.b)) |
|
1822 |
+ sds.a <- matrix(sds.a, nrow(object), ncol(object), byrow=TRUE) |
|
1823 |
+ sds.b <- matrix(sds.b, nrow(object), ncol(object), byrow=TRUE) |
|
1824 |
+ } else sds.a <- sds.b <- matrix(0, nrow(object), ncol(object)) |
|
1825 |
+ emissionProbs <- array(NA, dim=c(nrow(object), |
|
1826 |
+ ncol(object), length(cnStates))) |
|
1827 |
+ corr <- getParam(object, "corr", batch) |
|
1828 |
+ corrA.BB <- getParam(object, "corrA.BB", batch) |
|
1829 |
+ corrB.AA <- getParam(object, "corrB.AA", batch) |
|
1830 |
+ nuA <- getParam(object, "nuA", batch) |
|
1831 |
+ nuB <- getParam(object, "nuB", batch) |
|
1832 |
+ phiA <- getParam(object, "phiA", batch) |
|
1833 |
+ phiB <- getParam(object, "phiB", batch) |
|
1834 |
+ sig2A <- getParam(object, "sig2A", batch) |
|
1835 |
+ sig2B <- getParam(object, "sig2B", batch) |
|
1836 |
+ tau2A <- getParam(object, "tau2A", batch) |
|
1837 |
+ tau2B <- getParam(object, "tau2B", batch) |
|
1838 |
+ a <- as.numeric(log2(A(object))) |
|
1839 |
+ b <- as.numeric(log2(B(object))) |
|
1840 |
+ |
|
1841 |
+ for(k in seq(along=cnStates)){ |
|
1842 |
+ T <- cnStates[k] |
|
1843 |
+ f.x.y <- matrix(0, sum(nrow(object)), ncol(object)) |
|
1844 |
+ for(copyA in 0:T){ |
|
1845 |
+ copyB <- T-copyA |
|
1846 |
+ sigmaA <- sqrt(tau2A*(copyA==0) + sig2A*(copyA > 0)) |
|
1847 |
+ sigmaB <- sqrt(tau2B*(copyB==0) + sig2B*(copyB > 0)) |
|
1848 |
+ if(copyA == 0 & copyB > 0) r <- corrA.BB |
|
1849 |
+ if(copyA > 0 & copyB == 0) r <- corrB.AA |
|
1850 |
+ if(copyA > 0 & copyB > 0) r <- corr |
|
1851 |
+ if(copyA == 0 & copyB == 0) r <- 0 |
|
1852 |
+ muA <- log2(nuA+copyA*phiA) |
|
1853 |
+ muB <- log2(nuB+copyB*phiB) |
|
1854 |
+ |
|
1855 |
+ sigmaA <- matrix(sigmaA, nrow=length(sigmaA), ncol=ncol(object), byrow=FALSE) |
|
1856 |
+ sigmaB <- matrix(sigmaB, nrow=length(sigmaB), ncol=ncol(object), byrow=FALSE) |
|
1857 |
+ ## scale the variances by a sample-specific estimate of the variances |
|
1858 |
+ ## var(I_A, ijp) = sigma_A_ip * sigma_A_jp |
|
1859 |
+ sigmaA <- sigmaA+sds.a |
|
1860 |
+ sigmaB <- sigmaB+sds.b |
|
1861 |
+ meanA <- as.numeric(matrix(muA, nrow(object), ncol(object))) |
|
1862 |
+ meanB <- as.numeric(matrix(muB, nrow(object), ncol(object))) |
|
1863 |
+ rho <- as.numeric(matrix(r, nrow(object), ncol(object))) |
|
1864 |
+ sd.A <- as.numeric(matrix(sigmaA, nrow(object), ncol(object))) |
|
1865 |
+ sd.B <- as.numeric(matrix(sigmaB, nrow(object), ncol(object))) |
|
1866 |
+ Q.x.y <- 1/(1-rho^2)*(((a - meanA)/sd.A)^2 + ((b - meanB)/sd.B)^2 - 2*rho*((a - meanA)*(b - meanB))/(sd.A*sd.B)) |
|
1867 |
+ ## For CN states > 1, assume that any of the possible genotypes are equally likely a priori...just take the sum |
|
1868 |
+ ## For instance, for state copy number 2 there are three combinations: AA, AB, BB |
|
1869 |
+ ## -- two of the three combinations should be near zero. |
|
1870 |
+ ## TODO: copy-neutral LOH would put near-zero mass on both copyA > 0, copyB > 0 |
|
1871 |
+ f.x.y <- f.x.y + matrix(1/(2*pi*sd.A*sd.B*sqrt(1-rho^2))*exp(-0.5*Q.x.y), nrow(object), ncol(object)) |
|
1872 |
+ } |
|
1873 |
+ emissionProbs[, , k] <- log(f.x.y) |
|
1874 |
+ } |
|
1875 |
+ emissionProbs |
|
1876 |
+} |
|
1877 |
+ |
|
1878 |
+setMethod("update", "character", function(object, ...){ |
|
1879 |
+ crlmmFile <- object |
|
1880 |
+ for(i in seq(along=crlmmFile)){ |
|
1881 |
+ cat("Processing ", crlmmFile[i], "...\n") |
|
1882 |
+ load(crlmmFile[i]) |
|
1883 |
+ crlmmSetList <- get("crlmmSetList") |
|
1884 |
+ if(length(crlmmSetList) == 3) next() ##copy number object already present. |
|
1885 |
+ if(!"chromosome" %in% fvarLabels(crlmmSetList[[1]])){ |
|
1886 |
+ featureData(crlmmSetList[[1]]) <- addFeatureAnnotation(crlmmSetList) |
|
1887 |
+ } |
|
1888 |
+ CHR <- unique(chromosome(crlmmSetList[[1]])) |
|
1889 |
+ if(length(CHR) > 1) stop("More than one chromosome in the object. This method requires one chromosome at a time.") |
|
1890 |
+ if(CHR==24){ |
|
1891 |
+ message("skipping chromosome 24") |
|
1892 |
+ next() |
|
1893 |
+ } |
|
1894 |
+ cat("----------------------------------------------------------------------------\n") |
|
1895 |
+ cat("- Estimating copy number for chromosome", CHR, "\n") |
|
1896 |
+ cat("----------------------------------------------------------------------------\n") |
|
1897 |
+ crlmmSetList <- update(crlmmSetList, CHR=CHR, ...) |
|
1898 |
+ save(crlmmSetList, file=crlmmFile[i]) |
|
1899 |
+ rm(crlmmSetList); gc(); |
|
1900 |
+ } |
|
1901 |
+}) |
|
1902 |
+ |
|
1903 |
+ |
|
1904 |
+addFeatureAnnotation.SnpCallSetPlus <- function(object, ...){ |
|
1905 |
+ ##if(missing(CHR)) stop("Must specificy chromosome") |
|
1906 |
+ cdfName <- annotation(object) |
|
1907 |
+ pkgname <- paste(cdfName, "Crlmm", sep="") |
|
1908 |
+ path <- system.file("extdata", package=pkgname) |
|
1909 |
+ loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv) |
|
1910 |
+ cnProbes <- get("cnProbes", envir=.crlmmPkgEnv) |
|
1911 |
+ loader("snpProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv) |
|
1912 |
+ snpProbes <- get("snpProbes", envir=.crlmmPkgEnv) |
|
1913 |
+ |
|
1914 |
+ ##Feature Data |
|
1915 |
+ isSnp <- rep(as.integer(0), nrow(object)) |
|
1916 |
+ isSnp[snpIndex(object)] <- as.integer(1) |
|
1917 |
+ names(isSnp) <- featureNames(object) |
|
1918 |
+## snps <- featureNames(object)[snpIndex(object)] |
|
1919 |
+## nps <- featureNames(object)[cnIndex(object)] |
|
1920 |
+ snps <- featureNames(object)[isSnp == 1] |
|
1921 |
+ nps <- featureNames(object)[isSnp == 0] |
|
1922 |
+ position.snp <- snpProbes[match(snps, rownames(snpProbes)), "position"] |
|
1923 |
+ names(position.snp) <- snps |
|
1924 |
+ position.np <- cnProbes[match(nps, rownames(cnProbes)), "position"] |
|
1925 |
+ names(position.np) <- nps |
|
1926 |
+ |
|
1927 |
+ J <- grep("chr", colnames(snpProbes)) |
|
1928 |
+ chr.snp <- snpProbes[match(snps, rownames(snpProbes)), J] |
|
1929 |
+ chr.np <- cnProbes[match(nps, rownames(cnProbes)), J] |
|
1930 |
+ |
|
1931 |
+ position <- c(position.snp, position.np) |
|
1932 |
+ chrom <- c(chr.snp, chr.np) |
|
1933 |
+ |
|
1934 |
+ ##We may not have annotation for all of the snps |
|
1935 |
+ if(!all(featureNames(object) %in% names(position))){ |
|
1936 |
+ message("Dropping loci for which physical position is not available.") |
|
1937 |
+ object <- object[featureNames(object) %in% names(position), ] |
|
1938 |
+ } |
|
1939 |
+ ix <- match(featureNames(object), names(position)) |
|
1940 |
+ position <- position[ix] |
|
1941 |
+ chrom <- chrom[ix] |
|
1942 |
+ ##require(SNPchip) |
|