...
|
...
|
@@ -47,7 +47,7 @@ getFeatureData <- function(cdfName, copynumber=FALSE){
|
47
|
47
|
M[index, "isSnp"] <- 0L
|
48
|
48
|
}
|
49
|
49
|
##A few of the snpProbes do not match -- I think it is chromosome Y.
|
50
|
|
- M[is.na(M[, "isSnp"]), "isSnp"] <- 1L
|
|
50
|
+ M[is.na(M[, "chromosome"]), "isSnp"] <- 1L
|
51
|
51
|
M <- data.frame(M)
|
52
|
52
|
return(new("AnnotatedDataFrame", data=M))
|
53
|
53
|
}
|
...
|
...
|
@@ -60,6 +60,9 @@ construct <- function(filenames,
|
60
|
60
|
if(!missing(batch)){
|
61
|
61
|
stopifnot(length(batch) == length(sns))
|
62
|
62
|
}
|
|
63
|
+ if(any(is.na(batch))){
|
|
64
|
+ stop("NA's in batch variable")
|
|
65
|
+ }
|
63
|
66
|
if(missing(sns) & missing(filenames)) stop("one of filenames or samplenames (sns) must be provided")
|
64
|
67
|
if(verbose) message("Initializing container for copy number estimation")
|
65
|
68
|
featureData <- getFeatureData(cdfName, copynumber=copynumber)
|
...
|
...
|
@@ -109,7 +112,7 @@ genotype <- function(filenames,
|
109
|
112
|
SNRMin=5,
|
110
|
113
|
recallMin=10,
|
111
|
114
|
recallRegMin=1000,
|
112
|
|
- gender=NULL,
|
|
115
|
+ gender=NULL,
|
113
|
116
|
returnParams=TRUE,
|
114
|
117
|
badSNP=0.7){
|
115
|
118
|
is.lds <- ifelse(isPackageLoaded("ff"), TRUE, FALSE)
|
...
|
...
|
@@ -151,7 +154,7 @@ genotype <- function(filenames,
|
151
|
154
|
mixtureSampleSize=mixtureSampleSize, pkgname=pkgname,
|
152
|
155
|
neededPkgs=c("crlmm", pkgname))
|
153
|
156
|
## Now we initialize a CNSet object, cloning A and B
|
154
|
|
- message("Cloning A and B objects")
|
|
157
|
+ if(verbose) message("Cloning A and B matrices to store genotype calls and confidence scores.")
|
155
|
158
|
## The clones will be used to store calls and confidence scores
|
156
|
159
|
open(A)
|
157
|
160
|
open(B)
|
...
|
...
|
@@ -171,15 +174,13 @@ genotype <- function(filenames,
|
171
|
174
|
batch=batch)
|
172
|
175
|
if(!missing(sns)){
|
173
|
176
|
sampleNames(cnSet) <- sns
|
174
|
|
- open(A)
|
175
|
|
- protocolData <- annotatedDataFrameFrom(A(cnSet), byrow=FALSE)
|
176
|
|
- close(A)
|
177
|
177
|
} else {
|
178
|
178
|
sampleNames(cnSet) <- basename(filenames)
|
179
|
|
- protocolData <- getProtocolData.Affy(filenames)
|
180
|
179
|
}
|
|
180
|
+ protocolData <- getProtocolData.Affy(filenames)
|
181
|
181
|
rownames(pData(protocolData)) <- sns
|
182
|
182
|
protocolData(cnSet) <- protocolData
|
|
183
|
+ ##protocolData(cnSet) <- protocolData
|
183
|
184
|
pd <- data.frame(matrix(NA, nc, 3), row.names=sns)
|
184
|
185
|
colnames(pd)=c("SKW", "SNR", "gender")
|
185
|
186
|
phenoData(cnSet) <- new("AnnotatedDataFrame", data=pd)
|
...
|
...
|
@@ -213,7 +214,7 @@ genotype <- function(filenames,
|
213
|
214
|
returnParams=returnParams,
|
214
|
215
|
badSNP=badSNP)
|
215
|
216
|
if(verbose) message("Genotyping finished. Updating container with genotype calls and confidence scores.")
|
216
|
|
- cnSet$gender <- tmp$gender
|
|
217
|
+ cnSet$gender <- tmp[["gender"]]
|
217
|
218
|
return(cnSet)
|
218
|
219
|
}
|
219
|
220
|
|
...
|
...
|
@@ -359,37 +360,39 @@ fit.wls <- function(NN, sigma, allele, Y, autosome, X){
|
359
|
360
|
}
|
360
|
361
|
|
361
|
362
|
shrinkGenotypeSummaries <- function(strata, index.list, object, MIN.OBS, MIN.SAMPLES, DF.PRIOR,
|
362
|
|
- verbose, is.lds){
|
363
|
|
- if(is.lds) {physical <- get("physical"); open(object)}
|
|
363
|
+ verbose, is.lds=TRUE){
|
|
364
|
+ ##if(is.lds) {physical <- get("physical"); open(object)}
|
|
365
|
+ open(object)
|
364
|
366
|
if(verbose) message(" Probe stratum ", strata, " of ", length(index.list))
|
365
|
367
|
marker.index <- index.list[[strata]]
|
366
|
368
|
batches <- split(seq_along(batch(object)), as.character(batch(object)))
|
367
|
369
|
batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
|
368
|
|
- batchnames <- batchNames(object)
|
369
|
|
- N.AA <- as.matrix(N.AA(object)[marker.index, ])
|
370
|
|
- N.AB <- as.matrix(N.AB(object)[marker.index, ])
|
371
|
|
- N.BB <- as.matrix(N.BB(object)[marker.index, ])
|
372
|
|
- medianA.AA <- as.matrix(medianA.AA(object)[marker.index,])
|
373
|
|
- medianA.AB <- as.matrix(medianA.AB(object)[marker.index,])
|
374
|
|
- medianA.BB <- as.matrix(medianA.BB(object)[marker.index,])
|
375
|
|
- medianB.AA <- as.matrix(medianB.AA(object)[marker.index,])
|
376
|
|
- medianB.AB <- as.matrix(medianB.AB(object)[marker.index,])
|
377
|
|
- medianB.BB <- as.matrix(medianB.BB(object)[marker.index,])
|
378
|
|
- madA.AA <- as.matrix(madA.AA(object)[marker.index,])
|
379
|
|
- madA.AB <- as.matrix(madA.AB(object)[marker.index,])
|
380
|
|
- madA.BB <- as.matrix(madA.BB(object)[marker.index,])
|
381
|
|
- madB.AA <- as.matrix(madB.AA(object)[marker.index,])
|
382
|
|
- madB.AB <- as.matrix(madB.AB(object)[marker.index,])
|
383
|
|
- madB.BB <- as.matrix(madB.BB(object)[marker.index,])
|
384
|
|
- medianA <- medianB <- shrink.madB <- shrink.madA <- vector("list", length(batchnames))
|
385
|
|
- shrink.tau2A.AA <- tau2A.AA <- as.matrix(tau2A.AA(object)[marker.index,])
|
386
|
|
- shrink.tau2B.BB <- tau2B.BB <- as.matrix(tau2B.BB(object)[marker.index,])
|
387
|
|
- shrink.tau2A.BB <- tau2A.BB <- as.matrix(tau2A.BB(object)[marker.index,])
|
388
|
|
- shrink.tau2B.AA <- tau2B.AA <- as.matrix(tau2B.AA(object)[marker.index,])
|
389
|
|
- shrink.corrAA <- corrAA <- as.matrix(corrAA(object)[marker.index, ])
|
390
|
|
- shrink.corrAB <- corrAB <- as.matrix(corrAB(object)[marker.index, ])
|
391
|
|
- shrink.corrBB <- corrBB <- as.matrix(corrBB(object)[marker.index, ])
|
392
|
|
- flags <- as.matrix(flags(object)[marker.index, ])
|
|
370
|
+ batch.names <- batchNames(object)
|
|
371
|
+ batch.index <- which(batchNames(object) %in% batch.names)
|
|
372
|
+ N.AA <- as.matrix(N.AA(object)[marker.index, batch.index])
|
|
373
|
+ N.AB <- as.matrix(N.AB(object)[marker.index, batch.index])
|
|
374
|
+ N.BB <- as.matrix(N.BB(object)[marker.index, batch.index])
|
|
375
|
+ medianA.AA <- as.matrix(medianA.AA(object)[marker.index, batch.index])
|
|
376
|
+ medianA.AB <- as.matrix(medianA.AB(object)[marker.index, batch.index])
|
|
377
|
+ medianA.BB <- as.matrix(medianA.BB(object)[marker.index, batch.index])
|
|
378
|
+ medianB.AA <- as.matrix(medianB.AA(object)[marker.index, batch.index])
|
|
379
|
+ medianB.AB <- as.matrix(medianB.AB(object)[marker.index, batch.index])
|
|
380
|
+ medianB.BB <- as.matrix(medianB.BB(object)[marker.index, batch.index])
|
|
381
|
+ madA.AA <- as.matrix(madA.AA(object)[marker.index, batch.index])
|
|
382
|
+ madA.AB <- as.matrix(madA.AB(object)[marker.index, batch.index])
|
|
383
|
+ madA.BB <- as.matrix(madA.BB(object)[marker.index, batch.index])
|
|
384
|
+ madB.AA <- as.matrix(madB.AA(object)[marker.index, batch.index])
|
|
385
|
+ madB.AB <- as.matrix(madB.AB(object)[marker.index, batch.index])
|
|
386
|
+ madB.BB <- as.matrix(madB.BB(object)[marker.index, batch.index])
|
|
387
|
+ medianA <- medianB <- shrink.madB <- shrink.madA <- vector("list", length(batch.names))
|
|
388
|
+ shrink.tau2A.AA <- tau2A.AA <- as.matrix(tau2A.AA(object)[marker.index, batch.index])
|
|
389
|
+ shrink.tau2B.BB <- tau2B.BB <- as.matrix(tau2B.BB(object)[marker.index, batch.index])
|
|
390
|
+ shrink.tau2A.BB <- tau2A.BB <- as.matrix(tau2A.BB(object)[marker.index, batch.index])
|
|
391
|
+ shrink.tau2B.AA <- tau2B.AA <- as.matrix(tau2B.AA(object)[marker.index, batch.index])
|
|
392
|
+ shrink.corrAA <- corrAA <- as.matrix(corrAA(object)[marker.index, batch.index])
|
|
393
|
+ shrink.corrAB <- corrAB <- as.matrix(corrAB(object)[marker.index, batch.index])
|
|
394
|
+ shrink.corrBB <- corrBB <- as.matrix(corrBB(object)[marker.index, batch.index])
|
|
395
|
+ flags <- as.matrix(flags(object)[marker.index, batch.index])
|
393
|
396
|
for(k in seq(along=batches)){
|
394
|
397
|
sample.index <- batches[[k]]
|
395
|
398
|
this.batch <- unique(as.character(batch(object)[sample.index]))
|
...
|
...
|
@@ -422,9 +425,6 @@ shrinkGenotypeSummaries <- function(strata, index.list, object, MIN.OBS, MIN.SAM
|
422
|
425
|
## SNPs that we'll use for imputing location/scale of unobserved genotypes
|
423
|
426
|
##---------------------------------------------------------------------------
|
424
|
427
|
index.complete <- indexComplete(NN, medianA[[k]], medianB[[k]], MIN.OBS)
|
425
|
|
- if(length(index.complete) == 1){
|
426
|
|
- if(index.complete == FALSE) return()
|
427
|
|
- }
|
428
|
428
|
##
|
429
|
429
|
##---------------------------------------------------------------------------
|
430
|
430
|
## Impute sufficient statistics for unobserved genotypes (plate-specific)
|
...
|
...
|
@@ -459,46 +459,32 @@ shrinkGenotypeSummaries <- function(strata, index.list, object, MIN.OBS, MIN.SAM
|
459
|
459
|
negB <- rowSums(medianB[[k]] < 0) > 0
|
460
|
460
|
flags[, k] <- as.integer(rowSums(NN == 0) > 0 | negA | negB)
|
461
|
461
|
}
|
462
|
|
- if(length(batches) >= 3){
|
463
|
|
- if(verbose) message("Imputing centers for unobserved genotypes from other batches")
|
464
|
|
- res <- imputeAcrossBatch(N.AA, N.AB, N.BB,
|
465
|
|
- medianA.AA, medianA.AB, medianA.BB,
|
466
|
|
- medianB.AA, medianB.AB, medianB.BB)
|
467
|
|
- medianA.AA <- res[["medianA.AA"]]
|
468
|
|
- medianA.AB <- res[["medianA.AB"]]
|
469
|
|
- medianA.BB <- res[["medianA.BB"]]
|
470
|
|
- medianB.AA <- res[["medianB.AA"]]
|
471
|
|
- medianB.AB <- res[["medianB.AB"]]
|
472
|
|
- medianB.BB <- res[["medianB.BB"]]
|
473
|
|
- updated <- res[["updated"]]
|
474
|
|
- }
|
475
|
|
- flags(object)[marker.index, ] <- flags
|
476
|
|
- medianA.AA(object)[marker.index, ] <- do.call("cbind", lapply(medianA, function(x) x[, 1]))
|
477
|
|
- medianA.AB(object)[marker.index, ] <- do.call("cbind", lapply(medianA, function(x) x[, 2]))
|
478
|
|
- medianA.BB(object)[marker.index, ] <- do.call("cbind", lapply(medianA, function(x) x[, 3]))
|
479
|
|
- medianB.AA(object)[marker.index, ] <- do.call("cbind", lapply(medianB, function(x) x[, 1]))
|
480
|
|
- medianB.AB(object)[marker.index, ] <- do.call("cbind", lapply(medianB, function(x) x[, 2]))
|
481
|
|
- medianB.BB(object)[marker.index, ] <- do.call("cbind", lapply(medianB, function(x) x[, 3]))
|
|
462
|
+ flags(object)[marker.index, batch.index] <- flags
|
|
463
|
+ medianA.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA, function(x) x[, 1]))
|
|
464
|
+ medianA.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA, function(x) x[, 2]))
|
|
465
|
+ medianA.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA, function(x) x[, 3]))
|
|
466
|
+ medianB.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB, function(x) x[, 1]))
|
|
467
|
+ medianB.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB, function(x) x[, 2]))
|
|
468
|
+ medianB.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB, function(x) x[, 3]))
|
482
|
469
|
##
|
483
|
|
- madA.AA(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 1]))
|
484
|
|
- madA.AB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 2]))
|
485
|
|
- madA.BB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 3]))
|
486
|
|
- madB.AA(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 1]))
|
487
|
|
- madB.AB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 2]))
|
488
|
|
- madB.BB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 3]))
|
|
470
|
+ madA.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 1]))
|
|
471
|
+ madA.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 2]))
|
|
472
|
+ madA.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 3]))
|
|
473
|
+ madB.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 1]))
|
|
474
|
+ madB.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 2]))
|
|
475
|
+ madB.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 3]))
|
489
|
476
|
##
|
490
|
|
- corrAA(object)[marker.index, ] <- shrink.corrAA
|
491
|
|
- corrAB(object)[marker.index, ] <- shrink.corrAB
|
492
|
|
- corrBB(object)[marker.index, ] <- shrink.corrBB
|
493
|
|
- tau2A.AA(object)[marker.index,] <- shrink.tau2A.AA
|
494
|
|
- tau2A.BB(object)[marker.index,] <- shrink.tau2A.BB
|
495
|
|
- tau2B.AA(object)[marker.index,] <- shrink.tau2B.AA
|
496
|
|
- tau2B.BB(object)[marker.index,] <- shrink.tau2B.BB
|
|
477
|
+ corrAA(object)[marker.index, batch.index] <- shrink.corrAA
|
|
478
|
+ corrAB(object)[marker.index, batch.index] <- shrink.corrAB
|
|
479
|
+ corrBB(object)[marker.index, batch.index] <- shrink.corrBB
|
|
480
|
+ tau2A.AA(object)[marker.index, batch.index] <- shrink.tau2A.AA
|
|
481
|
+ tau2A.BB(object)[marker.index, batch.index] <- shrink.tau2A.BB
|
|
482
|
+ tau2B.AA(object)[marker.index, batch.index] <- shrink.tau2B.AA
|
|
483
|
+ tau2B.BB(object)[marker.index, batch.index] <- shrink.tau2B.BB
|
497
|
484
|
if(is.lds) return(TRUE) else return(object)
|
498
|
485
|
}
|
499
|
486
|
|
500
|
487
|
|
501
|
|
-
|
502
|
488
|
fit.lm1 <- function(strata,
|
503
|
489
|
index.list,
|
504
|
490
|
object,
|
...
|
...
|
@@ -513,34 +499,38 @@ fit.lm1 <- function(strata,
|
513
|
499
|
snps <- index.list[[strata]]
|
514
|
500
|
batches <- split(seq_along(batch(object)), as.character(batch(object)))
|
515
|
501
|
batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
|
516
|
|
- batchnames <- batchNames(object)
|
517
|
|
- N.AA <- as.matrix(N.AA(object)[snps, ])
|
518
|
|
- N.AB <- as.matrix(N.AB(object)[snps, ])
|
519
|
|
- N.BB <- as.matrix(N.BB(object)[snps, ])
|
520
|
|
- medianA.AA <- as.matrix(medianA.AA(object)[snps,])
|
521
|
|
- medianA.AB <- as.matrix(medianA.AB(object)[snps,])
|
522
|
|
- medianA.BB <- as.matrix(medianA.BB(object)[snps,])
|
523
|
|
- medianB.AA <- as.matrix(medianB.AA(object)[snps,])
|
524
|
|
- medianB.AB <- as.matrix(medianB.AB(object)[snps,])
|
525
|
|
- medianB.BB <- as.matrix(medianB.BB(object)[snps,])
|
526
|
|
- madA.AA <- as.matrix(madA.AA(object)[snps,])
|
527
|
|
- madA.AB <- as.matrix(madA.AB(object)[snps,])
|
528
|
|
- madA.BB <- as.matrix(madA.BB(object)[snps,])
|
529
|
|
- madB.AA <- as.matrix(madB.AA(object)[snps,])
|
530
|
|
- madB.AB <- as.matrix(madB.AB(object)[snps,])
|
531
|
|
- madB.BB <- as.matrix(madB.BB(object)[snps,])
|
532
|
|
- tau2A.AA <- as.matrix(tau2A.AA(object)[snps,])
|
533
|
|
- tau2B.BB <- as.matrix(tau2B.BB(object)[snps,])
|
534
|
|
- tau2A.BB <- as.matrix(tau2A.BB(object)[snps,])
|
535
|
|
- tau2B.AA <- as.matrix(tau2B.AA(object)[snps,])
|
536
|
|
- corrAA <- as.matrix(corrAA(object)[snps, ])
|
537
|
|
- corrAB <- as.matrix(corrAB(object)[snps, ])
|
538
|
|
- corrBB <- as.matrix(corrBB(object)[snps, ])
|
539
|
|
- nuA <- as.matrix(nuA(object)[snps, ])
|
540
|
|
- phiA <- as.matrix(phiA(object)[snps, ])
|
541
|
|
- nuB <- as.matrix(nuB(object)[snps, ])
|
542
|
|
- phiB <- as.matrix(phiB(object)[snps, ])
|
543
|
|
- flags <- as.matrix(flags(object)[snps, ])
|
|
502
|
+ ##batchnames <- batchNames(object)
|
|
503
|
+ batch.names <- names(batches)
|
|
504
|
+ batch.index <- which(batchNames(object) %in% batch.names)
|
|
505
|
+ if(length(batches) > 1 && "grandMean" %in% batchNames(object))
|
|
506
|
+ batch.index <- c(batch.index, match("grandMean", batchNames(object)))
|
|
507
|
+ N.AA <- as.matrix(N.AA(object)[snps, batch.index])
|
|
508
|
+ N.AB <- as.matrix(N.AB(object)[snps, batch.index])
|
|
509
|
+ N.BB <- as.matrix(N.BB(object)[snps, batch.index])
|
|
510
|
+ medianA.AA <- as.matrix(medianA.AA(object)[snps, batch.index])
|
|
511
|
+ medianA.AB <- as.matrix(medianA.AB(object)[snps, batch.index])
|
|
512
|
+ medianA.BB <- as.matrix(medianA.BB(object)[snps, batch.index])
|
|
513
|
+ medianB.AA <- as.matrix(medianB.AA(object)[snps, batch.index])
|
|
514
|
+ medianB.AB <- as.matrix(medianB.AB(object)[snps, batch.index])
|
|
515
|
+ medianB.BB <- as.matrix(medianB.BB(object)[snps, batch.index])
|
|
516
|
+ madA.AA <- as.matrix(madA.AA(object)[snps, batch.index])
|
|
517
|
+ madA.AB <- as.matrix(madA.AB(object)[snps, batch.index])
|
|
518
|
+ madA.BB <- as.matrix(madA.BB(object)[snps, batch.index])
|
|
519
|
+ madB.AA <- as.matrix(madB.AA(object)[snps, batch.index])
|
|
520
|
+ madB.AB <- as.matrix(madB.AB(object)[snps, batch.index])
|
|
521
|
+ madB.BB <- as.matrix(madB.BB(object)[snps, batch.index])
|
|
522
|
+ tau2A.AA <- as.matrix(tau2A.AA(object)[snps, batch.index])
|
|
523
|
+ tau2B.BB <- as.matrix(tau2B.BB(object)[snps, batch.index])
|
|
524
|
+ tau2A.BB <- as.matrix(tau2A.BB(object)[snps, batch.index])
|
|
525
|
+ tau2B.AA <- as.matrix(tau2B.AA(object)[snps, batch.index])
|
|
526
|
+ corrAA <- as.matrix(corrAA(object)[snps, batch.index])
|
|
527
|
+ corrAB <- as.matrix(corrAB(object)[snps, batch.index])
|
|
528
|
+ corrBB <- as.matrix(corrBB(object)[snps, batch.index])
|
|
529
|
+ nuA <- as.matrix(nuA(object)[snps, batch.index])
|
|
530
|
+ phiA <- as.matrix(phiA(object)[snps, batch.index])
|
|
531
|
+ nuB <- as.matrix(nuB(object)[snps, batch.index])
|
|
532
|
+ phiB <- as.matrix(phiB(object)[snps, batch.index])
|
|
533
|
+ flags <- as.matrix(flags(object)[snps, batch.index])
|
544
|
534
|
for(k in seq(along=batches)){
|
545
|
535
|
B <- batches[[k]]
|
546
|
536
|
if(length(B) < MIN.SAMPLES) next()
|
...
|
...
|
@@ -550,7 +540,29 @@ fit.lm1 <- function(strata,
|
550
|
540
|
madA <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k])
|
551
|
541
|
madB <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k])
|
552
|
542
|
NN <- cbind(N.AA[, k], N.AB[, k], N.BB[, k])
|
553
|
|
- ## we're regressing on the medians using the standard errors (hence the division by N) as weights
|
|
543
|
+ ## regress on the medians using the standard errors (hence the division by N) as weights
|
|
544
|
+ res <- fit.wls(NN=NN, sigma=madA, allele="A", Y=medianA, autosome=!CHR.X)
|
|
545
|
+ nuA[, k] <- res[1, ]
|
|
546
|
+ phiA[, k] <- res[2, ]
|
|
547
|
+ rm(res)
|
|
548
|
+ res <- fit.wls(NN=NN, sigma=madB, allele="B", Y=medianB, autosome=!CHR.X)##allele="B", Ystar=YB, W=wB, Ns=Ns)
|
|
549
|
+ nuB[, k] <- res[1, ]
|
|
550
|
+ phiB[, k] <- res[2, ]
|
|
551
|
+ }
|
|
552
|
+ ##---------------------------------------------------------------------------
|
|
553
|
+ ##
|
|
554
|
+ ## Grand mean
|
|
555
|
+ ##
|
|
556
|
+ ##---------------------------------------------------------------------------
|
|
557
|
+ if(length(batches) > 1 && "grandMean" %in% batchNames(object)){
|
|
558
|
+ ## then the last column is for the grandMean
|
|
559
|
+ k <- ncol(medianA.AA)
|
|
560
|
+ medianA <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k])
|
|
561
|
+ medianB <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k])
|
|
562
|
+ madA <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k])
|
|
563
|
+ madB <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k])
|
|
564
|
+ NN <- cbind(N.AA[, k], N.AB[, k], N.BB[, k])
|
|
565
|
+ ##index <- which(rowSums(is.na(medianA)) > 0)
|
554
|
566
|
res <- fit.wls(NN=NN, sigma=madA, allele="A", Y=medianA, autosome=!CHR.X)
|
555
|
567
|
nuA[, k] <- res[1, ]
|
556
|
568
|
phiA[, k] <- res[2, ]
|
...
|
...
|
@@ -558,8 +570,6 @@ fit.lm1 <- function(strata,
|
558
|
570
|
res <- fit.wls(NN=NN, sigma=madB, allele="B", Y=medianB, autosome=!CHR.X)##allele="B", Ystar=YB, W=wB, Ns=Ns)
|
559
|
571
|
nuB[, k] <- res[1, ]
|
560
|
572
|
phiB[, k] <- res[2, ]
|
561
|
|
-## cA[, k] <- matrix((1/phiA[, J]*(A-nuA[, J])), nrow(A), ncol(A))
|
562
|
|
-## cB[, k] <- matrix((1/phiB[, J]*(B-nuB[, J])), nrow(B), ncol(B))
|
563
|
573
|
}
|
564
|
574
|
if(THR.NU.PHI){
|
565
|
575
|
nuA[nuA < MIN.NU] <- MIN.NU
|
...
|
...
|
@@ -567,10 +577,10 @@ fit.lm1 <- function(strata,
|
567
|
577
|
phiA[phiA < MIN.PHI] <- MIN.PHI
|
568
|
578
|
phiB[phiB < MIN.PHI] <- MIN.PHI
|
569
|
579
|
}
|
570
|
|
- nuA(object)[snps, ] <- nuA
|
571
|
|
- nuB(object)[snps, ] <- nuB
|
572
|
|
- phiA(object)[snps, ] <- phiA
|
573
|
|
- phiB(object)[snps, ] <- phiB
|
|
580
|
+ nuA(object)[snps, batch.index] <- nuA
|
|
581
|
+ nuB(object)[snps, batch.index] <- nuB
|
|
582
|
+ phiA(object)[snps, batch.index] <- phiA
|
|
583
|
+ phiB(object)[snps, batch.index] <- phiB
|
574
|
584
|
if(is.lds){
|
575
|
585
|
close(object)
|
576
|
586
|
return(TRUE)
|
...
|
...
|
@@ -593,30 +603,34 @@ fit.lm2 <- function(strata,
|
593
|
603
|
marker.index <- index.list[[strata]]
|
594
|
604
|
batches <- split(seq_along(batch(object)), as.character(batch(object)))
|
595
|
605
|
batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
|
596
|
|
-
|
|
606
|
+ batch.names <- names(batches)
|
|
607
|
+ batch.index <- which(batchNames(object) %in% batch.names)
|
|
608
|
+ ##
|
597
|
609
|
ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object))
|
598
|
|
- flags <- as.matrix(flags(object)[ii, ])
|
|
610
|
+ flags <- as.matrix(flags(object)[ii, batch.index])
|
599
|
611
|
fns <- featureNames(object)[ii]
|
600
|
612
|
fns.noflags <- fns[rowSums(flags, na.rm=T) == 0]
|
601
|
613
|
snp.index <- sample(match(fns.noflags, featureNames(object)), 5000)
|
602
|
|
-
|
603
|
|
- nuA.np <- as.matrix(nuA(object)[marker.index, ])
|
604
|
|
- phiA.np <- as.matrix(phiA(object)[marker.index, ])
|
605
|
|
- tau2A.AA <- as.matrix(tau2A.AA(object)[marker.index, ])
|
606
|
|
-
|
607
|
|
- nuA.snp <- as.matrix(nuA(object)[snp.index, ])
|
608
|
|
- nuB.snp <- as.matrix(nuB(object)[snp.index, ])
|
609
|
|
- phiA.snp <- as.matrix(phiA(object)[snp.index, ])
|
610
|
|
- phiB.snp <- as.matrix(phiB(object)[snp.index, ])
|
611
|
|
- medianA.AA <- as.matrix(medianA.AA(object)[snp.index,])
|
612
|
|
- medianB.BB <- as.matrix(medianB.BB(object)[snp.index,])
|
613
|
|
-
|
614
|
|
- medianA.AA.np <- as.matrix(medianA.AA(object)[marker.index,])
|
|
614
|
+ ##
|
|
615
|
+ nuA.np <- as.matrix(nuA(object)[marker.index, batch.index])
|
|
616
|
+ phiA.np <- as.matrix(phiA(object)[marker.index, batch.index])
|
|
617
|
+ tau2A.AA <- as.matrix(tau2A.AA(object)[marker.index, batch.index])
|
|
618
|
+ ##
|
|
619
|
+ nuA.snp <- as.matrix(nuA(object)[snp.index, batch.index])
|
|
620
|
+ nuB.snp <- as.matrix(nuB(object)[snp.index, batch.index])
|
|
621
|
+ phiA.snp <- as.matrix(phiA(object)[snp.index, batch.index])
|
|
622
|
+ phiB.snp <- as.matrix(phiB(object)[snp.index, batch.index])
|
|
623
|
+ medianA.AA <- as.matrix(medianA.AA(object)[snp.index, batch.index])
|
|
624
|
+ medianB.BB <- as.matrix(medianB.BB(object)[snp.index, batch.index])
|
|
625
|
+ medianA.AA.np <- as.matrix(medianA.AA(object)[marker.index, batch.index])
|
615
|
626
|
for(k in seq_along(batches)){
|
616
|
627
|
B <- batches[[k]]
|
617
|
628
|
this.batch <- unique(as.character(batch(object)[B]))
|
618
|
629
|
X <- cbind(1, log2(c(medianA.AA[, k], medianB.BB[, k])))
|
619
|
630
|
Y <- log2(c(phiA.snp[, k], phiB.snp[, k]))
|
|
631
|
+ not.missing <- rowSums(is.na(X)) == 0 & !is.na(Y)
|
|
632
|
+ X <- X[not.missing, ]
|
|
633
|
+ Y <- Y[not.missing]
|
620
|
634
|
betahat <- solve(crossprod(X), crossprod(X, Y))
|
621
|
635
|
##crosshyb <- max(median(medianA.AA[, k]) - median(medianA.AA.np[, k]), 0)
|
622
|
636
|
crosshyb <- 0
|
...
|
...
|
@@ -630,8 +644,8 @@ fit.lm2 <- function(strata,
|
630
|
644
|
nuA.np[nuA.np < MIN.NU] <- MIN.NU
|
631
|
645
|
phiA.np[phiA.np < MIN.PHI] <- MIN.PHI
|
632
|
646
|
}
|
633
|
|
- nuA(object)[marker.index, ] <- nuA.np
|
634
|
|
- phiA(object)[marker.index, ] <- phiA.np
|
|
647
|
+ nuA(object)[marker.index, batch.index] <- nuA.np
|
|
648
|
+ phiA(object)[marker.index, batch.index] <- phiA.np
|
635
|
649
|
if(is.lds) { close(object); return(TRUE)}
|
636
|
650
|
return(object)
|
637
|
651
|
}
|
...
|
...
|
@@ -645,6 +659,7 @@ summarizeMaleXNps <- function(marker.index,
|
645
|
659
|
gender <- object$gender
|
646
|
660
|
AA <- as.matrix(A(object)[marker.index, gender==1])
|
647
|
661
|
madA.AA <- medianA.AA <- matrix(NA, nr, nc)
|
|
662
|
+ colnames(madA.AA) <- colnames(medianA.AA) <- batchNames(object)
|
648
|
663
|
numberMenPerBatch <- rep(NA, nc)
|
649
|
664
|
for(k in seq_along(batches)){
|
650
|
665
|
B <- batches[[k]]
|
...
|
...
|
@@ -656,8 +671,8 @@ summarizeMaleXNps <- function(marker.index,
|
656
|
671
|
sns <- colnames(AA)
|
657
|
672
|
J <- sns%in%sns.batch
|
658
|
673
|
numberMenPerBatch[k] <- length(J)
|
659
|
|
- medianA.AA[, k] <- rowMedians(AA[, J], na.rm=TRUE)
|
660
|
|
- madA.AA[, k] <- rowMAD(AA[, J], na.rm=TRUE)
|
|
674
|
+ medianA.AA[, k] <- rowMedians(AA[, J, drop=FALSE], na.rm=TRUE)
|
|
675
|
+ madA.AA[, k] <- rowMAD(AA[, J, drop=FALSE], na.rm=TRUE)
|
661
|
676
|
}
|
662
|
677
|
return(list(medianA.AA=medianA.AA,
|
663
|
678
|
madA.AA=madA.AA))
|
...
|
...
|
@@ -665,29 +680,35 @@ summarizeMaleXNps <- function(marker.index,
|
665
|
680
|
|
666
|
681
|
|
667
|
682
|
summarizeXGenotypes <- function(marker.index,
|
668
|
|
- batches,
|
669
|
|
- object,
|
670
|
|
- GT.CONF.THR,
|
671
|
|
- MIN.OBS,
|
672
|
|
- MIN.SAMPLES,
|
673
|
|
- verbose,
|
674
|
|
- is.lds,
|
675
|
|
- DF.PRIOR,
|
676
|
|
- gender="male", ...){
|
|
683
|
+ batches,
|
|
684
|
+ object,
|
|
685
|
+ GT.CONF.THR,
|
|
686
|
+ MIN.OBS,
|
|
687
|
+ MIN.SAMPLES,
|
|
688
|
+ verbose,
|
|
689
|
+ is.lds,
|
|
690
|
+ DF.PRIOR,
|
|
691
|
+ gender="male", ...){
|
|
692
|
+ I <- unlist(batches)
|
677
|
693
|
if(gender == "male"){
|
678
|
|
- sample.index <- which(object$gender==1)
|
679
|
|
- } else sample.index <- which(object$gender==2)
|
|
694
|
+ gender.index <- which(object$gender == 1)
|
|
695
|
+ } else {
|
|
696
|
+ gender.index <- which(object$gender == 2)
|
|
697
|
+ }
|
|
698
|
+ batches <- lapply(batches, function(x, gender.index) intersect(x, gender.index), gender.index)
|
|
699
|
+ batch.names <- names(batches)
|
|
700
|
+ batch.index <- which(batchNames(object) %in% batch.names)
|
680
|
701
|
nr <- length(marker.index)
|
681
|
|
- nc <- length(batchNames(object))
|
682
|
|
-## NN.Mlist <- imputed.medianA <- imputed.medianB <- shrink.madA <- shrink.madB <- vector("list", nc)
|
|
702
|
+ nc <- length(batch.index)
|
683
|
703
|
NN.Mlist <- medianA <- medianB <- shrink.madA <- shrink.madB <- vector("list", nc)
|
684
|
|
- ##gender <- object$gender
|
685
|
|
- GG <- as.matrix(calls(object)[marker.index, sample.index])
|
686
|
|
- CP <- as.matrix(snpCallProbability(object)[marker.index, sample.index])
|
687
|
|
- AA <- as.matrix(A(object)[marker.index, sample.index])
|
688
|
|
- BB <- as.matrix(B(object)[marker.index, sample.index])
|
|
704
|
+ names(NN.Mlist) <- names(medianA) <- names(medianB) <- names(shrink.madA) <- names(shrink.madB) <- batch.names
|
|
705
|
+ GG <- as.matrix(calls(object)[marker.index, ])
|
|
706
|
+ CP <- as.matrix(snpCallProbability(object)[marker.index, ])
|
|
707
|
+ AA <- as.matrix(A(object)[marker.index, ])
|
|
708
|
+ BB <- as.matrix(B(object)[marker.index, ])
|
689
|
709
|
for(k in seq_along(batches)){
|
690
|
710
|
sample.index <- batches[[k]]
|
|
711
|
+ if(length(sample.index) == 0) next()
|
691
|
712
|
this.batch <- unique(as.character(batch(object)[sample.index]))
|
692
|
713
|
##gender <- object$gender[sample.index]
|
693
|
714
|
if(length(sample.index) < MIN.SAMPLES) next()
|
...
|
...
|
@@ -695,12 +716,13 @@ summarizeXGenotypes <- function(marker.index,
|
695
|
716
|
##subset GG apppriately
|
696
|
717
|
sns <- colnames(GG)
|
697
|
718
|
J <- sns%in%sns.batch
|
698
|
|
- G <- GG[, J]
|
699
|
|
- xx <- CP[, J]
|
|
719
|
+ G <- GG[, J, drop=FALSE]
|
|
720
|
+ xx <- CP[, J, drop=FALSE]
|
|
721
|
+ p <- i2p(xx)
|
700
|
722
|
highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
|
701
|
723
|
G <- G*highConf
|
702
|
|
- A <- AA[, J]
|
703
|
|
- B <- BB[, J]
|
|
724
|
+ A <- AA[, J, drop=FALSE]
|
|
725
|
+ B <- BB[, J, drop=FALSE]
|
704
|
726
|
G.AA <- G==1
|
705
|
727
|
G.AA[G.AA==FALSE] <- NA
|
706
|
728
|
G.AB <- G==2
|
...
|
...
|
@@ -774,6 +796,18 @@ summarizeXGenotypes <- function(marker.index,
|
774
|
796
|
## AB and AA observed
|
775
|
797
|
unobserved.index[[3]] <- which(unobservedBB & (NN[, 2] >= MIN.OBS & NN[, 1] >= MIN.OBS))
|
776
|
798
|
res <- imputeCenter(medianA[[k]], medianB[[k]], index.complete, unobserved.index)
|
|
799
|
+ medianA[[k]] <- res[[1]]
|
|
800
|
+ medianB[[k]] <- res[[2]]
|
|
801
|
+
|
|
802
|
+ ## RS: For Monomorphic SNPs a mixture model may be better
|
|
803
|
+ ## RS: Further, we can improve estimation by borrowing strength across batch
|
|
804
|
+ unobserved.index[[1]] <- which(unobservedAA & unobservedAB)
|
|
805
|
+ unobserved.index[[2]] <- which(unobservedBB & unobservedAB)
|
|
806
|
+ unobserved.index[[3]] <- which(unobservedAA & unobservedBB) ## strange
|
|
807
|
+ res <- imputeCentersForMonomorphicSnps(medianA[[k]], medianB[[k]],
|
|
808
|
+ index.complete,
|
|
809
|
+ unobserved.index)
|
|
810
|
+ medianA[[k]] <- res[[1]]; medianB[[k]] <- res[[2]]
|
777
|
811
|
}
|
778
|
812
|
medianA[[k]] <- res[[1]]
|
779
|
813
|
medianB[[k]] <- res[[2]]
|
...
|
...
|
@@ -798,7 +832,7 @@ fit.lm3 <- function(strata,
|
798
|
832
|
MIN.NU,
|
799
|
833
|
MIN.PHI,
|
800
|
834
|
verbose, is.lds, CHR.X, ...){
|
801
|
|
- if(is.lds) {physical <- get("physical"); open(object)}
|
|
835
|
+ ##if(is.lds) {physical <- get("physical"); open(object)}
|
802
|
836
|
if(verbose) message(" Probe stratum ", strata, " of ", length(index.list))
|
803
|
837
|
gender <- object$gender
|
804
|
838
|
enough.males <- sum(gender==1) >= MIN.SAMPLES
|
...
|
...
|
@@ -810,22 +844,25 @@ fit.lm3 <- function(strata,
|
810
|
844
|
marker.index <- index.list[[strata]]
|
811
|
845
|
batches <- split(seq_along(batch(object)), as.character(batch(object)))
|
812
|
846
|
batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
|
813
|
|
- nuA <- as.matrix(nuA(object)[marker.index, ])
|
814
|
|
- nuB <- as.matrix(nuB(object)[marker.index, ])
|
815
|
|
- phiA <- as.matrix(phiA(object)[marker.index, ])
|
816
|
|
- phiB <- as.matrix(phiB(object)[marker.index, ])
|
817
|
|
- phiA2 <- as.matrix(phiPrimeA(object)[marker.index, ])
|
818
|
|
- phiB2 <- as.matrix(phiPrimeB(object)[marker.index, ])
|
|
847
|
+ batch.names <- names(batches)
|
|
848
|
+ batch.index <- which(batchNames(object) %in% batch.names)
|
|
849
|
+
|
|
850
|
+ nuA <- as.matrix(nuA(object)[marker.index, batch.index])
|
|
851
|
+ nuB <- as.matrix(nuB(object)[marker.index, batch.index])
|
|
852
|
+ phiA <- as.matrix(phiA(object)[marker.index, batch.index])
|
|
853
|
+ phiB <- as.matrix(phiB(object)[marker.index, batch.index])
|
|
854
|
+ phiA2 <- as.matrix(phiPrimeA(object)[marker.index, batch.index])
|
|
855
|
+ phiB2 <- as.matrix(phiPrimeB(object)[marker.index, batch.index])
|
819
|
856
|
if(enough.males){
|
820
|
857
|
res <- summarizeXGenotypes(marker.index=marker.index,
|
821
|
|
- batches=batches,
|
822
|
|
- object=object,
|
823
|
|
- GT.CONF.THR=GT.CONF.THR,
|
824
|
|
- MIN.SAMPLES=MIN.SAMPLES,
|
825
|
|
- MIN.OBS=MIN.OBS,
|
826
|
|
- verbose=verbose,
|
827
|
|
- is.lds=is.lds,
|
828
|
|
- DF.PRIOR=DF.PRIOR/2,
|
|
858
|
+ batches=batches,
|
|
859
|
+ object=object,
|
|
860
|
+ GT.CONF.THR=GT.CONF.THR,
|
|
861
|
+ MIN.SAMPLES=MIN.SAMPLES,
|
|
862
|
+ MIN.OBS=MIN.OBS,
|
|
863
|
+ verbose=verbose,
|
|
864
|
+ is.lds=is.lds,
|
|
865
|
+ DF.PRIOR=DF.PRIOR/2,
|
829
|
866
|
gender="male")
|
830
|
867
|
madA.Mlist <- res[["madA"]]
|
831
|
868
|
madB.Mlist <- res[["madB"]]
|
...
|
...
|
@@ -855,6 +892,7 @@ fit.lm3 <- function(strata,
|
855
|
892
|
}
|
856
|
893
|
for(k in seq_along(batches)){
|
857
|
894
|
sample.index <- batches[[k]]
|
|
895
|
+ if(is.null(sample.index)) next()
|
858
|
896
|
this.batch <- unique(as.character(batch(object)[sample.index]))
|
859
|
897
|
gender <- object$gender[sample.index]
|
860
|
898
|
enough.men <- sum(gender==1) >= MIN.SAMPLES
|
...
|
...
|
@@ -878,6 +916,10 @@ fit.lm3 <- function(strata,
|
878
|
916
|
NN.M <- NN.Mlist[[k]]
|
879
|
917
|
}
|
880
|
918
|
if(enough.men & enough.women){
|
|
919
|
+ if(any(madA.F == 0)){
|
|
920
|
+ message("Zeros in median absolute deviation. Not estimating CN for chrX for batch ", names(batches)[k])
|
|
921
|
+ next()
|
|
922
|
+ }
|
881
|
923
|
betas <- fit.wls(cbind(NN.M, NN.F),
|
882
|
924
|
sigma=cbind(madA.M, madA.F),
|
883
|
925
|
allele="A",
|
...
|
...
|
@@ -914,6 +956,10 @@ fit.lm3 <- function(strata,
|
914
|
956
|
phiB[, k] <- betas[2, ]
|
915
|
957
|
}
|
916
|
958
|
if(!enough.men & enough.women){
|
|
959
|
+ if(any(madA.F == 0)){
|
|
960
|
+ message("Zeros in median absolute deviation. Not estimating CN for chrX for batch ", names(batches)[k])
|
|
961
|
+ next()
|
|
962
|
+ }
|
917
|
963
|
betas <- fit.wls(NN.F,
|
918
|
964
|
sigma=madA.F,
|
919
|
965
|
allele="A",
|
...
|
...
|
@@ -938,15 +984,16 @@ fit.lm3 <- function(strata,
|
938
|
984
|
phiB[phiB < MIN.PHI] <- MIN.PHI
|
939
|
985
|
phiB2[phiB2 < 1] <- 1
|
940
|
986
|
}
|
941
|
|
- nuA(object)[marker.index, ] <- nuA
|
942
|
|
- nuB(object)[marker.index, ] <- nuB
|
943
|
|
- phiA(object)[marker.index, ] <- phiA
|
944
|
|
- phiB(object)[marker.index, ] <- phiB
|
945
|
|
- phiPrimeA(object)[marker.index, ] <- phiA2
|
946
|
|
- phiPrimeB(object)[marker.index, ] <- phiB2
|
|
987
|
+ nuA(object)[marker.index, batch.index] <- nuA
|
|
988
|
+ nuB(object)[marker.index, batch.index] <- nuB
|
|
989
|
+ phiA(object)[marker.index, batch.index] <- phiA
|
|
990
|
+ phiB(object)[marker.index, batch.index] <- phiB
|
|
991
|
+ phiPrimeA(object)[marker.index, batch.index] <- phiA2
|
|
992
|
+ phiPrimeB(object)[marker.index, batch.index] <- phiB2
|
947
|
993
|
if(is.lds) {close(object); return(TRUE)} else return(object)
|
948
|
994
|
}
|
949
|
995
|
|
|
996
|
+##nonpolymorphic markers on X
|
950
|
997
|
fit.lm4 <- function(strata,
|
951
|
998
|
index.list,
|
952
|
999
|
object,
|
...
|
...
|
@@ -954,8 +1001,9 @@ fit.lm4 <- function(strata,
|
954
|
1001
|
THR.NU.PHI,
|
955
|
1002
|
MIN.NU,
|
956
|
1003
|
MIN.PHI,
|
957
|
|
- verbose, is.lds, ...){
|
958
|
|
- if(is.lds) {physical <- get("physical"); open(object)}
|
|
1004
|
+ verbose, is.lds=TRUE, ...){
|
|
1005
|
+ ##if(is.lds) {physical <- get("physical"); open(object)}
|
|
1006
|
+ ## exclude batches that have fewer than MIN.SAMPLES
|
959
|
1007
|
gender <- object$gender
|
960
|
1008
|
enough.males <- sum(gender==1) >= MIN.SAMPLES
|
961
|
1009
|
enough.females <- sum(gender==2) >= MIN.SAMPLES
|
...
|
...
|
@@ -963,68 +1011,71 @@ fit.lm4 <- function(strata,
|
963
|
1011
|
message(paste("fewer than", MIN.SAMPLES, "men and women. Copy number not estimated for CHR X"))
|
964
|
1012
|
return(object)
|
965
|
1013
|
}
|
|
1014
|
+ excludeBatch <- names(table(batch(object)))[table(batch(object)) < MIN.SAMPLES]
|
|
1015
|
+ if(length(excludeBatch) > 0){
|
|
1016
|
+ bns <- unique(batch(object))
|
|
1017
|
+ batch.index <- which(!bns %in% excludeBatch)
|
|
1018
|
+ sample.index <- which(!batch(object)%in% excludeBatch)
|
|
1019
|
+ } else {
|
|
1020
|
+ sample.index <- 1:ncol(object)
|
|
1021
|
+ batch.index <- as.integer(as.factor(unique(batch(object))))
|
|
1022
|
+ }
|
966
|
1023
|
if(verbose) message(" Probe stratum ", strata, " of ", length(index.list))
|
967
|
1024
|
marker.index <- index.list[[strata]]
|
968
|
1025
|
batches <- split(seq_along(batch(object)), as.character(batch(object)))
|
969
|
|
- batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
|
970
|
|
- nc <- length(batchNames(object))
|
|
1026
|
+ batches <- batches[batch.index]
|
|
1027
|
+ nc <- length(batch.index)
|
971
|
1028
|
if(enough.males){
|
972
|
1029
|
res <- summarizeMaleXNps(marker.index=marker.index,
|
973
|
1030
|
batches=batches,
|
974
|
1031
|
object=object, MIN.SAMPLES=MIN.SAMPLES)
|
975
|
|
- medianA.AA.M <- res[["medianA.AA"]]
|
976
|
|
- madA.AA.M <- res[["madA.AA"]]
|
977
|
|
-
|
|
1032
|
+ medianA.AA.M <- res[["medianA.AA"]][, batch.index, drop=FALSE]
|
|
1033
|
+ madA.AA.M <- res[["madA.AA"]][, batch.index, drop=FALSE]
|
978
|
1034
|
}
|
979
|
|
- medianA.AA.F <- as.matrix(medianA.AA(object)[marker.index, ]) ## median for women
|
980
|
|
- madA.AA.F <- as.matrix(madA.AA(object)[marker.index, ]) ## median for women
|
981
|
|
- split.gender <- split(gender, as.character(batch(object)))
|
|
1035
|
+ medianA.AA.F <- as.matrix(medianA.AA(object)[marker.index, batch.index, drop=FALSE]) ## median for women
|
|
1036
|
+ madA.AA.F <- as.matrix(madA.AA(object)[marker.index, batch.index, drop=FALSE]) ## median for women
|
|
1037
|
+ split.gender <- split(gender[sample.index], as.character(batch(object)[sample.index]))
|
982
|
1038
|
N.M <- sapply(split.gender, function(x) sum(x==1))
|
983
|
1039
|
N.F <- sapply(split.gender, function(x) sum(x==2))
|
984
|
|
- nuA <- as.matrix(nuA(object)[marker.index, ])
|
985
|
|
- nuB <- as.matrix(nuB(object)[marker.index, ])
|
986
|
|
- phiA <- as.matrix(phiA(object)[marker.index, ])
|
987
|
|
- phiB <- as.matrix(phiB(object)[marker.index, ])
|
|
1040
|
+ nuA <- as.matrix(nuA(object)[marker.index, batch.index, drop=FALSE])
|
|
1041
|
+ nuB <- as.matrix(nuB(object)[marker.index, batch.index, drop=FALSE])
|
|
1042
|
+ phiA <- as.matrix(phiA(object)[marker.index, batch.index, drop=FALSE])
|
|
1043
|
+ phiB <- as.matrix(phiB(object)[marker.index, batch.index, drop=FALSE])
|
988
|
1044
|
ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object))
|
989
|
1045
|
fns <- featureNames(object)[ii]
|
990
|
|
- flags <- as.matrix(flags(object)[ii, ])
|
|
1046
|
+ flags <- as.matrix(flags(object)[ii, batch.index])
|
991
|
1047
|
fns.noflags <- fns[rowSums(flags, na.rm=T) == 0]
|
992
|
1048
|
snp.index <- sample(match(fns.noflags, featureNames(object)), 10000)
|
993
|
|
-
|
994
|
|
- ## exclude batches that have fewer than MIN.SAMPLES
|
995
|
|
- excludeBatch <- names(table(batch(object)))[table(batch(object)) < MIN.SAMPLES]
|
996
|
|
- batch.index <- which(!batchNames(object) %in% excludeBatch)
|
997
|
|
-
|
|
1049
|
+ ##
|
998
|
1050
|
N.AA <- as.matrix(N.AA(object)[snp.index, batch.index, drop=FALSE])
|
999
|
|
- N.AB <- as.matrix(N.AA(object)[snp.index, batch.index, drop=FALSE])
|
1000
|
|
- N.BB <- as.matrix(N.AA(object)[snp.index, batch.index, drop=FALSE])
|
|
1051
|
+ N.AB <- as.matrix(N.AB(object)[snp.index, batch.index, drop=FALSE])
|
|
1052
|
+ N.BB <- as.matrix(N.BB(object)[snp.index, batch.index, drop=FALSE])
|
1001
|
1053
|
enoughAA <- rowSums(N.AA < 5) == 0
|
1002
|
1054
|
enoughAB <- rowSums(N.AB < 5) == 0
|
1003
|
1055
|
enoughBB <- rowSums(N.BB < 5) == 0
|
1004
|
1056
|
snp.index <- snp.index[enoughAA & enoughAB & enoughBB]
|
1005
|
|
- if(length(snp.index) < 100) {
|
1006
|
|
- message("Too few passing SNPs for estimating model parameters at nonpolymorphic loci on chrom X")
|
1007
|
|
- return()
|
|
1057
|
+ if(length(snp.index) < 100){
|
|
1058
|
+ message("too few snps pass criteria for estimating parameters for NP markers on chr X")
|
|
1059
|
+ return(object)
|
1008
|
1060
|
}
|
1009
|
|
- ##stopifnot(length(snp.index) > 100)
|
1010
|
|
- nuA.snp.notmissing <- rowSums(is.na(as.matrix(nuA(object)[snp.index, ]))) == 0
|
1011
|
|
- nuA.snp.notnegative <- rowSums(as.matrix(nuA(object)[snp.index, ]) < 20) == 0
|
|
1061
|
+ nuA.snp <- as.matrix(nuA(object)[snp.index, batch.index, drop=FALSE])
|
|
1062
|
+ nuA.snp.notmissing <- rowSums(is.na(nuA.snp)) == 0
|
|
1063
|
+ nuA.snp.notnegative <- rowSums(as.matrix(nuA(object)[snp.index, batch.index, drop=FALSE]) < 20) == 0
|
1012
|
1064
|
snp.index <- snp.index[nuA.snp.notmissing & nuA.snp.notnegative]
|
1013
|
|
- medianA.AA.snp <- as.matrix(medianA.AA(object)[snp.index,])
|
1014
|
|
- medianB.BB.snp <- as.matrix(medianB.BB(object)[snp.index,])
|
1015
|
|
-
|
1016
|
|
- nuA.snp <- as.matrix(nuA(object)[snp.index, ])
|
1017
|
|
- nuB.snp <- as.matrix(nuB(object)[snp.index, ])
|
1018
|
|
- phiA.snp <- as.matrix(phiA(object)[snp.index, ])
|
1019
|
|
- phiB.snp <- as.matrix(phiB(object)[snp.index, ])
|
|
1065
|
+ medianA.AA.snp <- as.matrix(medianA.AA(object)[snp.index, batch.index])
|
|
1066
|
+ medianB.BB.snp <- as.matrix(medianB.BB(object)[snp.index, batch.index])
|
|
1067
|
+ nuA.snp <- as.matrix(nuA(object)[snp.index, batch.index])
|
|
1068
|
+ nuB.snp <- as.matrix(nuB(object)[snp.index, batch.index])
|
|
1069
|
+ phiA.snp <- as.matrix(phiA(object)[snp.index, batch.index])
|
|
1070
|
+ phiB.snp <- as.matrix(phiB(object)[snp.index, batch.index])
|
1020
|
1071
|
## pseudoAR <- position(object)[snp.index] < 2709520 | (position(object)[snp.index] > 154584237 & position(object)[snp.index] < 154913754)
|
1021
|
1072
|
## pseudoAR[is.na(pseudoAR)] <- FALSE
|
1022
|
1073
|
for(k in seq_along(batches)){
|
1023
|
1074
|
B <- batches[[k]]
|
1024
|
1075
|
this.batch <- unique(as.character(batch(object)[B]))
|
1025
|
1076
|
gender <- object$gender[B]
|
1026
|
|
- enough.men <- N.M[k] >= MIN.SAMPLES
|
1027
|
|
- enough.women <- N.F[k] >= MIN.SAMPLES
|
|
1077
|
+ enough.men <- N.M[[this.batch]] >= MIN.SAMPLES
|
|
1078
|
+ enough.women <- N.F[[this.batch]] >= MIN.SAMPLES
|
1028
|
1079
|
if(!enough.men & !enough.women) {
|
1029
|
1080
|
if(verbose) message(paste("fewer than", MIN.SAMPLES, "men and women in batch", this.batch, ". CHR X copy number not available. "))
|
1030
|
1081
|
next()
|
...
|
...
|
@@ -1036,6 +1087,9 @@ fit.lm4 <- function(strata,
|
1036
|
1087
|
}
|
1037
|
1088
|
X <- cbind(1, log2(c(tmp[, 1], tmp[, 2])))
|
1038
|
1089
|
Y <- log2(c(tmp[, 3], tmp[, 4]))
|
|
1090
|
+ notmissing.index <- which(rowSums(is.na(X)) == 0 & !is.na(Y))
|
|
1091
|
+ X <- X[notmissing.index, ]
|
|
1092
|
+ Y <- Y[notmissing.index]
|
1039
|
1093
|
betahat <- solve(crossprod(X), crossprod(X, Y))
|
1040
|
1094
|
if(enough.men){
|
1041
|
1095
|
X.men <- cbind(1, log2(medianA.AA.M[, k]))
|
...
|
...
|
@@ -1055,11 +1109,12 @@ fit.lm4 <- function(strata,
|
1055
|
1109
|
nuB[nuB < MIN.NU] <- MIN.NU
|
1056
|
1110
|
phiB[phiB < MIN.PHI] <- MIN.PHI
|
1057
|
1111
|
}
|
1058
|
|
- nuA(object)[marker.index, ] <- nuA
|
1059
|
|
- phiA(object)[marker.index, ] <- phiA
|
1060
|
|
- nuB(object)[marker.index, ] <- nuB
|
1061
|
|
- phiB(object)[marker.index, ] <- phiB
|
1062
|
|
- if(is.lds) {close(object); return(TRUE)} else return(object)
|
|
1112
|
+ nuA(object)[marker.index, batch.index] <- nuA
|
|
1113
|
+ phiA(object)[marker.index, batch.index] <- phiA
|
|
1114
|
+ nuB(object)[marker.index, batch.index] <- nuB
|
|
1115
|
+ phiB(object)[marker.index, batch.index] <- phiB
|
|
1116
|
+ ##if(is.lds) {close(object); return(TRUE)} else return(object)
|
|
1117
|
+ TRUE
|
1063
|
1118
|
}
|
1064
|
1119
|
|
1065
|
1120
|
whichPlatform <- function(cdfName){
|
...
|
...
|
@@ -1411,18 +1466,12 @@ shrinkSummary <- function(object,
|
1411
|
1466
|
}
|
1412
|
1467
|
|
1413
|
1468
|
genotypeSummary <- function(object,
|
1414
|
|
- GT.CONF.THR=0.95,
|
|
1469
|
+ GT.CONF.THR=0.80,
|
1415
|
1470
|
type=c("SNP", "NP", "X.SNP", "X.NP"), ##"X.snps", "X.nps"),
|
1416
|
1471
|
verbose=TRUE,
|
1417
|
1472
|
marker.index,
|
1418
|
1473
|
is.lds){
|
1419
|
1474
|
if(type == "X.SNP" | type=="X.NP"){
|
1420
|
|
-## gender <- object$gender
|
1421
|
|
-## ## the number of women in each batch could be less than 3...
|
1422
|
|
-## if(sum(gender == 2) < 3) {
|
1423
|
|
-## message("too few females to estimate within genotype summary statistics on CHR X")
|
1424
|
|
-## return(object)
|
1425
|
|
-## }
|
1426
|
1475
|
CHR.X <- TRUE
|
1427
|
1476
|
} else CHR.X <- FALSE
|
1428
|
1477
|
if(missing(marker.index)){
|
...
|
...
|
@@ -1514,7 +1563,7 @@ summarizeNps <- function(strata, index.list, object, batchSize,
|
1514
|
1563
|
}
|
1515
|
1564
|
this.batch <- unique(as.character(batch(object)[sample.index]))
|
1516
|
1565
|
j <- match(this.batch, batchnames)
|
1517
|
|
- I.A <- AA[, sample.index]
|
|
1566
|
+ I.A <- AA[, sample.index, drop=FALSE]
|
1518
|
1567
|
medianA.AA[, k] <- rowMedians(I.A, na.rm=TRUE)
|
1519
|
1568
|
madA.AA[, k] <- rowMAD(I.A, na.rm=TRUE)
|
1520
|
1569
|
## log2 Transform Intensities
|
...
|
...
|
@@ -1532,23 +1581,14 @@ summarizeSnps <- function(strata,
|
1532
|
1581
|
index.list,
|
1533
|
1582
|
object,
|
1534
|
1583
|
GT.CONF.THR,
|
1535
|
|
- verbose, is.lds, CHR.X, ...){
|
1536
|
|
- if(is.lds) {
|
1537
|
|
- physical <- get("physical")
|
1538
|
|
- open(object)
|
1539
|
|
- }
|
|
1584
|
+ verbose, is.lds=TRUE, CHR.X, ...){
|
|
1585
|
+## if(is.lds) {
|
|
1586
|
+## physical <- get("physical")
|
|
1587
|
+## open(object)
|
|
1588
|
+## }
|
|
1589
|
+ open(object)
|
1540
|
1590
|
if(verbose) message(" Probe stratum ", strata, " of ", length(index.list))
|
1541
|
1591
|
index <- index.list[[strata]]
|
1542
|
|
-## if(CHR.X) {
|
1543
|
|
-## sample.index <- which(object$gender==2)
|
1544
|
|
-## batches <- split(sample.index, as.character(batch(object))[sample.index])
|
1545
|
|
-## } else {
|
1546
|
|
-## batches <- split(seq_along(batch(object)), as.character(batch(object)))
|
1547
|
|
-## }
|
1548
|
|
- ## this message can be confusing if no women are in the dataset
|
1549
|
|
-## if(CHR.X){
|
1550
|
|
-## if(verbose) message(" biallelic cluster medians are estimated using only the women for SNPs on chr. X")
|
1551
|
|
-## }
|
1552
|
1592
|
batches <- split(seq_along(batch(object)), as.character(batch(object)))
|
1553
|
1593
|
batchnames <- batchNames(object)
|
1554
|
1594
|
nr <- length(index)
|
...
|
...
|
@@ -1562,6 +1602,14 @@ summarizeSnps <- function(strata,
|
1562
|
1602
|
AA <- as.matrix(A(object)[index, ])
|
1563
|
1603
|
BB <- as.matrix(B(object)[index, ])
|
1564
|
1604
|
FL <- as.matrix(flags(object)[index, ])
|
|
1605
|
+ summaryStats <- function(X, INT, FUNS){
|
|
1606
|
+ tmp <- matrix(NA, nrow(X), length(FUNS))
|
|
1607
|
+ for(j in seq_along(FUNS)){
|
|
1608
|
+ FUN <- match.fun(FUNS[j])
|
|
1609
|
+ tmp[, j] <- FUN(X*INT, na.rm=TRUE)
|
|
1610
|
+ }
|
|
1611
|
+ tmp
|
|
1612
|
+ }
|
1565
|
1613
|
if(verbose) message(" Computing summaries...")
|
1566
|
1614
|
for(k in seq_along(batches)){
|
1567
|
1615
|
##note that the genotype frequency for AA would include 'A' on chromosome X for men
|
...
|
...
|
@@ -1569,10 +1617,17 @@ summarizeSnps <- function(strata,
|
1569
|
1617
|
this.batch <- unique(as.character(batch(object)[sample.index]))
|
1570
|
1618
|
j <- match(this.batch, batchnames)
|
1571
|
1619
|
G <- GG[, sample.index, drop=FALSE]
|
1572
|
|
- ##NORM <- normal.index[, k]
|
1573
|
1620
|
xx <- CP[, sample.index, drop=FALSE]
|
1574
|
1621
|
highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
|
|
1622
|
+ ## Some markers may have genotype confidences scores that are ALL below the threshold
|
|
1623
|
+ ## For these markers, provide statistical summaries based on all the samples and flag
|
|
1624
|
+ ## Provide summaries for these markers and flag to indicate the problem
|
|
1625
|
+ ## RS: check whether we need the next to lines and, if so, effect downstream
|
|
1626
|
+ ii <- which(rowSums(highConf) == 0)
|
|
1627
|
+ if(length(ii) > 0) highConf[ii, ] <- TRUE
|
1575
|
1628
|
G <- G*highConf
|
|
1629
|
+ ## table(rowSums(G==0))
|
|
1630
|
+ ##G <- G*highConf*NORM
|
1576
|
1631
|
A <- AA[, sample.index, drop=FALSE]
|
1577
|
1632
|
B <- BB[, sample.index, drop=FALSE]
|
1578
|
1633
|
## this can be time consuming...do only once
|
...
|
...
|
@@ -1590,48 +1645,101 @@ summarizeSnps <- function(strata,
|
1590
|
1645
|
sample.index <- sample.index[gender == 2]
|
1591
|
1646
|
if(length(sample.index) == 0) next()
|
1592
|
1647
|
}
|
1593
|
|
- summaryStats <- function(X, INT, FUNS){
|
1594
|
|
- tmp <- matrix(NA, nrow(X), length(FUNS))
|
1595
|
|
- for(j in seq_along(FUNS)){
|
1596
|
|
- FUN <- match.fun(FUNS[j])
|
1597
|
|
- tmp[, j] <- FUN(X*INT, na.rm=TRUE)
|
1598
|
|
- }
|
1599
|
|
- tmp
|
1600
|
|
- }
|
1601
|
1648
|
stats <- summaryStats(G.AA, A, FUNS=c("rowMedians", "rowMAD"))
|
1602
|
1649
|
medianA.AA(object)[index, k] <- stats[, 1]
|
|
1650
|
+ ##missing.index <- which(rowSums(is.na(medianA.AA(object)[index, ,drop=FALSE])) > 0)
|
1603
|
1651
|
madA.AA(object)[index, k] <- stats[, 2]
|
1604
|
|
-
|
1605
|
1652
|
stats <- summaryStats(G.AB, A, FUNS=c("rowMedians", "rowMAD"))
|
1606
|
1653
|
medianA.AB(object)[index, k] <- stats[, 1]
|
1607
|
1654
|
madA.AB(object)[index, k] <- stats[, 2]
|
1608
|
|
-
|
1609
|
1655
|
stats <- summaryStats(G.BB, A, FUNS=c("rowMedians", "rowMAD"))
|
1610
|
1656
|
medianA.BB(object)[index, k] <- stats[, 1]
|
1611
|
1657
|
madA.BB(object)[index, k] <- stats[, 2]
|
1612
|
|
-
|
1613
|
1658
|
stats <- summaryStats(G.AA, B, FUNS=c("rowMedians", "rowMAD"))
|
1614
|
1659
|
medianB.AA(object)[index, k] <- stats[, 1]
|
1615
|
1660
|
madB.AA(object)[index, k] <- stats[, 2]
|
1616
|
|
-
|
1617
|
1661
|
stats <- summaryStats(G.AB, B, FUNS=c("rowMedians", "rowMAD"))
|
1618
|
1662
|
medianB.AB(object)[index, k] <- stats[, 1]
|
1619
|
1663
|
madB.AB(object)[index, k] <- stats[, 2]
|
1620
|
|
-
|
1621
|
1664
|
stats <- summaryStats(G.BB, B, FUNS=c("rowMedians", "rowMAD"))
|
1622
|
1665
|
medianB.BB(object)[index, k] <- stats[, 1]
|
1623
|
1666
|
madB.BB(object)[index, k] <- stats[, 2]
|
1624
|
|
-
|
1625
|
1667
|
## log2 Transform Intensities
|
1626
|
1668
|
A <- log2(A); B <- log2(B)
|
1627
|
1669
|
tau2A.AA[, k] <- summaryStats(G.AA, A, FUNS="rowMAD")^2
|
1628
|
1670
|
tau2A.BB[, k] <- summaryStats(G.BB, A, FUNS="rowMAD")^2
|
1629
|
1671
|
tau2B.AA[, k] <- summaryStats(G.AA, B, FUNS="rowMAD")^2
|
1630
|
1672
|
tau2B.BB[, k] <- summaryStats(G.BB, B, FUNS="rowMAD")^2
|
1631
|
|
-
|
1632
|
1673
|
corrAA[, k] <- rowCors(A*G.AA, B*G.AA, na.rm=TRUE)
|
1633
|
1674
|
corrAB[, k] <- rowCors(A*G.AB, B*G.AB, na.rm=TRUE)
|
1634
|
1675
|
corrBB[, k] <- rowCors(A*G.BB, B*G.BB, na.rm=TRUE)
|
|
1676
|
+ rm(A, B, G.AA, G.AB, G.BB, xx, highConf, G)
|
|
1677
|
+ gc()
|
|
1678
|
+ }
|
|
1679
|
+ ##---------------------------------------------------------------------------
|
|
1680
|
+ ## grand mean
|
|
1681
|
+ ##---------------------------------------------------------------------------
|
|
1682
|
+ if(length(batches) > 1 && "grandMean" %in% batchNames(object)){
|
|
1683
|
+ ##k <- k+1
|
|
1684
|
+ k <- match("grandMean", batchNames(object))
|
|
1685
|
+ if(verbose) message(" computing grand means...")
|
|
1686
|
+ ##G <- GG[, B]
|
|
1687
|
+ ##NORM <- normal.index[, k]
|
|
1688
|
+ ##xx <- CP[, B]
|
|
1689
|
+ ##highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
|
|
1690
|
+ highConf <- (1-exp(-CP/1000)) > GT.CONF.THR
|
|
1691
|
+ ##G <- G*highConf
|
|
1692
|
+ ## Some markers may have genotype confidences scores that are ALL below the threshold
|
|
1693
|
+ ## For these markers, provide statistical summaries based on all the samples and flag
|
|
1694
|
+ ## Provide summaries for these markers and flag to indicate the problem
|
|
1695
|
+ ii <- which(rowSums(highConf) == 0)
|
|
1696
|
+ if(length(ii) > 0) highConf[ii, ] <- TRUE
|
|
1697
|
+ GG <- GG*highConf
|
|
1698
|
+ ## table(rowSums(G==0))
|
|
1699
|
+ ##G <- G*highConf*NORM
|
|
1700
|
+## A <- AA[, B]
|
|
1701
|
+## B <- BB[, B]
|
|
1702
|
+ ## this can be time consuming...do only once
|
|
1703
|
+## G.AA <- G==1
|
|
1704
|
+ G.AA <- GG==1
|
|
1705
|
+ G.AA[G.AA==FALSE] <- NA
|
|
1706
|
+ G.AB <- GG==2
|
|
1707
|
+ G.AB[G.AB==FALSE] <- NA
|
|
1708
|
+ G.BB <- GG==3
|
|
1709
|
+ G.BB[G.BB==FALSE] <- NA
|
|
1710
|
+ Ns.AA[, k] <- rowSums(G.AA, na.rm=TRUE)
|
|
1711
|
+ Ns.AB[, k] <- rowSums(G.AB, na.rm=TRUE)
|
|
1712
|
+ Ns.BB[, k] <- rowSums(G.BB, na.rm=TRUE)
|
|
1713
|
+ ## Calculate row medians and MADs
|
|
1714
|
+ stats <- summaryStats(G.AA, AA, FUNS=c("rowMedians", "rowMAD"))
|
|
1715
|
+ medianA.AA(object)[index, k] <- stats[, 1]
|
|
1716
|
+ madA.AA(object)[index, k] <- stats[, 2]
|
|
1717
|
+ stats <- summaryStats(G.AB, AA, FUNS=c("rowMedians", "rowMAD"))
|
|
1718
|
+ medianA.AB(object)[index, k] <- stats[, 1]
|
|
1719
|
+ madA.AB(object)[index, k] <- stats[, 2]
|
|
1720
|
+ stats <- summaryStats(G.BB, AA, FUNS=c("rowMedians", "rowMAD"))
|
|
1721
|
+ medianA.BB(object)[index, k] <- stats[, 1]
|
|
1722
|
+ madA.BB(object)[index, k] <- stats[, 2]
|
|
1723
|
+ stats <- summaryStats(G.AA, BB, FUNS=c("rowMedians", "rowMAD"))
|
|
1724
|
+ medianB.AA(object)[index, k] <- stats[, 1]
|
|
1725
|
+ madB.AA(object)[index, k] <- stats[, 2]
|
|
1726
|
+ stats <- summaryStats(G.AB, BB, FUNS=c("rowMedians", "rowMAD"))
|
|
1727
|
+ medianB.AB(object)[index, k] <- stats[, 1]
|
|
1728
|
+ madB.AB(object)[index, k] <- stats[, 2]
|
|
1729
|
+ stats <- summaryStats(G.BB, BB, FUNS=c("rowMedians", "rowMAD"))
|
|
1730
|
+ medianB.BB(object)[index, k] <- stats[, 1]
|
|
1731
|
+ madB.BB(object)[index, k] <- stats[, 2]
|
|
1732
|
+ ## log2 Transform Intensities
|
|
1733
|
+ AA <- log2(AA); BB <- log2(BB)
|
|
1734
|
+ tau2A.AA[, k] <- summaryStats(G.AA, AA, FUNS="rowMAD")^2
|
|
1735
|
+ tau2A.BB[, k] <- summaryStats(G.BB, AA, FUNS="rowMAD")^2
|
|
1736
|
+ tau2B.AA[, k] <- summaryStats(G.AA, BB, FUNS="rowMAD")^2
|
|
1737
|
+ tau2B.BB[, k] <- summaryStats(G.BB, BB, FUNS="rowMAD")^2
|
|
1738
|
+ corrAA[, k] <- rowCors(AA*G.AA, BB*G.AA, na.rm=TRUE)
|
|
1739
|
+ corrAB[, k] <- rowCors(AA*G.AB, BB*G.AB, na.rm=TRUE)
|
|
1740
|
+ corrBB[, k] <- rowCors(AA*G.BB, BB*G.BB, na.rm=TRUE)
|
|
1741
|
+ rm(GG, CP, AA, BB, FL, stats)
|
|
1742
|
+ gc()
|
1635
|
1743
|
}
|
1636
|
1744
|
if(verbose) message(" Begin writing...")
|
1637
|
1745
|
N.AA(object)[index,] <- Ns.AA
|
...
|
...
|
@@ -1640,24 +1748,9 @@ summarizeSnps <- function(strata,
|
1640
|
1748
|
corrAA(object)[index,] <- corrAA
|
1641
|
1749
|
corrAB(object)[index,] <- corrAB
|
1642
|
1750
|
corrBB(object)[index,] <- corrBB
|
1643
|
|
-## medianA.AA(object)[index,] <- do.call(cbind, lapply(statsA.AA, function(x) x[, 1]))
|
1644
|
|
-## medianA.AB(object)[index,] <- do.call(cbind, lapply(statsA.AB, function(x) x[, 1]))
|
1645
|
|
-## medianA.BB(object)[index,] <- do.call(cbind, lapply(statsA.BB, function(x) x[, 1]))
|
1646
|
|
-## medianB.AA(object)[index,] <- do.call(cbind, lapply(statsB.AA, function(x) x[, 1]))
|
1647
|
|
-## medianB.AB(object)[index,] <- do.call(cbind, lapply(statsB.AB, function(x) x[, 1]))
|
1648
|
|
-## medianB.BB(object)[index,] <- do.call(cbind, lapply(statsB.BB, function(x) x[, 1]))
|
1649
|
|
-##
|
1650
|
|
-## madA.AA(object)[index,] <- do.call(cbind, lapply(statsA.AA, function(x) x[, 2]))
|
1651
|
|
-## madA.AB(object)[index,] <- do.call(cbind, lapply(statsA.AB, function(x) x[, 2]))
|
1652
|
|
-## madA.BB(object)[index,] <- do.call(cbind, lapply(statsA.BB, function(x) x[, 2]))
|
1653
|
|
-## madB.AA(object)[index,] <- do.call(cbind, lapply(statsB.AA, function(x) x[, 2]))
|
1654
|
|
-## madB.AB(object)[index,] <- do.call(cbind, lapply(statsB.AB, function(x) x[, 2]))
|
1655
|
|
-## madB.BB(object)[index,] <- do.call(cbind, lapply(statsB.BB, function(x) x[, 2]))
|
1656
|
1751
|
tau2A.AA(object)[index, ] <- tau2A.AA
|
1657
|
|
-## tau2A.AB(object)[index, ] <- tau2A.AB
|
1658
|
1752
|
tau2A.BB(object)[index, ] <- tau2A.BB
|
1659
|
1753
|
tau2B.AA(object)[index, ] <- tau2B.AA
|
1660
|
|
-## tau2B.AB(object)[index, ] <- tau2B.AB
|
1661
|
1754
|
tau2B.BB(object)[index, ] <- tau2B.BB
|
1662
|
1755
|
if(is.lds) return(TRUE) else return(object)
|
1663
|
1756
|
}
|
...
|
...
|
@@ -1677,6 +1770,7 @@ crlmmCopynumber <- function(object,
|
1677
|
1770
|
THR.NU.PHI=TRUE,
|
1678
|
1771
|
type=c("SNP", "NP", "X.SNP", "X.NP")){
|
1679
|
1772
|
stopifnot(type %in% c("SNP", "NP", "X.SNP", "X.NP"))
|
|
1773
|
+ if(GT.CONF.THR >= 1 | GT.CONF.THR < 0) stop("GT.CONF.THR must be in [0,1)")
|
1680
|
1774
|
batch <- batch(object)
|
1681
|
1775
|
is.snp <- isSnp(object)
|
1682
|
1776
|
is.autosome <- chromosome(object) < 23
|
...
|
...
|
@@ -1686,7 +1780,9 @@ crlmmCopynumber <- function(object,
|
1686
|
1780
|
if(is.lds) stopifnot(isPackageLoaded("ff"))
|
1687
|
1781
|
samplesPerBatch <- table(as.character(batch(object)))
|
1688
|
1782
|
if(any(samplesPerBatch < MIN.SAMPLES)){
|
1689
|
|
- warning("The following batches have fewer than ", MIN.SAMPLES, " samples:", names(samplesPerBatch)[samplesPerBatch < MIN.SAMPLES], ". Not estimating copy number for these batches.")
|
|
1783
|
+ tmp <- paste(names(samplesPerBatch)[samplesPerBatch < MIN.SAMPLES], collapse=", ")
|
|
1784
|
+ message("The following batches have fewer than ", MIN.SAMPLES, " samples: ", tmp)
|
|
1785
|
+ message("Not estimating copy number parameters for batch ", tmp)
|
1690
|
1786
|
}
|
1691
|
1787
|
mylabel <- function(marker.type){
|
1692
|
1788
|
switch(marker.type,
|
...
|
...
|
@@ -1696,15 +1792,14 @@ crlmmCopynumber <- function(object,
|
1696
|
1792
|
X.NP="chromosome X nonpolymorphic markers")
|
1697
|
1793
|
}
|
1698
|
1794
|
if(verbose) message("Computing summary statistics of the genotype clusters for each batch")
|
1699
|
|
- for(i in c(1, 2, 4)){ ## do not do X.SNP. Do this during fit.lm3
|
|
1795
|
+ I <- which(type %in% c("SNP", "NP", "X.NP"))
|
|
1796
|
+ for(j in seq_along(I)){ ## do not do X.SNP. Do this during fit.lm3
|
|
1797
|
+ i <- I[j]
|
1700
|
1798
|
marker.type <- type[i]
|
1701
|
1799
|
if(verbose) message(paste("...", mylabel(marker.type)))
|
1702
|
1800
|
##if(verbose) message(paste("Computing summary statistics for ", mylabel(marker.type), " genotype clusters for each batch")
|
1703
|
|
- marker.index <- whichMarkers(marker.type,
|
1704
|
|
- is.snp,
|
1705
|
|
- is.autosome,
|
1706
|
|
- is.annotated,
|
1707
|
|
- is.X)
|
|
1801
|
+ marker.index <- whichMarkers(marker.type, is.snp,
|
|
1802
|
+ is.autosome, is.annotated, is.X)
|
1708
|
1803
|
object <- genotypeSummary(object=object,
|
1709
|
1804
|
GT.CONF.THR=GT.CONF.THR,
|
1710
|
1805
|
type=marker.type,
|
...
|
...
|
@@ -1713,16 +1808,18 @@ crlmmCopynumber <- function(object,
|
1713
|
1808
|
is.lds=is.lds)
|
1714
|
1809
|
}
|
1715
|
1810
|
if(verbose) message("Imputing unobserved genotype medians and shrinking the variances (within-batch, across loci) ")##SNPs only
|
1716
|
|
- marker.index <- whichMarkers("SNP", is.snp,
|
1717
|
|
- is.autosome, is.annotated, is.X)
|
1718
|
|
- object <- shrinkSummary(object=object,
|
1719
|
|
- MIN.OBS=MIN.OBS,
|
1720
|
|
- MIN.SAMPLES=MIN.SAMPLES,
|
1721
|
|
- DF.PRIOR=DF.PRIOR,
|
1722
|
|
- type="SNP",
|
1723
|
|
- verbose=verbose,
|
1724
|
|
- marker.index=marker.index,
|
1725
|
|
- is.lds=is.lds)
|
|
1811
|
+ if("SNP" %in% type){
|
|
1812
|
+ marker.index <- whichMarkers("SNP", is.snp,
|
|
1813
|
+ is.autosome, is.annotated, is.X)
|
|
1814
|
+ object <- shrinkSummary(object=object,
|
|
1815
|
+ MIN.OBS=MIN.OBS,
|
|
1816
|
+ MIN.SAMPLES=MIN.SAMPLES,
|
|
1817
|
+ DF.PRIOR=DF.PRIOR,
|
|
1818
|
+ type="SNP",
|
|
1819
|
+ verbose=verbose,
|
|
1820
|
+ marker.index=marker.index,
|
|
1821
|
+ is.lds=is.lds)
|
|
1822
|
+ }
|
1726
|
1823
|
if(verbose) message("Estimating parameters for copy number")##SNPs only
|
1727
|
1824
|
for(i in seq_along(type)){
|
1728
|
1825
|
marker.type <- type[i]
|
...
|
...
|
@@ -1745,7 +1842,7 @@ crlmmCopynumber <- function(object,
|
1745
|
1842
|
is.lds=is.lds,
|
1746
|
1843
|
CHR.X=CHR.X)
|
1747
|
1844
|
}
|
1748
|
|
- return(object)
|
|
1845
|
+ TRUE
|
1749
|
1846
|
}
|
1750
|
1847
|
crlmmCopynumber2 <- function(){
|
1751
|
1848
|
.Defunct(msg="The crlmmCopynumber2 function has been deprecated. The function crlmmCopynumber should be used instead. crlmmCopynumber will support large data using ff provided that the ff package is loaded.")
|
...
|
...
|
@@ -1753,6 +1850,7 @@ crlmmCopynumber2 <- function(){
|
1753
|
1850
|
crlmmCopynumberLD <- crlmmCopynumber2
|
1754
|
1851
|
|
1755
|
1852
|
|
|
1853
|
+
|
1756
|
1854
|
estimateCnParameters <- function(object,
|
1757
|
1855
|
type=c("SNP", "NP", "X.SNP", "X.NP"),
|
1758
|
1856
|
verbose=TRUE,
|
...
|
...
|
@@ -1848,10 +1946,13 @@ imputeAA.AB <- function(index, N.AA, N.AB, N.BB,
|
1848
|
1946
|
X <- cbind(1, medianA[K, -gt.to.impute, drop=FALSE], medianB[K, -gt.to.impute, drop=FALSE])
|
1849
|
1947
|
Y <- cbind(medianA[K, gt.to.impute, drop=FALSE], medianB[K, gt.to.impute, drop=FALSE])
|
1850
|
1948
|
tmp <- tryCatch(betahat <- solve(crossprod(X), crossprod(X,Y)), error=function(e) NULL)
|
1851
|
|
- if(is.null(tmp)) {cat("."); next()}
|
1852
|
|
-
|
|
1949
|
+ if(is.null(tmp)) {
|
|
1950
|
+ ##cat(".");
|
|
1951
|
+ next()
|
|
1952
|
+ }
|
1853
|
1953
|
## Get data from observed genotypes in batch with insufficient data for genotypes 'gt.to.impute'
|
1854
|
1954
|
L <- which(rowSums(Ns < 3) == 2)
|
|
1955
|
+ if(length(L) == 0) next()
|
1855
|
1956
|
Z <- cbind(1, medianA[L, -gt.to.impute, drop=FALSE], medianB[L, -gt.to.impute, drop=FALSE])
|
1856
|
1957
|
imputedVals <- Z %*% betahat
|
1857
|
1958
|
medianA.AA[i, L] <- imputedVals[, 1]
|
...
|
...
|
@@ -1883,10 +1984,14 @@ imputeAB.BB <- function(index, N.AA, N.AB, N.BB,
|
1883
|
1984
|
X <- cbind(1, medianA[K, -gt.to.impute, drop=FALSE], medianB[K, -gt.to.impute, drop=FALSE])
|
1884
|
1985
|
Y <- cbind(medianA[K, gt.to.impute, drop=FALSE], medianB[K, gt.to.impute, drop=FALSE])
|
1885
|
1986
|
tmp <- tryCatch(betahat <- solve(crossprod(X), crossprod(X,Y)), error=function(e) NULL)
|
1886
|
|
- if(is.null(tmp)) {cat("."); next()}
|
|
1987
|
+ if(is.null(tmp)) {
|
|
1988
|
+## cat(".");
|
|
1989
|
+ next()
|
|
1990
|
+ }
|
1887
|
1991
|
|
1888
|
1992
|
## Get data from observed genotypes in batch with insufficient data for genotypes 'gt.to.impute'
|
1889
|
1993
|
L <- which(rowSums(Ns < 3) == 2)
|
|
1994
|
+ if(length(L) == 0) next()
|
1890
|
1995
|
Z <- cbind(1, medianA[L, -gt.to.impute, drop=FALSE], medianB[L, -gt.to.impute, drop=FALSE])
|
1891
|
1996
|
imputedVals <- Z %*% betahat
|
1892
|
1997
|
medianA.AB[i, L] <- imputedVals[, 1]
|
...
|
...
|
@@ -1918,10 +2023,13 @@ imputeAA <- function(index, N.AA, N.AB, N.BB,
|
1918
|
2023
|
X <- cbind(1, medianA[K, -gt.to.impute, drop=FALSE], medianB[K, -gt.to.impute, drop=FALSE])
|
1919
|
2024
|
Y <- cbind(medianA[K, gt.to.impute, drop=FALSE], medianB[K, gt.to.impute, drop=FALSE])
|
1920
|
2025
|
tmp <- tryCatch(betahat <- solve(crossprod(X), crossprod(X,Y)), error=function(e) NULL)
|
1921
|
|
- if(is.null(tmp)) {cat("."); next()}
|
1922
|
|
-
|
|
2026
|
+ if(is.null(tmp)) {
|
|
2027
|
+ ##cat(".");
|
|
2028
|
+ next()
|
|
2029
|
+ }
|
1923
|
2030
|
## Get data from observed genotypes in batch with insufficient data for genotypes 'gt.to.impute'
|
1924
|
2031
|
L <- which(rowSums(Ns < 3) == 1)
|
|
2032
|
+ if(length(L) == 0) next()
|
1925
|
2033
|
Z <- cbind(1, medianA[L, -gt.to.impute, drop=FALSE], medianB[L, -gt.to.impute, drop=FALSE])
|
1926
|
2034
|
imputedVals <- Z %*% betahat
|
1927
|
2035
|
medianA.AA[i, L] <- imputedVals[, 1]
|
...
|
...
|
@@ -1950,11 +2058,19 @@ imputeBB <- function(index, N.AA, N.AB, N.BB,
|
1950
|
2058
|
if(length(K) < 1) next() ## nothing we can do. use information from other snps
|
1951
|
2059
|
X <- cbind(1, medianA[K, -gt.to.impute, drop=FALSE], medianB[K, -gt.to.impute, drop=FALSE])
|
1952
|
2060
|
Y <- cbind(medianA[K, gt.to.impute, drop=FALSE], medianB[K, gt.to.impute, drop=FALSE])
|
|
2061
|
+ colnames(Y) <- c("A.BB", "B.BB")
|
1953
|
2062
|
tmp <- tryCatch(betahat <- solve(crossprod(X), crossprod(X,Y)), error=function(e) NULL)
|
1954
|
|
- if(is.null(tmp)) {cat("."); next()}
|
1955
|
|
-
|
|
2063
|
+ if(is.null(tmp)) {
|
|
2064
|
+ ##cat(".");
|
|
2065
|
+ next()
|
|
2066
|
+ }
|
|
2067
|
+## else{
|
|
2068
|
+## R <- Y-crossprod(X, betahat)
|
|
2069
|
+## RSS <- t(R)%*%R
|
|
2070
|
+## }
|
1956
|
2071
|
## Get data from observed genotypes in batch with insufficient data for genotypes 'gt.to.impute'
|
1957
|
2072
|
L <- which(rowSums(Ns < 3) == 1)
|
|
2073
|
+ if(length(L) == 0) next()
|
1958
|
2074
|
Z <- cbind(1, medianA[L, -gt.to.impute, drop=FALSE], medianB[L, -gt.to.impute, drop=FALSE])
|
1959
|
2075
|
imputedVals <- Z %*% betahat
|
1960
|
2076
|
medianA.BB[i, L] <- imputedVals[, 1]
|
...
|
...
|
@@ -1987,13 +2103,13 @@ imputeAcrossBatch <- function(N.AA, N.AB, N.BB,
|
1987
|
2103
|
names(updated) <- c("AA.AB", "AB.BB", "AA", "BB")
|
1988
|
2104
|
if(length(index[["AA.AB"]] > 0)){
|
1989
|
2105
|
res <- imputeAA.AB(index[["AA.AB"]],
|
1990
|
|
- N.AA,
|
1991
|
|
- N.AB,
|
1992
|
|
- N.BB,
|
1993
|
|
- medianA.AA,
|
1994
|
|
- medianA.AB,
|
1995
|
|
- medianA.BB,
|
1996
|
|
- medianB.AA, medianB.AB, medianB.BB)
|
|
2106
|
+ N.AA,
|
|
2107
|
+ N.AB,
|
|
2108
|
+ N.BB,
|
|
2109
|
+ medianA.AA,
|
|
2110
|
+ medianA.AB,
|
|
2111
|
+ medianA.BB,
|
|
2112
|
+ medianB.AA, medianB.AB, medianB.BB)
|
1997
|
2113
|
updated$AA.AB <- res$imputed
|
1998
|
2114
|
}
|
1999
|
2115
|
if(length(index[["AB.BB"]] > 0)){
|
...
|
...
|
@@ -2004,7 +2120,9 @@ imputeAcrossBatch <- function(N.AA, N.AB, N.BB,
|
2004
|
2120
|
res[["medianA.AA"]],
|
2005
|
2121
|
res[["medianA.AB"]],
|
2006
|
2122
|
res[["medianA.BB"]],
|
2007
|
|
- res[["medianB.AA"]], res[["medianB.AB"]], res[["medianB.BB"]])
|
|
2123
|
+ res[["medianB.AA"]],
|
|
2124
|
+ res[["medianB.AB"]],
|
|
2125
|
+ res[["medianB.BB"]])
|
2008
|
2126
|
updated$AB.BB <- res$imputed
|
2009
|
2127
|
}
|
2010
|
2128
|
if(length(index[["AA"]] > 0)){
|
...
|
...
|
@@ -2026,9 +2144,426 @@ imputeAcrossBatch <- function(N.AA, N.AB, N.BB,
|
2026
|
2144
|
res[["medianA.AA"]],
|
2027
|
2145
|
res[["medianA.AB"]],
|
2028
|
2146
|
res[["medianA.BB"]],
|
2029
|
|
- res[["medianB.AA"]], res[["medianB.AB"]], res[["medianB.BB"]])
|
|
2147
|
+ res[["medianB.AA"]],
|
|
2148
|
+ res[["medianB.AB"]],
|
|
2149
|
+ res[["medianB.BB"]])
|
2030
|
2150
|
updated$BB <- res$imputed
|
2031
|
2151
|
}
|
2032
|
2152
|
updated.indices <- unlist(updated)
|
2033
|
|
- return(res, updated)
|
|
2153
|
+ return(list(res, updated))
|
|
2154
|
+}
|
|
2155
|
+
|
|
2156
|
+posteriorMean <- function(object, type=c("SNP", "NP", "X.SNP", "X.NP"), verbose=TRUE,
|
|
2157
|
+ prior.prob=c(1/6, 1/6, 3/6, 1/6)){
|
|
2158
|
+ stopifnot(type %in% c("SNP", "NP", "X.SNP", "X.NP"))
|
|
2159
|
+ stopifnot(sum(prior.prob)==1)
|
|
2160
|
+ batch <- batch(object)
|
|
2161
|
+ is.snp <- isSnp(object)
|
|
2162
|
+ is.autosome <- chromosome(object) < 23
|
|
2163
|
+ is.annotated <- !is.na(chromosome(object))
|
|
2164
|
+ is.X <- chromosome(object) == 23
|
|
2165
|
+ is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
|
|
2166
|
+ if(is.lds) stopifnot(isPackageLoaded("ff"))
|
|
2167
|
+ samplesPerBatch <- table(as.character(batch(object)))
|
|
2168
|
+ mylabel <- function(marker.type){
|
|
2169
|
+ switch(marker.type,
|
|
2170
|
+ SNP="autosomal SNPs",
|
|
2171
|
+ NP="autosomal nonpolymorphic markers",
|
|
2172
|
+ X.SNP="chromosome X SNPs",
|
|
2173
|
+ X.NP="chromosome X nonpolymorphic markers")
|
|
2174
|
+ }
|
|
2175
|
+ if(type=="SNP"){
|
|
2176
|
+ if(verbose) message(paste("...", mylabel(type)))
|
|
2177
|
+ marker.index <- whichMarkers("SNP",
|
|
2178
|
+ is.snp,
|
|
2179
|
+ is.autosome,
|
|
2180
|
+ is.annotated,
|
|
2181
|
+ is.X)
|
|
2182
|
+ marker.list <- splitIndicesByLength(marker.index, ocProbesets())
|
|
2183
|
+ emit <- ocLapply(seq_along(marker.list),
|
|
2184
|
+ posteriorMean.snp,
|
|
2185
|
+ object=object,
|
|
2186
|
+ index.list=marker.list,
|
|
2187
|
+ verbose=verbose,
|
|
2188
|
+ prior.prob=prior.prob)
|
|
2189
|
+ }
|
|
2190
|
+ return(emit)
|
|
2191
|
+}
|
|
2192
|
+
|
|
2193
|
+posteriorMean.snp <- function(stratum, object, index.list,
|
|
2194
|
+ prior.prob=c(1/6, 1/6, 3/6, 1/6), is.lds=TRUE, verbose=TRUE){
|
|
2195
|
+ if(verbose) message("Probe stratum ", stratum, " of ", length(index.list))
|
|
2196
|
+ index <- index.list[[stratum]]
|
|
2197
|
+ if(is.lds){
|
|
2198
|
+ open(A(object))
|
|
2199
|
+ open(B(object))
|
|
2200
|
+ open(tau2A.AA(object))
|
|
2201
|
+ open(tau2B.BB(object))
|
|
2202
|
+ open(tau2A.BB(object))
|
|
2203
|
+ open(tau2B.AA(object))
|
|
2204
|
+ open(corrAA(object))
|
|
2205
|
+ open(corrAB(object))
|
|
2206
|
+ open(corrBB(object))
|
|
2207
|
+ open(nuA(object))
|
|
2208
|
+ open(nuB(object))
|
|
2209
|
+ open(phiA(object))
|
|
2210
|
+ open(phiB(object))
|
|
2211
|
+ }
|
|
2212
|
+ a <- log2(as.matrix(A(object)[index, ]))
|
|
2213
|
+ b <- log2(as.matrix(B(object)[index, ]))
|
|
2214
|
+ sig2A <- as.matrix(tau2A.AA(object)[index,])
|
|
2215
|
+ sig2B <- as.matrix(tau2B.BB(object)[index,])
|
|
2216
|
+ tau2A <- as.matrix(tau2A.BB(object)[index,])
|
|
2217
|
+ tau2B <- as.matrix(tau2B.AA(object)[index,])
|
|
2218
|
+ corrAA <- as.matrix(corrAA(object)[index, ])
|
|
2219
|
+ corrAB <- as.matrix(corrAB(object)[index, ])
|
|
2220
|
+ corrBB <- as.matrix(corrBB(object)[index, ])
|
|
2221
|
+ nuA <- as.matrix(nuA(object)[index, ])
|
|
2222
|
+ phiA <- as.matrix(phiA(object)[index, ])
|
|
2223
|
+ nuB <- as.matrix(nuB(object)[index, ])
|
|
2224
|
+ phiB <- as.matrix(phiB(object)[index, ])
|
|
2225
|
+ if(is.lds){
|
|
2226
|
+ close(A(object))
|
|
2227
|
+ close(B(object))
|
|
2228
|
+ close(tau2A.AA(object))
|
|
2229
|
+ close(tau2B.BB(object))
|
|
2230
|
+ close(tau2A.BB(object))
|
|
2231
|
+ close(tau2B.AA(object))
|
|
2232
|
+ close(corrAA(object))
|
|
2233
|
+ close(corrAB(object))
|
|
2234
|
+ close(corrBB(object))
|
|
2235
|
+ close(nuA(object))
|
|
2236
|
+ close(nuB(object))
|
|
2237
|
+ close(phiA(object))
|
|
2238
|
+ close(phiB(object))
|
|
2239
|
+ }
|
|
2240
|
+ emit <- array(NA, dim=c(nrow(a), ncol(a), 4))##SNPs x sample x 'truth'
|
|
2241
|
+ sample.index <- split(1:ncol(object), batch(object))
|
|
2242
|
+ sum.mymatrix <- function(...){
|
|
2243
|
+ x <- list(...)
|
|
2244
|
+ return(x[[1]] + do.call(sum, x[-1]))
|
|
2245
|
+ }
|
|
2246
|
+ ##emit <- vector("list", length(sample.index))
|
|
2247
|
+ for(j in seq_along(sample.index)){
|
|
2248
|
+ cat("batch ", j, "\n")
|
|
2249
|
+ J <- sample.index[[j]]
|
|
2250
|
+ probs <- array(NA, dim=c(nrow(a), length(J), 4))
|
|
2251
|
+ for(k in seq_along(0:3)){
|
|
2252
|
+ CT <- (0:3)[k]
|
|
2253
|
+ f.x.y <- vector("list", choose(CT+1, CT))
|
|
2254
|
+ for(i in seq_along(0:CT)){
|
|
2255
|
+ CA <- (0:CT)[i]
|
|
2256
|
+ CB <- CT-CA
|
|
2257
|
+ A.scale <- sqrt(tau2A[, j]*(CA==0) + sig2A[, j]*(CA > 0))
|
|
2258
|
+ B.scale <- sqrt(tau2B[, j]*(CA==0) + sig2B[, j]*(CA > 0))
|
|
2259
|
+ if(CA == 0 & CB == 0) rho <- 0
|
|
2260
|
+ if(CA == 0 & CB > 0) rho <- corrBB[, j]
|
|
2261
|
+ if(CA > 0 & CB == 0) rho <- corrAA[, j]
|
|
2262
|
+ if(CA > 0 & CB > 0) rho <- corrAB[, j]
|
|
2263
|
+ meanA <- log2(nuA[, j]+CA*phiA[, j])
|
|
2264
|
+ meanB <- log2(nuB[, j]+CB*phiB[, j])
|
|
2265
|
+ covs <- rho*A.scale*B.scale
|
|
2266
|
+ A.scale2 <- A.scale^2
|
|
2267
|
+ B.scale2 <- B.scale^2
|
|
2268
|
+ Q.x.y <- 1/(1-rho^2)*(((a[, J] - meanA)/A.scale)^2 + ((b[, J] - meanB)/B.scale)^2 - 2*rho*((a[, J] - meanA)*(b[, J] - meanB))/(A.scale*B.scale))
|
|
2269
|
+ f.x.y[[i]] <- 1/(2*pi*A.scale*B.scale*sqrt(1-rho^2))*exp(-0.5*Q.x.y)
|
|
2270
|
+ class(f.x.y[[i]]) <- "mymatrix"
|
|
2271
|
+ }
|
|
2272
|
+ probs[, , k] <- do.call("sum", f.x.y)
|
|
2273
|
+ ##if none of the states are likely (outlier), assign NA
|
|
2274
|
+ ## emit[, , counter] <- f.x.y
|
|
2275
|
+ ## counter <- counter+1
|
|
2276
|
+ }
|
|
2277
|
+ emit[, J, ] <- probs
|
|
2278
|
+ }
|
|
2279
|
+ for(i in 1:4) emit[, , i] <- emit[, , i]*prior.prob[i]
|
|
2280
|
+ total <- matrix(0, nrow(emit), ncol(emit))
|
|
2281
|
+ for(i in 1:4) total <- total+emit[, , i]
|
|
2282
|
+ message(" need to check for outliers before rescaling")
|
|
2283
|
+ is.outlier <- total < 1e-5
|
|
2284
|
+## plot(a[outlier.index[1], ], b[outlier.index[1], ], col="grey50",
|
|
2285
|
+## xaxt="n", yaxt="n", pch=21, cex=0.8,
|
|
2286
|
+## xlim=c(6.5, 12.5), ylim=c(6.5, 12.5), xlab="", ylab="")
|
|
2287
|
+## for(j in seq_along(sample.index)){
|
|
2288
|
+## J <- sample.index[[j]]
|
|
2289
|
+## b <- unique(batch(object)[J])
|
|
2290
|
+## for(CN in 2) lines(object, outlier.index[1], b, CN, col="black", lwd=2, x.axis="A")
|
|
2291
|
+## }
|
|
2292
|
+ ## how to handle outliers...
|
|
2293
|
+ ## - use priors (posterior mean will likely be near 2). smoothing needs to take into account the uncertainty
|
|
2294
|
+ ## - need uncertainty estimates for posterior means
|
|
2295
|
+ for(i in 1:4) emit[, , i] <- emit[, , i]/total
|
|
2296
|
+ return(emit)
|
|
2297
|
+}
|
|
2298
|
+
|
|
2299
|
+
|
|
2300
|
+
|
|
2301
|
+
|
|
2302
|
+rscrlmmGT2 <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
|
|
2303
|
+ col.names=NULL, probs=c(1/3, 1/3, 1/3), DF=6,
|
|
2304
|
+ SNRMin=5, recallMin=10, recallRegMin=1000,
|
|
2305
|
+ gender=NULL, desctrucitve=FALSE, verbose=TRUE,
|
|
2306
|
+ returnParams=FALSE, badSNP=.7){
|
|
2307
|
+ pkgname <- getCrlmmAnnotationName(cdfName)
|
|
2308
|
+ stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose))
|
|
2309
|
+ open(SNR)
|
|
2310
|
+ open(A)
|
|
2311
|
+ open(B)
|
|
2312
|
+ open(mixtureParams)
|
|
2313
|
+ ## expect objects to be ff
|
|
2314
|
+
|
|
2315
|
+ keepIndex <- which( SNR[] > SNRMin)
|
|
2316
|
+ if(length(keepIndex)==0) stop("No arrays above quality threshold!")
|
|
2317
|
+ if(is.null(row.names) & is.null(rownames(A))){
|
|
2318
|
+ ##verify that A has only snps. otherwise, calling function must pass rownames
|
|
2319
|
+ message("rownames not available. Assuming only SNPs in A")
|
|
2320
|
+ snpBatches <- splitIndicesByLength(1:nrow(A), ocProbesets())
|
|