git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@48923 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -53,7 +53,7 @@ importFrom(mvtnorm, dmvnorm) |
53 | 53 |
|
54 | 54 |
importFrom(ellipse, ellipse) |
55 | 55 |
|
56 |
-importFrom(ff, ffdf) |
|
56 |
+importFrom(ff, ffdf, physical.ff, physical.ffdf) |
|
57 | 57 |
|
58 | 58 |
exportClasses(CNSetLM, ffdf, list) |
59 | 59 |
exportMethods(open, "[", show, lM, lines, nu, phi, corr, sigma2, tau2) |
... | ... |
@@ -61,6 +61,7 @@ export(crlmm, |
61 | 61 |
crlmmCopynumber, |
62 | 62 |
crlmmIllumina, |
63 | 63 |
crlmmIllumina2, |
64 |
+ ellipseCenters, |
|
64 | 65 |
genotype, |
65 | 66 |
readIdatFiles, |
66 | 67 |
readIdatFiles2, |
... | ... |
@@ -71,7 +72,7 @@ export(crlmm, |
71 | 72 |
batch, |
72 | 73 |
crlmmCopynumber2, crlmmCopynumberLD) |
73 | 74 |
export(constructIlluminaCNSet) |
74 |
- |
|
75 |
+export(linesCNSetLM) |
|
75 | 76 |
|
76 | 77 |
|
77 | 78 |
|
... | ... |
@@ -24,6 +24,7 @@ getFeatureData.Affy <- function(cdfName, copynumber=FALSE){ |
24 | 24 |
gns <- getVarInEnv("gns") |
25 | 25 |
path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep="")) |
26 | 26 |
load(file.path(path, "snpProbes.rda")) |
27 |
+ snpProbes <- getVarInEnv("snpProbes") |
|
27 | 28 |
if(copynumber){ |
28 | 29 |
load(file.path(path, "cnProbes.rda")) |
29 | 30 |
cnProbes <- get("cnProbes") |
... | ... |
@@ -448,225 +449,225 @@ genotype3 <- function(filenames, |
448 | 449 |
## For Illumina |
449 | 450 |
##--------------------------------------------------------------------------- |
450 | 451 |
##--------------------------------------------------------------------------- |
451 |
-getPhenoData <- function(sampleSheet=NULL, arrayNames=NULL, |
|
452 |
- arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A")){ |
|
453 |
- if(!is.null(arrayNames)) { |
|
454 |
- pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
|
455 |
- } |
|
456 |
- if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet |
|
457 |
- if(is.null(arrayNames)){ |
|
458 |
- ##arrayNames=NULL |
|
459 |
- if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) { |
|
460 |
- barcode = sampleSheet[,arrayInfoColNames$barcode] |
|
461 |
- arrayNames=barcode |
|
462 |
- } |
|
463 |
- if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) { |
|
464 |
- position = sampleSheet[,arrayInfoColNames$position] |
|
465 |
- if(is.null(arrayNames)) |
|
466 |
- arrayNames=position |
|
467 |
- else |
|
468 |
- arrayNames = paste(arrayNames, position, sep=sep) |
|
469 |
- if(highDensity) { |
|
470 |
- hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02") |
|
471 |
- for(i in names(hdExt)) |
|
472 |
- arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames) |
|
473 |
- } |
|
474 |
- } |
|
475 |
- } |
|
476 |
- pd = new("AnnotatedDataFrame", data = sampleSheet) |
|
477 |
- sampleNames(pd) <- basename(arrayNames) |
|
478 |
- } |
|
479 |
- if(is.null(arrayNames)) { |
|
480 |
- arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path)) |
|
481 |
- if(!is.null(sampleSheet)) { |
|
482 |
- sampleSheet=NULL |
|
483 |
- cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n") |
|
484 |
- } |
|
485 |
- pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
|
486 |
- } |
|
487 |
- return(pd) |
|
488 |
-} |
|
489 |
-constructRG <- function(filenames, cdfName, sns, verbose, fileExt, sep, sampleSheet, arrayInfoColNames){ |
|
490 |
- if(verbose) message("reading first idat file to extract feature data") |
|
491 |
- grnfile <- paste(filenames[1], fileExt$green, sep=sep) |
|
492 |
- if(!file.exists(grnfile)){ |
|
493 |
- stop(paste(grnfile, " does not exist. Check fileExt argument")) |
|
494 |
- } |
|
495 |
- G <- readIDAT(grnfile) |
|
496 |
- idsG = rownames(G$Quants) |
|
497 |
- nr <- length(idsG) |
|
498 |
- fD <- new("AnnotatedDataFrame", data=data.frame(row.names=idsG))##, varMetadata=data.frame(labelDescript |
|
499 |
- nr <- nrow(fD) |
|
500 |
- dns <- list(featureNames(fD), basename(filenames)) |
|
501 |
- RG <- new("NChannelSet", |
|
502 |
- R=initializeBigMatrix(name="R", nr=nr, nc=length(filenames)), |
|
503 |
- G=initializeBigMatrix(name="G", nr=nr, nc=length(filenames)), |
|
504 |
- zero=initializeBigMatrix(name="zero", nr=nr, nc=length(filenames)), |
|
505 |
- featureData=fD, |
|
506 |
- annotation=cdfName) |
|
507 |
- phenoData(RG) <- getPhenoData(sampleSheet=sampleSheet, arrayNames=filenames, |
|
508 |
- arrayInfoColNames=arrayInfoColNames) |
|
509 |
-## pD <- data.frame(matrix(NA, length(sampleNames(RG)), 12), row.names=sampleNames(RG)) |
|
510 |
-## colnames(pD) <- c("Index","HapMap.Name","Name","ID", |
|
511 |
-## "Gender", "Plate", "Well", "Group", "Parent1", |
|
512 |
-## "Parent2","Replicate","SentrixPosition") |
|
513 |
- ##phenoData(RG) <- new("AnnotatedDataFrame", data=pD) |
|
514 |
- pD <- data.frame(matrix(NA, length(sampleNames(RG)), 1), row.names=sampleNames(RG)) |
|
515 |
- colnames(pD) <- "ScanDate" |
|
516 |
- protocolData(RG) <- new("AnnotatedDataFrame", data=pD) |
|
517 |
- sampleNames(RG) <- basename(filenames) |
|
518 |
- storageMode(RG) <- "environment" |
|
519 |
- RG##featureData=ops$illuminaOpts[["featureData"]]) |
|
520 |
-} |
|
521 |
-crlmmIlluminaRS <- function(sampleSheet=NULL, |
|
522 |
- arrayNames=NULL, |
|
523 |
- batch, |
|
524 |
- ids=NULL, |
|
525 |
- path=".", |
|
526 |
- arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
|
527 |
- highDensity=FALSE, |
|
528 |
- sep="_", |
|
529 |
- fileExt=list(green="Grn.idat", red="Red.idat"), |
|
530 |
- stripNorm=TRUE, |
|
531 |
- useTarget=TRUE, |
|
532 |
- row.names=TRUE, |
|
533 |
- col.names=TRUE, |
|
534 |
- probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
535 |
- seed=1, save.ab=FALSE, snpFile, cnFile, |
|
536 |
- mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
537 |
- cdfName, sns, recallMin=10, recallRegMin=1000, |
|
538 |
- returnParams=FALSE, badSNP=.7, |
|
539 |
- copynumber=FALSE, |
|
540 |
- load.it=TRUE) { |
|
541 |
- if(missing(cdfName)) stop("must specify cdfName") |
|
542 |
- if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames") |
|
543 |
- if(missing(sns)) sns <- basename(arrayNames) |
|
544 |
- if(missing(batch)){ |
|
545 |
- warning("The batch variable is not specified. The scan date of the array will be used as a surrogate for batch. The batch variable does not affect the preprocessing or genotyping, but is important for copy number estimation.") |
|
546 |
- } else { |
|
547 |
- if(length(batch) != length(sns)) |
|
548 |
- stop("batch variable must be the same length as the filenames") |
|
549 |
- } |
|
550 |
- batches <- splitIndicesByLength(seq(along=arrayNames), ocSamples()) |
|
551 |
- k <- 1 |
|
552 |
- for(j in batches){ |
|
553 |
- if(verbose) message("Batch ", k, " of ", length(batches)) |
|
554 |
- RG <- readIdatFiles(sampleSheet=sampleSheet[j, ], |
|
555 |
- arrayNames=arrayNames[j], |
|
556 |
- ids=ids, |
|
557 |
- path=path, |
|
558 |
- arrayInfoColNames=arrayInfoColNames, |
|
559 |
- highDensity=highDensity, |
|
560 |
- sep=sep, |
|
561 |
- fileExt=fileExt, |
|
562 |
- saveDate=TRUE) |
|
563 |
- RG <- RGtoXY(RG, chipType=cdfName) |
|
564 |
- protocolData <- protocolData(RG) |
|
565 |
- res <- preprocessInfinium2(RG, |
|
566 |
- mixtureSampleSize=mixtureSampleSize, |
|
567 |
- fitMixture=TRUE, |
|
568 |
- verbose=verbose, |
|
569 |
- seed=seed, |
|
570 |
- eps=eps, |
|
571 |
- cdfName=cdfName, |
|
572 |
- sns=sns[j], |
|
573 |
- stripNorm=stripNorm, |
|
574 |
- useTarget=useTarget) |
|
575 |
- rm(RG); gc() |
|
576 |
- ## MR: number of rows should be number of SNPs + number of nonpolymorphic markers. |
|
577 |
- ## Here, I'm just using the # of rows returned from the above function |
|
578 |
- if(k == 1){ |
|
579 |
- if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability") |
|
580 |
- callSet <- new("SnpSuperSet", |
|
581 |
- alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)), |
|
582 |
- alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)), |
|
583 |
- call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)), |
|
584 |
- callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)), |
|
585 |
- annotation=cdfName) |
|
586 |
- sampleNames(callSet) <- sns |
|
587 |
- phenoData(callSet) <- getPhenoData(sampleSheet=sampleSheet, |
|
588 |
- arrayNames=sns, |
|
589 |
- arrayInfoColNames=arrayInfoColNames) |
|
590 |
- pD <- data.frame(matrix(NA, length(sns), 1), row.names=sns) |
|
591 |
- colnames(pD) <- "ScanDate" |
|
592 |
- protocolData(callSet) <- new("AnnotatedDataFrame", data=pD) |
|
593 |
- pData(protocolData(callSet))[j, ] <- pData(protocolData) |
|
594 |
- featureNames(callSet) <- res[["gns"]] |
|
595 |
- pData(callSet)$SNR <- initializeBigVector("crlmmSNR-", length(sns), "double") |
|
596 |
- pData(callSet)$SKW <- initializeBigVector("crlmmSKW-", length(sns), "double") |
|
597 |
- ##pData(callSet)$SKW <- rep(NA, length(sns)) |
|
598 |
- ##pData(callSet)$SNR <- rep(NA, length(sns)) |
|
599 |
- pData(callSet)$gender <- rep(NA, length(sns)) |
|
600 |
- mixtureParams <- initializeBigMatrix("crlmmMixt-", nr=4, nc=ncol(callSet), vmode="double") |
|
601 |
- save(mixtureParams, file=file.path(ldPath(), "mixtureParams.rda")) |
|
602 |
- if(missing(batch)){ |
|
603 |
- protocolData(callSet)$batch <- rep(NA, length(sns)) |
|
604 |
- } else{ |
|
605 |
- protocolData(callSet)$batch <- batch |
|
606 |
- } |
|
607 |
- featureData(callSet) <- addFeatureAnnotation(callSet) |
|
608 |
- open(mixtureParams) |
|
609 |
- open(callSet$SNR) |
|
610 |
- open(callSet$SKW) |
|
611 |
- } |
|
612 |
- if(k > 1 & nrow(res[[1]]) != nrow(callSet)){ |
|
613 |
- ##RS: I don't understand why the IDATS for the |
|
614 |
- ##same platform potentially have different lengths |
|
615 |
- res[["A"]] <- res[["A"]][res$gns %in% featureNames(callSet), ] |
|
616 |
- res[["B"]] <- res[["B"]][res$gns %in% featureNames(callSet), ] |
|
617 |
- } |
|
618 |
- if(missing(batch)){ |
|
619 |
- protocolData(callSet)$batch[j] <- as.numeric(as.factor(protocolData$ScanDate)) |
|
620 |
- } |
|
621 |
- ## MR: we need to define a snp.index vs np.index |
|
622 |
- snp.index <- match(res$gns, featureNames(callSet)) |
|
623 |
- A(callSet)[snp.index, j] <- res[["A"]] |
|
624 |
- B(callSet)[snp.index, j] <- res[["B"]] |
|
625 |
- pData(callSet)$SKW[j] <- res$SKW |
|
626 |
- pData(callSet)$SNR[j] <- res$SNR |
|
627 |
- mixtureParams[, j] <- res$mixtureParams |
|
628 |
- rm(res); gc() |
|
629 |
- k <- k+1 |
|
630 |
- } |
|
631 |
- save(callSet, file=file.path(ldPath(), "callSet.rda")) |
|
632 |
- ##otherwise, A and B get overwritten |
|
633 |
- ##AA <- initializeBigMatrix("crlmmA", nrow(callSet), ncol(callSet), "integer") |
|
634 |
- ##BB <- initializeBigMatrix("crlmmB", nrow(callSet), ncol(callSet), "integer") |
|
635 |
- ##bb = ocProbesets()*ncol(A)*8 |
|
636 |
- AA <- clone(A(callSet)) |
|
637 |
- BB <- clone(B(callSet)) |
|
638 |
- ##ffrowapply(AA[i1:i2, ] <- A(callSet)[i1:i2, ], X=A(callSet), BATCHBYTES=bb) |
|
639 |
- ##ffrowapply(BB[i1:i2, ] <- B(callSet)[i1:i2, ], X=B(callSet), BATCHBYTES=bb) |
|
640 |
- ##crlmmGT2 overwrites A and B. |
|
641 |
- tmp <- crlmmGT2(A=A(callSet), |
|
642 |
- B=B(callSet), |
|
643 |
- SNR=callSet$SNR, |
|
644 |
- mixtureParams=mixtureParams, |
|
645 |
- cdfName=annotation(callSet), |
|
646 |
- row.names=featureNames(callSet), |
|
647 |
- col.names=sampleNames(callSet), |
|
648 |
- probs=probs, |
|
649 |
- DF=DF, |
|
650 |
- SNRMin=SNRMin, |
|
651 |
- recallMin=recallMin, |
|
652 |
- recallRegMin=recallRegMin, |
|
653 |
- gender=gender, |
|
654 |
- verbose=verbose, |
|
655 |
- returnParams=returnParams, |
|
656 |
- badSNP=badSNP) |
|
657 |
- open(tmp[["calls"]]) |
|
658 |
- open(tmp[["confs"]]) |
|
659 |
- A(callSet) <- AA |
|
660 |
- B(callSet) <- BB |
|
661 |
- snpCall(callSet) <- tmp[["calls"]] |
|
662 |
- ## MR: many zeros in the conf. scores (?) |
|
663 |
- snpCallProbability(callSet) <- tmp[["confs"]] |
|
664 |
- callSet$gender <- tmp$gender |
|
665 |
- if(copynumber) cnSet <- as(callSet, "CNSetLM") |
|
666 |
- close(mixtureParams) |
|
667 |
- rm(tmp); gc() |
|
668 |
- return(cnSet) |
|
669 |
-} |
|
452 |
+##getPhenoData <- function(sampleSheet=NULL, arrayNames=NULL, |
|
453 |
+## arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A")){ |
|
454 |
+## if(!is.null(arrayNames)) { |
|
455 |
+## pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
|
456 |
+## } |
|
457 |
+## if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet |
|
458 |
+## if(is.null(arrayNames)){ |
|
459 |
+## ##arrayNames=NULL |
|
460 |
+## if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) { |
|
461 |
+## barcode = sampleSheet[,arrayInfoColNames$barcode] |
|
462 |
+## arrayNames=barcode |
|
463 |
+## } |
|
464 |
+## if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) { |
|
465 |
+## position = sampleSheet[,arrayInfoColNames$position] |
|
466 |
+## if(is.null(arrayNames)) |
|
467 |
+## arrayNames=position |
|
468 |
+## else |
|
469 |
+## arrayNames = paste(arrayNames, position, sep=sep) |
|
470 |
+## if(highDensity) { |
|
471 |
+## hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02") |
|
472 |
+## for(i in names(hdExt)) |
|
473 |
+## arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames) |
|
474 |
+## } |
|
475 |
+## } |
|
476 |
+## } |
|
477 |
+## pd = new("AnnotatedDataFrame", data = sampleSheet) |
|
478 |
+## sampleNames(pd) <- basename(arrayNames) |
|
479 |
+## } |
|
480 |
+## if(is.null(arrayNames)) { |
|
481 |
+## arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path)) |
|
482 |
+## if(!is.null(sampleSheet)) { |
|
483 |
+## sampleSheet=NULL |
|
484 |
+## cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n") |
|
485 |
+## } |
|
486 |
+## pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
|
487 |
+## } |
|
488 |
+## return(pd) |
|
489 |
+##} |
|
490 |
+##constructRG <- function(filenames, cdfName, sns, verbose, fileExt, sep, sampleSheet, arrayInfoColNames){ |
|
491 |
+## if(verbose) message("reading first idat file to extract feature data") |
|
492 |
+## grnfile <- paste(filenames[1], fileExt$green, sep=sep) |
|
493 |
+## if(!file.exists(grnfile)){ |
|
494 |
+## stop(paste(grnfile, " does not exist. Check fileExt argument")) |
|
495 |
+## } |
|
496 |
+## G <- readIDAT(grnfile) |
|
497 |
+## idsG = rownames(G$Quants) |
|
498 |
+## nr <- length(idsG) |
|
499 |
+## fD <- new("AnnotatedDataFrame", data=data.frame(row.names=idsG))##, varMetadata=data.frame(labelDescript |
|
500 |
+## nr <- nrow(fD) |
|
501 |
+## dns <- list(featureNames(fD), basename(filenames)) |
|
502 |
+## RG <- new("NChannelSet", |
|
503 |
+## R=initializeBigMatrix(name="R", nr=nr, nc=length(filenames)), |
|
504 |
+## G=initializeBigMatrix(name="G", nr=nr, nc=length(filenames)), |
|
505 |
+## zero=initializeBigMatrix(name="zero", nr=nr, nc=length(filenames)), |
|
506 |
+## featureData=fD, |
|
507 |
+## annotation=cdfName) |
|
508 |
+## phenoData(RG) <- getPhenoData(sampleSheet=sampleSheet, arrayNames=filenames, |
|
509 |
+## arrayInfoColNames=arrayInfoColNames) |
|
510 |
+#### pD <- data.frame(matrix(NA, length(sampleNames(RG)), 12), row.names=sampleNames(RG)) |
|
511 |
+#### colnames(pD) <- c("Index","HapMap.Name","Name","ID", |
|
512 |
+#### "Gender", "Plate", "Well", "Group", "Parent1", |
|
513 |
+#### "Parent2","Replicate","SentrixPosition") |
|
514 |
+## ##phenoData(RG) <- new("AnnotatedDataFrame", data=pD) |
|
515 |
+## pD <- data.frame(matrix(NA, length(sampleNames(RG)), 1), row.names=sampleNames(RG)) |
|
516 |
+## colnames(pD) <- "ScanDate" |
|
517 |
+## protocolData(RG) <- new("AnnotatedDataFrame", data=pD) |
|
518 |
+## sampleNames(RG) <- basename(filenames) |
|
519 |
+## storageMode(RG) <- "environment" |
|
520 |
+## RG##featureData=ops$illuminaOpts[["featureData"]]) |
|
521 |
+##} |
|
522 |
+##crlmmIlluminaRS <- function(sampleSheet=NULL, |
|
523 |
+## arrayNames=NULL, |
|
524 |
+## batch, |
|
525 |
+## ids=NULL, |
|
526 |
+## path=".", |
|
527 |
+## arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
|
528 |
+## highDensity=FALSE, |
|
529 |
+## sep="_", |
|
530 |
+## fileExt=list(green="Grn.idat", red="Red.idat"), |
|
531 |
+## stripNorm=TRUE, |
|
532 |
+## useTarget=TRUE, |
|
533 |
+## row.names=TRUE, |
|
534 |
+## col.names=TRUE, |
|
535 |
+## probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
536 |
+## seed=1, save.ab=FALSE, snpFile, cnFile, |
|
537 |
+## mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
538 |
+## cdfName, sns, recallMin=10, recallRegMin=1000, |
|
539 |
+## returnParams=FALSE, badSNP=.7, |
|
540 |
+## copynumber=FALSE, |
|
541 |
+## load.it=TRUE) { |
|
542 |
+## if(missing(cdfName)) stop("must specify cdfName") |
|
543 |
+## if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames") |
|
544 |
+## if(missing(sns)) sns <- basename(arrayNames) |
|
545 |
+## if(missing(batch)){ |
|
546 |
+## warning("The batch variable is not specified. The scan date of the array will be used as a surrogate for batch. The batch variable does not affect the preprocessing or genotyping, but is important for copy number estimation.") |
|
547 |
+## } else { |
|
548 |
+## if(length(batch) != length(sns)) |
|
549 |
+## stop("batch variable must be the same length as the filenames") |
|
550 |
+## } |
|
551 |
+## batches <- splitIndicesByLength(seq(along=arrayNames), ocSamples()) |
|
552 |
+## k <- 1 |
|
553 |
+## for(j in batches){ |
|
554 |
+## if(verbose) message("Batch ", k, " of ", length(batches)) |
|
555 |
+## RG <- readIdatFiles(sampleSheet=sampleSheet[j, ], |
|
556 |
+## arrayNames=arrayNames[j], |
|
557 |
+## ids=ids, |
|
558 |
+## path=path, |
|
559 |
+## arrayInfoColNames=arrayInfoColNames, |
|
560 |
+## highDensity=highDensity, |
|
561 |
+## sep=sep, |
|
562 |
+## fileExt=fileExt, |
|
563 |
+## saveDate=TRUE) |
|
564 |
+## RG <- RGtoXY(RG, chipType=cdfName) |
|
565 |
+## protocolData <- protocolData(RG) |
|
566 |
+## res <- preprocessInfinium2(RG, |
|
567 |
+## mixtureSampleSize=mixtureSampleSize, |
|
568 |
+## fitMixture=TRUE, |
|
569 |
+## verbose=verbose, |
|
570 |
+## seed=seed, |
|
571 |
+## eps=eps, |
|
572 |
+## cdfName=cdfName, |
|
573 |
+## sns=sns[j], |
|
574 |
+## stripNorm=stripNorm, |
|
575 |
+## useTarget=useTarget) |
|
576 |
+## rm(RG); gc() |
|
577 |
+## ## MR: number of rows should be number of SNPs + number of nonpolymorphic markers. |
|
578 |
+## ## Here, I'm just using the # of rows returned from the above function |
|
579 |
+## if(k == 1){ |
|
580 |
+## if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability") |
|
581 |
+## callSet <- new("SnpSuperSet", |
|
582 |
+## alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)), |
|
583 |
+## alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)), |
|
584 |
+## call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)), |
|
585 |
+## callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)), |
|
586 |
+## annotation=cdfName) |
|
587 |
+## sampleNames(callSet) <- sns |
|
588 |
+## phenoData(callSet) <- getPhenoData(sampleSheet=sampleSheet, |
|
589 |
+## arrayNames=sns, |
|
590 |
+## arrayInfoColNames=arrayInfoColNames) |
|
591 |
+## pD <- data.frame(matrix(NA, length(sns), 1), row.names=sns) |
|
592 |
+## colnames(pD) <- "ScanDate" |
|
593 |
+## protocolData(callSet) <- new("AnnotatedDataFrame", data=pD) |
|
594 |
+## pData(protocolData(callSet))[j, ] <- pData(protocolData) |
|
595 |
+## featureNames(callSet) <- res[["gns"]] |
|
596 |
+## pData(callSet)$SNR <- initializeBigVector("crlmmSNR-", length(sns), "double") |
|
597 |
+## pData(callSet)$SKW <- initializeBigVector("crlmmSKW-", length(sns), "double") |
|
598 |
+## ##pData(callSet)$SKW <- rep(NA, length(sns)) |
|
599 |
+## ##pData(callSet)$SNR <- rep(NA, length(sns)) |
|
600 |
+## pData(callSet)$gender <- rep(NA, length(sns)) |
|
601 |
+## mixtureParams <- initializeBigMatrix("crlmmMixt-", nr=4, nc=ncol(callSet), vmode="double") |
|
602 |
+## save(mixtureParams, file=file.path(ldPath(), "mixtureParams.rda")) |
|
603 |
+## if(missing(batch)){ |
|
604 |
+## protocolData(callSet)$batch <- rep(NA, length(sns)) |
|
605 |
+## } else{ |
|
606 |
+## protocolData(callSet)$batch <- batch |
|
607 |
+## } |
|
608 |
+## featureData(callSet) <- addFeatureAnnotation(callSet) |
|
609 |
+## open(mixtureParams) |
|
610 |
+## open(callSet$SNR) |
|
611 |
+## open(callSet$SKW) |
|
612 |
+## } |
|
613 |
+## if(k > 1 & nrow(res[[1]]) != nrow(callSet)){ |
|
614 |
+## ##RS: I don't understand why the IDATS for the |
|
615 |
+## ##same platform potentially have different lengths |
|
616 |
+## res[["A"]] <- res[["A"]][res$gns %in% featureNames(callSet), ] |
|
617 |
+## res[["B"]] <- res[["B"]][res$gns %in% featureNames(callSet), ] |
|
618 |
+## } |
|
619 |
+## if(missing(batch)){ |
|
620 |
+## protocolData(callSet)$batch[j] <- as.numeric(as.factor(protocolData$ScanDate)) |
|
621 |
+## } |
|
622 |
+## ## MR: we need to define a snp.index vs np.index |
|
623 |
+## snp.index <- match(res$gns, featureNames(callSet)) |
|
624 |
+## A(callSet)[snp.index, j] <- res[["A"]] |
|
625 |
+## B(callSet)[snp.index, j] <- res[["B"]] |
|
626 |
+## pData(callSet)$SKW[j] <- res$SKW |
|
627 |
+## pData(callSet)$SNR[j] <- res$SNR |
|
628 |
+## mixtureParams[, j] <- res$mixtureParams |
|
629 |
+## rm(res); gc() |
|
630 |
+## k <- k+1 |
|
631 |
+## } |
|
632 |
+## save(callSet, file=file.path(ldPath(), "callSet.rda")) |
|
633 |
+## ##otherwise, A and B get overwritten |
|
634 |
+## ##AA <- initializeBigMatrix("crlmmA", nrow(callSet), ncol(callSet), "integer") |
|
635 |
+## ##BB <- initializeBigMatrix("crlmmB", nrow(callSet), ncol(callSet), "integer") |
|
636 |
+## ##bb = ocProbesets()*ncol(A)*8 |
|
637 |
+## AA <- clone(A(callSet)) |
|
638 |
+## BB <- clone(B(callSet)) |
|
639 |
+## ##ffrowapply(AA[i1:i2, ] <- A(callSet)[i1:i2, ], X=A(callSet), BATCHBYTES=bb) |
|
640 |
+## ##ffrowapply(BB[i1:i2, ] <- B(callSet)[i1:i2, ], X=B(callSet), BATCHBYTES=bb) |
|
641 |
+## ##crlmmGT2 overwrites A and B. |
|
642 |
+## tmp <- crlmmGT2(A=A(callSet), |
|
643 |
+## B=B(callSet), |
|
644 |
+## SNR=callSet$SNR, |
|
645 |
+## mixtureParams=mixtureParams, |
|
646 |
+## cdfName=annotation(callSet), |
|
647 |
+## row.names=featureNames(callSet), |
|
648 |
+## col.names=sampleNames(callSet), |
|
649 |
+## probs=probs, |
|
650 |
+## DF=DF, |
|
651 |
+## SNRMin=SNRMin, |
|
652 |
+## recallMin=recallMin, |
|
653 |
+## recallRegMin=recallRegMin, |
|
654 |
+## gender=gender, |
|
655 |
+## verbose=verbose, |
|
656 |
+## returnParams=returnParams, |
|
657 |
+## badSNP=badSNP) |
|
658 |
+## open(tmp[["calls"]]) |
|
659 |
+## open(tmp[["confs"]]) |
|
660 |
+## A(callSet) <- AA |
|
661 |
+## B(callSet) <- BB |
|
662 |
+## snpCall(callSet) <- tmp[["calls"]] |
|
663 |
+## ## MR: many zeros in the conf. scores (?) |
|
664 |
+## snpCallProbability(callSet) <- tmp[["confs"]] |
|
665 |
+## callSet$gender <- tmp$gender |
|
666 |
+## if(copynumber) cnSet <- as(callSet, "CNSetLM") |
|
667 |
+## close(mixtureParams) |
|
668 |
+## rm(tmp); gc() |
|
669 |
+## return(cnSet) |
|
670 |
+##} |
|
670 | 671 |
##--------------------------------------------------------------------------- |
671 | 672 |
##--------------------------------------------------------------------------- |
672 | 673 |
|
... | ... |
@@ -1003,6 +1004,7 @@ crlmmCopynumber <- function(object, |
1003 | 1004 |
fvarLabels(tmp) <- gsub("_", ".", fvarLabels(tmp)) |
1004 | 1005 |
##fvarLabels(tmp) <- gsub("\\.[1-9]", "", fvarLabels(tmp)) |
1005 | 1006 |
if(ffIsLoaded){ |
1007 |
+ physical <- get("physical") |
|
1006 | 1008 |
fData(tmp) <- fData(tmp)[, fvarLabels(tmp) %in% names(physical(lM(object)))] |
1007 | 1009 |
jj <- match(fvarLabels(tmp), names(lM(object))) |
1008 | 1010 |
lM(object)[row.index, jj] <- fData(tmp) |
... | ... |
@@ -1174,6 +1176,7 @@ fit.lm1 <- function(idxBatch, |
1174 | 1176 |
MIN.NU, |
1175 | 1177 |
MIN.PHI, |
1176 | 1178 |
verbose, ...){ |
1179 |
+ physical <- get("physical") |
|
1177 | 1180 |
if(verbose) message("Probe batch ", idxBatch, " of ", length(snpBatches)) |
1178 | 1181 |
snps <- snpBatches[[idxBatch]] |
1179 | 1182 |
batches <- split(seq(along=batch(object)), batch(object)) |
... | ... |
@@ -1352,7 +1355,7 @@ fit.lm1 <- function(idxBatch, |
1352 | 1355 |
lM(object)$corrAA <- tmp |
1353 | 1356 |
tmp <- physical(lM(object))$corrBB |
1354 | 1357 |
tmp[snps, ] <- corrBB |
1355 |
- lM(object)$corrAB <- tmp |
|
1358 |
+ lM(object)$corrBB <- tmp |
|
1356 | 1359 |
|
1357 | 1360 |
lapply(assayData(object), close) |
1358 | 1361 |
lapply(lM(object), close) |
... | ... |
@@ -1376,7 +1379,7 @@ fit.lm2 <- function(idxBatch, |
1376 | 1379 |
MIN.NU, |
1377 | 1380 |
MIN.PHI, |
1378 | 1381 |
verbose, ...){ |
1379 |
- ## which.batches, ...){ |
|
1382 |
+ physical <- get("physical") |
|
1380 | 1383 |
if(verbose) message("Probe batch ", idxBatch, " of ", length(snpBatches)) |
1381 | 1384 |
snps <- snpBatches[[idxBatch]] |
1382 | 1385 |
batches <- split(seq(along=batch(object)), batch(object)) |
... | ... |
@@ -1493,7 +1496,7 @@ fit.lm3 <- function(idxBatch, |
1493 | 1496 |
MIN.NU, |
1494 | 1497 |
MIN.PHI, |
1495 | 1498 |
verbose, ...){ |
1496 |
- ## which.batches, ...){ |
|
1499 |
+ physical <- get("physical") |
|
1497 | 1500 |
if(verbose) message("Probe batch ", idxBatch, " of ", length(snpBatches)) |
1498 | 1501 |
snps <- snpBatches[[idxBatch]] |
1499 | 1502 |
batches <- split(seq(along=batch(object)), batch(object)) |
... | ... |
@@ -1708,17 +1711,7 @@ fit.lm3 <- function(idxBatch, |
1708 | 1711 |
lM(object)$corrAA <- tmp |
1709 | 1712 |
tmp <- physical(lM(object))$corrBB |
1710 | 1713 |
tmp[snps, ] <- corrBB |
1711 |
- lM(object)$corrAB <- tmp |
|
1712 |
-## lM(object)$tau2A[snps, ] <- tau2A |
|
1713 |
-## lM(object)$tau2B[snps, ] <- tau2B |
|
1714 |
-## lM(object)$sig2A[snps, ] <- sig2A |
|
1715 |
-## lM(object)$sig2B[snps, ] <- sig2B |
|
1716 |
-## lM(object)$nuA[snps, ] <- nuA |
|
1717 |
-## lM(object)$nuB[snps, ] <- nuB |
|
1718 |
-## lM(object)$phiA[snps, ] <- phiA |
|
1719 |
-## lM(object)$phiB[snps, ] <- phiB |
|
1720 |
-## lM(object)$phiPrimeA[snps, ] <- phiA2 |
|
1721 |
-## lM(object)$phiPrimeB[snps, ] <- phiB2 |
|
1714 |
+ lM(object)$corrBB <- tmp |
|
1722 | 1715 |
lapply(assayData(object), close) |
1723 | 1716 |
lapply(lM(object), close) |
1724 | 1717 |
TRUE |
... | ... |
@@ -1741,6 +1734,7 @@ fit.lm4 <- function(idxBatch, |
1741 | 1734 |
MIN.NU, |
1742 | 1735 |
MIN.PHI, |
1743 | 1736 |
verbose, ...){ |
1737 |
+ physical <- get("physical") |
|
1744 | 1738 |
if(verbose) message("Probe batch ", idxBatch, " of ", length(snpBatches)) |
1745 | 1739 |
open(object) |
1746 | 1740 |
open(normal) |
... | ... |
@@ -2791,132 +2785,141 @@ biasAdj <- function(object, cnOptions, tmp.objects){ |
2791 | 2785 |
} |
2792 | 2786 |
|
2793 | 2787 |
|
2794 |
-bias1 <- function(idxBatch, |
|
2795 |
- snpBatches, |
|
2796 |
- index, |
|
2797 |
- object, |
|
2798 |
- normal, |
|
2799 |
- emit, |
|
2800 |
- prior.prob, |
|
2801 |
- MIN.SAMPLES, |
|
2802 |
- verbose){ |
|
2803 |
- |
|
2804 |
-} |
|
2805 |
- |
|
2806 |
-bias2 <- function(idxBatch, |
|
2807 |
- snpBatches, |
|
2808 |
- index, |
|
2809 |
- object, |
|
2810 |
- normal, |
|
2811 |
- prior.prob, |
|
2812 |
- MIN.SAMPLES, |
|
2813 |
- verbose){ |
|
2814 |
- open(object) |
|
2815 |
- open(normal) |
|
2816 |
- |
|
2817 |
- nps <- snpBatches[[idxBatch]] |
|
2818 |
- nuA <- lM(object)$nuA[nps, , drop=FALSE] |
|
2819 |
- phiA <- lM(object)$phiA[nps, , drop=FALSE] |
|
2820 |
- sig2A <- lM(object)$sig2A[nps, , drop=FALSE] |
|
2821 |
- AA <- as.matrix(A(object)[nps, ]) |
|
2822 |
- batches <- split(seq(along=batch(object)), batch(object)) |
|
2823 |
- batches <- batches[sapply(batches, length) >= MIN.SAMPLES] |
|
2824 |
- |
|
2825 |
- cn.lik <- matrix(NA, length(nps)*ncol(object), 4) |
|
2826 |
- argmax.cn <- emit[nps, ] |
|
2827 |
- norm <- matrix(1L, length(nps), ncol(object)) |
|
2828 |
- |
|
2829 |
- for(k in batches){ |
|
2830 |
- J <- match(unique(batch(object)[k]), unique(batch(object))) |
|
2831 |
- lT <- log2(AA[, k]) |
|
2832 |
- counter <- 1 ##state counter |
|
2833 |
- for(CT in c(0, 1.5, 2, 2.5)){ |
|
2834 |
- ##sds <- sqrt(sig2A[, J]*(CT==0) + sig2A[ , J]*(CT > 0)) |
|
2835 |
- sds <- sqrt(sig2A[, J]) |
|
2836 |
- means <- suppressWarnings(log2(nuA[, J]+CT*phiA[, J])) |
|
2837 |
- lik <- log(dnorm(lT, mean=means, sd=sds)) |
|
2838 |
- ##emit[[counter]][nps, ] <- tmp |
|
2839 |
- cn.lik[, counter] <- as.numeric(lik) |
|
2840 |
- counter <- counter+1 |
|
2841 |
- } |
|
2842 |
- outlier <- matrix(rowSums(cn.lik < -10) == 4, length(nps), ncol(object)) |
|
2843 |
- argmax.cn.lik <- apply(cn.lik, 1, function(x) order(x, decreasing=TRUE)[1]) |
|
2844 |
- argmax.cn <- matrix(argmax.cn.lik, length(nps), length(k)) |
|
2845 |
- |
|
2846 |
- isUp <- argmax.cn > 3 |
|
2847 |
- prUp <- rowMeans(isUp) |
|
2848 |
- |
|
2849 |
- isDn <- argmax.cn < 3 |
|
2850 |
- prDn <- rowMeans(isDn) |
|
2851 |
- |
|
2852 |
- index <- which(prUp > 0.05 & prUp > prDn) |
|
2853 |
- ##if proportion up greater than 5%, trim the high cn est. |
|
2854 |
- norm[index, k] <- argmax.cn[index, ] > 3 |
|
2855 |
- |
|
2856 |
- index <- which(prDn > 0.05 & prDn > prUp) |
|
2857 |
- norm[index, k] <- argmax.cn[index, ] < 3 |
|
2858 |
- norm[index, k] <- norm[index, k]*!outlier |
|
2859 |
- } |
|
2860 |
- normal[nps, ] <- norm |
|
2861 |
- TRUE |
|
2862 |
-} |
|
2863 |
- |
|
2788 |
+##bias1 <- function(idxBatch, |
|
2789 |
+## snpBatches, |
|
2790 |
+## index, |
|
2791 |
+## object, |
|
2792 |
+## normal, |
|
2793 |
+## emit, |
|
2794 |
+## prior.prob, |
|
2795 |
+## MIN.SAMPLES, |
|
2796 |
+## verbose){ |
|
2797 |
+## |
|
2798 |
+##} |
|
2864 | 2799 |
|
2865 |
-biasAdjust <- function(object, prior.prob=rep(1/4, 4), MIN.SAMPLES=10, verbose=TRUE){ |
|
2866 |
- load(file.path(ldPath(), "normal.rda")) |
|
2867 |
- autosomeIndex.nps <- (1:nrow(object))[chromosome(object) < 23 & !isSnp(object) & !is.na(chromosome(object))] |
|
2800 |
+##bias2 <- function(idxBatch, |
|
2801 |
+## snpBatches, |
|
2802 |
+## index, |
|
2803 |
+## object, |
|
2804 |
+## normal, |
|
2805 |
+## prior.prob, |
|
2806 |
+## MIN.SAMPLES, |
|
2807 |
+## verbose){ |
|
2808 |
+## open(object) |
|
2809 |
+## open(normal) |
|
2810 |
+## |
|
2811 |
+## nps <- snpBatches[[idxBatch]] |
|
2812 |
+## nuA <- lM(object)$nuA[nps, , drop=FALSE] |
|
2813 |
+## phiA <- lM(object)$phiA[nps, , drop=FALSE] |
|
2814 |
+## sig2A <- lM(object)$sig2A[nps, , drop=FALSE] |
|
2815 |
+## AA <- as.matrix(A(object)[nps, ]) |
|
2816 |
+## batches <- split(seq(along=batch(object)), batch(object)) |
|
2817 |
+## batches <- batches[sapply(batches, length) >= MIN.SAMPLES] |
|
2818 |
+## |
|
2819 |
+## cn.lik <- matrix(NA, length(nps)*ncol(object), 4) |
|
2820 |
+## argmax.cn <- emit[nps, ] |
|
2821 |
+## norm <- matrix(1L, length(nps), ncol(object)) |
|
2822 |
+## |
|
2823 |
+## for(k in batches){ |
|
2824 |
+## J <- match(unique(batch(object)[k]), unique(batch(object))) |
|
2825 |
+## lT <- log2(AA[, k]) |
|
2826 |
+## counter <- 1 ##state counter |
|
2827 |
+## for(CT in c(0, 1.5, 2, 2.5)){ |
|
2828 |
+## ##sds <- sqrt(sig2A[, J]*(CT==0) + sig2A[ , J]*(CT > 0)) |
|
2829 |
+## sds <- sqrt(sig2A[, J]) |
|
2830 |
+## means <- suppressWarnings(log2(nuA[, J]+CT*phiA[, J])) |
|
2831 |
+## lik <- log(dnorm(lT, mean=means, sd=sds)) |
|
2832 |
+## ##emit[[counter]][nps, ] <- tmp |
|
2833 |
+## cn.lik[, counter] <- as.numeric(lik) |
|
2834 |
+## counter <- counter+1 |
|
2835 |
+## } |
|
2836 |
+## outlier <- matrix(rowSums(cn.lik < -10) == 4, length(nps), ncol(object)) |
|
2837 |
+## argmax.cn.lik <- apply(cn.lik, 1, function(x) order(x, decreasing=TRUE)[1]) |
|
2838 |
+## argmax.cn <- matrix(argmax.cn.lik, length(nps), length(k)) |
|
2839 |
+## |
|
2840 |
+## isUp <- argmax.cn > 3 |
|
2841 |
+## prUp <- rowMeans(isUp) |
|
2842 |
+## |
|
2843 |
+## isDn <- argmax.cn < 3 |
|
2844 |
+## prDn <- rowMeans(isDn) |
|
2845 |
+## |
|
2846 |
+## index <- which(prUp > 0.05 & prUp > prDn) |
|
2847 |
+## ##if proportion up greater than 5%, trim the high cn est. |
|
2848 |
+## norm[index, k] <- argmax.cn[index, ] > 3 |
|
2849 |
+## |
|
2850 |
+## index <- which(prDn > 0.05 & prDn > prUp) |
|
2851 |
+## norm[index, k] <- argmax.cn[index, ] < 3 |
|
2852 |
+## norm[index, k] <- norm[index, k]*!outlier |
|
2853 |
+## } |
|
2854 |
+## normal[nps, ] <- norm |
|
2855 |
+## TRUE |
|
2856 |
+##} |
|
2868 | 2857 |
|
2869 |
-## emit <- initializeBigMatrix("emit", |
|
2870 |
-## nrow(object), |
|
2871 |
-## ncol(object), |
|
2872 |
-## vmode="double") |
|
2873 |
- if(verbose) message("Bias adjustment for nonpolymorphic loci on chromosomes 1-22.") |
|
2874 |
- snpBatches <- splitIndicesByLength(autosomeIndex.nps, ocProbesets()) |
|
2875 |
- ocLapply(seq(along=snpBatches), |
|
2876 |
- bias2, |
|
2877 |
- index=autosomeIndex.nps, |
|
2878 |
- snpBatches=snpBatches, |
|
2879 |
- object=object, |
|
2880 |
- normal=normal, |
|
2881 |
- prior.prob=prior.prob, |
|
2882 |
- MIN.SAMPLES=MIN.SAMPLES, |
|
2883 |
- verbose=verbose) |
|
2884 | 2858 |
|
2885 |
- if(verbose) message("Bias adjustment for polymorphic loci on chromosomes 1-22.") |
|
2886 |
- autosomeIndex.snps <- (1:nrow(object))[chromosome(object) < 23 & isSnp(object) & !is.na(chromosome(object))] |
|
2887 |
- snpBatches <- splitIndicesByLength(autosomeIndex.snps, ocProbesets()) |
|
2888 |
- ocLapply(seq(along=snpBatches), |
|
2889 |
- bias1, |
|
2890 |
- index=autosomeIndex.snps, |
|
2891 |
- snpBatches=snpBatches, |
|
2892 |
- object=object, |
|
2893 |
- normal=normal, |
|
2894 |
- prior.prob=prior.prob, |
|
2895 |
- emit=emit, |
|
2896 |
- MIN.SAMPLES=MIN.SAMPLES, |
|
2897 |
- verbose=verbose) |
|
2898 |
-} |
|
2859 |
+##biasAdjust <- function(object, prior.prob=rep(1/4, 4), MIN.SAMPLES=10, verbose=TRUE){ |
|
2860 |
+## load(file.path(ldPath(), "normal.rda")) |
|
2861 |
+## autosomeIndex.nps <- (1:nrow(object))[chromosome(object) < 23 & !isSnp(object) & !is.na(chromosome(object))] |
|
2862 |
+## |
|
2863 |
+#### emit <- initializeBigMatrix("emit", |
|
2864 |
+#### nrow(object), |
|
2865 |
+#### ncol(object), |
|
2866 |
+#### vmode="double") |
|
2867 |
+## if(verbose) message("Bias adjustment for nonpolymorphic loci on chromosomes 1-22.") |
|
2868 |
+## snpBatches <- splitIndicesByLength(autosomeIndex.nps, ocProbesets()) |
|
2869 |
+## ocLapply(seq(along=snpBatches), |
|
2870 |
+## bias2, |
|
2871 |
+## index=autosomeIndex.nps, |
|
2872 |
+## snpBatches=snpBatches, |
|
2873 |
+## object=object, |
|
2874 |
+## normal=normal, |
|
2875 |
+## prior.prob=prior.prob, |
|
2876 |
+## MIN.SAMPLES=MIN.SAMPLES, |
|
2877 |
+## verbose=verbose) |
|
2878 |
+## |
|
2879 |
+## if(verbose) message("Bias adjustment for polymorphic loci on chromosomes 1-22.") |
|
2880 |
+## autosomeIndex.snps <- (1:nrow(object))[chromosome(object) < 23 & isSnp(object) & !is.na(chromosome(object))] |
|
2881 |
+## snpBatches <- splitIndicesByLength(autosomeIndex.snps, ocProbesets()) |
|
2882 |
+## ocLapply(seq(along=snpBatches), |
|
2883 |
+## bias1, |
|
2884 |
+## index=autosomeIndex.snps, |
|
2885 |
+## snpBatches=snpBatches, |
|
2886 |
+## object=object, |
|
2887 |
+## normal=normal, |
|
2888 |
+## prior.prob=prior.prob, |
|
2889 |
+## emit=emit, |
|
2890 |
+## MIN.SAMPLES=MIN.SAMPLES, |
|
2891 |
+## verbose=verbose) |
|
2892 |
+##} |
|
2899 | 2893 |
|
2900 | 2894 |
|
2901 | 2895 |
##biasAdjNP <- function(plateIndex, envir, priorProb){ |
2902 |
-biasAdjNP <- function(object, cnOptions, tmp.objects){ |
|
2903 |
- ##batch <- unique(object$batch) |
|
2904 |
- batch <- unique(batch(object)) |
|
2905 |
- normalNP <- tmp.objects[["normal"]][!isSnp(object), ] |
|
2906 |
- CHR <- unique(chromosome(object)) |
|
2907 |
- A <- A(object)[!isSnp(object), ] |
|
2908 |
- sig2A <- getParam(object, "sig2A", batch) |
|
2909 |
- gender <- object$gender |
|
2910 |
- ##Assume that on the log-scale, that the background variance is the same... |
|
2911 |
- tau2A <- sig2A |
|
2912 |
- nuA <- getParam(object, "nuA", batch) |
|
2913 |
- phiA <- getParam(object, "phiA", batch) |
|
2914 |
- prior.prob <- cnOptions$prior.prob |
|
2915 |
- emit <- array(NA, dim=c(nrow(A), ncol(A), 4))##SNPs x sample x 'truth' |
|
2916 |
- lT <- log2(A) |
|
2917 |
- I <- isSnp(object) |
|
2918 |
- counter <- 1 ##state counter |
|
2919 |
-## for(CT in 0:3){ |
|
2896 |
+##biasAdjNP <- function(object, cnOptions, tmp.objects){ |
|
2897 |
+## ##batch <- unique(object$batch) |
|
2898 |
+## batch <- unique(batch(object)) |
|
2899 |
+## normalNP <- tmp.objects[["normal"]][!isSnp(object), ] |
|
2900 |
+## CHR <- unique(chromosome(object)) |
|
2901 |
+## A <- A(object)[!isSnp(object), ] |
|
2902 |
+## sig2A <- getParam(object, "sig2A", batch) |
|
2903 |
+## gender <- object$gender |
|
2904 |
+## ##Assume that on the log-scale, that the background variance is the same... |
|
2905 |
+## tau2A <- sig2A |
|
2906 |
+## nuA <- getParam(object, "nuA", batch) |
|
2907 |
+## phiA <- getParam(object, "phiA", batch) |
|
2908 |
+## prior.prob <- cnOptions$prior.prob |
|
2909 |
+## emit <- array(NA, dim=c(nrow(A), ncol(A), 4))##SNPs x sample x 'truth' |
|
2910 |
+## lT <- log2(A) |
|
2911 |
+## I <- isSnp(object) |
|
2912 |
+## counter <- 1 ##state counter |
|
2913 |
+#### for(CT in 0:3){ |
|
2914 |
+#### sds <- sqrt(tau2A[I]*(CT==0) + sig2A[I]*(CT > 0)) |
|
2915 |
+#### means <- suppressWarnings(log2(nuA[I]+CT*phiA[I])) |
|
2916 |
+#### tmp <- dnorm(lT, mean=means, sd=sds) |
|
2917 |
+#### emit[, , counter] <- tmp |
|
2918 |
+#### counter <- counter+1 |
|
2919 |
+#### } |
|
2920 |
+#### mostLikelyState <- apply(emit, c(1, 2), function(x) order(x, decreasing=TRUE)[1]) |
|
2921 |
+## counter <- 1 |
|
2922 |
+## for(CT in c(0,1,2,2.5)){ |
|
2920 | 2923 |
## sds <- sqrt(tau2A[I]*(CT==0) + sig2A[I]*(CT > 0)) |
2921 | 2924 |
## means <- suppressWarnings(log2(nuA[I]+CT*phiA[I])) |
2922 | 2925 |
## tmp <- dnorm(lT, mean=means, sd=sds) |
... | ... |
@@ -2924,40 +2927,31 @@ biasAdjNP <- function(object, cnOptions, tmp.objects){ |
2924 | 2927 |
## counter <- counter+1 |
2925 | 2928 |
## } |
2926 | 2929 |
## mostLikelyState <- apply(emit, c(1, 2), function(x) order(x, decreasing=TRUE)[1]) |
2927 |
- counter <- 1 |
|
2928 |
- for(CT in c(0,1,2,2.5)){ |
|
2929 |
- sds <- sqrt(tau2A[I]*(CT==0) + sig2A[I]*(CT > 0)) |
|
2930 |
- means <- suppressWarnings(log2(nuA[I]+CT*phiA[I])) |
|
2931 |
- tmp <- dnorm(lT, mean=means, sd=sds) |
|
2932 |
- emit[, , counter] <- tmp |
|
2933 |
- counter <- counter+1 |
|
2934 |
- } |
|
2935 |
- mostLikelyState <- apply(emit, c(1, 2), function(x) order(x, decreasing=TRUE)[1]) |
|
2936 |
- |
|
2937 |
- if(CHR == 23){ |
|
2938 |
- ## the state index for male on chromosome 23 is 2 |
|
2939 |
- ## add 1 so that the state index is 3 for 'normal' state |
|
2940 |
- mostLikelyState[, gender=="male"] <- mostLikelyState[, gender==1] + 1 |
|
2941 |
- } |
|
2942 |
- tmp3 <- mostLikelyState != 3 |
|
2943 |
- ##Those near 1 have NaNs for nu and phi. this occurs by NaNs in the muA[,, "A"] or muA[, , "B"] for X chromosome |
|
2944 |
- proportionSamplesAltered <- rowMeans(tmp3)##prop normal |
|
2945 |
- ii <- proportionSamplesAltered < 0.75 |
|
2946 |
- moreup <- rowSums(mostLikelyState > 3) > rowSums(mostLikelyState < 3) |
|
2947 |
- notUp <- mostLikelyState[ii & moreup, ] <= 3 |
|
2948 |
- notDown <- mostLikelyState[ii & !moreup, ] >= 3 |
|
2949 |
- NORM <- matrix(TRUE, nrow(A), ncol(A)) |
|
2950 |
- NORM[ii & moreup, ] <- notUp |
|
2951 |
- NORM[ii & !moreup, ] <- notDown |
|
2952 |
- normalNP <- normalNP*NORM |
|
2953 |
- |
|
2954 |
- ##flagAltered <- which(proportionSamplesAltered > 0.5) |
|
2955 |
- ##envir[["flagAlteredNP"]] <- flagAltered |
|
2956 |
- normal <- tmp.objects[["normal"]] |
|
2957 |
- normal[!isSnp(object), ] <- normalNP |
|
2958 |
- tmp.objects[["normal"]] <- normal |
|
2959 |
- return(tmp.objects) |
|
2960 |
-} |
|
2930 |
+## |
|
2931 |
+## if(CHR == 23){ |
|
2932 |
+## ## the state index for male on chromosome 23 is 2 |
|
2933 |
+## ## add 1 so that the state index is 3 for 'normal' state |
|
2934 |
+## mostLikelyState[, gender=="male"] <- mostLikelyState[, gender==1] + 1 |
|
2935 |
+## } |
|
2936 |
+## tmp3 <- mostLikelyState != 3 |
|
2937 |
+## ##Those near 1 have NaNs for nu and phi. this occurs by NaNs in the muA[,, "A"] or muA[, , "B"] for X chromosome |
|
2938 |
+## proportionSamplesAltered <- rowMeans(tmp3)##prop normal |
|
2939 |
+## ii <- proportionSamplesAltered < 0.75 |
|
2940 |
+## moreup <- rowSums(mostLikelyState > 3) > rowSums(mostLikelyState < 3) |
|
2941 |
+## notUp <- mostLikelyState[ii & moreup, ] <= 3 |
|
2942 |
+## notDown <- mostLikelyState[ii & !moreup, ] >= 3 |
|
2943 |
+## NORM <- matrix(TRUE, nrow(A), ncol(A)) |
|
2944 |
+## NORM[ii & moreup, ] <- notUp |
|
2945 |
+## NORM[ii & !moreup, ] <- notDown |
|
2946 |
+## normalNP <- normalNP*NORM |
|
2947 |
+## |
|
2948 |
+## ##flagAltered <- which(proportionSamplesAltered > 0.5) |
|
2949 |
+## ##envir[["flagAlteredNP"]] <- flagAltered |
|
2950 |
+## normal <- tmp.objects[["normal"]] |
|
2951 |
+## normal[!isSnp(object), ] <- normalNP |
|
2952 |
+## tmp.objects[["normal"]] <- normal |
|
2953 |
+## return(tmp.objects) |
|
2954 |
+##} |
|
2961 | 2955 |
|
2962 | 2956 |
|
2963 | 2957 |
getParams <- function(object, batch){ |
... | ... |
@@ -3044,74 +3038,74 @@ thresholdModelParams <- function(object, cnOptions){ |
3044 | 3038 |
##} |
3045 | 3039 |
|
3046 | 3040 |
|
3047 |
-computeCopynumber.CNSet <- function(object, cnOptions){ |
|
3048 |
- ##PLATE <- unique(object$batch) |
|
3049 |
- PLATE <- unique(batch(object)) |
|
3050 |
- verbose <- cnOptions$verbose |
|
3051 |
- tmp.objects <- instantiateObjects(object, cnOptions) |
|
3052 |
- bias.adj <- cnOptions$bias.adj |
|
3053 |
- if(bias.adj & ncol(object) <= 15){ |
|
3054 |
- warning(paste("bias.adj is TRUE, but too few samples to perform this step")) |
|
3055 |
- cnOptions$bias.adj <- bias.adj <- FALSE |
|
3056 |
- } |
|
3057 |
- if(bias.adj){ |
|
3058 |
- if(verbose) message("Dropping samples with low posterior prob. of normal copy number (samples dropped is locus-specific)") |
|
3059 |
- tmp.objects <- biasAdjNP(object, cnOptions, tmp.objects) |
|
3060 |
- tmp.objects <- biasAdj(object, cnOptions, tmp.objects) |
|
3061 |
- if(verbose) message("Recomputing location and scale parameters") |
|
3062 |
- } |
|
3063 |
- ##update tmp.objects |
|
3064 |
- tmp.objects <- withinGenotypeMoments(object, |
|
3065 |
- cnOptions=cnOptions, |
|
3066 |
- tmp.objects=tmp.objects) |
|
3067 |
- object <- locationAndScale(object, cnOptions, tmp.objects) |
|
3068 |
- tmp.objects <- oneBatch(object, |
|
3069 |
- cnOptions=cnOptions, |
|
3070 |
- tmp.objects=tmp.objects) |
|
3071 |
- ##coefs calls nuphiAllele. |
|
3072 |
- object <- coefs(object, cnOptions, tmp.objects) |
|
3073 |
- ##nuA=getParam(object, "nuA", PLATE) |
|
3074 |
- THR.NU.PHI <- cnOptions$THR.NU.PHI |
|
3075 |
- if(THR.NU.PHI){ |
|
3076 |
- verbose <- cnOptions$verbose |
|
3077 |
- ##if(verbose) message("Thresholding nu and phi") |
|
3078 |
- object <- thresholdModelParams(object, cnOptions) |
|
3079 |
- } |
|
3080 |
- ##if(verbose) message("\nAllele specific copy number") |
|
3081 |
- object <- polymorphic(object, cnOptions, tmp.objects) |
|
3082 |
- if(any(!isSnp(object))){ ## there are nonpolymorphic probes |
|
3083 |
- ##if(verbose) message("\nCopy number for nonpolymorphic probes...") |
|
3084 |
- object <- nonpolymorphic(object, cnOptions, tmp.objects) |
|
3085 |
- } |
|
3086 |
- ##--------------------------------------------------------------------------- |
|
3087 |
- ##Note: the replacement method multiples by 100 |
|
3088 |
-## CA(object)[, batch==PLATE] <- CA(object) |
|
3089 |
-## CB(object)[, batch==PLATE] <- CB(object) |
|
3090 |
- ##--------------------------------------------------------------------------- |
|
3091 |
- ##update-the plate-specific parameters for copy number |
|
3092 |
- object <- pr(object, "nuA", PLATE, getParam(object, "nuA", PLATE)) |
|
3093 |
- object <- pr(object, "nuA.se", PLATE, getParam(object, "nuA.se", PLATE)) |
|
3094 |
- object <- pr(object, "nuB", PLATE, getParam(object, "nuB", PLATE)) |
|
3095 |
- object <- pr(object, "nuB.se", PLATE, getParam(object, "nuB.se", PLATE)) |
|
3096 |
- object <- pr(object, "phiA", PLATE, getParam(object, "phiA", PLATE)) |
|
3097 |
- object <- pr(object, "phiA.se", PLATE, getParam(object, "phiA.se", PLATE)) |
|
3098 |
- object <- pr(object, "phiB", PLATE, getParam(object, "phiB", PLATE)) |
|
3099 |
- object <- pr(object, "phiB.se", PLATE, getParam(object, "phiB.se", PLATE)) |
|
3100 |
- object <- pr(object, "tau2A", PLATE, getParam(object, "tau2A", PLATE)) |
|
3101 |
- object <- pr(object, "tau2B", PLATE, getParam(object, "tau2B", PLATE)) |
|
3102 |
- object <- pr(object, "sig2A", PLATE, getParam(object, "sig2A", PLATE)) |
|
3103 |
- object <- pr(object, "sig2B", PLATE, getParam(object, "sig2B", PLATE)) |
|
3104 |
- object <- pr(object, "phiAX", PLATE, as.numeric(getParam(object, "phiAX", PLATE))) |
|
3105 |
- object <- pr(object, "phiBX", PLATE, as.numeric(getParam(object, "phiBX", PLATE))) |
|
3106 |
- object <- pr(object, "corr", PLATE, getParam(object, "corr", PLATE)) |
|
3107 |
- object <- pr(object, "corrA.BB", PLATE, getParam(object, "corrA.BB", PLATE)) |
|
3108 |
- object <- pr(object, "corrB.AA", PLATE, getParam(object, "corrB.AA", PLATE)) |
|
3109 |
- ##object <- object[order(chromosome(object), position(object)), ] |
|
3110 |
- if(cnOptions[["thresholdCopynumber"]]){ |
|
3111 |
- object <- thresholdCopynumber(object) |
|
3112 |
- } |
|
3113 |
- return(object) |
|
3114 |
-} |
|
3041 |
+##computeCopynumber.CNSet <- function(object, cnOptions){ |
|
3042 |
+## ##PLATE <- unique(object$batch) |
|
3043 |
+## PLATE <- unique(batch(object)) |
|
3044 |
+## verbose <- cnOptions$verbose |
|
3045 |
+## tmp.objects <- instantiateObjects(object, cnOptions) |
|
3046 |
+## bias.adj <- cnOptions$bias.adj |
|
3047 |
+## if(bias.adj & ncol(object) <= 15){ |
|
3048 |
+## warning(paste("bias.adj is TRUE, but too few samples to perform this step")) |
|
3049 |
+## cnOptions$bias.adj <- bias.adj <- FALSE |
|
3050 |
+## } |
|
3051 |
+## if(bias.adj){ |
|
3052 |
+## if(verbose) message("Dropping samples with low posterior prob. of normal copy number (samples dropped is locus-specific)") |
|
3053 |
+## tmp.objects <- biasAdjNP(object, cnOptions, tmp.objects) |
|
3054 |
+## tmp.objects <- biasAdj(object, cnOptions, tmp.objects) |
|
3055 |
+## if(verbose) message("Recomputing location and scale parameters") |
|
3056 |
+## } |
|
3057 |
+## ##update tmp.objects |
|
3058 |
+## tmp.objects <- withinGenotypeMoments(object, |
|
3059 |
+## cnOptions=cnOptions, |
|
3060 |
+## tmp.objects=tmp.objects) |
|
3061 |
+## object <- locationAndScale(object, cnOptions, tmp.objects) |
|
3062 |
+## tmp.objects <- oneBatch(object, |
|
3063 |
+## cnOptions=cnOptions, |
|
3064 |
+## tmp.objects=tmp.objects) |
|
3065 |
+## ##coefs calls nuphiAllele. |
|
3066 |
+## object <- coefs(object, cnOptions, tmp.objects) |
|
3067 |
+## ##nuA=getParam(object, "nuA", PLATE) |
|
3068 |
+## THR.NU.PHI <- cnOptions$THR.NU.PHI |
|
3069 |
+## if(THR.NU.PHI){ |
|
3070 |
+## verbose <- cnOptions$verbose |
|
3071 |
+## ##if(verbose) message("Thresholding nu and phi") |
|
3072 |
+## object <- thresholdModelParams(object, cnOptions) |
|
3073 |
+## } |
|
3074 |
+## ##if(verbose) message("\nAllele specific copy number") |
|
3075 |
+## object <- polymorphic(object, cnOptions, tmp.objects) |
|
3076 |
+## if(any(!isSnp(object))){ ## there are nonpolymorphic probes |
|
3077 |
+## ##if(verbose) message("\nCopy number for nonpolymorphic probes...") |
|
3078 |
+## object <- nonpolymorphic(object, cnOptions, tmp.objects) |
|
3079 |
+## } |
|
3080 |
+## ##--------------------------------------------------------------------------- |
|
3081 |
+## ##Note: the replacement method multiples by 100 |
|
3082 |
+#### CA(object)[, batch==PLATE] <- CA(object) |
|
3083 |
+#### CB(object)[, batch==PLATE] <- CB(object) |
|
3084 |
+## ##--------------------------------------------------------------------------- |
|
3085 |
+## ##update-the plate-specific parameters for copy number |
|
3086 |
+## object <- pr(object, "nuA", PLATE, getParam(object, "nuA", PLATE)) |
|
3087 |
+## object <- pr(object, "nuA.se", PLATE, getParam(object, "nuA.se", PLATE)) |
|
3088 |
+## object <- pr(object, "nuB", PLATE, getParam(object, "nuB", PLATE)) |
|
3089 |
+## object <- pr(object, "nuB.se", PLATE, getParam(object, "nuB.se", PLATE)) |
|
3090 |
+## object <- pr(object, "phiA", PLATE, getParam(object, "phiA", PLATE)) |
|
3091 |
+## object <- pr(object, "phiA.se", PLATE, getParam(object, "phiA.se", PLATE)) |
|
3092 |
+## object <- pr(object, "phiB", PLATE, getParam(object, "phiB", PLATE)) |
|
3093 |
+## object <- pr(object, "phiB.se", PLATE, getParam(object, "phiB.se", PLATE)) |
|
3094 |
+## object <- pr(object, "tau2A", PLATE, getParam(object, "tau2A", PLATE)) |
|
3095 |
+## object <- pr(object, "tau2B", PLATE, getParam(object, "tau2B", PLATE)) |
|
3096 |
+## object <- pr(object, "sig2A", PLATE, getParam(object, "sig2A", PLATE)) |
|
3097 |
+## object <- pr(object, "sig2B", PLATE, getParam(object, "sig2B", PLATE)) |
|
3098 |
+## object <- pr(object, "phiAX", PLATE, as.numeric(getParam(object, "phiAX", PLATE))) |
|
3099 |
+## object <- pr(object, "phiBX", PLATE, as.numeric(getParam(object, "phiBX", PLATE))) |
|
3100 |
+## object <- pr(object, "corr", PLATE, getParam(object, "corr", PLATE)) |
|
3101 |
+## object <- pr(object, "corrA.BB", PLATE, getParam(object, "corrA.BB", PLATE)) |
|
3102 |
+## object <- pr(object, "corrB.AA", PLATE, getParam(object, "corrB.AA", PLATE)) |
|
3103 |
+## ##object <- object[order(chromosome(object), position(object)), ] |
|
3104 |
+## if(cnOptions[["thresholdCopynumber"]]){ |
|
3105 |
+## object <- thresholdCopynumber(object) |
|
3106 |
+## } |
|
3107 |
+## return(object) |
|
3108 |
+##} |
|
3115 | 3109 |
|
3116 | 3110 |
|
3117 | 3111 |
|
... | ... |
@@ -3164,11 +3158,12 @@ constructIlluminaAssayData <- function(np, snp, object, storage.mode="environmen |
3164 | 3158 |
CB=emptyMatrix) |
3165 | 3159 |
} |
3166 | 3160 |
constructIlluminaCNSet <- function(crlmmResult, |
3161 |
+ path, |
|
3167 | 3162 |
snpFile, |
3168 | 3163 |
cnFile){ |
3169 |
- load(file.path(outdir, "snpFile.rda")) |
|
3164 |
+ load(file.path(path, "snpFile.rda")) |
|
3170 | 3165 |
res <- get("res") |
3171 |
- load(file.path(outdir, "cnFile.rda")) |
|
3166 |
+ load(file.path(path, "cnFile.rda")) |
|
3172 | 3167 |
cnAB <- get("cnAB") |
3173 | 3168 |
fD <- constructIlluminaFeatureData(c(res$gns, cnAB$gns), cdfName="human370v1c") |
3174 | 3169 |
new.order <- order(fD$chromosome, fD$position) |
... | ... |
@@ -3187,3 +3182,23 @@ constructIlluminaCNSet <- function(crlmmResult, |
3187 | 3182 |
|
3188 | 3183 |
|
3189 | 3184 |
|
3185 |
+ellipseCenters <- function(object, index, allele, batch, log.it=TRUE){ |
|
3186 |
+ ubatch <- unique(batch(cnSet))[batch] |
|
3187 |
+ Nu <- nu(object, allele)[index, batch] |
|
3188 |
+ Phi <- phi(object, allele)[index, batch] |
|
3189 |
+ centers <- list(Nu, Nu+Phi, Nu+2*Phi) |
|
3190 |
+ if(log.it) |
|
3191 |
+ centers <- lapply(centers, log2) |
|
3192 |
+ myLabels <- function(allele){ |
|
3193 |
+ switch(allele, |
|
3194 |
+ A=c("BB", "AB", "AA"), |
|
3195 |
+ B=c("AA", "AB", "BB"), |
|
3196 |
+ stop("allele must be 'A' or 'B'") |
|
3197 |
+ ) |
|
3198 |
+ } |
|
3199 |
+ nms <- myLabels(allele) |
|
3200 |
+ names(centers) <- nms |
|
3201 |
+ fns <- featureNames(object)[index] |
|
3202 |
+ centers$fns <- fns |
|
3203 |
+ return(centers) |
|
3204 |
+} |
... | ... |
@@ -39,6 +39,7 @@ setReplaceMethod("lM", c("CNSetLM", "list_or_ffdf"), function(object, value){ |
39 | 39 |
|
40 | 40 |
setMethod("open", "CNSetLM", function(con,...){ |
41 | 41 |
callNextMethod(con,...) |
42 |
+ physical <- get("physical") |
|
42 | 43 |
lapply(physical(lM(con)), open) |
43 | 44 |
}) |
44 | 45 |
|
... | ... |
@@ -103,6 +104,7 @@ setMethod("computeCopynumber", "CNSet", |
103 | 104 |
} |
104 | 105 |
object |
105 | 106 |
}) |
107 |
+ |
|
106 | 108 |
setMethod("copyNumber", "CNSet", function(object){ |
107 | 109 |
I <- isSnp(object) |
108 | 110 |
ffIsLoaded <- class(calls(object))[[1]]=="ff" |
... | ... |
@@ -210,11 +212,14 @@ setMethod("nu", c("CNSetLM", "character"), function(object, allele){ |
210 | 212 |
val <- getValue(allele) |
211 | 213 |
class.lm <- class(lM(object)) |
212 | 214 |
if(class.lm == "ffdf"){ |
213 |
- return(physical(lM(object))[[val]]) |
|
215 |
+ physical <- get("physical") |
|
216 |
+ res <- physical(lM(object))[[val]] |
|
217 |
+ |
|
214 | 218 |
} else { |
215 |
- if(class.lm != "matrix") stop("lM() must be matrix or ffdf") |
|
216 |
- return(lM(object)[[val]]) |
|
219 |
+ if(class.lm != "list") stop("lM() must be matrix or ffdf") |
|
220 |
+ res <- lM(object)[[val]] |
|
217 | 221 |
} |
222 |
+ return(res) |
|
218 | 223 |
}) |
219 | 224 |
|
220 | 225 |
setMethod("phi", c("CNSetLM", "character"), function(object, allele){ |
... | ... |
@@ -227,11 +232,14 @@ setMethod("phi", c("CNSetLM", "character"), function(object, allele){ |
227 | 232 |
val <- getValue(allele) |
228 | 233 |
class.lm <- class(lM(object)) |
229 | 234 |
if(class.lm == "ffdf"){ |
230 |
- return(physical(lM(object))[[val]]) |
|
235 |
+ physical <- get("physical") |
|
236 |
+ res <- physical(lM(object))[[val]] |
|
237 |
+ |
|
231 | 238 |
} else { |
232 |
- if(class.lm != "matrix") stop("lM() must be matrix or ffdf") |
|
233 |
- return(lM(object)[[val]]) |
|
239 |
+ if(class.lm != "list") stop("lM() must be matrix or ffdf") |
|
240 |
+ res <- lM(object)[[val]] |
|
234 | 241 |
} |
242 |
+ return(res) |
|
235 | 243 |
}) |
236 | 244 |
|
237 | 245 |
setMethod("sigma2", c("CNSetLM", "character"), function(object, allele){ |
... | ... |
@@ -242,13 +250,16 @@ setMethod("sigma2", c("CNSetLM", "character"), function(object, allele){ |
242 | 250 |
stop("allele must be 'A' or 'B'")) |
243 | 251 |
} |
244 | 252 |
val <- getValue(allele) |
245 |
- class.lm <- class(lM(object)) |
|
253 |
+ class.lm <- class(lM(object)) |
|
246 | 254 |
if(class.lm == "ffdf"){ |
247 |
- return(physical(lM(object))[[val]]) |
|
255 |
+ physical <- get("physical") |
|
256 |
+ res <- physical(lM(object))[[val]] |
|
257 |
+ |
|
248 | 258 |
} else { |
249 |
- if(class.lm != "matrix") stop("lM() must be matrix or ffdf") |
|
250 |
- return(lM(object)[[val]]) |
|
259 |
+ if(class.lm != "list") stop("lM() must be matrix or ffdf") |
|
260 |
+ res <- lM(object)[[val]] |
|
251 | 261 |
} |
262 |
+ return(res) |
|
252 | 263 |
}) |
253 | 264 |
|
254 | 265 |
setMethod("tau2", c("CNSetLM", "character"), function(object, allele){ |
... | ... |
@@ -259,13 +270,16 @@ setMethod("tau2", c("CNSetLM", "character"), function(object, allele){ |
259 | 270 |
stop("allele must be 'A' or 'B'")) |
260 | 271 |
} |
261 | 272 |
val <- getValue(allele) |
262 |
- class.lm <- class(lM(object)) |
|
273 |
+ class.lm <- class(lM(object)) |
|
263 | 274 |
if(class.lm == "ffdf"){ |
264 |
- return(physical(lM(object))[[val]]) |
|
275 |
+ physical <- get("physical") |
|
276 |
+ res <- physical(lM(object))[[val]] |
|
277 |
+ |
|
265 | 278 |
} else { |
266 |
- if(class.lm != "matrix") stop("lM() must be matrix or ffdf") |
|
267 |
- return(lM(object)[[val]]) |
|
279 |
+ if(class.lm != "list") stop("lM() must be matrix or ffdf") |
|
280 |
+ res <- lM(object)[[val]] |
|
268 | 281 |
} |
282 |
+ return(res) |
|
269 | 283 |
}) |
270 | 284 |
|
271 | 285 |
setMethod("corr", c("CNSetLM", "character"), function(object, allele){ |
... | ... |
@@ -277,12 +291,15 @@ setMethod("corr", c("CNSetLM", "character"), function(object, allele){ |
277 | 291 |
stop("must be AA, AB, or BB")) |
278 | 292 |
} |
279 | 293 |
val <- getValue(allele) |
280 |
- class.lm <- class(lM(object)) |
|
294 |
+ class.lm <- class(lM(object)) |
|
281 | 295 |
if(class.lm == "ffdf"){ |
282 |
- return(physical(lM(object))[[val]]) |
|
296 |
+ physical <- get("physical") |
|
297 |
+ res <- physical(lM(object))[[val]] |
|
298 |
+ |
|
283 | 299 |
} else { |
284 |
- if(class.lm != "matrix") stop("lM() must be matrix or ffdf") |
|
285 |
- return(lM(object)[[val]]) |
|
300 |
+ if(class.lm != "list") stop("lM() must be matrix or ffdf") |
|
301 |
+ res <- lM(object)[[val]] |
|
286 | 302 |
} |
303 |
+ return(res) |
|
287 | 304 |
}) |
288 | 305 |
|
... | ... |
@@ -11,33 +11,19 @@ linesCNSetLM <- function(x, y, batch, copynumber, x.axis="A", ...){ |
11 | 11 |
ffIsLoaded <- calls.class[1] == "ff_matrix" | calls.class[1] == "ffdf" | calls.class[1]=="ff" |
12 | 12 |
column <- grep(batch, unique(batch(object))) |
13 | 13 |
stopifnot(length(column) == 1) |
14 |
- if(ffIsLoaded){ |
|
15 |
- nuA <- (physical(lM(object))[["nuA"]])[I, column , drop=TRUE] |
|
16 |
- nuB <- (physical(lM(object))[["nuB"]])[I, column , drop=TRUE] |
|
17 |
- phiA <- (physical(lM(object))[["phiA"]])[I, column ,drop=TRUE] |
|
18 |
- phiB <- (physical(lM(object))[["phiB"]])[I, column ,drop=TRUE] |
|
19 |
- tau2A <- (physical(lM(object))[["tau2A"]])[I, column ,drop=TRUE] |
|
20 |
- tau2B <- (physical(lM(object))[["tau2B"]])[I, column ,drop=TRUE] |
|
21 |
- sigma2A <- (physical(lM(object))[["sig2A"]])[I, column ,drop=TRUE] |
|
22 |
- sigma2B <- (physical(lM(object))[["sig2B"]])[I, column ,drop=TRUE] |
|
23 |
- corrAB <- (physical(lM(object))[["corrAB"]])[I, column ,drop=TRUE] |
|
24 |
- corrAA <- (physical(lM(object))[["corrAA"]])[I, column ,drop=TRUE] |
|
25 |
- corrBB <- (physical(lM(object))[["corrBB"]])[I, column ,drop=TRUE] |
|
26 |
- } else { |
|
27 |
- nuA <- lM(object)[["nuA"]][I, column , drop=TRUE] |
|
28 |
- nuB <- lM(object)[["nuB"]][I, column , drop=TRUE] |
|
29 |
- phiA <- lM(object)[["phiA"]][I, column ,drop=TRUE] |
|
30 |
- phiB <- lM(object)[["phiB"]][I, column ,drop=TRUE] |
|
31 |
- tau2A <- lM(object)[["tau2A"]][I, column ,drop=TRUE] |
|
32 |
- tau2B <- lM(object)[["tau2B"]][I, column ,drop=TRUE] |
|
33 |
- sigma2A <- lM(object)[["sig2A"]][I, column ,drop=TRUE] |
|
34 |
- sigma2B <- lM(object)[["sig2B"]][I, column ,drop=TRUE] |
|
35 |
- corrAB <- lM(object)[["corrAB"]][I, column ,drop=TRUE] |
|
36 |
- corrAA <- lM(object)[["corrAA"]][I, column ,drop=TRUE] |
|
37 |
- corrBB <- lM(object)[["corrBB"]][I, column ,drop=TRUE] |
|
38 |
- } |
|
14 |
+ nuA <- nu(object, "A")[I, column] |
|
15 |
+ nuB <- nu(object, "B")[I, column] |
|
16 |
+ phiA <- phi(object, "A")[I, column] |
|
17 |
+ phiB <- phi(object, "B")[I, column] |
|
18 |
+ tau2A <- tau2(object, "A")[I, column] |
|
19 |
+ tau2B <- tau2(object, "B")[I, column] |
|
20 |
+ sigma2A <- sigma2(object, "A")[I, column] |
|
21 |
+ sigma2B <- sigma2(object, "B")[I, column] |
|
22 |
+ corrAB <- corr(object, "AB")[I, column] |
|
23 |
+ corrAA <- corr(object, "AA")[I, column] |
|
24 |
+ corrBB <- corr(object, "BB")[I, column] |
|
39 | 25 |
if(all(is.na(nuA))) { |
40 |
- message("Parameter estimates for batch ", b, " not available") |
|
26 |
+ message("Parameter estimates for batch ", batch, " not available") |
|
41 | 27 |
next() |
42 | 28 |
} |
43 | 29 |
for(CN in copynumber){ |
... | ... |
@@ -29,11 +29,12 @@ intMedianSummaries <- function(mat, grps) |
29 | 29 |
## R CMD check |
30 | 30 |
|
31 | 31 |
isLoaded <- function(dataset, environ=.crlmmPkgEnv) |
32 |
- exists(dataset, envir=environ) |
|
32 |
+ exists(dataset, envir=environ) |
|
33 |
+ |
|
33 | 34 |
getVarInEnv <- function(dataset, environ=.crlmmPkgEnv){ |
34 |
- if (!isLoaded(dataset)) |
|
35 |
- stop("Variable ", dataset, " not found in .crlmmPkgEnv") |
|
36 |
- environ[[dataset]] |
|
35 |
+ if (!isLoaded(dataset)) |
|
36 |
+ stop("Variable ", dataset, " not found in .crlmmPkgEnv") |
|
37 |
+ environ[[dataset]] |
|
37 | 38 |
} |
38 | 39 |
|
39 | 40 |
list2SnpSet <- function(x, returnParams=FALSE){ |
... | ... |
@@ -50,18 +50,20 @@ Class \code{"\linkS4class{Versioned}"}, by class "CNSet", distance 6. |
50 | 50 |
\item{lines}{\code{signature(x="CNSetLM")}: for drawing prediction regions on A vs B scatterplots} |
51 | 51 |
\item{lM}{\code{signature(object = "CNSetLM")}: Extract list or |
52 | 52 |
ffdf object containing linear model parameters} |
53 |
- \item{nu}{\code{signature(object = "CNSetLM"), \code{signature(allele="character")}}: |
|
54 |
- intercept for linear model. See \code{\link{nu}}.} |
|
55 |
- \item{open}{\code{signature(con = "CNSetLM")}: opens file connects |
|
56 |
- to ff objects for assayData elements and linear model parameters} |
|
57 |
- \item{phi}{\code{signature(object = "CNSetLM"), \code{signature(allele="character")}}: |
|
58 |
- slope for linear model. See \code{\link{phi}}.} |
|
53 |
+ \item{nu}{\code{signature(object = "CNSetLM", allele="character")}: |
|
54 |
+ intercept for linear model. See \code{\link{nu}}} |
|
55 |
+ \item{open}{\code{signature(con = "CNSetLM")}: opens file connection |
|
56 |
+ to ff objects for assayData elements and linear model parameters} |
|
57 |
+ \item{phi}{\code{signature(object = "CNSetLM", allele="character")}: |
|
58 |
+ slope for linear model. See \code{\link{phi}}.} |
|
59 | 59 |
\item{show}{\code{signature(object = "CNSetLM")}: print method |
60 |
- for the class } |
|
61 |
- \item{sigma2}{\code{signature(object = "CNSetLM"), \code{signature(allele="character")}}: |
|
62 |
- The within genotype } |
|
63 |
- } |
|
60 |
+ for the class } |
|
61 |
+ \item{sigma2}{\code{signature(object = "CNSetLM", allele="character")}: |
|
62 |
+ Accessor for log2 intensity variance among subjects with genotype AA (allele 'A') and genotype BB (allele 'B') |
|
63 |
+ } |
|
64 | 64 |
} |
65 |
+} |
|
66 |
+ |
|
65 | 67 |
\author{ R. Scharpf} |
66 | 68 |
\seealso{ |
67 | 69 |
\code{\linkS4class{SnpSuperSet}}, \code{\linkS4class{CNSet}} |
68 | 70 |
deleted file mode 100644 |
... | ... |
@@ -1,21 +0,0 @@ |
1 |
-\name{CNSetLM-methods} |
|
2 |
-\alias{lM} |
|
3 |
- |
|
4 |
-\title{Methods for CNSetLM class} |
|
5 |
-\description{ Accessors for CNSetLM class} |
|
6 |
-\usage{ |
|
7 |
- lM(object) |
|
8 |
-} |
|
9 |
- |
|
10 |
-\arguments{ |
|
11 |
- \item{object}{\code{CNSetLM}} |
|
12 |
-} |
|
13 |
-\details{ |
|
14 |
- \code{lM} returns a list (or an ffdf object if large data support is enabled) of the parameters estimated from a linear model fit for each SNP. The parameters are batch and locus-specific. |
|
15 |
-} |
|
16 |
-\value{object of class \code{ffdf} or \code{list}} |
|
17 |
- |
|
18 |
-\author{R. Scharpf} |
|
19 |
- |
|
20 |
-\seealso{\code{\link{crlmmCopynumber}}, \code{\link{crlmmCopynumber2}}, \code{\link{CNSetLM-class}}} |
|
21 |
-\keyword{manip} |
22 | 0 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,44 @@ |
1 |
+\name{constructIlluminaCNSet} |
|
2 |
+\alias{constructIlluminaCNSet} |
|
3 |
+\title{ |
|
4 |
+ Construct an instance of CNSetLM after preprocessing Illumina files |
|
5 |
+} |
|
6 |
+\description{ |
|
7 |
+ |
|
8 |
+ Assemble the preprocessed data and genotype calls from |
|
9 |
+ \code{crlmmIllumina} to initialize a \code{CNSetLM} object. |
|
10 |
+ |
|
11 |
+} |
|
12 |
+\usage{ |
|
13 |
+constructIlluminaCNSet(crlmmResult, path, snpFile, cnFile) |
|
14 |
+} |
|
15 |
+\arguments{ |
|
16 |
+ \item{crlmmResult}{ |
|
17 |
+ |
|
18 |
+ A \code{SnpSet} object returned by function \code{crlmmIllumina} or |
|
19 |
+ \code{crlmmIllumina2}. |
|
20 |
+ |
|
21 |
+} |
|
22 |
+ |
|
23 |
+ \item{path}{path to files created by \code{crlmmIllumina}} |
|
24 |
+ |
|
25 |
+ \item{snpFile}{ |
|
26 |
+ The \code{snpFile} filename specified in \code{crlmmIllumina}. |
|
27 |
+} |
|
28 |
+ \item{cnFile}{ |
|
29 |
+ The \code{cnFile} filename specified in \code{crlmmIllumina}. |
|
30 |
+} |
|
31 |
+} |
|
32 |
+ |
|
33 |
+\value{ |
|
34 |
+ An object of class \code{CNSetLM}. |
|
35 |
+} |
|
36 |
+\author{ |
|
37 |
+R. Scharpf |
|
38 |
+} |
|
39 |
+\seealso{ |
|
40 |
+ \code{\link{CNSetLM-class}}, \code{\link{crlmmIllumina}} |
|
41 |
+} |
|
42 |
+ |
|
43 |
+\keyword{manip} |
|
44 |
+ |
... | ... |
@@ -1,5 +1,6 @@ |
1 | 1 |
\name{crlmmIllumina} |
2 | 2 |
\alias{crlmmIllumina} |
3 |
+\alias{crlmmIllumina2} |
|
3 | 4 |
\title{Genotype Illumina Infinium II BeadChip data with CRLMM} |
4 | 5 |
\description{ |
5 | 6 |
Implementation of the CRLMM algorithm for |
... | ... |
@@ -14,6 +15,14 @@ crlmmIllumina(RG, XY, stripNorm=TRUE, |
14 | 15 |
snpFile, cnFile, mixtureSampleSize=10^5, |
15 | 16 |
eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10, |
16 | 17 |
recallRegMin=1000, returnParams=FALSE, badSNP=0.7) |
18 |
+ |
|
19 |
+crlmmIllumina2(RG, XY, stripNorm=TRUE, |
|
20 |
+ useTarget=TRUE, row.names=TRUE, col.names=TRUE, |
|
21 |
+ probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, |
|
22 |
+ gender=NULL, seed=1, save.it=FALSE, load.it=FALSE, |
|
23 |
+ snpFile, cnFile, mixtureSampleSize=10^5, |
|
24 |
+ eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10, |
|
25 |
+ recallRegMin=1000, returnParams=FALSE, badSNP=0.7) |
|
17 | 26 |
} |
18 | 27 |
|
19 | 28 |
\arguments{ |
... | ... |
@@ -64,11 +73,13 @@ crlmmIllumina(RG, XY, stripNorm=TRUE, |
64 | 73 |
} |
65 | 74 |
|
66 | 75 |
\details{ |
76 |
+ |
|
67 | 77 |
Note: The user should specify either the \code{RG} or \code{XY} |
68 | 78 |
intensities, not both. Alternatively if \code{crlmmIllumina} has been |
69 | 79 |
run already with \code{save.it=TRUE}, the preprocessed data can be |
70 | 80 |
loaded from file by specifying \code{load.it=TRUE} and |
71 | 81 |
\code{intensityFile} (\code{RG} or \code{XY} are not needed in this case). |
82 |
+ |
|
72 | 83 |
} |
73 | 84 |
|
74 | 85 |
\references{ |
75 | 86 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,31 @@ |
1 |
+%added to prevent warnings |
|
2 |
+\name{ffdf-class} |
|
3 |
+\Rdversion{1.1} |
|
4 |
+\docType{class} |
|
5 |
+\alias{ffdf-class} |
|
6 |
+\alias{list-class} |
|
7 |
+\title{Class "ffdf"} |
|
8 |
+\description{ |
|
9 |
+ object of class \code{ffdf} |
|
10 |
+} |
|
11 |
+\section{Objects from the Class}{A virtual Class: No objects may be created from it.} |
|
12 |
+\section{Slots}{ |
|
13 |
+ \describe{ |
|
14 |
+ \item{\code{.S3Class}:}{Object of class \code{"character"} ~~ } |
|
15 |
+ } |
|
16 |
+} |
|
17 |
+\section{Extends}{ |
|