... | ... |
@@ -549,6 +549,10 @@ function (which expects ff objects and supports parallel processing) |
549 | 549 |
** Added close() statements to readIdatFiles(), RGtoXY() and stripNormalize(). Added open() statement to stripNormalize(). Moved close() statement in preprocessInfinium2() |
550 | 550 |
** tidied up crlmm-illumina.R, removing commented out code. |
551 | 551 |
|
552 |
-2010-09-16 M. Ritchie 1.7.13 |
|
552 |
+2010-09-16 M. Ritchie 1.7.14 |
|
553 | 553 |
** added immuno chip 12 as one of the chip options for Illumina. |
554 | 554 |
|
555 |
+2010-09-28 M. Ritchie 1.7.15 |
|
556 |
+** added functions get.ProtocolData.Illumina(), construct.Illumina() and genotype.Illumina() to maintiain consistency with Rob's Affy functions |
|
557 |
+** removed 'save.it', 'load.it', 'snpFile', 'cnFile' arguments from crlmmIllumina(), crlmmIlluminaV2 and preprocessInfinium2(). Updated man pages to reflect these changes. |
|
558 |
+** added a switch so that both regular matrix/ff storage can be handled in each function, depending upon whether the ff package has been loaded. |
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.7.14 |
|
5 |
-Date: 2010-09-16 |
|
4 |
+Version: 1.7.15 |
|
5 |
+Date: 2010-09-27 |
|
6 | 6 |
Author: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>, Ingo Ruczinski <iruczins@jhsph.edu>, Rafael A Irizarry |
7 | 7 |
Maintainer: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
8 | 8 |
Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms |
... | ... |
@@ -141,11 +141,12 @@ readIdatFiles <- function(sampleSheet=NULL, |
141 | 141 |
protocolData(RG)[["ScanDate"]] = dates$scan |
142 | 142 |
} |
143 | 143 |
storageMode(RG) = "lockedEnvironment" |
144 |
-# if(class(RG@assayData$R)[1]=="ff_matrix") { |
|
144 |
+ is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
|
145 |
+ if(is.lds) { |
|
145 | 146 |
close(RG@assayData$R) |
146 | 147 |
close(RG@assayData$G) |
147 | 148 |
close(RG@assayData$zero) |
148 |
-# } |
|
149 |
+ } |
|
149 | 150 |
RG |
150 | 151 |
} |
151 | 152 |
|
... | ... |
@@ -466,6 +467,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
466 | 467 |
annotation=chipType, phenoData=RG@phenoData, |
467 | 468 |
protocolData=RG@protocolData, storage.mode="environment") |
468 | 469 |
featureNames(XY) = ids # featureNames(RG) |
470 |
+ sampleNames(XY) = sampleNames(RG) |
|
469 | 471 |
gc() |
470 | 472 |
# Need to initialize - matrices filled with NAs to begin with |
471 | 473 |
XY@assayData$X[1:nsnps,] = 0 |
... | ... |
@@ -499,11 +501,12 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
499 | 501 |
XY@assayData$zero[infI,] = 0 |
500 | 502 |
gc() |
501 | 503 |
|
502 |
-# if(class(XY@assayData$X)[1]=="ff_matrix") { |
|
504 |
+ is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
|
505 |
+ if(is.lds) { |
|
503 | 506 |
close(XY@assayData$X) |
504 | 507 |
close(XY@assayData$Y) |
505 | 508 |
close(XY@assayData$zero) |
506 |
-# } |
|
509 |
+ } |
|
507 | 510 |
|
508 | 511 |
# storageMode(XY) = "lockedEnvironment" |
509 | 512 |
XY |
... | ... |
@@ -531,10 +534,12 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) { |
531 | 534 |
message("Quantile normalizing ", ncol(XY), " arrays by ", max(stripnum), " strips.") |
532 | 535 |
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=max(stripnum), style=3) |
533 | 536 |
} |
534 |
-# if(class(XY@assayData$X)[1]=="ff_matrix") { |
|
537 |
+ is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
|
538 |
+ |
|
539 |
+ if(is.lds) { |
|
535 | 540 |
open(XY@assayData$X) |
536 | 541 |
open(XY@assayData$Y) |
537 |
-# } |
|
542 |
+ } |
|
538 | 543 |
for(s in 1:max(stripnum)) { |
539 | 544 |
if(verbose) { |
540 | 545 |
if (getRversion() > '2.7.0') setTxtProgressBar(pb, s) |
... | ... |
@@ -554,10 +559,10 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) { |
554 | 559 |
rm(subX, subY, tmp, sel) |
555 | 560 |
gc() |
556 | 561 |
} |
557 |
-# if(class(XY@assayData$X)[1]=="ff_matrix") { |
|
562 |
+ if(is.lds) { |
|
558 | 563 |
close(XY@assayData$X) |
559 | 564 |
close(XY@assayData$Y) |
560 |
-# } |
|
565 |
+ } |
|
561 | 566 |
|
562 | 567 |
if(verbose) |
563 | 568 |
cat("\n") |
... | ... |
@@ -573,10 +578,10 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
573 | 578 |
cdfName, |
574 | 579 |
sns, |
575 | 580 |
stripNorm=TRUE, |
576 |
- useTarget=TRUE, # ) { #, |
|
577 |
- save.it=FALSE, |
|
578 |
- snpFile, |
|
579 |
- cnFile) { |
|
581 |
+ useTarget=TRUE) { |
|
582 |
+# save.it=FALSE, |
|
583 |
+# snpFile, |
|
584 |
+# cnFile) { |
|
580 | 585 |
if(stripNorm) |
581 | 586 |
XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose) |
582 | 587 |
|
... | ... |
@@ -602,15 +607,18 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
602 | 607 |
theKnots <- getVarInEnv("theKnots") |
603 | 608 |
narrays = ncol(XY) |
604 | 609 |
|
605 |
- if(save.it & !missing(cnFile)) { |
|
610 |
+ is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
|
611 |
+ |
|
612 |
+# if(save.it & !missing(cnFile)) { |
|
606 | 613 |
# separate out copy number probes |
607 | 614 |
npIndex = getVarInEnv("npProbesFid") |
608 | 615 |
nprobes = length(npIndex) |
609 | 616 |
if(length(nprobes)>0) { |
610 |
- open(XY@assayData$X) |
|
611 |
- open(XY@assayData$Y) |
|
612 |
- open(XY@assayData$zero) |
|
613 |
- |
|
617 |
+ if(is.lds) { |
|
618 |
+ open(XY@assayData$X) |
|
619 |
+ open(XY@assayData$Y) |
|
620 |
+ open(XY@assayData$zero) |
|
621 |
+ } |
|
614 | 622 |
A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays) |
615 | 623 |
B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays) |
616 | 624 |
|
... | ... |
@@ -622,13 +630,18 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
622 | 630 |
|
623 | 631 |
cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName) |
624 | 632 |
|
625 |
- t0 <- proc.time() |
|
626 |
- save(cnAB, file=cnFile) |
|
627 |
- t0 <- proc.time()-t0 |
|
628 |
- if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".") |
|
629 |
- rm(cnAB, B, zero) |
|
633 |
+# t0 <- proc.time() |
|
634 |
+# save(cnAB, file=cnFile) |
|
635 |
+# t0 <- proc.time()-t0 |
|
636 |
+# if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".") |
|
637 |
+ rm(A, B, zero) |
|
638 |
+ if(is.lds) { |
|
639 |
+ close(XY@assayData$X) |
|
640 |
+ close(XY@assayData$Y) |
|
641 |
+ close(XY@assayData$zero) |
|
642 |
+ } |
|
630 | 643 |
} |
631 |
- } |
|
644 |
+# } |
|
632 | 645 |
|
633 | 646 |
# next process snp probes |
634 | 647 |
snpIndex = getVarInEnv("snpProbesFid") |
... | ... |
@@ -661,14 +674,16 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
661 | 674 |
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3) |
662 | 675 |
} |
663 | 676 |
|
664 |
- open(XY@assayData$X) |
|
665 |
- open(XY@assayData$Y) |
|
666 |
- open(XY@assayData$zero) |
|
667 |
- |
|
677 |
+ if(is.lds) { |
|
678 |
+ open(XY@assayData$X) |
|
679 |
+ open(XY@assayData$Y) |
|
680 |
+ open(XY@assayData$zero) |
|
681 |
+ } |
|
682 |
+ |
|
668 | 683 |
for(i in 1:narrays){ |
669 |
- A[,i] = exprs(channel(XY, "X"))[snpIndex,i] |
|
670 |
- B[,i] = exprs(channel(XY, "Y"))[snpIndex,i] |
|
671 |
- zero[,i] = exprs(channel(XY, "zero"))[snpIndex,i] |
|
684 |
+ A[,i] = as.integer(exprs(channel(XY, "X"))[snpIndex,i]) |
|
685 |
+ B[,i] = as.integer(exprs(channel(XY, "Y"))[snpIndex,i]) |
|
686 |
+ zero[,i] = as.integer(exprs(channel(XY, "zero"))[snpIndex,i]) |
|
672 | 687 |
# SKW[i] = mean((A[snpIndex,i][idx2]-mean(A[snpIndex,i][idx2]))^3)/(sd(A[snpIndex,i][idx2])^3) |
673 | 688 |
SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3) |
674 | 689 |
if(fitMixture){ |
... | ... |
@@ -693,43 +708,43 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
693 | 708 |
if (!fitMixture) SNR <- mixtureParams <- NA |
694 | 709 |
## gns comes from preprocStuff.rda |
695 | 710 |
|
696 |
- if(save.it & !missing(snpFile)) { |
|
697 |
- t0 <- proc.time() |
|
698 |
- save(res, file=snpFile) |
|
699 |
- t0 <- proc.time()-t0 |
|
700 |
- if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".") |
|
701 |
- } |
|
702 |
-# if(class(A)[1]=="ff_matrix") { |
|
711 |
+# if(save.it & !missing(snpFile)) { |
|
712 |
+# t0 <- proc.time() |
|
713 |
+# save(res, file=snpFile) |
|
714 |
+# t0 <- proc.time()-t0 |
|
715 |
+# if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".") |
|
716 |
+# } |
|
717 |
+ if(is.lds) { |
|
703 | 718 |
close(A) |
704 | 719 |
close(B) |
705 | 720 |
close(zero) |
706 | 721 |
close(SKW) |
707 | 722 |
close(mixtureParams) |
708 | 723 |
close(SNR) |
709 |
-# } |
|
724 |
+ } |
|
710 | 725 |
|
711 | 726 |
res = list(A=A, B=B, |
712 | 727 |
zero=zero, sns=sns, gns=names(snpIndex), SNR=SNR, SKW=SKW, |
713 |
- mixtureParams=mixtureParams, cdfName=cdfName) |
|
714 |
- |
|
715 |
- open(res[["A"]]) |
|
716 |
- open(res[["B"]]) |
|
717 |
- open(res[["zero"]]) |
|
718 |
- open(res[["SNR"]]) |
|
719 |
- open(res[["mixtureParams"]]) |
|
720 |
- |
|
721 |
- if(save.it & !missing(snpFile)) { |
|
722 |
- t0 <- proc.time() |
|
723 |
- save(res, file=snpFile) |
|
724 |
- t0 <- proc.time()-t0 |
|
725 |
- if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".") |
|
726 |
- } |
|
728 |
+ mixtureParams=mixtureParams, cdfName=cdfName, cnAB=cnAB) |
|
729 |
+ |
|
730 |
+# open(res[["A"]]) |
|
731 |
+# open(res[["B"]]) |
|
732 |
+# open(res[["zero"]]) |
|
733 |
+# open(res[["SNR"]]) |
|
734 |
+# open(res[["mixtureParams"]]) |
|
735 |
+ |
|
736 |
+# if(save.it & !missing(snpFile)) { |
|
737 |
+# t0 <- proc.time() |
|
738 |
+# save(res, file=snpFile) |
|
739 |
+# t0 <- proc.time()-t0 |
|
740 |
+# if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".") |
|
741 |
+# } |
|
727 | 742 |
|
728 |
- close(res[["A"]]) |
|
729 |
- close(res[["B"]]) |
|
730 |
- close(res[["zero"]]) |
|
731 |
- close(res[["SNR"]]) |
|
732 |
- close(res[["mixtureParams"]]) |
|
743 |
+# close(res[["A"]]) |
|
744 |
+# close(res[["B"]]) |
|
745 |
+# close(res[["zero"]]) |
|
746 |
+# close(res[["SNR"]]) |
|
747 |
+# close(res[["mixtureParams"]]) |
|
733 | 748 |
|
734 | 749 |
return(res) |
735 | 750 |
} |
... | ... |
@@ -738,32 +753,43 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
738 | 753 |
crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
739 | 754 |
row.names=TRUE, col.names=TRUE, |
740 | 755 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
741 |
- seed=1, save.it=FALSE, load.it=FALSE, snpFile, cnFile, |
|
756 |
+ seed=1, # save.it=FALSE, load.it=FALSE, snpFile, cnFile, |
|
742 | 757 |
mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
743 | 758 |
cdfName, sns, recallMin=10, recallRegMin=1000, |
744 | 759 |
returnParams=FALSE, badSNP=.7) { |
745 |
- if (save.it & (missing(snpFile) | missing(cnFile))) |
|
746 |
- stop("'snpFile' and/or 'cnFile' is missing and you chose to save.it") |
|
747 |
- if (load.it & missing(snpFile)) |
|
748 |
- stop("'snpFile' is missing and you chose to load.it") |
|
749 |
- if (!missing(snpFile)) |
|
750 |
- if (load.it & !file.exists(snpFile)){ |
|
751 |
- load.it <- FALSE |
|
752 |
- message("File ", snpFile, " does not exist.") |
|
753 |
- stop("Cannot load SNP data.") |
|
754 |
- } |
|
755 |
- if (!load.it){ |
|
760 |
+# if (save.it & (missing(snpFile) | missing(cnFile))) |
|
761 |
+# stop("'snpFile' and/or 'cnFile' is missing and you chose to save.it") |
|
762 |
+# if (load.it & missing(snpFile)) |
|
763 |
+# stop("'snpFile' is missing and you chose to load.it") |
|
764 |
+# if (!missing(snpFile)) |
|
765 |
+# if (load.it & !file.exists(snpFile)){ |
|
766 |
+# load.it <- FALSE |
|
767 |
+# message("File ", snpFile, " does not exist.") |
|
768 |
+# stop("Cannot load SNP data.") |
|
769 |
+# } |
|
770 |
+# if (!load.it){ |
|
756 | 771 |
if(!missing(RG)) { |
757 | 772 |
if(missing(XY)) |
758 | 773 |
XY = RGtoXY(RG, chipType=cdfName) |
759 | 774 |
else |
760 | 775 |
stop("Both RG and XY specified - please use one or the other") |
761 | 776 |
} |
762 |
- if (missing(sns)) sns <- sampleNames(XY) #$X |
|
763 |
- |
|
777 |
+ if (missing(sns)) { |
|
778 |
+ sns = sampleNames(XY) #$X |
|
779 |
+ } |
|
764 | 780 |
res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
765 |
- seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, |
|
766 |
- save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
|
781 |
+ seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #, |
|
782 |
+# save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
|
783 |
+ |
|
784 |
+ is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
|
785 |
+ |
|
786 |
+ if(is.lds) { |
|
787 |
+ open(res[["A"]]) |
|
788 |
+ open(res[["B"]]) |
|
789 |
+ open(res[["SNR"]]) |
|
790 |
+ open(res[["mixtureParams"]]) |
|
791 |
+ } |
|
792 |
+ |
|
767 | 793 |
# fD = featureData(XY) |
768 | 794 |
# phenD = XY@phenoData |
769 | 795 |
# protD = XY@protocolData |
... | ... |
@@ -782,13 +808,13 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
782 | 808 |
# pData(callSet)$SNR <- rep(NA, length(sns)) |
783 | 809 |
# pData(callSet)$gender <- rep(NA, length(sns)) |
784 | 810 |
|
785 |
- }else{ |
|
786 |
- if(verbose) message("Loading ", snpFile, ".") |
|
787 |
- obj <- load(snpFile) |
|
788 |
- if(verbose) message("Done.") |
|
789 |
- if(!any(obj == "res")) |
|
790 |
- stop("Object in ", snpFile, " seems to be invalid.") |
|
791 |
- } |
|
811 |
+# }else{ |
|
812 |
+# if(verbose) message("Loading ", snpFile, ".") |
|
813 |
+# obj <- load(snpFile) |
|
814 |
+# if(verbose) message("Done.") |
|
815 |
+# if(!any(obj == "res")) |
|
816 |
+# stop("Object in ", snpFile, " seems to be invalid.") |
|
817 |
+# } |
|
792 | 818 |
|
793 | 819 |
# rm(phenD, protD , fD) |
794 | 820 |
|
... | ... |
@@ -802,22 +828,47 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
802 | 828 |
if(row.names) row.names=res$gns else row.names=NULL |
803 | 829 |
if(col.names) col.names=res$sns else col.names=NULL |
804 | 830 |
|
805 |
- res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]), |
|
806 |
- B=res[["B"]], # as.matrix(B(callSet)[snp.index,]), # j]), |
|
807 |
- SNR=res[["SNR"]], # callSet$SNR, # [j], |
|
808 |
- mixtureParams=res[["mixtureParams"]], #, |
|
809 |
- cdfName=res[["cdfName"]], # annotation(callSet), |
|
810 |
- row.names=row.names, # featureNames(callSet)[snp.index], |
|
811 |
- col.names=col.names, # sampleNames(callSet), #[j], |
|
812 |
- probs=probs, |
|
813 |
- DF=DF, |
|
814 |
- SNRMin=SNRMin, |
|
815 |
- recallMin=recallMin, |
|
816 |
- recallRegMin=recallRegMin, |
|
817 |
- gender=gender, |
|
818 |
- verbose=verbose, |
|
819 |
- returnParams=returnParams, |
|
820 |
- badSNP=badSNP) |
|
831 |
+ FUN <- ifelse(is.lds, "crlmmGT2", "crlmmGT") |
|
832 |
+ ## genotyping |
|
833 |
+ crlmmGTfxn <- function(FUN,...){ |
|
834 |
+ switch(FUN, |
|
835 |
+ crlmmGT2=crlmmGT2(...), |
|
836 |
+ crlmmGT=crlmmGT(...)) |
|
837 |
+ } |
|
838 |
+ res2 <- crlmmGTfxn(FUN, |
|
839 |
+ A=res[["A"]], |
|
840 |
+ B=res[["B"]], |
|
841 |
+ SNR=res[["SNR"]], |
|
842 |
+ mixtureParams=res[["mixtureParams"]], |
|
843 |
+ cdfName=cdfName, |
|
844 |
+ row.names=row.names, |
|
845 |
+ col.names=col.names, |
|
846 |
+ probs=probs, |
|
847 |
+ DF=DF, |
|
848 |
+ SNRMin=SNRMin, |
|
849 |
+ recallMin=recallMin, |
|
850 |
+ recallRegMin=recallRegMin, |
|
851 |
+ gender=gender, |
|
852 |
+ verbose=verbose, |
|
853 |
+ returnParams=returnParams, |
|
854 |
+ badSNP=badSNP) |
|
855 |
+ |
|
856 |
+# res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]), |
|
857 |
+# B=res[["B"]], # as.matrix(B(callSet)[snp.index,]), # j]), |
|
858 |
+# SNR=res[["SNR"]], # callSet$SNR, # [j], |
|
859 |
+# mixtureParams=res[["mixtureParams"]], #, |
|
860 |
+# cdfName=res[["cdfName"]], # annotation(callSet), |
|
861 |
+# row.names=row.names, # featureNames(callSet)[snp.index], |
|
862 |
+# col.names=col.names, # sampleNames(callSet), #[j], |
|
863 |
+# probs=probs, |
|
864 |
+# DF=DF, |
|
865 |
+# SNRMin=SNRMin, |
|
866 |
+# recallMin=recallMin, |
|
867 |
+# recallRegMin=recallRegMin, |
|
868 |
+# gender=gender, |
|
869 |
+# verbose=verbose, |
|
870 |
+# returnParams=returnParams, |
|
871 |
+# badSNP=badSNP) |
|
821 | 872 |
# rm(res); gc() |
822 | 873 |
# suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]]) |
823 | 874 |
# suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]]) |
... | ... |
@@ -847,14 +898,12 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
847 | 898 |
sep="_", |
848 | 899 |
fileExt=list(green="Grn.idat", red="Red.idat"), |
849 | 900 |
saveDate=FALSE, |
850 |
-# save.rg=FALSE, |
|
851 |
-# rgFile, |
|
852 | 901 |
stripNorm=TRUE, |
853 | 902 |
useTarget=TRUE, |
854 | 903 |
row.names=TRUE, |
855 | 904 |
col.names=TRUE, |
856 | 905 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
857 |
- seed=1, save.it=FALSE, snpFile, cnFile, |
|
906 |
+ seed=1, # save.it=FALSE, snpFile, cnFile, |
|
858 | 907 |
mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
859 | 908 |
cdfName, sns, recallMin=10, recallRegMin=1000, |
860 | 909 |
returnParams=FALSE, badSNP=.7) { |
... | ... |
@@ -865,8 +914,8 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
865 | 914 |
|
866 | 915 |
# if (save.rg & missing(rgFile)) |
867 | 916 |
# stop("'rgFile' is missing, and you chose save.rg") |
868 |
- if (save.it & (missing(snpFile) | missing(cnFile))) |
|
869 |
- stop("'snpFile' or 'cnFile' is missing and you chose save.it") |
|
917 |
+# if (save.it & (missing(snpFile) | missing(cnFile))) |
|
918 |
+# stop("'snpFile' or 'cnFile' is missing and you chose save.it") |
|
870 | 919 |
# batches = NULL |
871 | 920 |
# if(!is.null(arrayNames)) |
872 | 921 |
# batches <- rep(1, length(arrayNames)) # problem here if arrayNames not specified! # splitIndicesByLength(seq(along=arrayNames), ocSamples()) |
... | ... |
@@ -892,13 +941,18 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
892 | 941 |
if (missing(sns)) { sns = sampleNames(XY) #subsns = sampleNames(XY) |
893 | 942 |
} # else subsns = sns[j] |
894 | 943 |
res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
895 |
- seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, |
|
896 |
- save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
|
897 |
- open(res[["A"]]) |
|
898 |
- open(res[["B"]]) |
|
899 |
- open(res[["SNR"]]) |
|
900 |
- open(res[["mixtureParams"]]) |
|
944 |
+ seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #, |
|
945 |
+# save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
|
946 |
+ |
|
947 |
+ is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
|
901 | 948 |
|
949 |
+ if(is.lds) { |
|
950 |
+ open(res[["A"]]) |
|
951 |
+ open(res[["B"]]) |
|
952 |
+ open(res[["SNR"]]) |
|
953 |
+ open(res[["mixtureParams"]]) |
|
954 |
+ } |
|
955 |
+ |
|
902 | 956 |
# fD = featureData(XY) |
903 | 957 |
# phenD = XY@phenoData |
904 | 958 |
# protD = XY@protocolData |
... | ... |
@@ -951,22 +1005,48 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
951 | 1005 |
# rm(res); gc() |
952 | 1006 |
if(row.names) row.names=res$gns else row.names=NULL |
953 | 1007 |
if(col.names) col.names=res$sns else col.names=NULL |
954 |
- res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]), |
|
955 |
- B=res[["B"]], # as.matrix(B(callSet)[snp.index,]), # j]), |
|
956 |
- SNR=res[["SNR"]], # callSet$SNR, # [j], |
|
957 |
- mixtureParams=res[["mixtureParams"]], #, |
|
958 |
- cdfName=res[["cdfName"]], # annotation(callSet), |
|
959 |
- row.names=row.names, # featureNames(callSet)[snp.index], |
|
960 |
- col.names=col.names, # sampleNames(callSet), #[j], |
|
961 |
- probs=probs, |
|
962 |
- DF=DF, |
|
963 |
- SNRMin=SNRMin, |
|
964 |
- recallMin=recallMin, |
|
965 |
- recallRegMin=recallRegMin, |
|
966 |
- gender=gender, |
|
967 |
- verbose=verbose, |
|
968 |
- returnParams=returnParams, |
|
969 |
- badSNP=badSNP) |
|
1008 |
+ |
|
1009 |
+ FUN <- ifelse(is.lds, "crlmmGT2", "crlmmGT") |
|
1010 |
+ ## genotyping |
|
1011 |
+ crlmmGTfxn <- function(FUN,...){ |
|
1012 |
+ switch(FUN, |
|
1013 |
+ crlmmGT2=crlmmGT2(...), |
|
1014 |
+ crlmmGT=crlmmGT(...)) |
|
1015 |
+ } |
|
1016 |
+ res2 <- crlmmGTfxn(FUN, |
|
1017 |
+ A=res[["A"]], |
|
1018 |
+ B=res[["B"]], |
|
1019 |
+ SNR=res[["SNR"]], |
|
1020 |
+ mixtureParams=res[["mixtureParams"]], |
|
1021 |
+ cdfName=cdfName, |
|
1022 |
+ row.names=row.names, |
|
1023 |
+ col.names=col.names, |
|
1024 |
+ probs=probs, |
|
1025 |
+ DF=DF, |
|
1026 |
+ SNRMin=SNRMin, |
|
1027 |
+ recallMin=recallMin, |
|
1028 |
+ recallRegMin=recallRegMin, |
|
1029 |
+ gender=gender, |
|
1030 |
+ verbose=verbose, |
|
1031 |
+ returnParams=returnParams, |
|
1032 |
+ badSNP=badSNP) |
|
1033 |
+ |
|
1034 |
+# res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]), |
|
1035 |
+# B=res[["B"]], # as.matrix(B(callSet)[snp.index,]), # j]), |
|
1036 |
+# SNR=res[["SNR"]], # callSet$SNR, # [j], |
|
1037 |
+# mixtureParams=res[["mixtureParams"]], #, |
|
1038 |
+# cdfName=res[["cdfName"]], # annotation(callSet), |
|
1039 |
+# row.names=row.names, # featureNames(callSet)[snp.index], |
|
1040 |
+# col.names=col.names, # sampleNames(callSet), #[j], |
|
1041 |
+# probs=probs, |
|
1042 |
+# DF=DF, |
|
1043 |
+# SNRMin=SNRMin, |
|
1044 |
+# recallMin=recallMin, |
|
1045 |
+# recallRegMin=recallRegMin, |
|
1046 |
+# gender=gender, |
|
1047 |
+# verbose=verbose, |
|
1048 |
+# returnParams=returnParams, |
|
1049 |
+# badSNP=badSNP) |
|
970 | 1050 |
# rm(res); gc() |
971 | 1051 |
# suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]]) |
972 | 1052 |
# suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]]) |
... | ... |
@@ -1008,3 +1088,283 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
1008 | 1088 |
# } |
1009 | 1089 |
# return(callSet) |
1010 | 1090 |
} |
1091 |
+ |
|
1092 |
+# Add function analogous to Rob's Affy functions to set up container |
|
1093 |
+getProtocolData.Illumina = function(filenames, sep="_", fileExt="Grn.idat") { |
|
1094 |
+ narrays = length(filenames) |
|
1095 |
+ |
|
1096 |
+ headerInfo = list(nProbes = rep(NA, narrays), |
|
1097 |
+ Barcode = rep(NA, narrays), |
|
1098 |
+ ChipType = rep(NA, narrays), |
|
1099 |
+ Manifest = rep(NA, narrays), # not sure about this one - sometimes blank |
|
1100 |
+ Position = rep(NA, narrays)) # this may also vary a bit |
|
1101 |
+ |
|
1102 |
+ scanDates = data.frame(ScanDate=rep(NA, narrays), DecodeDate=rep(NA, narrays)) |
|
1103 |
+ rownames(scanDates) = gsub(paste(sep, fileExt, sep=""), "", filenames) |
|
1104 |
+ ## read in the data |
|
1105 |
+ for(i in seq_along(filenames)) { |
|
1106 |
+ cat("reading", filenames[i], "\n") |
|
1107 |
+ idsG = G = NULL |
|
1108 |
+ G = readIDAT(filenames[i]) |
|
1109 |
+ idsG = rownames(G$Quants) |
|
1110 |
+ headerInfo$nProbes[i] = G$nSNPsRead |
|
1111 |
+ headerInfo$Barcode[i] = G$Barcode |
|
1112 |
+ headerInfo$ChipType[i] = G$ChipType |
|
1113 |
+ headerInfo$Manifest[i] = G$Unknown$MostlyNull |
|
1114 |
+ headerInfo$Position[i] = G$Unknowns$MostlyA |
|
1115 |
+ if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) { |
|
1116 |
+ warning("Chips are not of the same type. Skipping ", basename(filenames[i])) |
|
1117 |
+ next() |
|
1118 |
+ } |
|
1119 |
+ scanDates$ScanDate[i] = G$RunInfo[1, 1] |
|
1120 |
+ scanDates$DecodeDate[i] = G$RunInfo[2, 1] |
|
1121 |
+ rm(G) |
|
1122 |
+ gc() |
|
1123 |
+ } |
|
1124 |
+ protocoldata = new("AnnotatedDataFrame", |
|
1125 |
+ data=scanDates, |
|
1126 |
+ varMetadata=data.frame(labelDescription=colnames(scanDates), |
|
1127 |
+ row.names=colnames(scanDates))) |
|
1128 |
+ return(protocoldata) |
|
1129 |
+} |
|
1130 |
+ |
|
1131 |
+ |
|
1132 |
+construct.Illumina <- function(sampleSheet=NULL, |
|
1133 |
+ arrayNames=NULL, |
|
1134 |
+ ids=NULL, |
|
1135 |
+ path=".", |
|
1136 |
+ arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
|
1137 |
+ highDensity=FALSE, |
|
1138 |
+ sep="_", |
|
1139 |
+ fileExt=list(green="Grn.idat", |
|
1140 |
+ red="Red.idat"), |
|
1141 |
+ cdfName, |
|
1142 |
+ copynumber=TRUE, |
|
1143 |
+ verbose=TRUE, batch, fns, saveDate=TRUE){ |
|
1144 |
+ if(!is.null(arrayNames)) { |
|
1145 |
+ pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
|
1146 |
+ } |
|
1147 |
+ if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet |
|
1148 |
+ if(is.null(arrayNames)) { |
|
1149 |
+ if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) { |
|
1150 |
+ barcode = sampleSheet[,arrayInfoColNames$barcode] |
|
1151 |
+ arrayNames=barcode |
|
1152 |
+ } |
|
1153 |
+ if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) { |
|
1154 |
+ position = sampleSheet[,arrayInfoColNames$position] |
|
1155 |
+ if(is.null(arrayNames)) |
|
1156 |
+ arrayNames=position |
|
1157 |
+ else |
|
1158 |
+ arrayNames = paste(arrayNames, position, sep=sep) |
|
1159 |
+ if(highDensity) { |
|
1160 |
+ hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02") |
|
1161 |
+ for(i in names(hdExt)) |
|
1162 |
+ arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames) |
|
1163 |
+ } |
|
1164 |
+ } |
|
1165 |
+ } |
|
1166 |
+ pd = new("AnnotatedDataFrame", data = sampleSheet) |
|
1167 |
+ sampleNames(pd) <- basename(arrayNames) |
|
1168 |
+ } |
|
1169 |
+ if(is.null(arrayNames)) { |
|
1170 |
+ arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path)) |
|
1171 |
+ if(!is.null(sampleSheet)) { |
|
1172 |
+ sampleSheet=NULL |
|
1173 |
+ cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n") |
|
1174 |
+ } |
|
1175 |
+ pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
|
1176 |
+ } |
|
1177 |
+ |
|
1178 |
+ narrays = length(arrayNames) |
|
1179 |
+ |
|
1180 |
+ if(!missing(batch)) { |
|
1181 |
+ stopifnot(length(batch) == narrays) |
|
1182 |
+ } |
|
1183 |
+ if(missing(batch)) { |
|
1184 |
+ batch = as.factor(rep(1, narrays)) |
|
1185 |
+ } |
|
1186 |
+ |
|
1187 |
+ grnfiles = paste(arrayNames, fileExt$green, sep=sep) |
|
1188 |
+ redfiles = paste(arrayNames, fileExt$red, sep=sep) |
|
1189 |
+ if(length(grnfiles)==0 || length(redfiles)==0) |
|
1190 |
+ stop("Cannot find .idat files") |
|
1191 |
+ if(length(grnfiles)!=length(redfiles)) |
|
1192 |
+ stop("Cannot find matching .idat files") |
|
1193 |
+ if(path[1] != "." & path[1] != ""){ |
|
1194 |
+ grnidats = file.path(path, grnfiles) |
|
1195 |
+ redidats = file.path(path, redfiles) |
|
1196 |
+ } else { |
|
1197 |
+ message("path arg not set. Assuming files are in local directory, or that complete path is provided") |
|
1198 |
+ grnidats = grnfiles |
|
1199 |
+ redidats = redfiles |
|
1200 |
+ } |
|
1201 |
+ if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files") |
|
1202 |
+ if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files") |
|
1203 |
+ |
|
1204 |
+ if(verbose) message("Initializing container for copy number estimation") |
|
1205 |
+ featureData <- getFeatureData.Affy(cdfName, copynumber=copynumber) |
|
1206 |
+ if(!missing(fns)){ |
|
1207 |
+ index <- match(fns, featureNames(featureData)) |
|
1208 |
+ if(all(is.na(index))) stop("fns not in featureNames") |
|
1209 |
+ featureData <- featureData[index, ] |
|
1210 |
+ } |
|
1211 |
+ nr <- nrow(featureData); nc <- narrays |
|
1212 |
+ cnSet <- new("CNSet", |
|
1213 |
+ alleleA=initializeBigMatrix(name="A", nr, nc), |
|
1214 |
+ alleleB=initializeBigMatrix(name="B", nr, nc), |
|
1215 |
+ call=initializeBigMatrix(name="call", nr, nc), |
|
1216 |
+ callProbability=initializeBigMatrix(name="callPr", nr,nc), |
|
1217 |
+ annotation=cdfName, |
|
1218 |
+ batch=batch) |
|
1219 |
+ if (!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)) { |
|
1220 |
+ sampleNames(cnSet) = sampleSheet$Sample_ID |
|
1221 |
+ } |
|
1222 |
+ else sampleNames(cnSet) = arrayNames |
|
1223 |
+ |
|
1224 |
+ if(saveDate){ |
|
1225 |
+ protocolData <- getProtocolData.Illumina(grnidats, sep=sep, fileExt=fileExt$green) |
|
1226 |
+ } else{ |
|
1227 |
+ protocolData <- annotatedDataFrameFrom(A(cnSet), byrow=FALSE) |
|
1228 |
+ } |
|
1229 |
+ rownames(pData(protocolData)) <- sampleNames(cnSet) # sns |
|
1230 |
+ protocolData(cnSet) <- protocolData |
|
1231 |
+ featureData(cnSet) <- featureData |
|
1232 |
+ featureNames(cnSet) <- featureNames(featureData) |
|
1233 |
+ pd <- data.frame(matrix(NA, nc, 3), row.names=sampleNames(cnSet)) # sns) |
|
1234 |
+ colnames(pd)=c("SKW", "SNR", "gender") |
|
1235 |
+ phenoData(cnSet) <- new("AnnotatedDataFrame", data=pd) |
|
1236 |
+ return(cnSet) |
|
1237 |
+} |
|
1238 |
+ |
|
1239 |
+genotype.Illumina <- function(sampleSheet=NULL, |
|
1240 |
+ arrayNames=NULL, |
|
1241 |
+ ids=NULL, |
|
1242 |
+ path=".", |
|
1243 |
+ arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
|
1244 |
+ highDensity=FALSE, |
|
1245 |
+ sep="_", |
|
1246 |
+ fileExt=list(green="Grn.idat", |
|
1247 |
+ red="Red.idat"), |
|
1248 |
+ cdfName, |
|
1249 |
+ copynumber=TRUE, |
|
1250 |
+ batch, |
|
1251 |
+ fns, |
|
1252 |
+ saveDate=TRUE, |
|
1253 |
+ stripNorm=TRUE, |
|
1254 |
+ useTarget=TRUE, |
|
1255 |
+ mixtureSampleSize=10^5, |
|
1256 |
+ eps=0.1, |
|
1257 |
+ verbose=TRUE, |
|
1258 |
+ seed=1, |
|
1259 |
+ sns, |
|
1260 |
+ probs=rep(1/3, 3), |
|
1261 |
+ DF=6, |
|
1262 |
+ SNRMin=5, |
|
1263 |
+ recallMin=10, |
|
1264 |
+ recallRegMin=1000, |
|
1265 |
+ gender=NULL, |
|
1266 |
+ returnParams=TRUE, |
|
1267 |
+ badSNP=0.7) { |
|
1268 |
+ is.lds <- ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
|
1269 |
+ if(missing(cdfName)) stop("must specify cdfName") |
|
1270 |
+ if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames") |
|
1271 |
+ callSet <- construct.Illumina(sampleSheet=sampleSheet, arrayNames=arrayNames, |
|
1272 |
+ ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, |
|
1273 |
+ highDensity=highDensity, sep=sep, fileExt=fileExt, |
|
1274 |
+ cdfName=cdfName, copynumber=copynumber, verbose=verbose, batch=batch, |
|
1275 |
+ fns=fns, saveDate=saveDate) |
|
1276 |
+ open(callSet) |
|
1277 |
+ mixtureParams <- matrix(NA, 4, nrow(callSet)) |
|
1278 |
+ is.snp <- isSnp(callSet) |
|
1279 |
+ snp.index <- which(is.snp) |
|
1280 |
+ |
|
1281 |
+ RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames, |
|
1282 |
+ ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, |
|
1283 |
+ highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate) |
|
1284 |
+ |
|
1285 |
+ XY = RGtoXY(RG, chipType=cdfName) |
|
1286 |
+ rm(RG); gc() |
|
1287 |
+ if (missing(sns)) { sns = sampleNames(XY) #subsns = sampleNames(XY) |
|
1288 |
+ } # else subsns = sns[j] |
|
1289 |
+ |
|
1290 |
+ res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
|
1291 |
+ seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) |
|
1292 |
+ # save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
|
1293 |
+ rm(XY); gc() |
|
1294 |
+ if(verbose) message("Finished preprocessing.") |
|
1295 |
+ if(is.lds){ |
|
1296 |
+ open(res[["A"]]) |
|
1297 |
+ open(res[["B"]]) |
|
1298 |
+ open(res[["SNR"]]) |
|
1299 |
+ open(res[["mixtureParams"]]) |
|
1300 |
+ bb = ocProbesets()*ncol(A)*8 |
|
1301 |
+ ffrowapply(A(callSet)[i1:i2, ] <- res[["A"]][i1:i2, ], X=res[["A"]], BATCHBYTES=bb) |
|
1302 |
+ ffrowapply(B(callSet)[i1:i2, ] <- res[["B"]][i1:i2, ], X=res[["B"]], BATCHBYTES=bb) |
|
1303 |
+ } else{ |
|
1304 |
+ A(callSet)[snp.index, ] <- res[["A"]] |
|
1305 |
+ B(callSet)[snp.index, ] <- res[["B"]] |
|
1306 |
+ } |
|
1307 |
+ pData(callSet)$SKW <- res[["SKW"]] |
|
1308 |
+ pData(callSet)$SNR <- res[["SNR"]] |
|
1309 |
+ mixtureParams <- res[["mixtureParams"]] |
|
1310 |
+# np.index <- which(!is.snp) |
|
1311 |
+# if(verbose) message("Normalizing nonpolymorphic markers") |
|
1312 |
+# FUN <- ifelse(is.lds, "cnrma2", "cnrma") |
|
1313 |
+ ## main purpose is to update 'alleleA' |
|
1314 |
+# cnrmaFxn <- function(FUN,...){ |
|
1315 |
+# switch(FUN, |
|
1316 |
+# cnrma=cnrma(...), |
|
1317 |
+# cnrma2=cnrma2(...)) |
|
1318 |
+# } |
|
1319 |
+ ## consider passing only A for NPs. |
|
1320 |
+# AA <- cnrmaFxn(FUN, A=A(callSet), |
|
1321 |
+# filenames=filenames, |
|
1322 |
+# row.names=featureNames(callSet)[np.index], |
|
1323 |
+# cdfName=cdfName, |
|
1324 |
+# sns=sns, |
|
1325 |
+# seed=seed, |
|
1326 |
+# verbose=verbose) |
|
1327 |
+# if(!is.lds) A(callSet) <- AA |
|
1328 |
+# rm(AA) |
|
1329 |
+ FUN <- ifelse(is.lds, "crlmmGT2", "crlmmGT") |
|
1330 |
+ ## genotyping |
|
1331 |
+ crlmmGTfxn <- function(FUN,...){ |
|
1332 |
+ switch(FUN, |
|
1333 |
+ crlmmGT2=crlmmGT2(...), |
|
1334 |
+ crlmmGT=crlmmGT(...)) |
|
1335 |
+ } |
|
1336 |
+ tmp <- crlmmGTfxn(FUN, |
|
1337 |
+ A=res[["A"]], |
|
1338 |
+ B=res[["B"]], |
|
1339 |
+ SNR=res[["SNR"]], |
|
1340 |
+ mixtureParams=res[["mixtureParams"]], |
|
1341 |
+ cdfName=cdfName, |
|
1342 |
+ row.names=NULL, |
|
1343 |
+ col.names=sampleNames(callSet), |
|
1344 |
+ probs=probs, |
|
1345 |
+ DF=DF, |
|
1346 |
+ SNRMin=SNRMin, |
|
1347 |
+ recallMin=recallMin, |
|
1348 |
+ recallRegMin=recallRegMin, |
|
1349 |
+ gender=gender, |
|
1350 |
+ verbose=verbose, |
|
1351 |
+ returnParams=returnParams, |
|
1352 |
+ badSNP=badSNP) |
|
1353 |
+ rm(res); gc() |
|
1354 |
+ if(verbose) message("Genotyping finished. Updating container with genotype calls and confidence scores.") |
|
1355 |
+ if(is.lds){ |
|
1356 |
+ open(tmp[["calls"]]) |
|
1357 |
+ open(tmp[["confs"]]) |
|
1358 |
+ ffrowapply(snpCall(callSet)[i1:i2, ] <- tmp[["calls"]][i1:i2, ], X=tmp[["calls"]], BATCHBYTES=bb) |
|
1359 |
+ ffrowapply(snpCallProbability(callSet)[i1:i2, ] <- tmp[["confs"]][i1:i2, ], X=tmp[["confs"]], BATCHBYTES=bb) |
|
1360 |
+ close(tmp[["calls"]]) |
|
1361 |
+ close(tmp[["confs"]]) |
|
1362 |
+ } else { |
|
1363 |
+ calls(callSet)[snp.index, ] <- tmp[["calls"]] |
|
1364 |
+ snpCallProbability(callSet)[snp.index, ] <- tmp[["confs"]] |
|
1365 |
+ } |
|
1366 |
+ callSet$gender <- tmp$gender |
|
1367 |
+ close(callSet) |
|
1368 |
+ return(callSet) |
|
1369 |
+} |
|
1370 |
+ |
... | ... |
@@ -10,8 +10,7 @@ |
10 | 10 |
crlmmIllumina(RG, XY, stripNorm=TRUE, |
11 | 11 |
useTarget=TRUE, row.names=TRUE, col.names=TRUE, |
12 | 12 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, |
13 |
- gender=NULL, seed=1, save.it=FALSE, load.it=FALSE, |
|
14 |
- snpFile, cnFile, mixtureSampleSize=10^5, |
|
13 |
+ gender=NULL, seed=1, mixtureSampleSize=10^5, |
|
15 | 14 |
eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10, |
16 | 15 |
recallRegMin=1000, returnParams=FALSE, badSNP=0.7) |
17 | 16 |
} |
... | ... |
@@ -32,12 +31,6 @@ crlmmIllumina(RG, XY, stripNorm=TRUE, |
32 | 31 |
defining sex. (1 - male; 2 - female)} |
33 | 32 |
\item{seed}{'integer' scalar for random number generator (used to |
34 | 33 |
sample \code{mixtureSampleSize} SNPs for mixture model.} |
35 |
- \item{save.it}{'logical'. Save preprocessed SNP and copy number data?} |
|
36 |
- \item{load.it}{'logical'. Load preprocessed SNP data to speed up analysis?} |
|
37 |
- \item{snpFile}{'character' with filename of preprocessed SNP data to |
|
38 |
- be saved/loaded.} |
|
39 |
- \item{cnFile}{'character' with filename of preprocessed copy number |
|
40 |
- data to be saved.} |
|
41 | 34 |
\item{mixtureSampleSize}{'integer'. The number of SNP's to be used |
42 | 35 |
when fitting the mixture model.} |
43 | 36 |
\item{eps}{Minimum change for mixture model.} |
... | ... |
@@ -66,11 +59,7 @@ crlmmIllumina(RG, XY, stripNorm=TRUE, |
66 | 59 |
\details{ |
67 | 60 |
|
68 | 61 |
Note: The user should specify either the \code{RG} or \code{XY} |
69 |
- intensities, not both. Alternatively if \code{crlmmIllumina} has been |
|
70 |
- run already with \code{save.it=TRUE}, the preprocessed data can be |
|
71 |
- loaded from file by specifying \code{load.it=TRUE} and |
|
72 |
- \code{intensityFile} (\code{RG} or \code{XY} are not needed in this case). |
|
73 |
- |
|
62 |
+ intensities, not both. |
|
74 | 63 |
} |
75 | 64 |
|
76 | 65 |
\references{ |
... | ... |
@@ -12,8 +12,7 @@ crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
12 | 12 |
highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), |
13 | 13 |
saveDate=FALSE, stripNorm=TRUE, useTarget=TRUE, row.names=TRUE, col.names=TRUE, |
14 | 14 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
15 |
- seed=1, save.ab=FALSE, snpFile, cnFile, |
|
16 |
- mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
15 |
+ seed=1, mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
17 | 16 |
cdfName, sns, recallMin=10, recallRegMin=1000, |
18 | 17 |
returnParams=FALSE, badSNP=.7) |
19 | 18 |
} |
... | ... |
@@ -58,11 +57,6 @@ crlmmIlluminaV2(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
58 | 57 |
defining sex. (1 - male; 2 - female)} |
59 | 58 |
\item{seed}{'integer' scalar for random number generator (used to |
60 | 59 |
sample \code{mixtureSampleSize} SNPs for mixture model.} |
61 |
- \item{save.it}{'logical'. Save preprocessed SNP and copy number data?} |
|
62 |
- \item{snpFile}{'character' with filename of preprocessed SNP data to |
|
63 |
- be saved/loaded.} |
|
64 |
- \item{cnFile}{'character' with filename of preprocessed copy number |
|
65 |
- data to be saved.} |
|
66 | 60 |
\item{mixtureSampleSize}{'integer'. The number of SNP's to be used |
67 | 61 |
when fitting the mixture model.} |
68 | 62 |
\item{eps}{Minimum change for mixture model.} |