git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@37668 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,11 @@ |
1 |
+2009-03-01 Benilton Carvalho <bcarvalh@jhsph.edu> |
|
2 |
+ |
|
3 |
+* Added TODO file |
|
4 |
+ |
|
5 |
+* Added CHANGES file |
|
6 |
+ |
|
7 |
+* Reduced number of files in R/ by combining functions for a given methodology (crlmm / cnrma) |
|
8 |
+ |
|
9 |
+* Files with suffix XYZ-functions.R should have *functions* for methodology XYZ |
|
10 |
+ |
|
11 |
+* Files with suffix XYZ-methods.R should have *methods* for methodology XYZ |
... | ... |
@@ -9,15 +9,12 @@ Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays. |
9 | 9 |
Imports: affyio, preprocessCore, utils, stats, genefilter, splines, Biobase |
10 | 10 |
License: Artistic-2.0 |
11 | 11 |
Suggests: hapmapsnp5, genomewidesnp5Crlmm, genomewidesnp6Crlmm, methods |
12 |
-Collate: crlmmGT.R |
|
13 |
- crlmm.R |
|
12 |
+Collate: crlmm-functions.R |
|
14 | 13 |
fitAffySnpMixture56.R |
15 | 14 |
snprma.R |
16 | 15 |
utils.R |
17 | 16 |
zzz.R |
18 |
- crlmmGTnm.R |
|
19 |
- crlmmnm.R |
|
20 | 17 |
AllClasses.R |
21 |
- Methods.R |
|
18 |
+ crlmm-methods.R |
|
22 | 19 |
functions.R |
23 | 20 |
LazyLoad: yes |
27 | 24 |
similarity index 60% |
28 | 25 |
rename from R/crlmmGT.R |
29 | 26 |
rename to R/crlmm-functions.R |
... | ... |
@@ -1,3 +1,51 @@ |
1 |
+crlmm <- function(filenames, row.names=TRUE, col.names=TRUE, |
|
2 |
+ probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
3 |
+ save.it=FALSE, load.it=FALSE, intensityFile, |
|
4 |
+ mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
5 |
+ cdfName, sns, recallMin=10, recallRegMin=1000, |
|
6 |
+ returnParams=FALSE, badSNP=.7){ |
|
7 |
+ if ((load.it | save.it) & missing(intensityFile)) |
|
8 |
+ stop("'intensityFile' is missing, and you chose either load.it or save.it") |
|
9 |
+ |
|
10 |
+ if (missing(sns)) sns <- basename(filenames) |
|
11 |
+ if (!missing(intensityFile)) |
|
12 |
+ if (load.it & !file.exists(intensityFile)){ |
|
13 |
+ load.it <- FALSE |
|
14 |
+ message("File ", intensityFile, " does not exist.") |
|
15 |
+ message("Not loading it, but running SNPRMA from scratch.") |
|
16 |
+ } |
|
17 |
+ if (!load.it){ |
|
18 |
+ res <- snprma(filenames, fitMixture=TRUE, |
|
19 |
+ mixtureSampleSize=mixtureSampleSize, verbose=verbose, |
|
20 |
+ eps=eps, cdfName=cdfName, sns=sns) |
|
21 |
+ if(save.it){ |
|
22 |
+ t0 <- proc.time() |
|
23 |
+ save(res, file=intensityFile) |
|
24 |
+ t0 <- proc.time()-t0 |
|
25 |
+ if (verbose) message("Used ", t0[3], " seconds to save ", intensityFile, ".") |
|
26 |
+ } |
|
27 |
+ }else{ |
|
28 |
+ if (verbose) message("Loading ", intensityFile, ".") |
|
29 |
+ obj <- load(intensityFile) |
|
30 |
+ if (verbose) message("Done.") |
|
31 |
+ if (obj != "res") |
|
32 |
+ stop("Object in ", intensityFile, " seems to be invalid.") |
|
33 |
+ } |
|
34 |
+ if(row.names) row.names=res$gns else row.names=NULL |
|
35 |
+ if(col.names) col.names=res$sns else col.names=NULL |
|
36 |
+ res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]], |
|
37 |
+ res[["mixtureParams"]], res[["cdfName"]], |
|
38 |
+ gender=gender, row.names=row.names, |
|
39 |
+ col.names=col.names, recallMin=recallMin, |
|
40 |
+ recallRegMin=1000, SNRMin=SNRMin, |
|
41 |
+ returnParams=returnParams, badSNP=badSNP, |
|
42 |
+ verbose=verbose) |
|
43 |
+ |
|
44 |
+ res2[["SNR"]] <- res[["SNR"]] |
|
45 |
+ res2[["SKW"]] <- res[["SKW"]] |
|
46 |
+ return(res2) |
|
47 |
+} |
|
48 |
+ |
|
1 | 49 |
crlmmGT <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
2 | 50 |
col.names=NULL, probs=c(1/3, 1/3, 1/3), DF=6, |
3 | 51 |
SNRMin=5, recallMin=10, recallRegMin=1000, |
... | ... |
@@ -225,13 +273,121 @@ gtypeCallerR2 <- function(A, B, fIndex, mIndex, theCenters, theScales, |
225 | 273 |
|
226 | 274 |
|
227 | 275 |
|
276 |
+############################### |
|
277 |
+####### THIS IS TEMPORARY NOT OFFICIALLY USED |
|
278 |
+##################################### |
|
279 |
+crlmmNM <- function(filenames, row.names=TRUE, col.names=TRUE, |
|
280 |
+ probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
281 |
+ save.it=FALSE, load.it=FALSE, |
|
282 |
+ intensityFile="tmpcrlmmintensities.rda", |
|
283 |
+ desctrucitve=FALSE, mixtureSampleSize=10^5, eps=0.1, |
|
284 |
+ verbose=TRUE, cdfName, sns, recallMin=10, |
|
285 |
+ recallRegMin=1000, returnParams=FALSE){ |
|
286 |
+ |
|
287 |
+ if (missing(sns)) sns <- basename(filenames) |
|
288 |
+ if (load.it & !file.exists(intensityFile)){ |
|
289 |
+ load.it <- FALSE |
|
290 |
+ message("File ", intensityFile, " does not exist.") |
|
291 |
+ message("Not loading it, but running SNPRMA from scratch.") |
|
292 |
+ } |
|
293 |
+ if (!load.it){ |
|
294 |
+ res <- snprma(filenames, fitMixture=TRUE, |
|
295 |
+ mixtureSampleSize=mixtureSampleSize, verbose=verbose, |
|
296 |
+ eps=eps, cdfName=cdfName, sns=sns) |
|
297 |
+ if(save.it){ |
|
298 |
+ t0 <- proc.time() |
|
299 |
+ save(res, file=intensityFile) |
|
300 |
+ t0 <- proc.time()-t0 |
|
301 |
+ message("Used ", t0[3], " seconds to save ", intensityFile, ".") |
|
302 |
+ } |
|
303 |
+ }else{ |
|
304 |
+ message("Loading ", intensityFile, ".") |
|
305 |
+ obj <- load(intensityFile) |
|
306 |
+ message("Done.") |
|
307 |
+ if (obj != "res") |
|
308 |
+ stop("Object in ", intensityFile, " seems to be invalid.") |
|
309 |
+ } |
|
310 |
+ if(row.names) row.names=res$gns else row.names=NULL |
|
311 |
+ if(col.names) col.names=res$sns else col.names=NULL |
|
312 |
+ |
|
313 |
+ res2 <- crlmmGTnm(res[["A"]], res[["B"]], res[["SNR"]], |
|
314 |
+ res[["mixtureParams"]], res[["cdfName"]], |
|
315 |
+ gender=gender, row.names=row.names, |
|
316 |
+ col.names=col.names, recallMin=recallMin, |
|
317 |
+ recallRegMin=1000, SNRMin=SNRMin, |
|
318 |
+ returnParams=returnParams) |
|
228 | 319 |
|
320 |
+ res2[["SNR"]] <- res[["SNR"]] |
|
321 |
+ |
|
322 |
+ return(res2) |
|
323 |
+} |
|
324 |
+ |
|
325 |
+crlmmTNoN <- function(filenames, row.names=TRUE, col.names=TRUE, |
|
326 |
+ probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
327 |
+ save.it=FALSE, load.it=FALSE, |
|
328 |
+ intensityFile="tmpcrlmmintensities.rda", |
|
329 |
+ desctrucitve=FALSE, mixtureSampleSize=10^5, eps=0.1, |
|
330 |
+ verbose=TRUE){ |
|
331 |
+ if (load.it & !file.exists(intensityFile)){ |
|
332 |
+ load.it <- FALSE |
|
333 |
+ message("File ", intensityFile, " does not exist.") |
|
334 |
+ message("Not loading it, but running SNPRMA from scratch.") |
|
335 |
+ } |
|
336 |
+ if (!load.it){ |
|
337 |
+ res <- snprma(filenames, fitMixture=TRUE, |
|
338 |
+ mixtureSampleSize=mixtureSampleSize, verbose=verbose, |
|
339 |
+ eps=eps) |
|
340 |
+ if(save.it) save(res, file=intensityFile) |
|
341 |
+ }else{ |
|
342 |
+ message("Loading ", intensityFile, ".") |
|
343 |
+ obj <- load(intensityFile) |
|
344 |
+ message("Done.") |
|
345 |
+ if (obj != "res") |
|
346 |
+ stop("Object in ", intensityFile, " seems to be invalid.") |
|
347 |
+ } |
|
348 |
+ if(row.names) row.names=res$gns else row.names=NULL |
|
349 |
+ if(col.names) col.names=res$sns else col.names=NULL |
|
350 |
+ res2 <- crlmmGTTNoN(res[["A"]], res[["B"]], res[["SNR"]], |
|
351 |
+ res[["mixtureParams"]], res[["cdfName"]], |
|
352 |
+ gender=gender, row.names=row.names, |
|
353 |
+ col.names=col.names) |
|
354 |
+ res2[["SNR"]] <- res[["SNR"]] |
|
355 |
+ return(res2) |
|
356 |
+} |
|
357 |
+ |
|
358 |
+crlmmNormalNoN <- function(filenames, row.names=TRUE, col.names=TRUE, |
|
359 |
+ probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
360 |
+ save.it=FALSE, load.it=FALSE, |
|
361 |
+ intensityFile="tmpcrlmmintensities.rda", |
|
362 |
+ desctrucitve=FALSE, mixtureSampleSize=10^5, eps=0.1, |
|
363 |
+ verbose=TRUE){ |
|
364 |
+ if (load.it & !file.exists(intensityFile)){ |
|
365 |
+ load.it <- FALSE |
|
366 |
+ message("File ", intensityFile, " does not exist.") |
|
367 |
+ message("Not loading it, but running SNPRMA from scratch.") |
|
368 |
+ } |
|
369 |
+ if (!load.it){ |
|
370 |
+ res <- snprma(filenames, fitMixture=TRUE, |
|
371 |
+ mixtureSampleSize=mixtureSampleSize, verbose=verbose, |
|
372 |
+ eps=eps) |
|
373 |
+ if(save.it) save(res, file=intensityFile) |
|
374 |
+ }else{ |
|
375 |
+ message("Loading ", intensityFile, ".") |
|
376 |
+ obj <- load(intensityFile) |
|
377 |
+ message("Done.") |
|
378 |
+ if (obj != "res") |
|
379 |
+ stop("Object in ", intensityFile, " seems to be invalid.") |
|
380 |
+ } |
|
381 |
+ if(row.names) row.names=res$gns else row.names=NULL |
|
382 |
+ if(col.names) col.names=res$sns else col.names=NULL |
|
383 |
+ res2 <- crlmmGTNormalNoN(res[["A"]], res[["B"]], res[["SNR"]], |
|
384 |
+ res[["mixtureParams"]], res[["cdfName"]], |
|
385 |
+ gender=gender, row.names=row.names, |
|
386 |
+ col.names=col.names) |
|
387 |
+ res2[["SNR"]] <- res[["SNR"]] |
|
388 |
+ return(res2) |
|
389 |
+} |
|
229 | 390 |
|
230 |
-################## |
|
231 |
-################## |
|
232 |
-### THIS IS TEMPORARY AND NOT OFFICIALLY USED |
|
233 |
-################## |
|
234 |
-#################### |
|
235 | 391 |
crlmmGTTNoN <- function(A, B, SNR, mixtureParams, cdfName, |
236 | 392 |
row.names=NULL, col.names=NULL, probs=c(1/3, |
237 | 393 |
1/3, 1/3), DF=6, SNRMin=6, gender=NULL, |
... | ... |
@@ -559,3 +715,210 @@ gtypeCallerR2NormalNoN <- function(A, B, fIndex, mIndex, theCenters, theScales, |
559 | 715 |
} |
560 | 716 |
|
561 | 717 |
|
718 |
+crlmmGTnm <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
|
719 |
+ col.names=NULL, probs=c(1/3, 1/3, 1/3), DF=6, |
|
720 |
+ SNRMin=5, recallMin=10, recallRegMin=1000, |
|
721 |
+ gender=NULL, desctrucitve=FALSE, verbose=TRUE, |
|
722 |
+ returnParams=FALSE){ |
|
723 |
+ |
|
724 |
+ keepIndex <- which(SNR>SNRMin) |
|
725 |
+ if(length(keepIndex)==0) stop("No arrays above quality threshold!") |
|
726 |
+ |
|
727 |
+ NC <- ncol(A) |
|
728 |
+ NR <- nrow(A) |
|
729 |
+ |
|
730 |
+ pkgname <- getCrlmmAnnotationName(cdfName) |
|
731 |
+ if(!require(pkgname, character.only=TRUE)){ |
|
732 |
+ suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="") |
|
733 |
+ msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall) |
|
734 |
+ message(strwrap(msg)) |
|
735 |
+ stop("Package ", pkgname, " could not be found.") |
|
736 |
+ rm(suggCall, msg) |
|
737 |
+ } |
|
738 |
+ |
|
739 |
+ if(verbose) message("Loading annotations.") |
|
740 |
+ data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv) |
|
741 |
+ |
|
742 |
+ ## this is toget rid of the 'no visible binding' notes |
|
743 |
+ ## variable definitions |
|
744 |
+ XIndex <- getVarInEnv("XIndex") |
|
745 |
+ autosomeIndex <- getVarInEnv("autosomeIndex") |
|
746 |
+ YIndex <- getVarInEnv("YIndex") |
|
747 |
+ SMEDIAN <- getVarInEnv("SMEDIAN") |
|
748 |
+ theKnots <- getVarInEnv("theKnots") |
|
749 |
+ regionInfo <- getVarInEnv("regionInfo") |
|
750 |
+ |
|
751 |
+ ##IF gender not provide, we predict |
|
752 |
+ if(is.null(gender)){ |
|
753 |
+ if(verbose) message("Determining gender.") |
|
754 |
+ XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2 |
|
755 |
+ if(sum(SNR>SNRMin)==1){ |
|
756 |
+ gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5))) |
|
757 |
+ }else{ |
|
758 |
+ gender <- kmeans(XMedian, c(min(XMedian[SNR>SNRMin]), max(XMedian[SNR>SNRMin])))[["cluster"]] |
|
759 |
+ } |
|
760 |
+ } |
|
761 |
+ |
|
762 |
+ Indexes <- list(autosomeIndex, XIndex, YIndex) |
|
763 |
+ cIndexes <- list(keepIndex, |
|
764 |
+ keepIndex[which(gender[keepIndex]==2)], |
|
765 |
+ keepIndex[which(gender[keepIndex]==1)]) |
|
766 |
+ |
|
767 |
+ if(verbose) cat("Calling", NR, "SNPs for recalibration") |
|
768 |
+ |
|
769 |
+ ## call C |
|
770 |
+ fIndex <- which(gender==2) |
|
771 |
+ mIndex <- which(gender==1) |
|
772 |
+ t0 <- proc.time() |
|
773 |
+ newparams <- gtypeCallerRnm(A, B, fIndex, mIndex, |
|
774 |
+ params[["centers"]], params[["scales"]], params[["N"]], |
|
775 |
+ Indexes, cIndexes, |
|
776 |
+ sapply(Indexes, length), sapply(cIndexes, length), |
|
777 |
+ SMEDIAN, theKnots, |
|
778 |
+ mixtureParams, DF, probs, 0.025) |
|
779 |
+ t0 <- proc.time()-t0 |
|
780 |
+ message("Part 1 took ", t0[3], " seconds.") |
|
781 |
+ gc(verbose=FALSE) |
|
782 |
+ names(newparams) <- c("centers", "scales", "N") |
|
783 |
+ |
|
784 |
+ if(verbose) message("Done.") |
|
785 |
+ if(verbose) message("Estimating recalibration parameters.") |
|
786 |
+ d <- newparams[["centers"]] - params$centers |
|
787 |
+ |
|
788 |
+ ##regression |
|
789 |
+ Index <- intersect(which(pmin(newparams[["N"]][, 1], |
|
790 |
+ newparams[["N"]][, 2], |
|
791 |
+ newparams[["N"]][, 3]) > recallMin & |
|
792 |
+ !apply(regionInfo, 1, any)), |
|
793 |
+ autosomeIndex) |
|
794 |
+ |
|
795 |
+ if(length(Index) < recallRegMin){ |
|
796 |
+ warning("Recallibration not possible.") |
|
797 |
+ newparams <- params |
|
798 |
+ dev <- vector("numeric", nrow(newparams[["centers"]])) |
|
799 |
+ SS <- matrix(Inf, 3, 3) |
|
800 |
+ }else{ |
|
801 |
+ data4reg <- as.data.frame(newparams[["centers"]][Index,]) |
|
802 |
+ names(data4reg) <- c("AA", "AB", "BB") |
|
803 |
+ regParams <- cbind( coef(lm(AA~AB*BB, data=data4reg)), |
|
804 |
+ c(coef(lm(AB~AA+BB, data=data4reg)), 0), |
|
805 |
+ coef(lm(BB~AA*AB, data=data4reg))) |
|
806 |
+ rownames(regParams) <- c("intercept", "X", "Y", "XY") |
|
807 |
+ rm(data4reg) |
|
808 |
+ |
|
809 |
+ minN <- 3 |
|
810 |
+ newparams[["centers"]][newparams[["N"]] < minN] <- NA |
|
811 |
+ Index <- setdiff(which(rowSums(is.na(newparams[["centers"]]))==1), YIndex) |
|
812 |
+ if(verbose) cat("Filling out empty centers") |
|
813 |
+ for(i in Index){ |
|
814 |
+ if(verbose) if(i%%10000==0)cat(".") |
|
815 |
+ mu <- newparams[["centers"]][i, ] |
|
816 |
+ j <- which(is.na(mu)) |
|
817 |
+ newparams[["centers"]][i, j] <- c(1, mu[-j], prod(mu[-j]))%*%regParams[, j] |
|
818 |
+ } |
|
819 |
+ |
|
820 |
+ ##remaing NAs are made like originals |
|
821 |
+ if(length(YIndex)>0){ |
|
822 |
+ noMoveIndex <- union(setdiff(which(rowSums(is.na(newparams[["centers"]]))>0), YIndex), |
|
823 |
+ YIndex[rowSums(is.na(newparams[["centers"]][YIndex, ])>1)]) |
|
824 |
+ } |
|
825 |
+ newparams[["centers"]][is.na(newparams[["centers"]])] <- params[["centers"]][is.na(newparams[["centers"]])] |
|
826 |
+ if(verbose) cat("\n") |
|
827 |
+ |
|
828 |
+ if(verbose) message("Calculating and standardizing size of shift.") |
|
829 |
+ DD <- newparams[["centers"]] - params[["centers"]] |
|
830 |
+ DD <- sweep(DD, 2, colMeans(DD[autosomeIndex, ])) |
|
831 |
+ SS <- cov(DD[autosomeIndex, ]) |
|
832 |
+ SSI <- solve(SS) |
|
833 |
+ dev <- vector("numeric", nrow(DD)) |
|
834 |
+ if(length(YIndex)){ |
|
835 |
+ dev[-YIndex] <- apply(DD[-YIndex, ], 1, function(x) x%*%SSI%*%x) |
|
836 |
+ dev[-YIndex] <- 1/sqrt( (2*pi)^3*det(SS))*exp(-0.5*dev[-YIndex]) |
|
837 |
+ ##Now Y (only two params) |
|
838 |
+ SSY <- SS[c(1, 3), c(1, 3)] |
|
839 |
+ SSI <- solve(SSY) |
|
840 |
+ dev[YIndex] <- apply(DD[YIndex, c(1, 3)], 1, function(x) x%*%SSI%*%x) |
|
841 |
+ dev[YIndex] <- 1/sqrt( (2*pi)^2*det(SSY))*exp(-0.5*dev[YIndex]) |
|
842 |
+ } else { |
|
843 |
+ dev=apply(DD,1,function(x) x%*%SSI%*%x) |
|
844 |
+ dev=1/sqrt( (2*pi)^3*det(SS))*exp(-0.5*dev) |
|
845 |
+ } |
|
846 |
+ } |
|
847 |
+ |
|
848 |
+ ## BC: must keep SD |
|
849 |
+ params[-2] <- newparams[-2] |
|
850 |
+ |
|
851 |
+ rm(newparams);gc(verbose=FALSE) |
|
852 |
+ if(verbose) cat("Calling", NR, "SNPs") |
|
853 |
+ ## ################### |
|
854 |
+ ## ## MOVE TO C####### |
|
855 |
+ t0 <- proc.time() |
|
856 |
+ ImNull <- gtypeCallerR2nm(A, B, fIndex, mIndex, params[["centers"]], |
|
857 |
+ params[["scales"]], params[["N"]], Indexes, |
|
858 |
+ cIndexes, sapply(Indexes, length), |
|
859 |
+ sapply(cIndexes, length), SMEDIAN, theKnots, |
|
860 |
+ mixtureParams, DF, probs, 0.025, |
|
861 |
+ which(regionInfo[,2]), |
|
862 |
+ which(regionInfo[,1])) |
|
863 |
+ t0 <- proc.time()-t0 |
|
864 |
+ gc(verbose=FALSE) |
|
865 |
+ message("\n Part 2 took ", t0[3], " seconds.") |
|
866 |
+ ## END MOVE TO C####### |
|
867 |
+ ## ################## |
|
868 |
+ |
|
869 |
+ dev <- dev/(dev+1/383) |
|
870 |
+ if(!is.null(row.names)){ rownames(A) <- rownames(B) <- names(dev) <- row.names} |
|
871 |
+ if(!is.null(col.names)){ colnames(A) <- colnames(B) <- col.names} |
|
872 |
+ |
|
873 |
+ if(verbose) message("Done.") |
|
874 |
+ if (returnParams){ |
|
875 |
+ return(list(calls=A, confs=B, SNPQC=dev, batchQC=mean(diag(SS)), params=params)) |
|
876 |
+ }else{ |
|
877 |
+ return(list(calls=A, confs=B, SNPQC=dev, batchQC=mean(diag(SS)))) |
|
878 |
+ } |
|
879 |
+} |
|
880 |
+ |
|
881 |
+ |
|
882 |
+gtypeCallerRnm <- function(A, B, fIndex, mIndex, theCenters, theScales, |
|
883 |
+ theNs, Indexes, cIndexes, nIndexes, |
|
884 |
+ ncIndexes, SMEDIAN, knots, params, dft, |
|
885 |
+ probs, trim){ |
|
886 |
+ |
|
887 |
+ stopifnot(!missing(A), !missing(B), dim(A)==dim(B), |
|
888 |
+ nrow(A)==nrow(theCenters), nrow(A)==nrow(theScales), |
|
889 |
+ nrow(A) == nrow(theNs), length(knots)==3, nrow(params)==4, |
|
890 |
+ ncol(params)==ncol(A), length(trim)==1, length(probs)==3) |
|
891 |
+ |
|
892 |
+ ## make code robust |
|
893 |
+ ## check types before passing to C |
|
894 |
+ |
|
895 |
+ .Call("gtypeCallerPart1nm", A, B, |
|
896 |
+ as.integer(fIndex), as.integer(mIndex), |
|
897 |
+ as.numeric(theCenters), as.numeric(theScales), |
|
898 |
+ as.integer(theNs), lapply(Indexes, as.integer), lapply(cIndexes, as.integer), as.integer(nIndexes), as.integer(ncIndexes), |
|
899 |
+ as.numeric(SMEDIAN), as.numeric(knots), as.numeric(params), |
|
900 |
+ as.integer(dft), as.numeric(probs), as.numeric(trim), |
|
901 |
+ PACKAGE="crlmm") |
|
902 |
+ |
|
903 |
+} |
|
904 |
+ |
|
905 |
+gtypeCallerR2nm <- function(A, B, fIndex, mIndex, theCenters, theScales, |
|
906 |
+ theNs, Indexes, cIndexes, nIndexes, |
|
907 |
+ ncIndexes, SMEDIAN, knots, params, dft, |
|
908 |
+ probs, trim, noTraining, noInfo){ |
|
909 |
+ |
|
910 |
+ stopifnot(!missing(A), !missing(B), dim(A)==dim(B), |
|
911 |
+ nrow(A)==nrow(theCenters), nrow(A)==nrow(theScales), |
|
912 |
+ nrow(A) == nrow(theNs), length(knots)==3, nrow(params)==4, |
|
913 |
+ ncol(params)==ncol(A), length(trim)==1, length(probs)==3) |
|
914 |
+ |
|
915 |
+ .Call("gtypeCallerPart2nm", A, B, |
|
916 |
+ as.integer(fIndex), as.integer(mIndex), |
|
917 |
+ as.numeric(theCenters), as.numeric(theScales), |
|
918 |
+ as.integer(theNs), Indexes, cIndexes, nIndexes, ncIndexes, |
|
919 |
+ as.numeric(SMEDIAN), as.numeric(knots), as.numeric(params), |
|
920 |
+ as.integer(dft), as.numeric(probs), as.numeric(trim), |
|
921 |
+ as.integer(noTraining), as.integer(noInfo), PACKAGE="crlmm") |
|
922 |
+ |
|
923 |
+} |
|
924 |
+ |
565 | 928 |
deleted file mode 100644 |
... | ... |
@@ -1,118 +0,0 @@ |
1 |
-crlmm <- function(filenames, row.names=TRUE, col.names=TRUE, |
|
2 |
- probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
3 |
- save.it=FALSE, load.it=FALSE, intensityFile, |
|
4 |
- mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
5 |
- cdfName, sns, recallMin=10, recallRegMin=1000, |
|
6 |
- returnParams=FALSE, badSNP=.7){ |
|
7 |
- if ((load.it | save.it) & missing(intensityFile)) |
|
8 |
- stop("'intensityFile' is missing, and you chose either load.it or save.it") |
|
9 |
- |
|
10 |
- if (missing(sns)) sns <- basename(filenames) |
|
11 |
- if (!missing(intensityFile)) |
|
12 |
- if (load.it & !file.exists(intensityFile)){ |
|
13 |
- load.it <- FALSE |
|
14 |
- message("File ", intensityFile, " does not exist.") |
|
15 |
- message("Not loading it, but running SNPRMA from scratch.") |
|
16 |
- } |
|
17 |
- if (!load.it){ |
|
18 |
- res <- snprma(filenames, fitMixture=TRUE, |
|
19 |
- mixtureSampleSize=mixtureSampleSize, verbose=verbose, |
|
20 |
- eps=eps, cdfName=cdfName, sns=sns) |
|
21 |
- if(save.it){ |
|
22 |
- t0 <- proc.time() |
|
23 |
- save(res, file=intensityFile) |
|
24 |
- t0 <- proc.time()-t0 |
|
25 |
- if (verbose) message("Used ", t0[3], " seconds to save ", intensityFile, ".") |
|
26 |
- } |
|
27 |
- }else{ |
|
28 |
- if (verbose) message("Loading ", intensityFile, ".") |
|
29 |
- obj <- load(intensityFile) |
|
30 |
- if (verbose) message("Done.") |
|
31 |
- if (obj != "res") |
|
32 |
- stop("Object in ", intensityFile, " seems to be invalid.") |
|
33 |
- } |
|
34 |
- if(row.names) row.names=res$gns else row.names=NULL |
|
35 |
- if(col.names) col.names=res$sns else col.names=NULL |
|
36 |
- res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]], |
|
37 |
- res[["mixtureParams"]], res[["cdfName"]], |
|
38 |
- gender=gender, row.names=row.names, |
|
39 |
- col.names=col.names, recallMin=recallMin, |
|
40 |
- recallRegMin=1000, SNRMin=SNRMin, |
|
41 |
- returnParams=returnParams, badSNP=badSNP, |
|
42 |
- verbose=verbose) |
|
43 |
- |
|
44 |
- res2[["SNR"]] <- res[["SNR"]] |
|
45 |
- res2[["SKW"]] <- res[["SKW"]] |
|
46 |
- return(res2) |
|
47 |
-} |
|
48 |
- |
|
49 |
- |
|
50 |
-############################### |
|
51 |
-####### THIS IS TEMPORARY NOT OFFICIALLY USED |
|
52 |
-##################################### |
|
53 |
- |
|
54 |
-crlmmTNoN <- function(filenames, row.names=TRUE, col.names=TRUE, |
|
55 |
- probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
56 |
- save.it=FALSE, load.it=FALSE, |
|
57 |
- intensityFile="tmpcrlmmintensities.rda", |
|
58 |
- desctrucitve=FALSE, mixtureSampleSize=10^5, eps=0.1, |
|
59 |
- verbose=TRUE){ |
|
60 |
- if (load.it & !file.exists(intensityFile)){ |
|
61 |
- load.it <- FALSE |
|
62 |
- message("File ", intensityFile, " does not exist.") |
|
63 |
- message("Not loading it, but running SNPRMA from scratch.") |
|
64 |
- } |
|
65 |
- if (!load.it){ |
|
66 |
- res <- snprma(filenames, fitMixture=TRUE, |
|
67 |
- mixtureSampleSize=mixtureSampleSize, verbose=verbose, |
|
68 |
- eps=eps) |
|
69 |
- if(save.it) save(res, file=intensityFile) |
|
70 |
- }else{ |
|
71 |
- message("Loading ", intensityFile, ".") |
|
72 |
- obj <- load(intensityFile) |
|
73 |
- message("Done.") |
|
74 |
- if (obj != "res") |
|
75 |
- stop("Object in ", intensityFile, " seems to be invalid.") |
|
76 |
- } |
|
77 |
- if(row.names) row.names=res$gns else row.names=NULL |
|
78 |
- if(col.names) col.names=res$sns else col.names=NULL |
|
79 |
- res2 <- crlmmGTTNoN(res[["A"]], res[["B"]], res[["SNR"]], |
|
80 |
- res[["mixtureParams"]], res[["cdfName"]], |
|
81 |
- gender=gender, row.names=row.names, |
|
82 |
- col.names=col.names) |
|
83 |
- res2[["SNR"]] <- res[["SNR"]] |
|
84 |
- return(res2) |
|
85 |
-} |
|
86 |
- |
|
87 |
-crlmmNormalNoN <- function(filenames, row.names=TRUE, col.names=TRUE, |
|
88 |
- probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
89 |
- save.it=FALSE, load.it=FALSE, |
|
90 |
- intensityFile="tmpcrlmmintensities.rda", |
|
91 |
- desctrucitve=FALSE, mixtureSampleSize=10^5, eps=0.1, |
|
92 |
- verbose=TRUE){ |
|
93 |
- if (load.it & !file.exists(intensityFile)){ |
|
94 |
- load.it <- FALSE |
|
95 |
- message("File ", intensityFile, " does not exist.") |
|
96 |
- message("Not loading it, but running SNPRMA from scratch.") |
|
97 |
- } |
|
98 |
- if (!load.it){ |
|
99 |
- res <- snprma(filenames, fitMixture=TRUE, |
|
100 |
- mixtureSampleSize=mixtureSampleSize, verbose=verbose, |
|
101 |
- eps=eps) |
|
102 |
- if(save.it) save(res, file=intensityFile) |
|
103 |
- }else{ |
|
104 |
- message("Loading ", intensityFile, ".") |
|
105 |
- obj <- load(intensityFile) |
|
106 |
- message("Done.") |
|
107 |
- if (obj != "res") |
|
108 |
- stop("Object in ", intensityFile, " seems to be invalid.") |
|
109 |
- } |
|
110 |
- if(row.names) row.names=res$gns else row.names=NULL |
|
111 |
- if(col.names) col.names=res$sns else col.names=NULL |
|
112 |
- res2 <- crlmmGTNormalNoN(res[["A"]], res[["B"]], res[["SNR"]], |
|
113 |
- res[["mixtureParams"]], res[["cdfName"]], |
|
114 |
- gender=gender, row.names=row.names, |
|
115 |
- col.names=col.names) |
|
116 |
- res2[["SNR"]] <- res[["SNR"]] |
|
117 |
- return(res2) |
|
118 |
-} |
119 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,207 +0,0 @@ |
1 |
-crlmmGTnm <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
|
2 |
- col.names=NULL, probs=c(1/3, 1/3, 1/3), DF=6, |
|
3 |
- SNRMin=5, recallMin=10, recallRegMin=1000, |
|
4 |
- gender=NULL, desctrucitve=FALSE, verbose=TRUE, |
|
5 |
- returnParams=FALSE){ |
|
6 |
- |
|
7 |
- keepIndex <- which(SNR>SNRMin) |
|
8 |
- if(length(keepIndex)==0) stop("No arrays above quality threshold!") |
|
9 |
- |
|
10 |
- NC <- ncol(A) |
|
11 |
- NR <- nrow(A) |
|
12 |
- |
|
13 |
- pkgname <- getCrlmmAnnotationName(cdfName) |
|
14 |
- if(!require(pkgname, character.only=TRUE)){ |
|
15 |
- suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="") |
|
16 |
- msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall) |
|
17 |
- message(strwrap(msg)) |
|
18 |
- stop("Package ", pkgname, " could not be found.") |
|
19 |
- rm(suggCall, msg) |
|
20 |
- } |
|
21 |
- |
|
22 |
- if(verbose) message("Loading annotations.") |
|
23 |
- data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv) |
|
24 |
- |
|
25 |
- ## this is toget rid of the 'no visible binding' notes |
|
26 |
- ## variable definitions |
|
27 |
- XIndex <- getVarInEnv("XIndex") |
|
28 |
- autosomeIndex <- getVarInEnv("autosomeIndex") |
|
29 |
- YIndex <- getVarInEnv("YIndex") |
|
30 |
- SMEDIAN <- getVarInEnv("SMEDIAN") |
|
31 |
- theKnots <- getVarInEnv("theKnots") |
|
32 |
- regionInfo <- getVarInEnv("regionInfo") |
|
33 |
- |
|
34 |
- ##IF gender not provide, we predict |
|
35 |
- if(is.null(gender)){ |
|
36 |
- if(verbose) message("Determining gender.") |
|
37 |
- XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2 |
|
38 |
- if(sum(SNR>SNRMin)==1){ |
|
39 |
- gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5))) |
|
40 |
- }else{ |
|
41 |
- gender <- kmeans(XMedian, c(min(XMedian[SNR>SNRMin]), max(XMedian[SNR>SNRMin])))[["cluster"]] |
|
42 |
- } |
|
43 |
- } |
|
44 |
- |
|
45 |
- Indexes <- list(autosomeIndex, XIndex, YIndex) |
|
46 |
- cIndexes <- list(keepIndex, |
|
47 |
- keepIndex[which(gender[keepIndex]==2)], |
|
48 |
- keepIndex[which(gender[keepIndex]==1)]) |
|
49 |
- |
|
50 |
- if(verbose) cat("Calling", NR, "SNPs for recalibration") |
|
51 |
- |
|
52 |
- ## call C |
|
53 |
- fIndex <- which(gender==2) |
|
54 |
- mIndex <- which(gender==1) |
|
55 |
- t0 <- proc.time() |
|
56 |
- newparams <- gtypeCallerRnm(A, B, fIndex, mIndex, |
|
57 |
- params[["centers"]], params[["scales"]], params[["N"]], |
|
58 |
- Indexes, cIndexes, |
|
59 |
- sapply(Indexes, length), sapply(cIndexes, length), |
|
60 |
- SMEDIAN, theKnots, |
|
61 |
- mixtureParams, DF, probs, 0.025) |
|
62 |
- t0 <- proc.time()-t0 |
|
63 |
- message("Part 1 took ", t0[3], " seconds.") |
|
64 |
- gc(verbose=FALSE) |
|
65 |
- names(newparams) <- c("centers", "scales", "N") |
|
66 |
- |
|
67 |
- if(verbose) message("Done.") |
|
68 |
- if(verbose) message("Estimating recalibration parameters.") |
|
69 |
- d <- newparams[["centers"]] - params$centers |
|
70 |
- |
|
71 |
- ##regression |
|
72 |
- Index <- intersect(which(pmin(newparams[["N"]][, 1], |
|
73 |
- newparams[["N"]][, 2], |
|
74 |
- newparams[["N"]][, 3]) > recallMin & |
|
75 |
- !apply(regionInfo, 1, any)), |
|
76 |
- autosomeIndex) |
|
77 |
- |
|
78 |
- if(length(Index) < recallRegMin){ |
|
79 |
- warning("Recallibration not possible.") |
|
80 |
- newparams <- params |
|
81 |
- dev <- vector("numeric", nrow(newparams[["centers"]])) |
|
82 |
- SS <- matrix(Inf, 3, 3) |
|
83 |
- }else{ |
|
84 |
- data4reg <- as.data.frame(newparams[["centers"]][Index,]) |
|
85 |
- names(data4reg) <- c("AA", "AB", "BB") |
|
86 |
- regParams <- cbind( coef(lm(AA~AB*BB, data=data4reg)), |
|
87 |
- c(coef(lm(AB~AA+BB, data=data4reg)), 0), |
|
88 |
- coef(lm(BB~AA*AB, data=data4reg))) |
|
89 |
- rownames(regParams) <- c("intercept", "X", "Y", "XY") |
|
90 |
- rm(data4reg) |
|
91 |
- |
|
92 |
- minN <- 3 |
|
93 |
- newparams[["centers"]][newparams[["N"]] < minN] <- NA |
|
94 |
- Index <- setdiff(which(rowSums(is.na(newparams[["centers"]]))==1), YIndex) |
|
95 |
- if(verbose) cat("Filling out empty centers") |
|
96 |
- for(i in Index){ |
|
97 |
- if(verbose) if(i%%10000==0)cat(".") |
|
98 |
- mu <- newparams[["centers"]][i, ] |
|
99 |
- j <- which(is.na(mu)) |
|
100 |
- newparams[["centers"]][i, j] <- c(1, mu[-j], prod(mu[-j]))%*%regParams[, j] |
|
101 |
- } |
|
102 |
- |
|
103 |
- ##remaing NAs are made like originals |
|
104 |
- if(length(YIndex)>0){ |
|
105 |
- noMoveIndex <- union(setdiff(which(rowSums(is.na(newparams[["centers"]]))>0), YIndex), |
|
106 |
- YIndex[rowSums(is.na(newparams[["centers"]][YIndex, ])>1)]) |
|
107 |
- } |
|
108 |
- newparams[["centers"]][is.na(newparams[["centers"]])] <- params[["centers"]][is.na(newparams[["centers"]])] |
|
109 |
- if(verbose) cat("\n") |
|
110 |
- |
|
111 |
- if(verbose) message("Calculating and standardizing size of shift.") |
|
112 |
- DD <- newparams[["centers"]] - params[["centers"]] |
|
113 |
- DD <- sweep(DD, 2, colMeans(DD[autosomeIndex, ])) |
|
114 |
- SS <- cov(DD[autosomeIndex, ]) |
|
115 |
- SSI <- solve(SS) |
|
116 |
- dev <- vector("numeric", nrow(DD)) |
|
117 |
- if(length(YIndex)){ |
|
118 |
- dev[-YIndex] <- apply(DD[-YIndex, ], 1, function(x) x%*%SSI%*%x) |
|
119 |
- dev[-YIndex] <- 1/sqrt( (2*pi)^3*det(SS))*exp(-0.5*dev[-YIndex]) |
|
120 |
- ##Now Y (only two params) |
|
121 |
- SSY <- SS[c(1, 3), c(1, 3)] |
|
122 |
- SSI <- solve(SSY) |
|
123 |
- dev[YIndex] <- apply(DD[YIndex, c(1, 3)], 1, function(x) x%*%SSI%*%x) |
|
124 |
- dev[YIndex] <- 1/sqrt( (2*pi)^2*det(SSY))*exp(-0.5*dev[YIndex]) |
|
125 |
- } else { |
|
126 |
- dev=apply(DD,1,function(x) x%*%SSI%*%x) |
|
127 |
- dev=1/sqrt( (2*pi)^3*det(SS))*exp(-0.5*dev) |
|
128 |
- } |
|
129 |
- } |
|
130 |
- |
|
131 |
- ## BC: must keep SD |
|
132 |
- params[-2] <- newparams[-2] |
|
133 |
- |
|
134 |
- rm(newparams);gc(verbose=FALSE) |
|
135 |
- if(verbose) cat("Calling", NR, "SNPs") |
|
136 |
- ## ################### |
|
137 |
- ## ## MOVE TO C####### |
|
138 |
- t0 <- proc.time() |
|
139 |
- ImNull <- gtypeCallerR2nm(A, B, fIndex, mIndex, params[["centers"]], |
|
140 |
- params[["scales"]], params[["N"]], Indexes, |
|
141 |
- cIndexes, sapply(Indexes, length), |
|
142 |
- sapply(cIndexes, length), SMEDIAN, theKnots, |
|
143 |
- mixtureParams, DF, probs, 0.025, |
|
144 |
- which(regionInfo[,2]), |
|
145 |
- which(regionInfo[,1])) |
|
146 |
- t0 <- proc.time()-t0 |
|
147 |
- gc(verbose=FALSE) |
|
148 |
- message("\n Part 2 took ", t0[3], " seconds.") |
|
149 |
- ## END MOVE TO C####### |
|
150 |
- ## ################## |
|
151 |
- |
|
152 |
- dev <- dev/(dev+1/383) |
|
153 |
- if(!is.null(row.names)){ rownames(A) <- rownames(B) <- names(dev) <- row.names} |
|
154 |
- if(!is.null(col.names)){ colnames(A) <- colnames(B) <- col.names} |
|
155 |
- |
|
156 |
- if(verbose) message("Done.") |
|
157 |
- if (returnParams){ |
|
158 |
- return(list(calls=A, confs=B, SNPQC=dev, batchQC=mean(diag(SS)), params=params)) |
|
159 |
- }else{ |
|
160 |
- return(list(calls=A, confs=B, SNPQC=dev, batchQC=mean(diag(SS)))) |
|
161 |
- } |
|
162 |
-} |
|
163 |
- |
|
164 |
- |
|
165 |
-gtypeCallerRnm <- function(A, B, fIndex, mIndex, theCenters, theScales, |
|
166 |
- theNs, Indexes, cIndexes, nIndexes, |
|
167 |
- ncIndexes, SMEDIAN, knots, params, dft, |
|
168 |
- probs, trim){ |
|
169 |
- |
|
170 |
- stopifnot(!missing(A), !missing(B), dim(A)==dim(B), |
|
171 |
- nrow(A)==nrow(theCenters), nrow(A)==nrow(theScales), |
|
172 |
- nrow(A) == nrow(theNs), length(knots)==3, nrow(params)==4, |
|
173 |
- ncol(params)==ncol(A), length(trim)==1, length(probs)==3) |
|
174 |
- |
|
175 |
- ## make code robust |
|
176 |
- ## check types before passing to C |
|
177 |
- |
|
178 |
- .Call("gtypeCallerPart1nm", A, B, |
|
179 |
- as.integer(fIndex), as.integer(mIndex), |
|
180 |
- as.numeric(theCenters), as.numeric(theScales), |
|
181 |
- as.integer(theNs), lapply(Indexes, as.integer), lapply(cIndexes, as.integer), as.integer(nIndexes), as.integer(ncIndexes), |
|
182 |
- as.numeric(SMEDIAN), as.numeric(knots), as.numeric(params), |
|
183 |
- as.integer(dft), as.numeric(probs), as.numeric(trim), |
|
184 |
- PACKAGE="crlmm") |
|
185 |
- |
|
186 |
-} |
|
187 |
- |
|
188 |
-gtypeCallerR2nm <- function(A, B, fIndex, mIndex, theCenters, theScales, |
|
189 |
- theNs, Indexes, cIndexes, nIndexes, |
|
190 |
- ncIndexes, SMEDIAN, knots, params, dft, |
|
191 |
- probs, trim, noTraining, noInfo){ |
|
192 |
- |
|
193 |
- stopifnot(!missing(A), !missing(B), dim(A)==dim(B), |
|
194 |
- nrow(A)==nrow(theCenters), nrow(A)==nrow(theScales), |
|
195 |
- nrow(A) == nrow(theNs), length(knots)==3, nrow(params)==4, |
|
196 |
- ncol(params)==ncol(A), length(trim)==1, length(probs)==3) |
|
197 |
- |
|
198 |
- .Call("gtypeCallerPart2nm", A, B, |
|
199 |
- as.integer(fIndex), as.integer(mIndex), |
|
200 |
- as.numeric(theCenters), as.numeric(theScales), |
|
201 |
- as.integer(theNs), Indexes, cIndexes, nIndexes, ncIndexes, |
|
202 |
- as.numeric(SMEDIAN), as.numeric(knots), as.numeric(params), |
|
203 |
- as.integer(dft), as.numeric(probs), as.numeric(trim), |
|
204 |
- as.integer(noTraining), as.integer(noInfo), PACKAGE="crlmm") |
|
205 |
- |
|
206 |
-} |
|
207 |
- |
208 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,116 +0,0 @@ |
1 |
-crlmmNM <- function(filenames, row.names=TRUE, col.names=TRUE, |
|
2 |
- probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
3 |
- save.it=FALSE, load.it=FALSE, |
|
4 |
- intensityFile="tmpcrlmmintensities.rda", |
|
5 |
- desctrucitve=FALSE, mixtureSampleSize=10^5, eps=0.1, |
|
6 |
- verbose=TRUE, cdfName, sns, recallMin=10, |
|
7 |
- recallRegMin=1000, returnParams=FALSE){ |
|
8 |
- |
|
9 |
- if (missing(sns)) sns <- basename(filenames) |
|
10 |
- if (load.it & !file.exists(intensityFile)){ |
|
11 |
- load.it <- FALSE |
|
12 |
- message("File ", intensityFile, " does not exist.") |
|
13 |
- message("Not loading it, but running SNPRMA from scratch.") |
|
14 |
- } |
|
15 |
- if (!load.it){ |
|
16 |
- res <- snprma(filenames, fitMixture=TRUE, |
|
17 |
- mixtureSampleSize=mixtureSampleSize, verbose=verbose, |
|
18 |
- eps=eps, cdfName=cdfName, sns=sns) |
|
19 |
- if(save.it){ |
|
20 |
- t0 <- proc.time() |
|
21 |
- save(res, file=intensityFile) |
|
22 |
- t0 <- proc.time()-t0 |
|
23 |
- message("Used ", t0[3], " seconds to save ", intensityFile, ".") |
|
24 |
- } |
|
25 |
- }else{ |
|
26 |
- message("Loading ", intensityFile, ".") |
|
27 |
- obj <- load(intensityFile) |
|
28 |
- message("Done.") |
|
29 |
- if (obj != "res") |
|
30 |
- stop("Object in ", intensityFile, " seems to be invalid.") |
|
31 |
- } |
|
32 |
- if(row.names) row.names=res$gns else row.names=NULL |
|
33 |
- if(col.names) col.names=res$sns else col.names=NULL |
|
34 |
- |
|
35 |
- res2 <- crlmmGTnm(res[["A"]], res[["B"]], res[["SNR"]], |
|
36 |
- res[["mixtureParams"]], res[["cdfName"]], |
|
37 |
- gender=gender, row.names=row.names, |
|
38 |
- col.names=col.names, recallMin=recallMin, |
|
39 |
- recallRegMin=1000, SNRMin=SNRMin, |
|
40 |
- returnParams=returnParams) |
|
41 |
- |
|
42 |
- res2[["SNR"]] <- res[["SNR"]] |
|
43 |
- |
|
44 |
- return(res2) |
|
45 |
-} |
|
46 |
- |
|
47 |
- |
|
48 |
-############################### |
|
49 |
-####### THIS IS TEMPORARY NOT OFFICIALLY USED |
|
50 |
-##################################### |
|
51 |
- |
|
52 |
-crlmmTNoN <- function(filenames, row.names=TRUE, col.names=TRUE, |
|
53 |
- probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
54 |
- save.it=FALSE, load.it=FALSE, |
|
55 |
- intensityFile="tmpcrlmmintensities.rda", |
|
56 |
- desctrucitve=FALSE, mixtureSampleSize=10^5, eps=0.1, |
|
57 |
- verbose=TRUE){ |
|
58 |
- if (load.it & !file.exists(intensityFile)){ |
|
59 |
- load.it <- FALSE |
|
60 |
- message("File ", intensityFile, " does not exist.") |
|
61 |
- message("Not loading it, but running SNPRMA from scratch.") |
|
62 |
- } |
|
63 |
- if (!load.it){ |
|
64 |
- res <- snprma(filenames, fitMixture=TRUE, |
|
65 |
- mixtureSampleSize=mixtureSampleSize, verbose=verbose, |
|
66 |
- eps=eps) |
|
67 |
- if(save.it) save(res, file=intensityFile) |
|
68 |
- }else{ |
|
69 |
- message("Loading ", intensityFile, ".") |
|
70 |
- obj <- load(intensityFile) |
|
71 |
- message("Done.") |
|
72 |
- if (obj != "res") |
|
73 |
- stop("Object in ", intensityFile, " seems to be invalid.") |
|
74 |
- } |
|
75 |
- if(row.names) row.names=res$gns else row.names=NULL |
|
76 |
- if(col.names) col.names=res$sns else col.names=NULL |
|
77 |
- res2 <- crlmmGTTNoN(res[["A"]], res[["B"]], res[["SNR"]], |
|
78 |
- res[["mixtureParams"]], res[["cdfName"]], |
|
79 |
- gender=gender, row.names=row.names, |
|
80 |
- col.names=col.names) |
|
81 |
- res2[["SNR"]] <- res[["SNR"]] |
|
82 |
- return(res2) |
|
83 |
-} |
|
84 |
- |
|
85 |
-crlmmNormalNoN <- function(filenames, row.names=TRUE, col.names=TRUE, |
|
86 |
- probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
87 |
- save.it=FALSE, load.it=FALSE, |
|
88 |
- intensityFile="tmpcrlmmintensities.rda", |
|
89 |
- desctrucitve=FALSE, mixtureSampleSize=10^5, eps=0.1, |
|
90 |
- verbose=TRUE){ |
|
91 |
- if (load.it & !file.exists(intensityFile)){ |
|
92 |
- load.it <- FALSE |
|
93 |
- message("File ", intensityFile, " does not exist.") |
|
94 |
- message("Not loading it, but running SNPRMA from scratch.") |
|
95 |
- } |
|
96 |
- if (!load.it){ |
|
97 |
- res <- snprma(filenames, fitMixture=TRUE, |
|
98 |
- mixtureSampleSize=mixtureSampleSize, verbose=verbose, |
|
99 |
- eps=eps) |
|
100 |
- if(save.it) save(res, file=intensityFile) |
|
101 |
- }else{ |
|
102 |
- message("Loading ", intensityFile, ".") |
|
103 |
- obj <- load(intensityFile) |
|
104 |
- message("Done.") |
|
105 |
- if (obj != "res") |
|
106 |
- stop("Object in ", intensityFile, " seems to be invalid.") |
|
107 |
- } |
|
108 |
- if(row.names) row.names=res$gns else row.names=NULL |
|
109 |
- if(col.names) col.names=res$sns else col.names=NULL |
|
110 |
- res2 <- crlmmGTNormalNoN(res[["A"]], res[["B"]], res[["SNR"]], |
|
111 |
- res[["mixtureParams"]], res[["cdfName"]], |
|
112 |
- gender=gender, row.names=row.names, |
|
113 |
- col.names=col.names) |
|
114 |
- res2[["SNR"]] <- res[["SNR"]] |
|
115 |
- return(res2) |
|
116 |
-} |
117 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,45 +0,0 @@ |
1 |
-fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){ |
|
2 |
- ##56 stands for 5 and 6 arrays but will also work for Illumina |
|
3 |
- ##Note the unfortunate choice of numbering: |
|
4 |
- ##1 is BB, 2 AB, and 3 AA. Opposite to everything else! |
|
5 |
- ##this is legacy code I decided not to change. |
|
6 |
- ## this why at the end we report -coefs: F1 is the negative f |
|
7 |
- mus <- append(quantile(M, c(1, 5)/6, names=FALSE), 0, 1) |
|
8 |
- sigmas <- rep(mad(c(M[M<mus[1]]-mus[1], M[M>mus[3]]-mus[3])), 3) |
|
9 |
- sigmas[2] <- sigmas[2]/2 |
|
10 |
- |
|
11 |
- weights <- apply(cbind(mus, sigmas), 1, function(p) dnorm(M, p[1], p[2])) |
|
12 |
- previousF1 <- -Inf |
|
13 |
- change <- eps+1 |
|
14 |
- it <- 0 |
|
15 |
- |
|
16 |
- if(verbose) message("Max change must be under ", eps, ".") |
|
17 |
- matS <- stupidSplineBasis(S, knots) |
|
18 |
- while (change > eps & it < maxit){ |
|
19 |
- it <- it+1 |
|
20 |
- ## E |
|
21 |
- z <- sweep(weights, 2, probs, "*") |
|
22 |
- LogLik <- rowSums(z) |
|
23 |
- z <- sweep(z, 1, LogLik, "/") |
|
24 |
- probs <- colMeans(z) |
|
25 |
- |
|
26 |
- ## M |
|
27 |
- fit1 <- crossprod(chol2inv(chol(crossprod(sweep(matS, 1, z[, 1], FUN="*"), matS))), crossprod(matS, z[, 1]*M)) |
|
28 |
- |
|
29 |
- fit2 <- sum(z[, 2]*M)/sum(z[, 2]) |
|
30 |
- F1 <- matS%*%fit1 |
|
31 |
- sigmas[c(1, 3)] <- sqrt(sum(z[, 1]*(M-F1)^2)/sum(z[, 1])) |
|
32 |
- sigmas[2] <- sqrt(sum(z[, 2]*(M-fit2)^2)/sum(z[, 2])) |
|
33 |
- |
|
34 |
- weights[, 1] <- dnorm(M, F1, sigmas[1]) |
|
35 |
- weights[, 2] <- dnorm(M, fit2, sigmas[2]) |
|
36 |
- weights[, 3] <- dnorm(M, -F1, sigmas[3]) |
|
37 |
- |
|
38 |
- change <- max(abs(F1-previousF1)) |
|
39 |
- previousF1 <- F1 |
|
40 |
- if(verbose) message("Iter ", it, ": ", change, ".") |
|
41 |
- } |
|
42 |
- medF1 <- median(-F1) |
|
43 |
- return(list(coef= -fit1, medF1=medF1, sigma1=sigmas[1], sigma2=sigmas[2])) |
|
44 |
-} |
|
45 |
- |
46 | 0 |
similarity index 68% |
47 | 1 |
rename from R/snprma.R |
48 | 2 |
rename to R/snprma-functions.R |
... | ... |
@@ -83,3 +83,49 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1, |
83 | 83 |
## gns comes from preprocStuff.rda |
84 | 84 |
list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName) |
85 | 85 |
} |
86 |
+ |
|
87 |
+fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){ |
|
88 |
+ ##56 stands for 5 and 6 arrays but will also work for Illumina |
|
89 |
+ ##Note the unfortunate choice of numbering: |
|
90 |
+ ##1 is BB, 2 AB, and 3 AA. Opposite to everything else! |
|
91 |
+ ##this is legacy code I decided not to change. |
|
92 |
+ ## this why at the end we report -coefs: F1 is the negative f |
|
93 |
+ mus <- append(quantile(M, c(1, 5)/6, names=FALSE), 0, 1) |
|
94 |
+ sigmas <- rep(mad(c(M[M<mus[1]]-mus[1], M[M>mus[3]]-mus[3])), 3) |
|
95 |
+ sigmas[2] <- sigmas[2]/2 |
|
96 |
+ |
|
97 |
+ weights <- apply(cbind(mus, sigmas), 1, function(p) dnorm(M, p[1], p[2])) |
|
98 |
+ previousF1 <- -Inf |
|
99 |
+ change <- eps+1 |
|
100 |
+ it <- 0 |
|
101 |
+ |
|
102 |
+ if(verbose) message("Max change must be under ", eps, ".") |
|
103 |
+ matS <- stupidSplineBasis(S, knots) |
|
104 |
+ while (change > eps & it < maxit){ |
|
105 |
+ it <- it+1 |
|
106 |
+ ## E |
|
107 |
+ z <- sweep(weights, 2, probs, "*") |
|
108 |
+ LogLik <- rowSums(z) |
|
109 |
+ z <- sweep(z, 1, LogLik, "/") |
|
110 |
+ probs <- colMeans(z) |
|
111 |
+ |
|
112 |
+ ## M |
|
113 |
+ fit1 <- crossprod(chol2inv(chol(crossprod(sweep(matS, 1, z[, 1], FUN="*"), matS))), crossprod(matS, z[, 1]*M)) |
|
114 |
+ |
|
115 |
+ fit2 <- sum(z[, 2]*M)/sum(z[, 2]) |
|
116 |
+ F1 <- matS%*%fit1 |
|
117 |
+ sigmas[c(1, 3)] <- sqrt(sum(z[, 1]*(M-F1)^2)/sum(z[, 1])) |
|
118 |
+ sigmas[2] <- sqrt(sum(z[, 2]*(M-fit2)^2)/sum(z[, 2])) |
|
119 |
+ |
|
120 |
+ weights[, 1] <- dnorm(M, F1, sigmas[1]) |
|
121 |
+ weights[, 2] <- dnorm(M, fit2, sigmas[2]) |
|
122 |
+ weights[, 3] <- dnorm(M, -F1, sigmas[3]) |
|
123 |
+ |
|
124 |
+ change <- max(abs(F1-previousF1)) |
|
125 |
+ previousF1 <- F1 |
|
126 |
+ if(verbose) message("Iter ", it, ": ", change, ".") |
|
127 |
+ } |
|
128 |
+ medF1 <- median(-F1) |
|
129 |
+ return(list(coef= -fit1, medF1=medF1, sigma1=sigmas[1], sigma2=sigmas[2])) |
|
130 |
+} |
|
131 |
+ |
86 | 132 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,13 @@ |
1 |
+##################################### |
|
2 |
+### FOR CRLMM |
|
3 |
+##################################### |
|
4 |
+- Decide on output format (BC vote: eSet-like) |
|
5 |
+- Helper to convert to snpMatrix |
|
6 |
+- Vignette with non-trivial use of crlmm (use Vince's) |
|
7 |
+- Add RS ids |
|
8 |
+- Allele plots |
|
9 |
+- M v S plots |
|
10 |
+ |
|
11 |
+##################################### |
|
12 |
+### FOR CNRMA |
|
13 |
+##################################### |