Browse code

Improving package organization

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@37668 bc3139a8-67e5-0310-9ffc-ced21a209358

Benilton Carvalho authored on 02/03/2009 14:22:41
Showing 11 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,11 @@
1
+2009-03-01  Benilton Carvalho <bcarvalh@jhsph.edu>
2
+
3
+* Added TODO file
4
+
5
+* Added CHANGES file
6
+
7
+* Reduced number of files in R/ by combining functions for a given methodology (crlmm / cnrma)
8
+
9
+* Files with suffix XYZ-functions.R should have *functions* for methodology XYZ
10
+
11
+* Files with suffix XYZ-methods.R should have *methods* for methodology XYZ
... ...
@@ -9,15 +9,12 @@ Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays.
9 9
 Imports: affyio, preprocessCore, utils, stats, genefilter, splines, Biobase
10 10
 License: Artistic-2.0
11 11
 Suggests: hapmapsnp5, genomewidesnp5Crlmm, genomewidesnp6Crlmm, methods
12
-Collate: crlmmGT.R
13
-         crlmm.R
12
+Collate: crlmm-functions.R
14 13
          fitAffySnpMixture56.R
15 14
          snprma.R
16 15
          utils.R
17 16
          zzz.R
18
-         crlmmGTnm.R
19
-         crlmmnm.R
20 17
          AllClasses.R
21
-         Methods.R
18
+         crlmm-methods.R
22 19
 	 functions.R
23 20
 LazyLoad: yes
24 21
similarity index 100%
25 22
rename from R/functions.R
26 23
rename to R/cnrma-functions.R
27 24
similarity index 60%
28 25
rename from R/crlmmGT.R
29 26
rename to R/crlmm-functions.R
... ...
@@ -1,3 +1,51 @@
1
+crlmm <- function(filenames, row.names=TRUE, col.names=TRUE,
2
+                  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
3
+                  save.it=FALSE, load.it=FALSE, intensityFile,
4
+                  mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
5
+                  cdfName, sns, recallMin=10, recallRegMin=1000,
6
+                  returnParams=FALSE, badSNP=.7){
7
+  if ((load.it | save.it) & missing(intensityFile))
8
+	  stop("'intensityFile' is missing, and you chose either load.it or save.it")
9
+  
10
+  if (missing(sns)) sns <- basename(filenames)
11
+  if (!missing(intensityFile))
12
+    if (load.it & !file.exists(intensityFile)){
13
+      load.it <- FALSE
14
+      message("File ", intensityFile, " does not exist.")
15
+      message("Not loading it, but running SNPRMA from scratch.")
16
+    }
17
+  if (!load.it){
18
+    res <- snprma(filenames, fitMixture=TRUE,
19
+                  mixtureSampleSize=mixtureSampleSize, verbose=verbose,
20
+                  eps=eps, cdfName=cdfName, sns=sns)
21
+    if(save.it){
22
+      t0 <- proc.time()
23
+      save(res, file=intensityFile)
24
+      t0 <- proc.time()-t0
25
+      if (verbose) message("Used ", t0[3], " seconds to save ", intensityFile, ".")
26
+    }
27
+  }else{
28
+    if (verbose) message("Loading ", intensityFile, ".")
29
+    obj <- load(intensityFile)
30
+    if (verbose) message("Done.")
31
+    if (obj != "res")
32
+      stop("Object in ", intensityFile, " seems to be invalid.")
33
+  }
34
+  if(row.names) row.names=res$gns else row.names=NULL
35
+  if(col.names) col.names=res$sns else col.names=NULL
36
+  res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]],
37
+                  res[["mixtureParams"]], res[["cdfName"]],
38
+                  gender=gender, row.names=row.names,
39
+                  col.names=col.names, recallMin=recallMin,
40
+                  recallRegMin=1000, SNRMin=SNRMin,
41
+                  returnParams=returnParams, badSNP=badSNP,
42
+                  verbose=verbose)
43
+
44
+  res2[["SNR"]] <- res[["SNR"]]
45
+  res2[["SKW"]] <- res[["SKW"]]
46
+  return(res2)
47
+}
48
+
1 49
 crlmmGT <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
2 50
                     col.names=NULL, probs=c(1/3, 1/3, 1/3), DF=6,
3 51
                     SNRMin=5, recallMin=10, recallRegMin=1000,
... ...
@@ -225,13 +273,121 @@ gtypeCallerR2 <- function(A, B, fIndex, mIndex, theCenters, theScales,
225 273
 
226 274
 
227 275
 
276
+###############################
277
+####### THIS IS TEMPORARY NOT OFFICIALLY USED
278
+#####################################
279
+crlmmNM <- function(filenames, row.names=TRUE, col.names=TRUE,
280
+                  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
281
+                  save.it=FALSE, load.it=FALSE,
282
+                  intensityFile="tmpcrlmmintensities.rda",
283
+                  desctrucitve=FALSE, mixtureSampleSize=10^5, eps=0.1,
284
+                  verbose=TRUE, cdfName, sns, recallMin=10,
285
+                  recallRegMin=1000, returnParams=FALSE){
286
+  
287
+  if (missing(sns)) sns <- basename(filenames)
288
+  if (load.it & !file.exists(intensityFile)){
289
+    load.it <- FALSE
290
+    message("File ", intensityFile, " does not exist.")
291
+    message("Not loading it, but running SNPRMA from scratch.")
292
+  }
293
+  if (!load.it){
294
+    res <- snprma(filenames, fitMixture=TRUE,
295
+                  mixtureSampleSize=mixtureSampleSize, verbose=verbose,
296
+                  eps=eps, cdfName=cdfName, sns=sns)
297
+    if(save.it){
298
+      t0 <- proc.time()
299
+      save(res, file=intensityFile)
300
+      t0 <- proc.time()-t0
301
+      message("Used ", t0[3], " seconds to save ", intensityFile, ".")
302
+    }
303
+  }else{
304
+    message("Loading ", intensityFile, ".")
305
+    obj <- load(intensityFile)
306
+    message("Done.")
307
+    if (obj != "res")
308
+      stop("Object in ", intensityFile, " seems to be invalid.")
309
+  }
310
+  if(row.names) row.names=res$gns else row.names=NULL
311
+  if(col.names) col.names=res$sns else col.names=NULL
312
+
313
+  res2 <- crlmmGTnm(res[["A"]], res[["B"]], res[["SNR"]],
314
+                  res[["mixtureParams"]], res[["cdfName"]],
315
+                  gender=gender, row.names=row.names,
316
+                  col.names=col.names, recallMin=recallMin,
317
+                  recallRegMin=1000, SNRMin=SNRMin,
318
+                  returnParams=returnParams)
228 319
 
320
+  res2[["SNR"]] <- res[["SNR"]]
321
+  
322
+  return(res2)
323
+}
324
+
325
+crlmmTNoN <- function(filenames, row.names=TRUE, col.names=TRUE,
326
+                  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
327
+                  save.it=FALSE, load.it=FALSE,
328
+                  intensityFile="tmpcrlmmintensities.rda",
329
+                  desctrucitve=FALSE, mixtureSampleSize=10^5, eps=0.1,
330
+                  verbose=TRUE){
331
+  if (load.it & !file.exists(intensityFile)){
332
+    load.it <- FALSE
333
+    message("File ", intensityFile, " does not exist.")
334
+    message("Not loading it, but running SNPRMA from scratch.")
335
+  }
336
+  if (!load.it){
337
+    res <- snprma(filenames, fitMixture=TRUE,
338
+                  mixtureSampleSize=mixtureSampleSize, verbose=verbose,
339
+                  eps=eps)
340
+    if(save.it) save(res, file=intensityFile)
341
+  }else{
342
+    message("Loading ", intensityFile, ".")
343
+    obj <- load(intensityFile)
344
+    message("Done.")
345
+    if (obj != "res")
346
+      stop("Object in ", intensityFile, " seems to be invalid.")
347
+  }
348
+  if(row.names) row.names=res$gns else row.names=NULL
349
+  if(col.names) col.names=res$sns else col.names=NULL
350
+  res2 <- crlmmGTTNoN(res[["A"]], res[["B"]], res[["SNR"]],
351
+                  res[["mixtureParams"]], res[["cdfName"]],
352
+                  gender=gender, row.names=row.names,
353
+                  col.names=col.names)
354
+  res2[["SNR"]] <- res[["SNR"]]
355
+  return(res2)
356
+}
357
+
358
+crlmmNormalNoN <- function(filenames, row.names=TRUE, col.names=TRUE,
359
+                  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
360
+                  save.it=FALSE, load.it=FALSE,
361
+                  intensityFile="tmpcrlmmintensities.rda",
362
+                  desctrucitve=FALSE, mixtureSampleSize=10^5, eps=0.1,
363
+                  verbose=TRUE){
364
+  if (load.it & !file.exists(intensityFile)){
365
+    load.it <- FALSE
366
+    message("File ", intensityFile, " does not exist.")
367
+    message("Not loading it, but running SNPRMA from scratch.")
368
+  }
369
+  if (!load.it){
370
+    res <- snprma(filenames, fitMixture=TRUE,
371
+                  mixtureSampleSize=mixtureSampleSize, verbose=verbose,
372
+                  eps=eps)
373
+    if(save.it) save(res, file=intensityFile)
374
+  }else{
375
+    message("Loading ", intensityFile, ".")
376
+    obj <- load(intensityFile)
377
+    message("Done.")
378
+    if (obj != "res")
379
+      stop("Object in ", intensityFile, " seems to be invalid.")
380
+  }
381
+  if(row.names) row.names=res$gns else row.names=NULL
382
+  if(col.names) col.names=res$sns else col.names=NULL
383
+  res2 <- crlmmGTNormalNoN(res[["A"]], res[["B"]], res[["SNR"]],
384
+                  res[["mixtureParams"]], res[["cdfName"]],
385
+                  gender=gender, row.names=row.names,
386
+                  col.names=col.names)
387
+  res2[["SNR"]] <- res[["SNR"]]
388
+  return(res2)
389
+}
229 390
 
230
-##################
231
-##################
232
-### THIS IS TEMPORARY AND NOT OFFICIALLY USED
233
-##################
234
-####################
235 391
 crlmmGTTNoN <- function(A, B, SNR, mixtureParams, cdfName,
236 392
                          row.names=NULL, col.names=NULL, probs=c(1/3,
237 393
                          1/3, 1/3), DF=6, SNRMin=6, gender=NULL,
... ...
@@ -559,3 +715,210 @@ gtypeCallerR2NormalNoN <- function(A, B, fIndex, mIndex, theCenters, theScales,
559 715
 }
560 716
 
561 717
 
718
+crlmmGTnm <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
719
+                    col.names=NULL, probs=c(1/3, 1/3, 1/3), DF=6,
720
+                    SNRMin=5, recallMin=10, recallRegMin=1000,
721
+                    gender=NULL, desctrucitve=FALSE, verbose=TRUE,
722
+                    returnParams=FALSE){
723
+  
724
+  keepIndex <- which(SNR>SNRMin)
725
+  if(length(keepIndex)==0) stop("No arrays above quality threshold!")
726
+  
727
+  NC <- ncol(A)
728
+  NR <- nrow(A)
729
+  
730
+  pkgname <- getCrlmmAnnotationName(cdfName)
731
+  if(!require(pkgname, character.only=TRUE)){
732
+    suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
733
+    msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
734
+    message(strwrap(msg))
735
+    stop("Package ", pkgname, " could not be found.")
736
+    rm(suggCall, msg)
737
+  }
738
+
739
+  if(verbose) message("Loading annotations.")
740
+  data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv)
741
+
742
+  ## this is toget rid of the 'no visible binding' notes
743
+  ## variable definitions
744
+  XIndex <- getVarInEnv("XIndex")
745
+  autosomeIndex <- getVarInEnv("autosomeIndex")
746
+  YIndex <- getVarInEnv("YIndex")
747
+  SMEDIAN <- getVarInEnv("SMEDIAN")
748
+  theKnots <- getVarInEnv("theKnots")
749
+  regionInfo <- getVarInEnv("regionInfo")
750
+  
751
+  ##IF gender not provide, we predict
752
+  if(is.null(gender)){
753
+    if(verbose) message("Determining gender.")
754
+    XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2
755
+    if(sum(SNR>SNRMin)==1){
756
+      gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5)))
757
+    }else{
758
+      gender <- kmeans(XMedian, c(min(XMedian[SNR>SNRMin]), max(XMedian[SNR>SNRMin])))[["cluster"]]
759
+    }
760
+  }
761
+  
762
+  Indexes <- list(autosomeIndex, XIndex, YIndex)
763
+  cIndexes <- list(keepIndex, 
764
+                   keepIndex[which(gender[keepIndex]==2)], 
765
+                   keepIndex[which(gender[keepIndex]==1)])
766
+  
767
+  if(verbose) cat("Calling", NR, "SNPs for recalibration")
768
+
769
+  ## call C
770
+  fIndex <- which(gender==2)
771
+  mIndex <- which(gender==1)
772
+  t0 <- proc.time()
773
+  newparams <- gtypeCallerRnm(A, B, fIndex, mIndex,
774
+                            params[["centers"]], params[["scales"]], params[["N"]],
775
+                            Indexes, cIndexes,
776
+                            sapply(Indexes, length), sapply(cIndexes, length),
777
+                            SMEDIAN, theKnots,
778
+                            mixtureParams, DF, probs, 0.025)
779
+  t0 <- proc.time()-t0
780
+  message("Part 1 took ", t0[3], " seconds.")
781
+  gc(verbose=FALSE)
782
+  names(newparams) <- c("centers", "scales", "N")
783
+  
784
+  if(verbose) message("Done.")
785
+  if(verbose) message("Estimating recalibration parameters.")
786
+  d <- newparams[["centers"]] - params$centers
787
+
788
+  ##regression 
789
+  Index <- intersect(which(pmin(newparams[["N"]][, 1],
790
+                                newparams[["N"]][, 2],
791
+                                newparams[["N"]][, 3]) > recallMin &
792
+                                !apply(regionInfo, 1, any)),
793
+                                autosomeIndex)
794
+  
795
+  if(length(Index) < recallRegMin){
796
+    warning("Recallibration not possible.")
797
+    newparams <- params
798
+    dev <- vector("numeric", nrow(newparams[["centers"]]))
799
+    SS <- matrix(Inf, 3, 3)
800
+  }else{
801
+    data4reg <- as.data.frame(newparams[["centers"]][Index,])
802
+    names(data4reg) <- c("AA", "AB", "BB")
803
+    regParams <- cbind(  coef(lm(AA~AB*BB, data=data4reg)),
804
+                       c(coef(lm(AB~AA+BB, data=data4reg)), 0), 
805
+                       coef(lm(BB~AA*AB, data=data4reg)))
806
+    rownames(regParams) <- c("intercept", "X", "Y", "XY")
807
+    rm(data4reg)
808
+  
809
+    minN <- 3
810
+    newparams[["centers"]][newparams[["N"]] < minN] <- NA
811
+    Index <- setdiff(which(rowSums(is.na(newparams[["centers"]]))==1), YIndex)
812
+    if(verbose) cat("Filling out empty centers")
813
+    for(i in Index){
814
+      if(verbose) if(i%%10000==0)cat(".")
815
+      mu <- newparams[["centers"]][i, ]
816
+      j <- which(is.na(mu))
817
+      newparams[["centers"]][i, j] <- c(1, mu[-j], prod(mu[-j]))%*%regParams[, j]
818
+    }
819
+    
820
+    ##remaing NAs are made like originals
821
+    if(length(YIndex)>0){
822
+      noMoveIndex <- union(setdiff(which(rowSums(is.na(newparams[["centers"]]))>0), YIndex), 
823
+                           YIndex[rowSums(is.na(newparams[["centers"]][YIndex, ])>1)])
824
+    }
825
+    newparams[["centers"]][is.na(newparams[["centers"]])] <- params[["centers"]][is.na(newparams[["centers"]])]
826
+    if(verbose) cat("\n")
827
+  
828
+    if(verbose) message("Calculating and standardizing size of shift.")
829
+    DD <- newparams[["centers"]] - params[["centers"]]
830
+    DD <- sweep(DD, 2, colMeans(DD[autosomeIndex, ]))
831
+    SS <- cov(DD[autosomeIndex, ])
832
+    SSI <- solve(SS)
833
+    dev <- vector("numeric", nrow(DD))
834
+    if(length(YIndex)){
835
+      dev[-YIndex] <- apply(DD[-YIndex, ], 1, function(x) x%*%SSI%*%x)
836
+      dev[-YIndex] <- 1/sqrt( (2*pi)^3*det(SS))*exp(-0.5*dev[-YIndex])
837
+      ##Now Y (only two params)
838
+      SSY <- SS[c(1, 3), c(1, 3)]
839
+      SSI <- solve(SSY) 
840
+      dev[YIndex] <- apply(DD[YIndex, c(1, 3)], 1, function(x) x%*%SSI%*%x)
841
+      dev[YIndex] <- 1/sqrt( (2*pi)^2*det(SSY))*exp(-0.5*dev[YIndex])
842
+    } else {
843
+      dev=apply(DD,1,function(x) x%*%SSI%*%x)
844
+      dev=1/sqrt( (2*pi)^3*det(SS))*exp(-0.5*dev)
845
+    }
846
+  }
847
+    
848
+  ## BC: must keep SD
849
+  params[-2] <- newparams[-2]
850
+  
851
+  rm(newparams);gc(verbose=FALSE)  
852
+  if(verbose) cat("Calling", NR, "SNPs")
853
+  ## ###################
854
+  ## ## MOVE TO C#######
855
+  t0 <- proc.time()
856
+  ImNull <- gtypeCallerR2nm(A, B, fIndex, mIndex, params[["centers"]],
857
+                          params[["scales"]], params[["N"]], Indexes,
858
+                          cIndexes, sapply(Indexes, length),
859
+                          sapply(cIndexes, length), SMEDIAN, theKnots,
860
+                          mixtureParams, DF, probs, 0.025,
861
+                          which(regionInfo[,2]),
862
+                          which(regionInfo[,1]))
863
+  t0 <- proc.time()-t0
864
+  gc(verbose=FALSE)
865
+  message("\n Part 2 took ", t0[3], " seconds.")
866
+  ##  END MOVE TO C#######
867
+  ## ##################
868
+  
869
+  dev <- dev/(dev+1/383)
870
+  if(!is.null(row.names)){ rownames(A) <- rownames(B) <- names(dev) <- row.names}
871
+  if(!is.null(col.names)){ colnames(A) <- colnames(B) <- col.names}
872
+  
873
+  if(verbose) message("Done.")
874
+  if (returnParams){
875
+    return(list(calls=A, confs=B, SNPQC=dev, batchQC=mean(diag(SS)), params=params))
876
+  }else{
877
+    return(list(calls=A, confs=B, SNPQC=dev, batchQC=mean(diag(SS))))
878
+  }
879
+}
880
+
881
+
882
+gtypeCallerRnm <- function(A, B, fIndex, mIndex, theCenters, theScales,
883
+                         theNs, Indexes, cIndexes, nIndexes,
884
+                         ncIndexes, SMEDIAN, knots, params, dft,
885
+                         probs, trim){
886
+
887
+  stopifnot(!missing(A), !missing(B), dim(A)==dim(B),
888
+            nrow(A)==nrow(theCenters), nrow(A)==nrow(theScales),
889
+            nrow(A) == nrow(theNs), length(knots)==3, nrow(params)==4,
890
+            ncol(params)==ncol(A), length(trim)==1, length(probs)==3)
891
+
892
+  ## make code robust
893
+  ## check types before passing to C
894
+  
895
+  .Call("gtypeCallerPart1nm", A, B,
896
+        as.integer(fIndex), as.integer(mIndex),
897
+        as.numeric(theCenters), as.numeric(theScales),
898
+        as.integer(theNs), lapply(Indexes, as.integer), lapply(cIndexes, as.integer), as.integer(nIndexes), as.integer(ncIndexes),
899
+        as.numeric(SMEDIAN), as.numeric(knots), as.numeric(params),
900
+        as.integer(dft), as.numeric(probs), as.numeric(trim),
901
+        PACKAGE="crlmm")
902
+  
903
+}
904
+
905
+gtypeCallerR2nm <- function(A, B, fIndex, mIndex, theCenters, theScales,
906
+                         theNs, Indexes, cIndexes, nIndexes,
907
+                         ncIndexes, SMEDIAN, knots, params, dft,
908
+                         probs, trim, noTraining, noInfo){
909
+
910
+  stopifnot(!missing(A), !missing(B), dim(A)==dim(B),
911
+            nrow(A)==nrow(theCenters), nrow(A)==nrow(theScales),
912
+            nrow(A) == nrow(theNs), length(knots)==3, nrow(params)==4,
913
+            ncol(params)==ncol(A), length(trim)==1, length(probs)==3)
914
+
915
+  .Call("gtypeCallerPart2nm", A, B,
916
+        as.integer(fIndex), as.integer(mIndex),
917
+        as.numeric(theCenters), as.numeric(theScales),
918
+        as.integer(theNs), Indexes, cIndexes, nIndexes, ncIndexes,
919
+        as.numeric(SMEDIAN), as.numeric(knots), as.numeric(params),
920
+        as.integer(dft), as.numeric(probs), as.numeric(trim),
921
+        as.integer(noTraining), as.integer(noInfo), PACKAGE="crlmm")
922
+  
923
+}
924
+
562 925
similarity index 100%
563 926
rename from R/Methods.R
564 927
rename to R/crlmm-methods.R
565 928
deleted file mode 100644
... ...
@@ -1,118 +0,0 @@
1
-crlmm <- function(filenames, row.names=TRUE, col.names=TRUE,
2
-                  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
3
-                  save.it=FALSE, load.it=FALSE, intensityFile,
4
-                  mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
5
-                  cdfName, sns, recallMin=10, recallRegMin=1000,
6
-                  returnParams=FALSE, badSNP=.7){
7
-  if ((load.it | save.it) & missing(intensityFile))
8
-	  stop("'intensityFile' is missing, and you chose either load.it or save.it")
9
-  
10
-  if (missing(sns)) sns <- basename(filenames)
11
-  if (!missing(intensityFile))
12
-    if (load.it & !file.exists(intensityFile)){
13
-      load.it <- FALSE
14
-      message("File ", intensityFile, " does not exist.")
15
-      message("Not loading it, but running SNPRMA from scratch.")
16
-    }
17
-  if (!load.it){
18
-    res <- snprma(filenames, fitMixture=TRUE,
19
-                  mixtureSampleSize=mixtureSampleSize, verbose=verbose,
20
-                  eps=eps, cdfName=cdfName, sns=sns)
21
-    if(save.it){
22
-      t0 <- proc.time()
23
-      save(res, file=intensityFile)
24
-      t0 <- proc.time()-t0
25
-      if (verbose) message("Used ", t0[3], " seconds to save ", intensityFile, ".")
26
-    }
27
-  }else{
28
-    if (verbose) message("Loading ", intensityFile, ".")
29
-    obj <- load(intensityFile)
30
-    if (verbose) message("Done.")
31
-    if (obj != "res")
32
-      stop("Object in ", intensityFile, " seems to be invalid.")
33
-  }
34
-  if(row.names) row.names=res$gns else row.names=NULL
35
-  if(col.names) col.names=res$sns else col.names=NULL
36
-  res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]],
37
-                  res[["mixtureParams"]], res[["cdfName"]],
38
-                  gender=gender, row.names=row.names,
39
-                  col.names=col.names, recallMin=recallMin,
40
-                  recallRegMin=1000, SNRMin=SNRMin,
41
-                  returnParams=returnParams, badSNP=badSNP,
42
-                  verbose=verbose)
43
-
44
-  res2[["SNR"]] <- res[["SNR"]]
45
-  res2[["SKW"]] <- res[["SKW"]]
46
-  return(res2)
47
-}
48
-
49
-
50
-###############################
51
-####### THIS IS TEMPORARY NOT OFFICIALLY USED
52
-#####################################
53
-
54
-crlmmTNoN <- function(filenames, row.names=TRUE, col.names=TRUE,
55
-                  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
56
-                  save.it=FALSE, load.it=FALSE,
57
-                  intensityFile="tmpcrlmmintensities.rda",
58
-                  desctrucitve=FALSE, mixtureSampleSize=10^5, eps=0.1,
59
-                  verbose=TRUE){
60
-  if (load.it & !file.exists(intensityFile)){
61
-    load.it <- FALSE
62
-    message("File ", intensityFile, " does not exist.")
63
-    message("Not loading it, but running SNPRMA from scratch.")
64
-  }
65
-  if (!load.it){
66
-    res <- snprma(filenames, fitMixture=TRUE,
67
-                  mixtureSampleSize=mixtureSampleSize, verbose=verbose,
68
-                  eps=eps)
69
-    if(save.it) save(res, file=intensityFile)
70
-  }else{
71
-    message("Loading ", intensityFile, ".")
72
-    obj <- load(intensityFile)
73
-    message("Done.")
74
-    if (obj != "res")
75
-      stop("Object in ", intensityFile, " seems to be invalid.")
76
-  }
77
-  if(row.names) row.names=res$gns else row.names=NULL
78
-  if(col.names) col.names=res$sns else col.names=NULL
79
-  res2 <- crlmmGTTNoN(res[["A"]], res[["B"]], res[["SNR"]],
80
-                  res[["mixtureParams"]], res[["cdfName"]],
81
-                  gender=gender, row.names=row.names,
82
-                  col.names=col.names)
83
-  res2[["SNR"]] <- res[["SNR"]]
84
-  return(res2)
85
-}
86
-
87
-crlmmNormalNoN <- function(filenames, row.names=TRUE, col.names=TRUE,
88
-                  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
89
-                  save.it=FALSE, load.it=FALSE,
90
-                  intensityFile="tmpcrlmmintensities.rda",
91
-                  desctrucitve=FALSE, mixtureSampleSize=10^5, eps=0.1,
92
-                  verbose=TRUE){
93
-  if (load.it & !file.exists(intensityFile)){
94
-    load.it <- FALSE
95
-    message("File ", intensityFile, " does not exist.")
96
-    message("Not loading it, but running SNPRMA from scratch.")
97
-  }
98
-  if (!load.it){
99
-    res <- snprma(filenames, fitMixture=TRUE,
100
-                  mixtureSampleSize=mixtureSampleSize, verbose=verbose,
101
-                  eps=eps)
102
-    if(save.it) save(res, file=intensityFile)
103
-  }else{
104
-    message("Loading ", intensityFile, ".")
105
-    obj <- load(intensityFile)
106
-    message("Done.")
107
-    if (obj != "res")
108
-      stop("Object in ", intensityFile, " seems to be invalid.")
109
-  }
110
-  if(row.names) row.names=res$gns else row.names=NULL
111
-  if(col.names) col.names=res$sns else col.names=NULL
112
-  res2 <- crlmmGTNormalNoN(res[["A"]], res[["B"]], res[["SNR"]],
113
-                  res[["mixtureParams"]], res[["cdfName"]],
114
-                  gender=gender, row.names=row.names,
115
-                  col.names=col.names)
116
-  res2[["SNR"]] <- res[["SNR"]]
117
-  return(res2)
118
-}
119 0
deleted file mode 100644
... ...
@@ -1,207 +0,0 @@
1
-crlmmGTnm <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
2
-                    col.names=NULL, probs=c(1/3, 1/3, 1/3), DF=6,
3
-                    SNRMin=5, recallMin=10, recallRegMin=1000,
4
-                    gender=NULL, desctrucitve=FALSE, verbose=TRUE,
5
-                    returnParams=FALSE){
6
-  
7
-  keepIndex <- which(SNR>SNRMin)
8
-  if(length(keepIndex)==0) stop("No arrays above quality threshold!")
9
-  
10
-  NC <- ncol(A)
11
-  NR <- nrow(A)
12
-  
13
-  pkgname <- getCrlmmAnnotationName(cdfName)
14
-  if(!require(pkgname, character.only=TRUE)){
15
-    suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
16
-    msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
17
-    message(strwrap(msg))
18
-    stop("Package ", pkgname, " could not be found.")
19
-    rm(suggCall, msg)
20
-  }
21
-
22
-  if(verbose) message("Loading annotations.")
23
-  data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv)
24
-
25
-  ## this is toget rid of the 'no visible binding' notes
26
-  ## variable definitions
27
-  XIndex <- getVarInEnv("XIndex")
28
-  autosomeIndex <- getVarInEnv("autosomeIndex")
29
-  YIndex <- getVarInEnv("YIndex")
30
-  SMEDIAN <- getVarInEnv("SMEDIAN")
31
-  theKnots <- getVarInEnv("theKnots")
32
-  regionInfo <- getVarInEnv("regionInfo")
33
-  
34
-  ##IF gender not provide, we predict
35
-  if(is.null(gender)){
36
-    if(verbose) message("Determining gender.")
37
-    XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2
38
-    if(sum(SNR>SNRMin)==1){
39
-      gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5)))
40
-    }else{
41
-      gender <- kmeans(XMedian, c(min(XMedian[SNR>SNRMin]), max(XMedian[SNR>SNRMin])))[["cluster"]]
42
-    }
43
-  }
44
-  
45
-  Indexes <- list(autosomeIndex, XIndex, YIndex)
46
-  cIndexes <- list(keepIndex, 
47
-                   keepIndex[which(gender[keepIndex]==2)], 
48
-                   keepIndex[which(gender[keepIndex]==1)])
49
-  
50
-  if(verbose) cat("Calling", NR, "SNPs for recalibration")
51
-
52
-  ## call C
53
-  fIndex <- which(gender==2)
54
-  mIndex <- which(gender==1)
55
-  t0 <- proc.time()
56
-  newparams <- gtypeCallerRnm(A, B, fIndex, mIndex,
57
-                            params[["centers"]], params[["scales"]], params[["N"]],
58
-                            Indexes, cIndexes,
59
-                            sapply(Indexes, length), sapply(cIndexes, length),
60
-                            SMEDIAN, theKnots,
61
-                            mixtureParams, DF, probs, 0.025)
62
-  t0 <- proc.time()-t0
63
-  message("Part 1 took ", t0[3], " seconds.")
64
-  gc(verbose=FALSE)
65
-  names(newparams) <- c("centers", "scales", "N")
66
-  
67
-  if(verbose) message("Done.")
68
-  if(verbose) message("Estimating recalibration parameters.")
69
-  d <- newparams[["centers"]] - params$centers
70
-
71
-  ##regression 
72
-  Index <- intersect(which(pmin(newparams[["N"]][, 1],
73
-                                newparams[["N"]][, 2],
74
-                                newparams[["N"]][, 3]) > recallMin &
75
-                                !apply(regionInfo, 1, any)),
76
-                                autosomeIndex)
77
-  
78
-  if(length(Index) < recallRegMin){
79
-    warning("Recallibration not possible.")
80
-    newparams <- params
81
-    dev <- vector("numeric", nrow(newparams[["centers"]]))
82
-    SS <- matrix(Inf, 3, 3)
83
-  }else{
84
-    data4reg <- as.data.frame(newparams[["centers"]][Index,])
85
-    names(data4reg) <- c("AA", "AB", "BB")
86
-    regParams <- cbind(  coef(lm(AA~AB*BB, data=data4reg)),
87
-                       c(coef(lm(AB~AA+BB, data=data4reg)), 0), 
88
-                       coef(lm(BB~AA*AB, data=data4reg)))
89
-    rownames(regParams) <- c("intercept", "X", "Y", "XY")
90
-    rm(data4reg)
91
-  
92
-    minN <- 3
93
-    newparams[["centers"]][newparams[["N"]] < minN] <- NA
94
-    Index <- setdiff(which(rowSums(is.na(newparams[["centers"]]))==1), YIndex)
95
-    if(verbose) cat("Filling out empty centers")
96
-    for(i in Index){
97
-      if(verbose) if(i%%10000==0)cat(".")
98
-      mu <- newparams[["centers"]][i, ]
99
-      j <- which(is.na(mu))
100
-      newparams[["centers"]][i, j] <- c(1, mu[-j], prod(mu[-j]))%*%regParams[, j]
101
-    }
102
-    
103
-    ##remaing NAs are made like originals
104
-    if(length(YIndex)>0){
105
-      noMoveIndex <- union(setdiff(which(rowSums(is.na(newparams[["centers"]]))>0), YIndex), 
106
-                           YIndex[rowSums(is.na(newparams[["centers"]][YIndex, ])>1)])
107
-    }
108
-    newparams[["centers"]][is.na(newparams[["centers"]])] <- params[["centers"]][is.na(newparams[["centers"]])]
109
-    if(verbose) cat("\n")
110
-  
111
-    if(verbose) message("Calculating and standardizing size of shift.")
112
-    DD <- newparams[["centers"]] - params[["centers"]]
113
-    DD <- sweep(DD, 2, colMeans(DD[autosomeIndex, ]))
114
-    SS <- cov(DD[autosomeIndex, ])
115
-    SSI <- solve(SS)
116
-    dev <- vector("numeric", nrow(DD))
117
-    if(length(YIndex)){
118
-      dev[-YIndex] <- apply(DD[-YIndex, ], 1, function(x) x%*%SSI%*%x)
119
-      dev[-YIndex] <- 1/sqrt( (2*pi)^3*det(SS))*exp(-0.5*dev[-YIndex])
120
-      ##Now Y (only two params)
121
-      SSY <- SS[c(1, 3), c(1, 3)]
122
-      SSI <- solve(SSY) 
123
-      dev[YIndex] <- apply(DD[YIndex, c(1, 3)], 1, function(x) x%*%SSI%*%x)
124
-      dev[YIndex] <- 1/sqrt( (2*pi)^2*det(SSY))*exp(-0.5*dev[YIndex])
125
-    } else {
126
-      dev=apply(DD,1,function(x) x%*%SSI%*%x)
127
-      dev=1/sqrt( (2*pi)^3*det(SS))*exp(-0.5*dev)
128
-    }
129
-  }
130
-    
131
-  ## BC: must keep SD
132
-  params[-2] <- newparams[-2]
133
-  
134
-  rm(newparams);gc(verbose=FALSE)  
135
-  if(verbose) cat("Calling", NR, "SNPs")
136
-  ## ###################
137
-  ## ## MOVE TO C#######
138
-  t0 <- proc.time()
139
-  ImNull <- gtypeCallerR2nm(A, B, fIndex, mIndex, params[["centers"]],
140
-                          params[["scales"]], params[["N"]], Indexes,
141
-                          cIndexes, sapply(Indexes, length),
142
-                          sapply(cIndexes, length), SMEDIAN, theKnots,
143
-                          mixtureParams, DF, probs, 0.025,
144
-                          which(regionInfo[,2]),
145
-                          which(regionInfo[,1]))
146
-  t0 <- proc.time()-t0
147
-  gc(verbose=FALSE)
148
-  message("\n Part 2 took ", t0[3], " seconds.")
149
-  ##  END MOVE TO C#######
150
-  ## ##################
151
-  
152
-  dev <- dev/(dev+1/383)
153
-  if(!is.null(row.names)){ rownames(A) <- rownames(B) <- names(dev) <- row.names}
154
-  if(!is.null(col.names)){ colnames(A) <- colnames(B) <- col.names}
155
-  
156
-  if(verbose) message("Done.")
157
-  if (returnParams){
158
-    return(list(calls=A, confs=B, SNPQC=dev, batchQC=mean(diag(SS)), params=params))
159
-  }else{
160
-    return(list(calls=A, confs=B, SNPQC=dev, batchQC=mean(diag(SS))))
161
-  }
162
-}
163
-
164
-
165
-gtypeCallerRnm <- function(A, B, fIndex, mIndex, theCenters, theScales,
166
-                         theNs, Indexes, cIndexes, nIndexes,
167
-                         ncIndexes, SMEDIAN, knots, params, dft,
168
-                         probs, trim){
169
-
170
-  stopifnot(!missing(A), !missing(B), dim(A)==dim(B),
171
-            nrow(A)==nrow(theCenters), nrow(A)==nrow(theScales),
172
-            nrow(A) == nrow(theNs), length(knots)==3, nrow(params)==4,
173
-            ncol(params)==ncol(A), length(trim)==1, length(probs)==3)
174
-
175
-  ## make code robust
176
-  ## check types before passing to C
177
-  
178
-  .Call("gtypeCallerPart1nm", A, B,
179
-        as.integer(fIndex), as.integer(mIndex),
180
-        as.numeric(theCenters), as.numeric(theScales),
181
-        as.integer(theNs), lapply(Indexes, as.integer), lapply(cIndexes, as.integer), as.integer(nIndexes), as.integer(ncIndexes),
182
-        as.numeric(SMEDIAN), as.numeric(knots), as.numeric(params),
183
-        as.integer(dft), as.numeric(probs), as.numeric(trim),
184
-        PACKAGE="crlmm")
185
-  
186
-}
187
-
188
-gtypeCallerR2nm <- function(A, B, fIndex, mIndex, theCenters, theScales,
189
-                         theNs, Indexes, cIndexes, nIndexes,
190
-                         ncIndexes, SMEDIAN, knots, params, dft,
191
-                         probs, trim, noTraining, noInfo){
192
-
193
-  stopifnot(!missing(A), !missing(B), dim(A)==dim(B),
194
-            nrow(A)==nrow(theCenters), nrow(A)==nrow(theScales),
195
-            nrow(A) == nrow(theNs), length(knots)==3, nrow(params)==4,
196
-            ncol(params)==ncol(A), length(trim)==1, length(probs)==3)
197
-
198
-  .Call("gtypeCallerPart2nm", A, B,
199
-        as.integer(fIndex), as.integer(mIndex),
200
-        as.numeric(theCenters), as.numeric(theScales),
201
-        as.integer(theNs), Indexes, cIndexes, nIndexes, ncIndexes,
202
-        as.numeric(SMEDIAN), as.numeric(knots), as.numeric(params),
203
-        as.integer(dft), as.numeric(probs), as.numeric(trim),
204
-        as.integer(noTraining), as.integer(noInfo), PACKAGE="crlmm")
205
-  
206
-}
207
-
208 0
deleted file mode 100644
... ...
@@ -1,116 +0,0 @@
1
-crlmmNM <- function(filenames, row.names=TRUE, col.names=TRUE,
2
-                  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
3
-                  save.it=FALSE, load.it=FALSE,
4
-                  intensityFile="tmpcrlmmintensities.rda",
5
-                  desctrucitve=FALSE, mixtureSampleSize=10^5, eps=0.1,
6
-                  verbose=TRUE, cdfName, sns, recallMin=10,
7
-                  recallRegMin=1000, returnParams=FALSE){
8
-  
9
-  if (missing(sns)) sns <- basename(filenames)
10
-  if (load.it & !file.exists(intensityFile)){
11
-    load.it <- FALSE
12
-    message("File ", intensityFile, " does not exist.")
13
-    message("Not loading it, but running SNPRMA from scratch.")
14
-  }
15
-  if (!load.it){
16
-    res <- snprma(filenames, fitMixture=TRUE,
17
-                  mixtureSampleSize=mixtureSampleSize, verbose=verbose,
18
-                  eps=eps, cdfName=cdfName, sns=sns)
19
-    if(save.it){
20
-      t0 <- proc.time()
21
-      save(res, file=intensityFile)
22
-      t0 <- proc.time()-t0
23
-      message("Used ", t0[3], " seconds to save ", intensityFile, ".")
24
-    }
25
-  }else{
26
-    message("Loading ", intensityFile, ".")
27
-    obj <- load(intensityFile)
28
-    message("Done.")
29
-    if (obj != "res")
30
-      stop("Object in ", intensityFile, " seems to be invalid.")
31
-  }
32
-  if(row.names) row.names=res$gns else row.names=NULL
33
-  if(col.names) col.names=res$sns else col.names=NULL
34
-
35
-  res2 <- crlmmGTnm(res[["A"]], res[["B"]], res[["SNR"]],
36
-                  res[["mixtureParams"]], res[["cdfName"]],
37
-                  gender=gender, row.names=row.names,
38
-                  col.names=col.names, recallMin=recallMin,
39
-                  recallRegMin=1000, SNRMin=SNRMin,
40
-                  returnParams=returnParams)
41
-
42
-  res2[["SNR"]] <- res[["SNR"]]
43
-  
44
-  return(res2)
45
-}
46
-
47
-
48
-###############################
49
-####### THIS IS TEMPORARY NOT OFFICIALLY USED
50
-#####################################
51
-
52
-crlmmTNoN <- function(filenames, row.names=TRUE, col.names=TRUE,
53
-                  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
54
-                  save.it=FALSE, load.it=FALSE,
55
-                  intensityFile="tmpcrlmmintensities.rda",
56
-                  desctrucitve=FALSE, mixtureSampleSize=10^5, eps=0.1,
57
-                  verbose=TRUE){
58
-  if (load.it & !file.exists(intensityFile)){
59
-    load.it <- FALSE
60
-    message("File ", intensityFile, " does not exist.")
61
-    message("Not loading it, but running SNPRMA from scratch.")
62
-  }
63
-  if (!load.it){
64
-    res <- snprma(filenames, fitMixture=TRUE,
65
-                  mixtureSampleSize=mixtureSampleSize, verbose=verbose,
66
-                  eps=eps)
67
-    if(save.it) save(res, file=intensityFile)
68
-  }else{
69
-    message("Loading ", intensityFile, ".")
70
-    obj <- load(intensityFile)
71
-    message("Done.")
72
-    if (obj != "res")
73
-      stop("Object in ", intensityFile, " seems to be invalid.")
74
-  }
75
-  if(row.names) row.names=res$gns else row.names=NULL
76
-  if(col.names) col.names=res$sns else col.names=NULL
77
-  res2 <- crlmmGTTNoN(res[["A"]], res[["B"]], res[["SNR"]],
78
-                  res[["mixtureParams"]], res[["cdfName"]],
79
-                  gender=gender, row.names=row.names,
80
-                  col.names=col.names)
81
-  res2[["SNR"]] <- res[["SNR"]]
82
-  return(res2)
83
-}
84
-
85
-crlmmNormalNoN <- function(filenames, row.names=TRUE, col.names=TRUE,
86
-                  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
87
-                  save.it=FALSE, load.it=FALSE,
88
-                  intensityFile="tmpcrlmmintensities.rda",
89
-                  desctrucitve=FALSE, mixtureSampleSize=10^5, eps=0.1,
90
-                  verbose=TRUE){
91
-  if (load.it & !file.exists(intensityFile)){
92
-    load.it <- FALSE
93
-    message("File ", intensityFile, " does not exist.")
94
-    message("Not loading it, but running SNPRMA from scratch.")
95
-  }
96
-  if (!load.it){
97
-    res <- snprma(filenames, fitMixture=TRUE,
98
-                  mixtureSampleSize=mixtureSampleSize, verbose=verbose,
99
-                  eps=eps)
100
-    if(save.it) save(res, file=intensityFile)
101
-  }else{
102
-    message("Loading ", intensityFile, ".")
103
-    obj <- load(intensityFile)
104
-    message("Done.")
105
-    if (obj != "res")
106
-      stop("Object in ", intensityFile, " seems to be invalid.")
107
-  }
108
-  if(row.names) row.names=res$gns else row.names=NULL
109
-  if(col.names) col.names=res$sns else col.names=NULL
110
-  res2 <- crlmmGTNormalNoN(res[["A"]], res[["B"]], res[["SNR"]],
111
-                  res[["mixtureParams"]], res[["cdfName"]],
112
-                  gender=gender, row.names=row.names,
113
-                  col.names=col.names)
114
-  res2[["SNR"]] <- res[["SNR"]]
115
-  return(res2)
116
-}
117 0
deleted file mode 100644
... ...
@@ -1,45 +0,0 @@
1
-fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){
2
-  ##56 stands for 5 and 6 arrays but will also work for Illumina
3
-  ##Note the unfortunate choice of numbering:
4
-  ##1 is BB, 2 AB, and 3 AA. Opposite to everything else!
5
-  ##this is legacy code I decided not to change.
6
-  ## this why at the end we report -coefs: F1 is the negative f
7
-  mus <- append(quantile(M, c(1, 5)/6, names=FALSE), 0, 1)
8
-  sigmas <- rep(mad(c(M[M<mus[1]]-mus[1], M[M>mus[3]]-mus[3])), 3)
9
-  sigmas[2] <- sigmas[2]/2
10
- 
11
-  weights <- apply(cbind(mus, sigmas), 1, function(p) dnorm(M, p[1], p[2]))
12
-  previousF1 <- -Inf
13
-  change <- eps+1
14
-  it <- 0
15
- 
16
-  if(verbose) message("Max change must be under ", eps, ".")
17
-  matS <- stupidSplineBasis(S, knots)
18
-  while (change > eps & it < maxit){
19
-    it <- it+1
20
-    ## E
21
-    z <- sweep(weights, 2, probs, "*")
22
-    LogLik <- rowSums(z)
23
-    z <- sweep(z, 1, LogLik, "/")
24
-    probs <- colMeans(z)
25
- 
26
-    ## M
27
-    fit1 <- crossprod(chol2inv(chol(crossprod(sweep(matS, 1, z[, 1], FUN="*"), matS))), crossprod(matS, z[, 1]*M))
28
- 
29
-    fit2 <- sum(z[, 2]*M)/sum(z[, 2])
30
-    F1 <- matS%*%fit1
31
-    sigmas[c(1, 3)] <- sqrt(sum(z[, 1]*(M-F1)^2)/sum(z[, 1]))
32
-    sigmas[2] <- sqrt(sum(z[, 2]*(M-fit2)^2)/sum(z[, 2]))
33
- 
34
-    weights[, 1] <- dnorm(M, F1, sigmas[1])
35
-    weights[, 2] <- dnorm(M, fit2, sigmas[2])
36
-    weights[, 3] <- dnorm(M, -F1, sigmas[3])
37
-    
38
-    change <- max(abs(F1-previousF1))
39
-    previousF1 <- F1
40
-    if(verbose) message("Iter ", it, ": ", change, ".")
41
-  }
42
-  medF1 <- median(-F1)
43
- return(list(coef= -fit1, medF1=medF1, sigma1=sigmas[1], sigma2=sigmas[2]))
44
-}
45
-
46 0
similarity index 68%
47 1
rename from R/snprma.R
48 2
rename to R/snprma-functions.R
... ...
@@ -83,3 +83,49 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1,
83 83
   ## gns comes from preprocStuff.rda
84 84
   list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
85 85
 }
86
+
87
+fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){
88
+  ##56 stands for 5 and 6 arrays but will also work for Illumina
89
+  ##Note the unfortunate choice of numbering:
90
+  ##1 is BB, 2 AB, and 3 AA. Opposite to everything else!
91
+  ##this is legacy code I decided not to change.
92
+  ## this why at the end we report -coefs: F1 is the negative f
93
+  mus <- append(quantile(M, c(1, 5)/6, names=FALSE), 0, 1)
94
+  sigmas <- rep(mad(c(M[M<mus[1]]-mus[1], M[M>mus[3]]-mus[3])), 3)
95
+  sigmas[2] <- sigmas[2]/2
96
+ 
97
+  weights <- apply(cbind(mus, sigmas), 1, function(p) dnorm(M, p[1], p[2]))
98
+  previousF1 <- -Inf
99
+  change <- eps+1
100
+  it <- 0
101
+ 
102
+  if(verbose) message("Max change must be under ", eps, ".")
103
+  matS <- stupidSplineBasis(S, knots)
104
+  while (change > eps & it < maxit){
105
+    it <- it+1
106
+    ## E
107
+    z <- sweep(weights, 2, probs, "*")
108
+    LogLik <- rowSums(z)
109
+    z <- sweep(z, 1, LogLik, "/")
110
+    probs <- colMeans(z)
111
+ 
112
+    ## M
113
+    fit1 <- crossprod(chol2inv(chol(crossprod(sweep(matS, 1, z[, 1], FUN="*"), matS))), crossprod(matS, z[, 1]*M))
114
+ 
115
+    fit2 <- sum(z[, 2]*M)/sum(z[, 2])
116
+    F1 <- matS%*%fit1
117
+    sigmas[c(1, 3)] <- sqrt(sum(z[, 1]*(M-F1)^2)/sum(z[, 1]))
118
+    sigmas[2] <- sqrt(sum(z[, 2]*(M-fit2)^2)/sum(z[, 2]))
119
+ 
120
+    weights[, 1] <- dnorm(M, F1, sigmas[1])
121
+    weights[, 2] <- dnorm(M, fit2, sigmas[2])
122
+    weights[, 3] <- dnorm(M, -F1, sigmas[3])
123
+    
124
+    change <- max(abs(F1-previousF1))
125
+    previousF1 <- F1
126
+    if(verbose) message("Iter ", it, ": ", change, ".")
127
+  }
128
+  medF1 <- median(-F1)
129
+ return(list(coef= -fit1, medF1=medF1, sigma1=sigmas[1], sigma2=sigmas[2]))
130
+}
131
+
86 132
new file mode 100644
... ...
@@ -0,0 +1,13 @@
1
+#####################################
2
+### FOR CRLMM
3
+#####################################
4
+- Decide on output format (BC vote: eSet-like)
5
+- Helper to convert to snpMatrix
6
+- Vignette with non-trivial use of crlmm (use Vince's)
7
+- Add RS ids
8
+- Allele plots
9
+- M v S plots
10
+
11
+#####################################
12
+### FOR CNRMA
13
+#####################################