Browse code

Numerous changed to copy number estimation detailed below.

Added the following functions:
o summarizeMaleXGenotypes
- impute genotype 'A' and genotype 'B' location when unobserved
- shrink within genotype variances
o shrinkGenotypeSummaries
- impute unobserved genotype 'AA', 'AB' and 'BB' genotypes
- shrink within genotype variances
o summarizeSnps
- within genotype location and scale. Genotype frequencies
o summarizeNps
- location and scale at nonpolymorphic loci
o genotypeSummary
- wrapper for summarizeSnps and summarizeNps

Extensively edited the following functions:
o fit.lm1 - fit.lm4
(these functions only fit the linear model to the within/genotype location scale)

Requires oligoClasses 1.11.7

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@48959 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 21/08/2010 02:49:54
Showing 8 changed files

... ...
@@ -10,7 +10,7 @@ License: Artistic-2.0
10 10
 Depends: R (>= 2.11.0),
11 11
          methods,
12 12
          Biobase (>= 2.9.0),
13
-         oligoClasses (>= 1.11.6)
13
+         oligoClasses (>= 1.11.7)
14 14
 Imports: affyio (>= 1.15.2),
15 15
          ellipse,
16 16
          ff,
... ...
@@ -27,6 +27,7 @@ Suggests: hapmapsnp6,
27 27
 	  ellipse
28 28
 Collate: AllGenerics.R
29 29
 	 AllClasses.R
30
+	 methods-AssayData.R
30 31
 	 methods-CNSet.R
31 32
 	 methods-eSet.R
32 33
          methods-SnpSuperSet.R
... ...
@@ -29,7 +29,9 @@ importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet, CNSet)
29 29
 importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs,
30 30
 		  "confs<-", cnConfidence, "cnConfidence<-", isSnp,
31 31
 		  chromosome, position, A, B,
32
-		  "A<-", "B<-", open, close, lM, "lM<-", flags)
32
+		  "A<-", "B<-", open, close, lM, flags,
33
+		  batchStatistics, "batchStatistics<-", updateObject)
34
+
33 35
 
34 36
 importFrom(oligoClasses, chromosome2integer, celfileDate, list.celfiles,
35 37
            copyNumber, initializeBigMatrix, initializeBigVector)
... ...
@@ -55,7 +57,7 @@ importFrom(mvtnorm, dmvnorm)
55 57
 importFrom(ellipse, ellipse)
56 58
 
57 59
 importFrom(ff, ffdf, physical.ff, physical.ffdf)
58
-importClassesFrom(oligoClasses, ffdf)
60
+importClassesFrom(oligoClasses, ffdf, ff_matrix)
59 61
 
60 62
 exportMethods(lines)
61 63
 exportMethods(CA, CB)
... ...
@@ -79,8 +81,12 @@ export(computeCopynumber, ACN)
79 81
 export(totalCopyNumber)
80 82
 export(cnrma, cnrma2)
81 83
 
82
-exportMethods("A<-", "B<-", "nuA", "nuB", "phiA", "phiB", "nuA<-", "nuB<-", "phiA<-", "phiB<-", "numberGenotype<-")
83
-
84
+exportMethods("A<-", "B<-", "nuA", "nuB", "phiA", "phiB", "nuA<-", "nuB<-", "phiA<-", "phiB<-")
85
+exportMethods(Ns, medians, mads, tau2, corr)
86
+exportClasses(ff_or_matrix)
87
+export(genotypeSummary, summarizeSnps, summarizeNps, shrinkSummary, shrinkGenotypeSummaries, summarizeMaleXGenotypes,
88
+       indexComplete)
89
+export(Ns)
84 90
 ## For debugging
85 91
 ## exportPattern("^[^\\.]")
86 92
 
... ...
@@ -12,19 +12,84 @@ setGeneric("CB", function(object, ...) standardGeneric("CB"))
12 12
 ##setGeneric("totalCopyNumber", function(object, ...) standardGeneric("totalCopyNumber"))
13 13
 
14 14
 
15
+setGeneric("Ns", function(object, ...) standardGeneric("Ns"))
16
+setGeneric("corr", function(object, ...) standardGeneric("corr"))
17
+setGeneric("mads", function(object, ...) standardGeneric("mads"))
18
+setGeneric("medians", function(object, ...) standardGeneric("medians"))
19
+setGeneric("tau2", function(object, ...) standardGeneric("tau2"))
20
+
15 21
 ## The generics below are for internal use with copy number methods
16 22
 ## If we keep them in oligoClasses, we need to export and document
23
+setGeneric("N.AA", function(object) standardGeneric("N.AA"))
24
+setGeneric("N.AB", function(object) standardGeneric("N.AB"))
25
+setGeneric("N.BB", function(object) standardGeneric("N.BB"))
26
+setGeneric("N.AA<-", function(object, value) standardGeneric("N.AA<-"))
27
+setGeneric("N.AB<-", function(object, value) standardGeneric("N.AB<-"))
28
+setGeneric("N.BB<-", function(object, value) standardGeneric("N.BB<-"))
29
+
30
+setGeneric("medians", function(object, ...) standardGeneric("medians"))
31
+setGeneric("medianA.AA", function(object) standardGeneric("medianA.AA"))
32
+setGeneric("medianA.AB", function(object) standardGeneric("medianA.AB"))
33
+setGeneric("medianA.BB", function(object) standardGeneric("medianA.BB"))
34
+setGeneric("medianB.AA", function(object) standardGeneric("medianB.AA"))
35
+setGeneric("medianB.AB", function(object) standardGeneric("medianB.AB"))
36
+setGeneric("medianB.BB", function(object) standardGeneric("medianB.BB"))
37
+setGeneric("medianA.AA<-", function(object, value) standardGeneric("medianA.AA<-"))
38
+setGeneric("medianA.AB<-", function(object, value) standardGeneric("medianA.AB<-"))
39
+setGeneric("medianA.BB<-", function(object, value) standardGeneric("medianA.BB<-"))
40
+setGeneric("medianB.AA<-", function(object, value) standardGeneric("medianB.AA<-"))
41
+setGeneric("medianB.AB<-", function(object, value) standardGeneric("medianB.AB<-"))
42
+setGeneric("medianB.BB<-", function(object, value) standardGeneric("medianB.BB<-"))
43
+
44
+
45
+
46
+setGeneric("madA.AA", function(object) standardGeneric("madA.AA"))
47
+setGeneric("madA.AB", function(object) standardGeneric("madA.AB"))
48
+setGeneric("madA.BB", function(object) standardGeneric("madA.BB"))
49
+setGeneric("madB.AA", function(object) standardGeneric("madB.AA"))
50
+setGeneric("madB.AB", function(object) standardGeneric("madB.AB"))
51
+setGeneric("madB.BB", function(object) standardGeneric("madB.BB"))
52
+setGeneric("madA.AA<-", function(object, value) standardGeneric("madA.AA<-"))
53
+setGeneric("madA.AB<-", function(object, value) standardGeneric("madA.AB<-"))
54
+setGeneric("madA.BB<-", function(object, value) standardGeneric("madA.BB<-"))
55
+setGeneric("madB.AA<-", function(object, value) standardGeneric("madB.AA<-"))
56
+setGeneric("madB.AB<-", function(object, value) standardGeneric("madB.AB<-"))
57
+setGeneric("madB.BB<-", function(object, value) standardGeneric("madB.BB<-"))
58
+
59
+setGeneric("tau2A.AA", function(object) standardGeneric("tau2A.AA"))
60
+##setGeneric("tau2A.AB", function(object) standardGeneric("tau2A.AB"))
61
+setGeneric("tau2A.BB", function(object) standardGeneric("tau2A.BB"))
62
+setGeneric("tau2B.AA", function(object) standardGeneric("tau2B.AA"))
63
+##setGeneric("tau2B.AB", function(object) standardGeneric("tau2B.AB"))
64
+setGeneric("tau2B.BB", function(object) standardGeneric("tau2B.BB"))
65
+setGeneric("tau2A.AA<-", function(object, value) standardGeneric("tau2A.AA<-"))
66
+##setGeneric("tau2A.AB<-", function(object, value) standardGeneric("tau2A.AB<-"))
67
+setGeneric("tau2A.BB<-", function(object, value) standardGeneric("tau2A.BB<-"))
68
+setGeneric("tau2B.AA<-", function(object, value) standardGeneric("tau2B.AA<-"))
69
+##setGeneric("tau2B.AB<-", function(object, value) standardGeneric("tau2B.AB<-"))
70
+setGeneric("tau2B.BB<-", function(object, value) standardGeneric("tau2B.BB<-"))
71
+
72
+setGeneric("corrAA", function(object) standardGeneric("corrAA"))
73
+setGeneric("corrAB", function(object) standardGeneric("corrAB"))
74
+setGeneric("corrBB", function(object) standardGeneric("corrBB"))
75
+setGeneric("corrAA<-", function(object, value) standardGeneric("corrAA<-"))
76
+setGeneric("corrAB<-", function(object, value) standardGeneric("corrAB<-"))
77
+setGeneric("corrBB<-", function(object, value) standardGeneric("corrBB<-"))
78
+
79
+
80
+
17 81
 setGeneric("nuA", function(object) standardGeneric("nuA"))
18 82
 setGeneric("nuB", function(object) standardGeneric("nuB"))
19 83
 setGeneric("phiA", function(object) standardGeneric("phiA"))
20 84
 setGeneric("phiB", function(object) standardGeneric("phiB"))
85
+setGeneric("phiPrimeA", function(object) standardGeneric("phiPrimeA"))
86
+setGeneric("phiPrimeB", function(object) standardGeneric("phiPrimeB"))
87
+setGeneric("phiPrimeA<-", function(object, value) standardGeneric("phiPrimeA<-"))
88
+setGeneric("phiPrimeB<-", function(object, value) standardGeneric("phiPrimeB<-"))
21 89
 setGeneric("sigma2A", function(object) standardGeneric("sigma2A"))
22 90
 setGeneric("sigma2B", function(object) standardGeneric("sigma2B"))
23 91
 setGeneric("tau2A", function(object) standardGeneric("tau2A"))
24 92
 setGeneric("tau2B", function(object) standardGeneric("tau2B"))
25
-setGeneric("corrAA", function(object) standardGeneric("corrAA"))
26
-setGeneric("corrBB", function(object) standardGeneric("corrBB"))
27
-setGeneric("corrAB", function(object) standardGeneric("corrAB"))
28 93
 setGeneric("nuA<-", function(object, value) standardGeneric("nuA<-"))
29 94
 setGeneric("nuB<-", function(object, value) standardGeneric("nuB<-"))
30 95
 setGeneric("phiA<-", function(object, value) standardGeneric("phiA<-"))
... ...
@@ -33,11 +98,6 @@ setGeneric("sigma2A<-", function(object, value) standardGeneric("sigma2A<-"))
33 98
 setGeneric("sigma2B<-", function(object, value) standardGeneric("sigma2B<-"))
34 99
 setGeneric("tau2A<-", function(object, value) standardGeneric("tau2A<-"))
35 100
 setGeneric("tau2B<-", function(object, value) standardGeneric("tau2B<-"))
36
-setGeneric("corrAA<-", function(object, value) standardGeneric("corrAA<-"))
37
-setGeneric("corrAB<-", function(object, value) standardGeneric("corrAB<-"))
38
-setGeneric("corrBB<-", function(object, value) standardGeneric("corrBB<-"))
39 101
 setGeneric("flags<-", function(object, value) standardGeneric("flags<-"))
40 102
 
41
-setGeneric("numberGenotype<-", function(object, value, ...) standardGeneric("numberGenotype<-"))
42
-
43 103
 
... ...
@@ -362,13 +362,13 @@ rowCors <- function(x, y, ...){
362 362
 	return(covar/(sd.x*sd.y))
363 363
 }
364 364
 
365
-corByGenotype <- function(A, B, G, Ns, which.cluster=c(1,2,3)[1], DF.PRIOR){
365
+corByGenotype <- function(A, B, G, Ns, which.cluster=c(1,2,3)[1]){##, DF.PRIOR){
366 366
 	x <- A * (G == which.cluster)
367 367
 	x[x==0] <- NA
368 368
 	y <- B * (G == which.cluster)
369 369
 	res <- as.matrix(rowCors(x, y, na.rm=TRUE))
370
-	cors <- shrink(res, Ns[, which.cluster], DF.PRIOR)
371
-	cors
370
+##	cors <- shrink(res, Ns[, which.cluster], DF.PRIOR)
371
+	res
372 372
 }
373 373
 
374 374
 dqrlsWrapper <- function(x, y, wts, tol=1e-7){
... ...
@@ -382,24 +382,38 @@ dqrlsWrapper <- function(x, y, wts, tol=1e-7){
382 382
 		 work=double(2 * p), PACKAGE="base")[["coefficients"]]
383 383
 }
384 384
 
385
-fit.wls <- function(allele, Ystar, W, Ns, autosome=TRUE){
385
+
386
+##fit.wls <- function(allele, Ystar, W, Ns, autosome=TRUE){
387
+fit.wls <- function(NN, sigma, allele, Y, autosome, X){
388
+	##		Np <- NN
389
+##		Np[Np < 1] <- 1
390
+##		vA2 <- vA^2/Np
391
+##		vB2 <- vB^2/Np
392
+##		wA <- sqrt(1/vA2)
393
+##		wB <- sqrt(1/vB2)
394
+##		YA <- muA*wA
395
+##		YB <- muB*wB
396
+	Np <- NN
397
+	Np[Np < 1] <- 1
398
+	W <- (sigma/sqrt(Np))^-1
399
+	Ystar <- Y*W
386 400
 	complete <- which(rowSums(is.na(W)) == 0 & rowSums(is.na(Ystar)) == 0)
387 401
 ##	if(any(!is.finite(W))){## | any(!is.finite(V))){
388 402
 ##		i <- which(rowSums(!is.finite(W)) > 0)
389 403
 ##		stop("Possible zeros in the within-genotype estimates of the spread (vA, vB). ")
390 404
 ##	}
391
-	NOHET <- mean(Ns[, 2], na.rm=TRUE) < 0.05
405
+##	NOHET <- mean(NN[, 2], na.rm=TRUE) < 0.05
392 406
 	if(missing(allele)) stop("must specify allele")
393
-	if(autosome){
407
+	if(autosome & missing(X)){
394 408
 		if(allele == "A") X <- cbind(1, 2:0)
395 409
 		if(allele == "B") X <- cbind(1, 0:2)
396
-		betahat <- matrix(NA, 2, nrow(Ystar))
397
-	} else {
410
+	}
411
+	if(!autosome & missing(X)){
398 412
 		if(allele == "A") X <- cbind(1, c(1, 0, 2, 1, 0), c(0, 1, 0, 1, 2))
399 413
 		if(allele == "B") X <- cbind(1, c(0, 1, 0, 1, 2), c(1, 0, 2, 1, 0))
400
-		betahat <- matrix(NA, 3, nrow(Ystar))
401 414
 	}
402
-	if(NOHET) X <- X[-2, ] ##more than 1 X chromosome, but all homozygous
415
+	betahat <- matrix(NA, ncol(X), nrow(Ystar))
416
+##	if(NOHET) X <- X[-2, ] ##more than 1 X chromosome, but all homozygous
403 417
 	##How to quickly generate Xstar, Xstar = diag(W) %*% X
404 418
 	##Xstar <- apply(W, 1, generateX, X)
405 419
 	ww <- rep(1, ncol(Ystar))
... ...
@@ -780,11 +794,136 @@ crlmmCopynumberLD <- function(object,
780 794
 }
781 795
 crlmmCopynumber2 <- crlmmCopynumberLD
782 796
 
783
-fit.lm1 <- function(strata.index,
797
+
798
+
799
+
800
+shrinkGenotypeSummaries <- function(strata, index.list, object, MIN.OBS, MIN.SAMPLES, DF.PRIOR,
801
+				    verbose, is.lds){
802
+	if(is.lds) {physical <- get("physical"); open(object)}
803
+	if(verbose) message("Probe stratum ", strata, " of ", length(index.list))
804
+	marker.index <- index.list[[strata]]
805
+	batches <- split(seq_along(batch(object)), as.character(batch(object)))
806
+	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
807
+	batchnames <- batchNames(object)
808
+	N.AA <- as.matrix(N.AA(object)[marker.index, ])
809
+	N.AB <- as.matrix(N.AB(object)[marker.index, ])
810
+	N.BB <- as.matrix(N.BB(object)[marker.index, ])
811
+	medianA.AA <- as.matrix(medianA.AA(object)[marker.index,])
812
+	medianA.AB <- as.matrix(medianA.AB(object)[marker.index,])
813
+	medianA.BB <- as.matrix(medianA.BB(object)[marker.index,])
814
+	medianB.AA <- as.matrix(medianB.AA(object)[marker.index,])
815
+	medianB.AB <- as.matrix(medianB.AB(object)[marker.index,])
816
+	medianB.BB <- as.matrix(medianB.BB(object)[marker.index,])
817
+	madA.AA <- as.matrix(madA.AA(object)[marker.index,])
818
+	madA.AB <- as.matrix(madA.AB(object)[marker.index,])
819
+	madA.BB <- as.matrix(madA.BB(object)[marker.index,])
820
+	madB.AA <- as.matrix(madB.AA(object)[marker.index,])
821
+	madB.AB <- as.matrix(madB.AB(object)[marker.index,])
822
+	madB.BB <- as.matrix(madB.BB(object)[marker.index,])
823
+	medianA <- medianB <- shrink.madB <- shrink.madA <- vector("list", length(batchnames))
824
+	shrink.tau2A.AA <- tau2A.AA <- as.matrix(tau2A.AA(object)[marker.index,])
825
+	shrink.tau2B.BB <- tau2B.BB <- as.matrix(tau2B.BB(object)[marker.index,])
826
+	shrink.tau2A.BB <- tau2A.BB <- as.matrix(tau2A.BB(object)[marker.index,])
827
+	shrink.tau2B.AA <- tau2B.AA <- as.matrix(tau2B.AA(object)[marker.index,])
828
+	shrink.corrAA <- corrAA <- as.matrix(corrAA(object)[marker.index, ])
829
+	shrink.corrAB <- corrAB <- as.matrix(corrAB(object)[marker.index, ])
830
+	shrink.corrBB <- corrBB <- as.matrix(corrBB(object)[marker.index, ])
831
+	flags <- as.matrix(flags(object)[marker.index, ])
832
+	for(k in seq(along=batches)){
833
+		B <- batches[[k]]
834
+		this.batch <- unique(as.character(batch(object)[B]))
835
+
836
+		medianA[[k]] <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k])
837
+		medianB[[k]] <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k])
838
+		madA <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k])
839
+		madB <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k])
840
+		NN <- cbind(N.AA[, k], N.AB[, k], N.BB[, k])
841
+		##RS: estimate DF.PRIOR
842
+		shrink.madA[[k]] <- shrink(madA, NN, DF.PRIOR)
843
+		shrink.madB[[k]] <- shrink(madB, NN, DF.PRIOR)
844
+
845
+		## an estimate of the background variance is the MAD
846
+		## of the log2(allele A) intensities among subjects with
847
+		## genotypes BB
848
+		shrink.tau2A.BB[, k] <- shrink(tau2A.BB[, k, drop=FALSE], NN[, 3], DF.PRIOR)[, drop=FALSE]
849
+		shrink.tau2B.AA[, k] <- shrink(tau2B.AA[, k, drop=FALSE], NN[, 1], DF.PRIOR)[, drop=FALSE]
850
+		## an estimate of the signal variance is the MAD
851
+		## of the log2(allele A) intensities among subjects with
852
+		## genotypes AA
853
+		shrink.tau2A.AA[, k] <- shrink(tau2A.AA[, k, drop=FALSE], NN[, 1], DF.PRIOR)[, drop=FALSE]
854
+		shrink.tau2B.BB[, k] <- shrink(tau2B.BB[, k, drop=FALSE], NN[, 3], DF.PRIOR)[, drop=FALSE]
855
+		cor.AA <- corrAA[, k, drop=FALSE]
856
+		cor.AB <- corrAB[, k, drop=FALSE]
857
+		cor.BB <- corrBB[, k, drop=FALSE]
858
+		shrink.corrAA[, k] <- shrink(cor.AA, NN[, 1], DF.PRIOR)
859
+		shrink.corrAB[, k] <- shrink(cor.AB, NN[, 2], DF.PRIOR)
860
+		shrink.corrBB[, k] <- shrink(cor.BB, NN[, 3], DF.PRIOR)
861
+
862
+		##---------------------------------------------------------------------------
863
+		## SNPs that we'll use for imputing location/scale of unobserved genotypes
864
+		##---------------------------------------------------------------------------
865
+		index.complete <- indexComplete(NN, medianA[[k]], medianB[[k]], MIN.OBS)
866
+
867
+		##---------------------------------------------------------------------------
868
+		## Impute sufficient statistics for unobserved genotypes (plate-specific)
869
+		##---------------------------------------------------------------------------
870
+		unobservedAA <- NN[, 1] < MIN.OBS
871
+		unobservedAB <- NN[, 2] < MIN.OBS
872
+		unobservedBB <- NN[, 3] < MIN.OBS
873
+		unobserved.index <- vector("list", 3)
874
+		unobserved.index[[1]] <- which(unobservedAA & (NN[, 2] >= MIN.OBS & NN[, 3] >= MIN.OBS))
875
+		unobserved.index[[2]] <- which(unobservedAB & (NN[, 1] >= MIN.OBS & NN[, 3] >= MIN.OBS))
876
+		unobserved.index[[3]] <- which(unobservedBB & (NN[, 2] >= MIN.OBS & NN[, 1] >= MIN.OBS))
877
+		res <- imputeCenter(medianA[[k]], medianB[[k]], index.complete, unobserved.index)
878
+		medianA[[k]] <- res[[1]]
879
+		medianB[[k]] <- res[[2]]
880
+		rm(res)
881
+		##the NA's in 'medianA' and 'medianB' are monomorphic if MIN.OBS = 1
882
+
883
+		## RS: For Monomorphic SNPs a mixture model may be better
884
+		## RS: Further, we can improve estimation by borrowing strength across batch
885
+		unobserved.index[[1]] <- which(unobservedAA & unobservedAB)
886
+		unobserved.index[[2]] <- which(unobservedBB & unobservedAB)
887
+		unobserved.index[[3]] <- which(unobservedAA & unobservedBB) ## strange
888
+		res <- imputeCentersForMonomorphicSnps(medianA[[k]], medianB[[k]],
889
+						       index.complete,
890
+						       unobserved.index)
891
+		medianA[[k]] <- res[[1]]; medianB[[k]] <- res[[2]]
892
+		rm(res)
893
+		negA <- rowSums(medianA[[k]] < 0) > 0
894
+		negB <- rowSums(medianB[[k]] < 0) > 0
895
+		flags[, k] <- as.integer(rowSums(NN == 0) > 0 | negA | negB)
896
+	}
897
+	flags(object)[marker.index, ] <- flags
898
+	medianA.AA(object)[marker.index, ] <- do.call("cbind", lapply(medianA, function(x) x[, 1]))
899
+	medianA.AB(object)[marker.index, ] <- do.call("cbind", lapply(medianA, function(x) x[, 2]))
900
+	medianA.BB(object)[marker.index, ] <- do.call("cbind", lapply(medianA, function(x) x[, 3]))
901
+	medianB.AA(object)[marker.index, ] <- do.call("cbind", lapply(medianB, function(x) x[, 1]))
902
+	medianB.AB(object)[marker.index, ] <- do.call("cbind", lapply(medianB, function(x) x[, 2]))
903
+	medianB.BB(object)[marker.index, ] <- do.call("cbind", lapply(medianB, function(x) x[, 3]))
904
+
905
+	madA.AA(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 1]))
906
+	madA.AB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 2]))
907
+	madA.BB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 3]))
908
+	madB.AA(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 1]))
909
+	madB.AB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 2]))
910
+	madB.BB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 3]))
911
+
912
+	corrAA(object)[marker.index, ] <- shrink.corrAA
913
+	corrAB(object)[marker.index, ] <- shrink.corrAB
914
+	corrBB(object)[marker.index, ] <- shrink.corrBB
915
+	tau2A.AA(object)[marker.index,] <- shrink.tau2A.AA
916
+	tau2A.BB(object)[marker.index,] <- shrink.tau2A.BB
917
+	tau2B.AA(object)[marker.index,] <- shrink.tau2B.AA
918
+	tau2B.BB(object)[marker.index,] <- shrink.tau2B.BB
919
+	if(is.lds) return(TRUE) else retrun(object)
920
+}
921
+
922
+
923
+
924
+fit.lm1 <- function(strata,
784 925
 		    index.list,
785
-		    marker.index,
786 926
 		    object,
787
-		    batchSize,
788 927
 		    SNRMin,
789 928
 		    MIN.SAMPLES,
790 929
 		    MIN.OBS,
... ...
@@ -793,142 +932,63 @@ fit.lm1 <- function(strata.index,
793 932
 		    THR.NU.PHI,
794 933
 		    MIN.NU,
795 934
 		    MIN.PHI,
796
-		    verbose,...){
797
-	if(isPackageLoaded("ff")) physical <- get("physical")
798
-	is.lds <- ifelse(is(calls(object), "ffdf") | is(calls(object), "ff_matrix"), TRUE, FALSE)
799
-	if(verbose) message("Probe stratum ", strata.index, " of ", length(index.list))
800
-	snps <- index.list[[strata.index]]
801
-	batches <- split(seq_along(batch(object)), batch(object))
935
+		    verbose, is.lds,
936
+		    CHR.X, ...){
937
+	if(is.lds) {physical <- get("physical"); open(object)}
938
+	if(verbose) message("Probe stratum ", strata, " of ", length(index.list))
939
+	snps <- index.list[[strata]]
940
+	batches <- split(seq_along(batch(object)), as.character(batch(object)))
802 941
 	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
803
-	open(object)
804
-	corrAB <- corrBB <- corrAA <- sig2B <- sig2A <- tau2B <- tau2A <- matrix(NA, length(snps), length(unique(batch(object))))
805
-	flags <- nuA <- nuB <- phiA <- phiB <- corrAB
806
-	## should the 'normal' indicator
807
-	NORM <- 1
808
-
809
-	Ns.names <- ls(numberGenotype(object))
810 942
 	batchnames <- batchNames(object)
811
-	GG <- as.matrix(calls(object)[snps, ])
812
-	CP <- as.matrix(snpCallProbability(object)[snps, ])
813
-	AA <- as.matrix(A(object)[snps, ])
814
-	BB <- as.matrix(B(object)[snps, ])
815
-	zzzz <- 1
816
-	for(k in batches){
817
-		this.batch <- unique(as.character(batch(object)[k]))
818
-		zzzz <- zzzz+1
819
-		G <- GG[, k]
820
-		##NORM <- normal.snps[, k]
821
-		xx <- CP[, k]
822
-		highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
823
-		G <- G*highConf*NORM
824
-		A <- AA[, k]
825
-		B <- BB[, k]
826
-		##index <- GT.B <- GT.A <- vector("list", 3)
827
-		##names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB")
828
-		Ns <- applyByGenotype(matrix(1, nrow(G), ncol(G)), rowSums, G)
829
-		muA <- applyByGenotype(A, rowMedians, G)
830
-		muB <- applyByGenotype(B, rowMedians, G)
831
-		vA <- applyByGenotype(A, rowMAD, G)
832
-		vB <- applyByGenotype(B, rowMAD, G)
833
-		vA <- shrink(vA, Ns, DF.PRIOR)
834
-		vB <- shrink(vB, Ns, DF.PRIOR)
835
-		##location and scale
836
-		J <- match(unique(batch(object)[k]), batchnames)##unique(batch(object)))
837
-		##background variance for alleleA
838
-		taus <- applyByGenotype(log2(A), rowMAD, G)^2
839
-		tau2A[, J] <- shrink(taus[, 3, drop=FALSE], Ns[, 3], DF.PRIOR)
840
-		sig2A[, J] <- shrink(taus[, 1, drop=FALSE], Ns[, 1], DF.PRIOR)
841
-		taus <- applyByGenotype(log2(B), rowMAD, G)^2
842
-		tau2B[, J] <- shrink(taus[, 3, drop=FALSE], Ns[, 1], DF.PRIOR)
843
-		sig2B[, J] <- shrink(taus[, 1, drop=FALSE], Ns[, 3], DF.PRIOR)
844
-
845
-		corrAB[, J] <- corByGenotype(A=A, B=B, G=G, Ns=Ns, which.cluster=2, DF.PRIOR)
846
-		corrAA[, J] <- corByGenotype(A=A, B=B, G=G, Ns=Ns, which.cluster=1, DF.PRIOR)
847
-		corrBB[, J] <- corByGenotype(A=A, B=B, G=G, Ns=Ns, which.cluster=3, DF.PRIOR)
848
-
849
-		##---------------------------------------------------------------------------
850
-		## Impute sufficient statistics for unobserved genotypes (plate-specific)
851
-		##---------------------------------------------------------------------------
852
-		index <- apply(Ns, 2, function(x, MIN.OBS) which(x > MIN.OBS), MIN.OBS)
853
-		correct.orderA <- muA[, 1] > muA[, 3]
854
-		correct.orderB <- muB[, 3] > muB[, 1]
855
-		index.complete <- intersect(which(correct.orderA & correct.orderB), intersect(index[[1]], intersect(index[[2]], index[[3]])))
856
-		size <- min(5000, length(index.complete))
857
-		if(size == 5000) index.complete <- sample(index.complete, 5000, replace=TRUE)
858
-		if(length(index.complete) < 200){
859
-			warning("fewer than 200 snps pass criteria for predicting the sufficient statistics")
860
-			return()
861
-		}
862
-		index <- vector("list", 3)
863
-		index[[1]] <- which(Ns[, 1] == 0 & (Ns[, 2] >= MIN.OBS & Ns[, 3] >= MIN.OBS))
864
-		index[[2]] <- which(Ns[, 2] == 0 & (Ns[, 1] >= MIN.OBS & Ns[, 3] >= MIN.OBS))
865
-		index[[3]] <- which(Ns[, 3] == 0 & (Ns[, 2] >= MIN.OBS & Ns[, 1] >= MIN.OBS))
866
-		res <- imputeCenter(muA, muB, index.complete, index)
867
-		muA <- res[[1]]
868
-		muB <- res[[2]]
869
-
870
-		## Monomorphic SNPs.  Mixture model may be better
871
-		## Improve estimation by borrowing strength across batch
872
-		noAA <- Ns[, 1] < MIN.OBS
873
-		noAB <- Ns[, 2] < MIN.OBS
874
-		noBB <- Ns[, 3] < MIN.OBS
875
-		index[[1]] <- noAA & noAB
876
-		index[[2]] <- noBB & noAB
877
-		index[[3]] <- noAA & noBB
878
-		cols <- c(3, 1, 2)
879
-		for(j in 1:3){
880
-			if(sum(index[[j]]) == 0) next()
881
-			kk <- cols[j]
882
-			X <- cbind(1, muA[index.complete, kk], muB[index.complete, kk])
883
-			Y <- cbind(muA[index.complete,  -kk],
884
-				   muB[index.complete,  -kk])
885
-			betahat <- solve(crossprod(X), crossprod(X,Y))
886
-			X <- cbind(1, muA[index[[j]],  kk], muB[index[[j]],  kk])
887
-			mus <- X %*% betahat
888
-			muA[index[[j]], -kk] <- mus[, 1:2]
889
-			muB[index[[j]], -kk] <- mus[, 3:4]
890
-		}
891
-		rm(betahat, X, Y, mus, index, noAA, noAB, noBB, res)
892
-		##gc()
893
-		negA <- rowSums(muA < 0) > 0
894
-		negB <- rowSums(muB < 0) > 0
895
-		flags[, J] <- rowSums(Ns == 0) > 0
896
-		##flags[, J] <- index[[1]] | index[[2]] | index[[3]] | rowSums(
943
+	N.AA <- as.matrix(N.AA(object)[snps, ])
944
+	N.AB <- as.matrix(N.AB(object)[snps, ])
945
+	N.BB <- as.matrix(N.BB(object)[snps, ])
946
+	medianA.AA <- as.matrix(medianA.AA(object)[snps,])
947
+	medianA.AB <- as.matrix(medianA.AB(object)[snps,])
948
+	medianA.BB <- as.matrix(medianA.BB(object)[snps,])
949
+	medianB.AA <- as.matrix(medianB.AA(object)[snps,])
950
+	medianB.AB <- as.matrix(medianB.AB(object)[snps,])
951
+	medianB.BB <- as.matrix(medianB.BB(object)[snps,])
952
+	madA.AA <- as.matrix(madA.AA(object)[snps,])
953
+	madA.AB <- as.matrix(madA.AB(object)[snps,])
954
+	madA.BB <- as.matrix(madA.BB(object)[snps,])
955
+	madB.AA <- as.matrix(madB.AA(object)[snps,])
956
+	madB.AB <- as.matrix(madB.AB(object)[snps,])
957
+	madB.BB <- as.matrix(madB.BB(object)[snps,])
958
+	tau2A.AA <- as.matrix(tau2A.AA(object)[snps,])
959
+	tau2B.BB <- as.matrix(tau2B.BB(object)[snps,])
960
+	tau2A.BB <- as.matrix(tau2A.BB(object)[snps,])
961
+	tau2B.AA <- as.matrix(tau2B.AA(object)[snps,])
962
+	corrAA <- as.matrix(corrAA(object)[snps, ])
963
+	corrAB <- as.matrix(corrAB(object)[snps, ])
964
+	corrBB <- as.matrix(corrBB(object)[snps, ])
965
+	nuA <- as.matrix(nuA(object)[snps, ])
966
+	phiA <- as.matrix(phiA(object)[snps, ])
967
+	nuB <- as.matrix(nuB(object)[snps, ])
968
+	phiB <- as.matrix(phiB(object)[snps, ])
969
+	flags <- as.matrix(flags(object)[snps, ])
970
+	for(k in seq(along=batches)){
971
+		B <- batches[[k]]
972
+		this.batch <- unique(as.character(batch(object)[B]))
973
+		medianA <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k])
974
+		medianB <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k])
975
+		madA <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k])
976
+		madB <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k])
977
+		NN <- cbind(N.AA[, k], N.AB[, k], N.BB[, k])
897 978
 		## we're regressing on the medians using the standard errors (hence the division by N) as weights
898
-		##formerly coefs()
899
-		Np <- Ns
900
-		Np[Np < 1] <- 1
901
-		vA2 <- vA^2/Np
902
-		vB2 <- vB^2/Np
903
-		wA <- sqrt(1/vA2)
904
-		wB <- sqrt(1/vB2)
905
-		YA <- muA*wA
906
-		YB <- muB*wB
907
-		res <- fit.wls(allele="A", Ystar=YA, W=wA, Ns=Ns)
908
-##		} else{
909
-##			if(zzzz==1) message("currently, only weighted least squares (wls) is available... fitting wls")
910
-##			res <- fit.wls(allele="A", Ystar=YA, W=wA, Ns=Ns)
911
-##		}
912
-		nuA[, J] <- res[[1]]
913
-		phiA[, J] <- res[[2]]
914
-		res <- fit.wls(allele="B", Ystar=YB, W=wB, Ns=Ns)
915
-##		} else {
916
-##			if(zzzz==1) message("currently, only weighted least squares (wls) is available... fitting wls")
917
-##			res <- fit.wls(allele="B", Ystar=YB, W=wB, Ns=Ns)
918
-##		}
919
-		##nuB[, J] <- res[[1]]
920
-		nuB[, J] <- res[1, ]
921
-		##phiB[, J] <- res[[2]]
922
-		phiB[, J] <- res[2, ]
979
+		res <- fit.wls(NN=NN, sigma=madA, allele="A", Y=medianA, autosome=!CHR.X)
980
+		nuA[, k] <- res[1, ]
981
+		phiA[, k] <- res[2, ]
982
+		rm(res)
983
+		##res <- fit.wls(allele="A", Ystar=YA, W=wA, Ns=Ns)
984
+		##nuA[, J] <- res[[1]]
985
+		##phiA[, J] <- res[[2]]
986
+		res <- fit.wls(NN=NN, sigma=madB, allele="B", Y=medianB, autosome=!CHR.X)##allele="B", Ystar=YB, W=wB, Ns=Ns)
987
+		nuB[, k] <- res[1, ]
988
+		phiB[, k] <- res[2, ]
923 989
 ##		cA[, k] <- matrix((1/phiA[, J]*(A-nuA[, J])), nrow(A), ncol(A))
924 990
 ##		cB[, k] <- matrix((1/phiB[, J]*(B-nuB[, J])), nrow(B), ncol(B))
925
-		jj <- match(this.batch, Ns.names)
926
-		numberGenotype(object, this.batch)[snps, ] <- Ns
927
-		rm(G, A, B, wA, wB, YA,YB, res, negA, negB, Np, Ns)
928
-		##gc()
929 991
 	}
930
-	nGt <- lapply(nGt, function(x){ colnames(x) <- c("AA", "AB", "BB"); return(x)})
931
-
932 992
 	if(THR.NU.PHI){
933 993
 		nuA[nuA < MIN.NU] <- MIN.NU
934 994
 		nuB[nuB < MIN.NU] <- MIN.NU
... ...
@@ -943,34 +1003,21 @@ fit.lm1 <- function(strata.index,
943 1003
 ##	cB <- matrix(as.integer(cB*100), nrow(cB), ncol(cB))
944 1004
 ##	CA(object)[snps, ] <- cA
945 1005
 ##	CB(object)[snps, ] <- cB
946
-	if(is.lds) lapply(lM(object), open)
947
-	flags(object)[snps, ] <- flags
948
-	tau2A(object) <- tau2A
949
-	tau2B(object) <- tau2B
950
-	sigma2A(object) <- sig2A
951
-	sigma2B(object) <- sig2B
952
-	nuA(object) <- nuA
953
-	nuB(object) <- nuB
954
-	phiA(object) <- phiA
955
-	phiB(object) <- phiB
956
-	corrAA(object) <- corrAA
957
-	corrBB(object) <- corrBB
958
-	corrAB(object) <- corrAB
959
-	if(is.lds) {
960
-		lapply(assayData(object), close)
961
-		lapply(lM(object), close)
1006
+	nuA(object)[snps, ] <- nuA
1007
+	nuB(object)[snps, ] <- nuB
1008
+	phiA(object)[snps, ] <- phiA
1009
+	phiB(object)[snps, ] <- phiB
1010
+	if(is.lds){
1011
+		close(object)
962 1012
 		return(TRUE)
963 1013
 	} else{
964 1014
 		return(object)
965 1015
 	}
966 1016
 }
967 1017
 
968
-fit.lm2 <- function(strata.index,
1018
+fit.lm2 <- function(strata,
969 1019
 		    index.list,
970
-		    marker.index,
971 1020
 		    object,
972
-		    Ns,
973
-		    batchSize,
974 1021
 		    SNRMin,
975 1022
 		    MIN.SAMPLES,
976 1023
 		    MIN.OBS,
... ...
@@ -979,107 +1026,238 @@ fit.lm2 <- function(strata.index,
979 1026
 		    THR.NU.PHI,
980 1027
 		    MIN.NU,
981 1028
 		    MIN.PHI,
982
-		    verbose,...){
983
-	physical <- get("physical")
984
-	if(verbose) message("Probe stratum ", strata.index, " of ", length(index.list))
985
-	snps <- index.list[[strata.index]]
986
-	batches <- split(seq(along=batch(object)), batch(object))
1029
+		    verbose, is.lds, CHR.X, ...){
1030
+	if(is.lds) {physical <- get("physical"); open(object)}
1031
+	if(verbose) message("Probe stratum ", strata, " of ", length(index.list))
1032
+	marker.index <- index.list[[strata]]
1033
+	batches <- split(seq_along(batch(object)), as.character(batch(object)))
987 1034
 	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
988 1035
 
989
-	open(object)
990
-	open(snpflags)
991
-##	open(normal)
992 1036
 
993
-##	cA <- matrix(NA, length(snps), ncol(object))
994 1037
 	ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object))
995
-	flags <- as.matrix(snpflags[,])
996
-	noflags <- rowSums(flags, na.rm=TRUE) == 0  ##NA's for unevaluated batches
997
-	## We do not want to write to discuss for each batch.  More efficient to
998
-	## write to disk after estimating these parameters for all batches.
999
-	nuA.np <- phiA.np <- sig2A.np <- matrix(NA, length(snps), length(unique(batch(object))))
1038
+	flags <- as.matrix(flags(object)[ii, ])
1039
+	fns <- featureNames(object)[ii]
1040
+	fns.noflags <- fns[rowSums(flags, na.rm=T) == 0]
1041
+	snp.index <- sample(match(fns.noflags, featureNames(object)), 5000)
1042
+
1043
+	##flags <- as.matrix(snpflags[,])
1044
+	##noflags <- rowSums(flags, na.rm=TRUE) == 0  ##NA's for unevaluated batches
1045
+
1046
+	nuA.np <- as.matrix(nuA(object)[marker.index, ])
1047
+	phiA.np <- as.matrix(phiA(object)[marker.index, ])
1048
+	tau2A.AA <- as.matrix(tau2A.AA(object)[marker.index, ])
1049
+
1050
+	##nuA.np <- phiA.np <- sig2A.np <- matrix(NA, length(marker.index), length(unique(batch(object))))
1000 1051
 	## for imputation, we need the corresponding parameters of the snps
1001
-	NN <- min(10e3, length(which(ii & noflags)))
1002
-	snp.ind <- sample(which(ii & noflags), NN)
1003
-	nnuA.snp <- as.matrix(physical(lM(object))$nuA[snp.ind,])
1004
-	pphiA.snp <- as.matrix(physical(lM(object))$phiA[snp.ind,])
1005
-	nnuB.snp <- as.matrix(physical(lM(object))$nuB[snp.ind,])
1006
-	pphiB.snp <- as.matrix(physical(lM(object))$phiB[snp.ind,])
1007
-
1008
-	AA.snp <- as.matrix(A(object)[snp.ind, ])
1009
-	BB.snp <- as.matrix(B(object)[snp.ind, ])
1052
+	##NN <- min(10e3, length(which(ii & noflags)))
1053
+	##snp.ind <- sample(which(ii & noflags), NN)
1054
+	nuA.snp <- as.matrix(nuA(object)[snp.index, ])
1055
+	nuB.snp <- as.matrix(nuB(object)[snp.index, ])
1056
+	phiA.snp <- as.matrix(phiA(object)[snp.index, ])
1057
+	phiB.snp <- as.matrix(phiB(object)[snp.index, ])
1058
+	medianA.AA <- as.matrix(medianA.AA(object)[snp.index,])
1059
+	medianB.BB <- as.matrix(medianB.BB(object)[snp.index,])
1060
+
1061
+
1062
+
1063
+	medianA.AA.np <- as.matrix(medianA.AA(object)[marker.index,])
1064
+
1065
+##	nnuA.snp <- as.matrix(physical(lM(object))$nuA[snp.ind,])
1066
+##	pphiA.snp <- as.matrix(physical(lM(object))$phiA[snp.ind,])
1067
+##	nnuB.snp <- as.matrix(physical(lM(object))$nuB[snp.ind,])
1068
+##	pphiB.snp <- as.matrix(physical(lM(object))$phiB[snp.ind,])
1069
+
1070
+##	AA.snp <- as.matrix(A(object)[snp.ind, ])
1071
+##	BB.snp <- as.matrix(B(object)[snp.ind, ])
1010 1072
 ##	NNORM.snp <- as.matrix(normal[snp.ind, ])
1011 1073
 ##	NORM.np <- as.matrix(normal[snps, ])
1012
-	AA.np <- as.matrix(A(object)[snps, ])
1013
-	GG <- as.matrix(calls(object)[snp.ind, ])
1014
-	CP <- as.matrix(snpCallProbability(object)[snp.ind, ])
1015
-	for(k in batches){
1016
-		##if(verbose) message("SNP batch ", ii, " of ", length(batches))
1017
-		J <- match(unique(batch(object)[k]), unique(batch(object)))
1018
-##		snp.index <- snp.ind & nuA[, J] > 20 & nuB[, J] > 20 & phiA[, J] > 20 & phiB[, J] > 20
1019
-##		if(sum(snp.index) >= 5000){
1020
-##			snp.index <- sample(which(snp.index), 5000)
1021
-##		} else snp.index <- which(snp.index)
1022
-		phiA.snp <- pphiA.snp[, J]
1023
-		phiB.snp <- pphiB.snp[, J]
1024
-		A.snp <- AA.snp[, k]
1025
-		B.snp <- BB.snp[, k]
1026
-		NORM.snp <- NNORM.snp[, k]
1027
-		G <- GG[, k]
1028
-		xx <- CP[, k]
1029
-		highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
1030
-		G <- G*highConf*NORM.snp
1031
-		G[G==0] <- NA
1074
+##	AA.np <- as.matrix(A(object)[marker.index, ])
1075
+##	GG <- as.matrix(calls(object)[snp.ind, ])
1076
+##	CP <- as.matrix(snpCallProbability(object)[snp.ind, ])
1077
+	for(k in seq_along(batches)){
1078
+		B <- batches[[k]]
1079
+		this.batch <- unique(as.character(batch(object)[B]))
1080
+##		phiA.snp <- pphiA.snp[, J]
1081
+##		phiB.snp <- pphiB.snp[, J]
1082
+##		A.snp <- AA.snp[, k]
1083
+##		B.snp <- BB.snp[, k]
1084
+##		NORM.snp <- NNORM.snp[, k]
1085
+##		G <- GG[, k]
1086
+##		xx <- CP[, k]
1087
+##		highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
1088
+##		G <- G*highConf*NORM.snp
1089
+##		G[G==0] <- NA
1032 1090
 		##nonpolymorphic
1033
-		A.np <- AA.np[, k]
1034
-		Ns <- applyByGenotype(matrix(1, nrow(G), ncol(G)), rowSums, G)
1035
-		muA <- applyByGenotype(A.snp, rowMedians, G)
1036
-		muB <- applyByGenotype(B.snp, rowMedians, G)
1037
-		muA <- muA[, 1]
1038
-		muB <- muB[, 3]
1039
-		X <- cbind(1, log2(c(muA, muB)))
1091
+##		A.np <- AA.np[, k]
1092
+##		Ns <- applyByGenotype(matrix(1, nrow(G), ncol(G)), rowSums, G)
1093
+##		muA <- applyByGenotype(A.snp, rowMedians, G)
1094
+##		muB <- applyByGenotype(B.snp, rowMedians, G)
1095
+##		muA <- muA[, 1]
1096
+##		muB <- muB[, 3]
1097
+##		X <- cbind(1, log2(c(muA, muB)))
1098
+		X <- cbind(1, log2(c(medianA.AA[, k], medianB.BB[, k])))
1040 1099
 		Y <- log2(c(phiA.snp, phiB.snp))
1041 1100
 		betahat <- solve(crossprod(X), crossprod(X, Y))
1042 1101
 		##
1043
-		mus <- rowMedians(A.np * NORM.np[, k], na.rm=TRUE)
1044
-		crosshyb <- max(median(muA) - median(mus), 0)
1045
-		X <- cbind(1, log2(mus+crosshyb))
1102
+##		mus <- rowMedians(A.np * NORM.np[, k], na.rm=TRUE)
1103
+##		averaging across markers, is there a difference in the
1104
+##		typical AA intensity for SNPs and the AA intensity for
1105
+##		nonpolymorphic loci
1106
+##		crosshyb <- max(median(muA) - median(mus), 0)
1107
+		crosshyb <- max(median(medianA.AA[, k]) - median(medianA.AA.np[, k]), 0)
1108
+##		X <- cbind(1, log2(mus+crosshyb))
1109
+		X <- cbind(1, log2(medianA.AA.np[, k] + crosshyb))
1046 1110
 		logPhiT <- X %*% betahat
1047
-		phiA.np[, J] <- 2^(logPhiT)
1048
-		nuA.np[, J] <- mus-2*phiA.np[, J]
1049
-		if(THR.NU.PHI){
1050
-			nuA.np[nuA.np[, J] < MIN.NU, J] <- MIN.NU
1051
-			phiA.np[phiA.np[, J] < MIN.PHI, J] <- MIN.PHI
1052
-		}
1111
+		phiA.np[, k] <- 2^(logPhiT)
1112
+		nuA.np[, k] <- medianA.AA.np[,k]-2*phiA.np[, k]
1053 1113
 ##		cA[, k] <- 1/phiA.np[, J] * (A.np - nuA.np[, J])
1054
-		sig2A.np[, J] <- rowMAD(log2(A.np*NORM.np[, k]), na.rm=TRUE)
1055
-		rm(NORM.snp, highConf, xx, G, Ns, A.np, X, Y, betahat, mus, logPhiT)
1056
-		gc()
1114
+##		sig2A.np[, J] <- rowMAD(log2(A.np*NORM.np[, k]), na.rm=TRUE)
1115
+##		rm(NORM.snp, highConf, xx, G, Ns, A.np, X, Y, betahat, mus, logPhiT)
1116
+##		gc()
1117
+	}
1118
+	if(THR.NU.PHI){
1119
+		nuA.np[nuA.np < MIN.NU] <- MIN.NU
1120
+		phiA.np[phiA.np < MIN.PHI] <- MIN.PHI
1057 1121
 	}
1058 1122
 ##	cA[cA < 0.05] <- 0.05
1059 1123
 ##	cA[cA > 5] <-  5
1060 1124
 ##	cA <- matrix(as.integer(cA*100), nrow(cA), ncol(cA))
1061
-##	CA(object)[snps, ] <- cA
1062
-	tmp <- physical(lM(object))$nuA
1063
-	tmp[snps, ] <- nuA.np
1064
-	lM(object)$nuA <- tmp
1065
-	tmp <- physical(lM(object))$sig2A
1066
-	tmp[snps, ] <- sig2A.np
1067
-	lM(object)$sig2A <- tmp
1068
-	tmp <- physical(lM(object))$phiA
1069
-	tmp[snps, ] <- phiA.np
1070
-	lM(object)$sig2A <- tmp
1071
-	lapply(assayData(object), close)
1072
-	lapply(lM(object), close)
1073
-	TRUE
1125
+	nuA(object)[marker.index, ] <- nuA.np
1126
+	phiA(object)[marker.index, ] <- phiA.np
1127
+	if(is.lds) { close(object); return(TRUE)}
1128
+	return(object)
1074 1129
 }
1075 1130
 
1131
+summarizeMaleXNps <- function(marker.index,
1132
+			      batches,
1133
+			      object, MIN.SAMPLES){
1134
+	nr <- length(marker.index)
1135
+	nc <- length(batchNames(object))
1136
+	NN.Mlist <- imputed.medianA <- imputed.medianB <- shrink.madA <- shrink.madB <- vector("list", nc)
1137
+	gender <- object$gender
1138
+	AA <- as.matrix(A(object)[marker.index, gender==1])
1139
+	madA.AA <- medianA.AA <- matrix(NA, nr, nc)
1140
+	numberMenPerBatch <- rep(NA, nc)
1141
+	for(k in seq_along(batches)){
1142
+		B <- batches[[k]]
1143
+		this.batch <- unique(as.character(batch(object)[B]))
1144
+		gender <- object$gender[B]
1145
+		if(sum(gender==1) < MIN.SAMPLES) next()
1146
+		sns.batch <- sampleNames(object)[B]
1147
+		##subset GG apppriately
1148
+		sns <- colnames(AA)
1149
+		J <- sns%in%sns.batch
1150
+		numberMenPerBatch[k] <- length(J)
1151
+		medianA.AA[, k] <- rowMedians(AA[, J], na.rm=TRUE)
1152
+		madA.AA[, k] <- rowMAD(AA[, J], na.rm=TRUE)
1153
+	}
1154
+	return(list(medianA.AA=medianA.AA,
1155
+		    madA.AA=madA.AA))
1156
+}
1157
+
1158
+
1159
+summarizeMaleXGenotypes <- function(marker.index,
1160
+				    batches,
1161
+				    object,
1162
+				    GT.CONF.THR,
1163
+				    MIN.OBS,
1164
+				    MIN.SAMPLES,
1165
+				    verbose,
1166
+				    is.lds,
1167
+				    DF.PRIOR,...){
1168
+	nr <- length(marker.index)
1169
+	nc <- length(batchNames(object))
1170
+	NN.Mlist <- imputed.medianA <- imputed.medianB <- shrink.madA <- shrink.madB <- vector("list", nc)
1171
+	gender <- object$gender
1172
+	GG <- as.matrix(calls(object)[marker.index, gender==1])
1173
+	CP <- as.matrix(snpCallProbability(object)[marker.index, gender==1])
1174
+	AA <- as.matrix(A(object)[marker.index, gender==1])
1175
+	BB <- as.matrix(B(object)[marker.index, gender==1])
1176
+	for(k in seq_along(batches)){
1177
+		B <- batches[[k]]
1178
+		this.batch <- unique(as.character(batch(object)[B]))
1179
+		gender <- object$gender[B]
1180
+		if(sum(gender==1) < MIN.SAMPLES) next()
1181
+		sns.batch <- sampleNames(object)[B]
1182
+		##subset GG apppriately
1183
+		sns <- colnames(GG)
1184
+		J <- sns%in%sns.batch
1185
+		G <- GG[, J]
1186
+		xx <- CP[, J]
1187
+		highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
1188
+		G <- G*highConf
1189
+		A <- AA[, J]
1190
+		B <- BB[, J]
1191
+		G.AA <- G==1
1192
+		G.AA[G.AA==FALSE] <- NA
1193
+		G.AB <- G==2
1194
+		G.AB[G.AB==FALSE] <- NA
1195
+		G.BB <- G==3
1196
+		G.BB[G.BB==FALSE] <- NA
1197
+		N.AA.M <- rowSums(G.AA, na.rm=TRUE)
1198
+		N.AB.M <- rowSums(G.AB, na.rm=TRUE)
1199
+		N.BB.M <- rowSums(G.BB, na.rm=TRUE)
1200
+		summaryStats <- function(X, INT, FUNS){
1201
+			tmp <- matrix(NA, nrow(X), length(FUNS))
1202
+			for(j in seq_along(FUNS)){
1203
+				FUN <- match.fun(FUNS[j])
1204
+				tmp[, j] <- FUN(X*INT, na.rm=TRUE)
1205
+			}
1206
+			tmp
1207
+		}
1208
+		statsA.AA <- summaryStats(G.AA, A, FUNS=c("rowMedians", "rowMAD"))
1209
+		statsA.AB <- summaryStats(G.AB, A, FUNS=c("rowMedians", "rowMAD"))
1210
+		statsA.BB <- summaryStats(G.BB, A, FUNS=c("rowMedians", "rowMAD"))
1211
+		statsB.AA <- summaryStats(G.AA, B, FUNS=c("rowMedians", "rowMAD"))
1212
+		statsB.AB <- summaryStats(G.AB, B, FUNS=c("rowMedians", "rowMAD"))
1213
+		statsB.BB <- summaryStats(G.BB, B, FUNS=c("rowMedians", "rowMAD"))
1214
+		medianA <- cbind(statsA.AA[, 1], statsA.AB[, 1], statsA.BB[, 1])
1215
+		medianB <- cbind(statsB.AA[, 1], statsB.AB[, 1], statsB.BB[, 1])
1216
+		madA <- cbind(statsA.AA[, 1], statsA.AB[, 1], statsA.BB[, 1])
1217
+		madB <- cbind(statsB.AA[, 1], statsB.AB[, 1], statsB.BB[, 1])
1218
+		rm(statsA.AA, statsA.AB, statsA.BB, statsB.AA, statsB.AB, statsB.BB)
1219
+
1220
+##		A <- log2(A); B <- log2(B)
1221
+##		tau2A.AA <- summaryStats(G.AA, A, FUNS="rowMAD")^2
1222
+##		tau2A.BB <- summaryStats(G.BB, A, FUNS="rowMAD")^2
1223
+##		tau2B.AA <- summaryStats(G.AA, B, FUNS="rowMAD")^2
1224
+##		tau2B.BB <- summaryStats(G.BB, B, FUNS="rowMAD")^2
1225
+		##tau2A <- cbind(tau2A.AA, tau2A.BB)
1226
+		##tau2B <- cbind(tau2B.AA, tau2B.BB)
1227
+		NN.M <- cbind(N.AA.M, N.AB.M, N.BB.M)
1228
+		NN.Mlist[[k]] <- NN.M
1229
+
1230
+		shrink.madA[[k]] <- shrink(madA, NN.M, DF.PRIOR)
1231
+		shrink.madB[[k]] <- shrink(madB, NN.M, DF.PRIOR)
1232
+
1233
+##		shrink.tau2A.BB[, k] <- shrink(tau2A.BB[, k, drop=FALSE], NN.M[, 3], DF.PRIOR)[, drop=FALSE]
1234
+##		shrink.tau2B.AA[, k] <- shrink(tau2B.AA[, k, drop=FALSE], NN.M[, 1], DF.PRIOR)[, drop=FALSE]
1235
+##		shrink.tau2A.AA[, k] <- shrink(tau2A.AA[, k, drop=FALSE], NN.M[, 1], DF.PRIOR)[, drop=FALSE]
1236
+##		shrink.tau2B.BB[, k] <- shrink(tau2B.BB[, k, drop=FALSE], NN.M[, 3], DF.PRIOR)[, drop=FALSE]
1076 1237
 
1077
-fit.lm3 <- function(strata.index,
1238
+		##---------------------------------------------------------------------------
1239
+		## SNPs that we'll use for imputing location/scale of unobserved genotypes
1240
+		##---------------------------------------------------------------------------
1241
+		index.complete <- indexComplete(NN.M[, -2], medianA, medianB, MIN.OBS)
1242
+
1243
+		##---------------------------------------------------------------------------
1244
+		## Impute sufficient statistics for unobserved genotypes (plate-specific)
1245
+		##---------------------------------------------------------------------------
1246
+		res <- imputeCenterX(medianA, medianB, NN.M, index.complete, MIN.OBS)
1247
+		imputed.medianA[[k]] <- res[[1]]
1248
+		imputed.medianB[[k]] <- res[[2]]
1249
+	}
1250
+	return(list(madA=shrink.madA,
1251
+		    madB=shrink.madB,
1252
+		    NN.M=NN.Mlist,
1253
+		    medianA=imputed.medianA,
1254
+		    medianB=imputed.medianB))
1255
+}
1256
+
1257
+## X chromosome, SNPs
1258
+fit.lm3 <- function(strata,
1078 1259
 		    index.list,
1079
-		    marker.index,
1080 1260
 		    object,
1081
-		    Ns,
1082
-		    batchSize,
1083 1261
 		    SNRMin,
1084 1262
 		    MIN.SAMPLES,
1085 1263
 		    MIN.OBS,
... ...
@@ -1088,235 +1266,154 @@ fit.lm3 <- function(strata.index,
1088 1266
 		    THR.NU.PHI,
1089 1267
 		    MIN.NU,
1090 1268
 		    MIN.PHI,
1091
-		    verbose, ...){
1092
-	physical <- get("physical")
1093
-	if(verbose) message("Probe stratum ", strata.index, " of ", length(index.list))
1094
-		snps <- index.list[[strata.index]]
1095
-	batches <- split(seq(along=batch(object)), batch(object))
1096
-	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
1097
-
1098
-	open(snpflags)
1099
-	open(normal)
1100
-	open(object)
1101
-	corrAB <- corrBB <- corrAA <- sig2B <- sig2A <- tau2B <- tau2A <- matrix(NA, length(snps), length(unique(batch(object))))
1102
-	phiA2 <- phiB2 <- tau2A
1103
-	flags <- nuA <- nuB <- phiA <- phiB <- corrAB
1104
-##	cB <- cA <- matrix(NA, length(snps), ncol(object))
1269
+		    verbose, is.lds, CHR.X, ...){
1270
+	if(is.lds) {physical <- get("physical"); open(object)}
1271
+	if(verbose) message("Probe stratum ", strata, " of ", length(index.list))
1105 1272
 	gender <- object$gender
1106
-	IX <- matrix(gender, length(snps), ncol(object))
1107
-	NORM <- normal[snps,]
1108
-	IX <- IX==2
1109
-
1110
-	GG <- as.matrix(calls(object)[snps, ])
1111
-	CP <- as.matrix(snpCallProbability(object)[snps,])
1112
-	AA <- as.matrix(A(object)[snps, ])
1113
-	BB <- as.matrix(B(object)[snps, ])
1114
-	for(k in batches){
1115
-		##if(verbose) message("SNP batch ", ii, " of ", length(batches))
1116
-		## within-genotype moments
1117
-		gender <- object$gender[k]
1118
-		G <- GG[, k]
1119
-		xx <- CP[, k]
1120
-		highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
1121
-		G <- G*highConf*NORM[, k]
1122
-		A <- AA[, k]
1123
-		B <- BB[, k]
1124
-		##index <- GT.B <- GT.A <- vector("list", 3)
1125
-		##names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB")
1126
-		Ns.F <- applyByGenotype(matrix(1, nrow(G), sum(gender==2)), rowSums, G[, gender==2])
1127
-		Ns.M <- applyByGenotype(matrix(1, nrow(G), sum(gender==1)), rowSums, G[, gender==1])
1128
-		Ns <- cbind(Ns.M[, 1], Ns.M[, 3], Ns.F)
1129
-		muA.F <- applyByGenotype(A[, gender==2], rowMedians, G[, gender==2])
1130
-		muA.M <- applyByGenotype(A[, gender==1], rowMedians, G[, gender==1])
1131
-		muB.F <- applyByGenotype(B[, gender==2], rowMedians, G[, gender==2])
1132
-		muB.M <- applyByGenotype(B[, gender==1], rowMedians, G[, gender==1])
1133
-		vA.F <- applyByGenotype(A[, gender==2], rowMAD, G[, gender==2])
1134
-		vB.F <- applyByGenotype(B[, gender==2], rowMAD, G[, gender==2])
1135
-		vA.M <- applyByGenotype(A[, gender==1], rowMAD, G[, gender==1])
1136
-		vB.M <- applyByGenotype(B[, gender==1], rowMAD, G[, gender==1])
1137
-		vA.F <- shrink(vA.F, Ns.F, DF.PRIOR)
1138
-		vA.M <- shrink(vA.M, Ns.M, DF.PRIOR)
1139
-		vB.F <- shrink(vB.F, Ns.F, DF.PRIOR)
1140
-		vB.M <- shrink(vB.M, Ns.M, DF.PRIOR)
1141
-		##location and scale
1142
-		J <- match(unique(batch(object)[k]), unique(batch(object)))
1143
-		##background variance for alleleA
1144
-		taus <- applyByGenotype(log2(A[, gender==2]), rowMAD, G[, gender==2])^2
1145
-		tau2A[, J] <- shrink(taus[, 3, drop=FALSE], Ns.F[, 3], DF.PRIOR)
1146
-		sig2A[, J] <- shrink(taus[, 1, drop=FALSE], Ns.F[, 1], DF.PRIOR)
1147
-		taus <- applyByGenotype(log2(B[, gender==2]), rowMAD, G[, gender==2])^2
1148
-		tau2B[, J] <- shrink(taus[, 3, drop=FALSE], Ns.F[, 1], DF.PRIOR)
1149
-		sig2B[, J] <- shrink(taus[, 1, drop=FALSE], Ns.F[, 3], DF.PRIOR)
1150
-		corrAB[, J] <- corByGenotype(A=A[, gender==2], B=B[, gender==2], G=G[, gender==2], Ns=Ns.F, which.cluster=2, DF.PRIOR)
1151
-		corrAA[, J] <- corByGenotype(A=A[, gender==2], B=B[, gender==2], G=G[, gender==2], Ns=Ns.F, which.cluster=1, DF.PRIOR)
1152
-		corrBB[, J] <- corByGenotype(A=A[, gender==2], B=B[, gender==2], G=G[, gender==2], Ns=Ns.F, which.cluster=3, DF.PRIOR)
1153
-		##formerly oneBatch()...
1154
-		##---------------------------------------------------------------------------
1155
-		## Impute sufficient statistics for unobserved genotypes (plate-specific)
1156
-		##---------------------------------------------------------------------------
1157
-		index <- apply(Ns.F, 2, function(x, MIN.OBS) which(x >= MIN.OBS), MIN.OBS)
1158
-		correct.orderA <- muA.F[, 1] > muA.F[, 3]
1159
-		correct.orderB <- muB.F[, 3] > muB.F[, 1]
1160
-		index.complete <- intersect(which(correct.orderA & correct.orderB), intersect(index[[1]], intersect(index[[2]], index[[3]])))
1161
-		size <- min(5000, length(index.complete))
1162
-		if(length(index.complete) < 200){
1163
-			warning("fewer than 200 snps pass criteria for predicting the sufficient statistics")
1164
-			return()
1273
+	enough.males <- sum(gender==1) > MIN.SAMPLES
1274
+	enough.females <- sum(gender==2) > MIN.SAMPLES
1275
+	if(!enough.males & !enough.females){
1276
+		message(paste("fewer than", MIN.SAMPLES, "men and women.  Copy number not estimated for CHR X"))
1277
+		return(object)
1278
+	}
1279
+	marker.index <- index.list[[strata]]
1280
+	batches <- split(seq_along(batch(object)), as.character(batch(object)))
1281
+	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
1282
+	nuA <- as.matrix(nuA(object)[marker.index, ])
1283
+	nuB <- as.matrix(nuB(object)[marker.index, ])
1284
+	phiA <- as.matrix(phiA(object)[marker.index, ])
1285
+	phiB <- as.matrix(phiB(object)[marker.index, ])
1286
+	phiA2 <- as.matrix(phiPrimeA(object)[marker.index, ])
1287
+	phiB2 <- as.matrix(phiPrimeB(object)[marker.index, ])
1288
+	if(enough.males){
1289
+		res <- summarizeMaleXGenotypes(marker.index=marker.index, batches=batches,
1290
+					       object=object, GT.CONF.THR=GT.CONF.THR,
1291
+					       MIN.SAMPLES=MIN.SAMPLES,
1292
+					       MIN.OBS=MIN.OBS,
1293
+					       verbose=verbose, is.lds=is.lds,
1294
+					       DF.PRIOR=DF.PRIOR/2)
1295
+		madA.Mlist <- res[["madA"]]
1296
+		madB.Mlist <- res[["madB"]]
1297
+		medianA.Mlist <- res[["medianA"]]
1298
+		medianB.Mlist <- res[["medianB"]]
1299
+		NN.Mlist <- res[["NN.M"]]
1300
+		rm(res)
1301
+		## Need N, median, mad
1302
+	}
1303
+	if(enough.females){
1304
+		N.AA.F <- as.matrix(N.AA(object)[marker.index, ])
1305
+		N.AB.F <- as.matrix(N.AB(object)[marker.index, ])
1306
+		N.BB.F <- as.matrix(N.BB(object)[marker.index, ])
1307
+		medianA.AA <- as.matrix(medianA.AA(object)[marker.index,])
1308
+		medianA.AB <- as.matrix(medianA.AB(object)[marker.index,])
1309
+		medianA.BB <- as.matrix(medianA.BB(object)[marker.index,])
1310
+		medianB.AA <- as.matrix(medianB.AA(object)[marker.index,])
1311
+		medianB.AB <- as.matrix(medianB.AB(object)[marker.index,])
1312
+		medianB.BB <- as.matrix(medianB.BB(object)[marker.index,])
1313
+		madA.AA <- as.matrix(madA.AA(object)[marker.index,])
1314
+		madA.AB <- as.matrix(madA.AB(object)[marker.index,])
1315
+		madA.BB <- as.matrix(madA.BB(object)[marker.index,])
1316
+		madB.AA <- as.matrix(madB.AA(object)[marker.index,])
1317
+		madB.AB <- as.matrix(madB.AB(object)[marker.index,])
1318
+		madB.BB <- as.matrix(madB.BB(object)[marker.index,])
1319
+	}
1320
+	for(k in seq_along(batches)){
1321
+		B <- batches[[k]]
1322
+		this.batch <- unique(as.character(batch(object)[B]))
1323
+		gender <- object$gender[B]
1324
+		enough.men <- sum(gender==1) >= MIN.SAMPLES
1325
+		enough.women <- sum(gender==2) >= MIN.SAMPLES
1326
+		if(!enough.men & !enough.women) {
1327
+			if(verbose) message(paste("fewer than", MIN.SAMPLES, "men and women in batch", this.batch, ". CHR X copy number not available. "))
1328
+			next()
1165 1329
 		}
1166
-		if(size==5000) index.complete <- sample(index.complete, size)
1167
-		index <- vector("list", 3)
1168
-		index[[1]] <- which(Ns.F[, 1] == 0 & (Ns.F[, 2] >= MIN.OBS & Ns.F[, 3] >= MIN.OBS))
1169
-		index[[2]] <- which(Ns.F[, 2] == 0 & (Ns.F[, 1] >= MIN.OBS & Ns.F[, 3] >= MIN.OBS))
1170
-		index[[3]] <- which(Ns.F[, 3] == 0 & (Ns.F[, 2] >= MIN.OBS & Ns.F[, 1] >= MIN.OBS))
1171
-		res <- imputeCenter(muA.F, muB.F, index.complete, index)
1172
-		muA.F <- res[[1]]
1173
-		muB.F <- res[[2]]
1174
-		nobsA <- Ns.M[, 1] > MIN.OBS
1175
-		nobsB <- Ns.M[, 3] > MIN.OBS
1176
-		notMissing <- !(is.na(muA.M[, 1]) | is.na(muA.M[, 3]) | is.na(muB.M[, 1]) | is.na(muB.M[, 3]))
1177
-		complete <- list()
1178
-		complete[[1]] <- which(correct.orderA & correct.orderB & nobsA & notMissing) ##be selective here
1179
-		complete[[2]] <- which(correct.orderA & correct.orderB & nobsB & notMissing) ##be selective here
1180
-		size <- min(5000, length(complete[[1]]))
1181
-		if(size > 5000) complete <- lapply(complete, function(x) sample(x, size))
1182
-		##
1183
-		res <- imputeCenterX(muA.M, muB.M, Ns.M, complete, MIN.OBS)
1184
-		muA.M <- res[[1]]
1185
-		muB.M <- res[[2]]
1186
-		##
1187
-		## Monomorphic SNPs.  Mixture model may be better
1188
-		## Improve estimation by borrowing strength across batch
1189
-		noAA <- Ns.F[, 1] < MIN.OBS
1190
-		noAB <- Ns.F[, 2] < MIN.OBS
1191
-		noBB <- Ns.F[, 3] < MIN.OBS
1192
-		index[[1]] <- noAA & noAB
1193
-		index[[2]] <- noBB & noAB
1194
-		index[[3]] <- noAA & noBB
1195
-		cols <- c(3, 1, 2)
1196
-		for(j in 1:3){
1197
-			if(sum(index[[j]]) == 0) next()
1198
-			kk <- cols[j]
1199
-			X <- cbind(1, muA.F[index.complete, kk], muB.F[index.complete, kk])
1200
-			Y <- cbind(muA.F[index.complete,  -kk],
1201
-				   muB.F[index.complete,  -kk])
1202
-			betahat <- solve(crossprod(X), crossprod(X,Y))
1203
-			X <- cbind(1, muA.F[index[[j]],  kk], muB.F[index[[j]],  kk])
1204
-			mus <- X %*% betahat
1205
-			muA.F[index[[j]], -kk] <- mus[, 1:2]
1206
-			muB.F[index[[j]], -kk] <- mus[, 3:4]
1330
+		if(enough.women){
1331
+			medianA.F <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k])
1332
+			medianB.F <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k])
1333
+			madA.F <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k])
1334
+			madB.F <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k])
1335
+			NN.F <- cbind(N.AA.F[, k], N.AB.F[, k], N.BB.F[, k])
1336
+		}
1337
+		if(enough.men){
1338
+			madA.M <- madA.Mlist[[k]]
1339
+			madB.M <- madB.Mlist[[k]]
1340
+			medianA.M <- medianA.Mlist[[k]]
1341
+			medianB.M <- medianB.Mlist[[k]]
1342
+			NN.M <- NN.Mlist[[k]]
1343
+		}
1344
+		if(enough.men & enough.women){
1345
+			betas <- fit.wls(cbind(NN.M[, c(1,3)], NN.F),
1346
+					 sigma=cbind(madA.M[, c(1,3)], madA.F),
1347
+					 allele="A",
1348
+					 Y=cbind(medianA.M[, c(1,3)], medianA.F),
1349
+					 autosome=FALSE)
1350
+			nuA[, k] <- betas[1, ]
1351
+			phiA[, k] <- betas[2, ]
1352
+			phiA2[, k] <- betas[3, ]
1353
+			betas <- fit.wls(cbind(NN.M[, c(1,3)], NN.F),
1354
+					 sigma=cbind(madB.M[, c(1,3)], madB.F),
1355
+					 allele="B",
1356
+					 Y=cbind(medianB.M[, c(1,3)], medianB.F),
1357
+					 autosome=FALSE)
1358
+			nuB[, k] <- betas[1, ]
1359
+			phiB[, k] <- betas[2, ]
1360
+			phiB2[, k] <- betas[3, ]
1207 1361
 		}
1208
-		negA <- rowSums(muA.F < 0) > 0
1209
-		negB <- rowSums(muB.F < 0) > 0
1210
-		flags[, J] <- rowSums(Ns.F == 0) > 0 | negA | negB
1211
-		##flags[, J] <- index[[1]] | index[[2]] | index[[3]] | rowSums(
1212
-		##formerly coefs()
1213
-		Np <- cbind(Ns.M[, c(1,3)], Ns.F)
1214
-		Np[Np < 1] <- 1
1215
-		vA <- cbind(vA.M[, c(1, 3)], vA.F)
1216
-		vB <- cbind(vB.M[, c(1, 3)], vB.F)
1217
-		muA <- cbind(muA.M[, c(1,3)], muA.F)
1218
-		muB <- cbind(muB.M[, c(1,3)], muB.F)
1219
-		vA2 <- vA^2/Np
1220
-		vB2 <- vB^2/Np
1221
-		wA <- sqrt(1/vA2)
1222
-		wB <- sqrt(1/vB2)
1223
-		YA <- muA*wA
1224
-		YB <- muB*wB
1225
-		##res <- nuphiAlleleX(allele="A", Ystar=YA, W=wA)
1226
-		betas <- fit.wls(allele="A", Ystar=YA, W=wA, Ns=Ns, autosome=FALSE)
1227
-		nuA[, J] <- betas[1, ]
1228
-		phiA[, J] <- betas[2, ]
1229
-		phiA2[, J] <- betas[3, ]
1230
-		rm(betas)
1231
-		betas <- fit.wls(allele="B", Ystar=YB, W=wB, Ns=Ns, autosome=FALSE)
1232
-		nuB[, J] <- betas[1, ]
1233
-		phiB[, J] <- betas[2, ]
1234
-		phiB2[, J] <- betas[3, ]
1235
-		if(THR.NU.PHI){
1236
-			nuA[nuA[, J] < MIN.NU, J] <- MIN.NU
1237
-			nuB[nuB[, J] < MIN.NU, J] <- MIN.NU
1238
-			phiA[phiA[, J] < MIN.PHI, J] <- MIN.PHI
1239
-			phiA2[phiA2[, J] < MIN.PHI, J] <- MIN.PHI
1240
-			phiB[phiB[, J] < MIN.PHI, J] <- MIN.PHI
1241
-			phiB2[phiB2[, J] < MIN.PHI, J] <- MIN.PHI
1362
+		if(enough.men & !enough.women){
1363
+			betas <- fit.wls(NN.M[, c(1,3)],
1364
+					 sigma=madA.M[, c(1,3)],
1365
+					 allele="A",
1366
+					 Y=medianA.M[, c(1,3)],
1367
+					 autosome=FALSE,
1368
+					 X=cbind(1, c(0, 1)))
1369
+			nuA[, k] <- betas[1, ]
1370
+			phiA[, k] <- betas[2, ]
1371
+			betas <- fit.wls(NN.M[, c(1,3)],
1372
+					 sigma=madB.M[, c(1,3)],
1373
+					 allele="B",
1374
+					 Y=medianB.M[, c(1,3)],
1375
+					 autosome=FALSE,
1376
+					 X=cbind(1, c(0, 1)))
1377
+			nuB[, k] <- betas[1, ]
1378
+			phiB[, k] <- betas[2, ]
1379
+		}
1380
+		if(!enough.men & enough.women){
1381
+			betas <- fit.wls(NN.F,
1382
+					 sigma=madA.F,
1383
+					 allele="A",
1384
+					 Y=medianA.F,
1385
+					 autosome=TRUE) ## can just use the usual design matrix for the women-only analysis
1386
+			nuA[, k] <- betas[1, ]
1387
+			phiA[, k] <- betas[2, ]
1388
+			betas <- fit.wls(NN.F,
1389
+					 sigma=madB.F,
1390
+					 allele="B",
1391
+					 Y=medianB.F,
1392
+					 autosome=TRUE) ## can just use the usual design matrix for the women-only analysis
1393
+			nuB[, k] <- betas[1, ]
1394
+			phiB[, k] <- betas[2, ]
1242 1395
 		}
1243
-		phistar <- phiB2[, J]/phiA[, J]
1244
-##		tmp <- (B-nuB[, J] - phistar*A + phistar*nuA[, J])/phiB[, J]
1245
-##		cB[, k] <- tmp/(1-phistar*phiA2[, J]/phiB[, J])
1246
-##		cA[, k] <- (A-nuA[, J]-phiA2[, J]*cB[, k])/phiA[, J]
1247
-		##some of the snps are called for the men, but not the women
1248
-		rm(YA, YB, wA, wB, res, phistar, A, B, G, index)
1249
-		##gc()
1250 1396
 	}
1251
-##	cA[cA < 0.05] <- 0.05
1252
-##	cB[cB < 0.05] <- 0.05
1253
-##	cA[cA > 5] <-  5
1254
-##	cB[cB > 5] <- 5
1255
-
1256
-	##--------------------------------------------------
1257
-	##RS: need to fix.  why are there NA's by coercion
1258
-##	cA <- matrix(as.integer(cA*100), nrow(cA), ncol(cA))
1259
-	##--------------------------------------------------
1260
-	##ii <- rowSums(is.na(cA)) > 0
1261
-	##these often arise at SNPs with low confidence scores
1262
-##	cB <- matrix(as.integer(cB*100), nrow(cB), ncol(cB))
1263
-##	CA(object)[snps, ] <- cA
1264
-##	CB(object)[snps, ] <- cB
1265
-	snpflags[snps, ] <- flags
1266
-	tmp <- physical(lM(object))$tau2A
1267
-	tmp[snps, ] <- tau2A
1268
-	lM(object)$tau2A <- tmp
1269
-	tmp <- physical(lM(object))$tau2B
1270
-	tmp[snps, ] <- tau2B
1271
-	lM(object)$tau2B <- tmp
1272
-	tmp <- physical(lM(object))$tau2B
1273
-	tmp[snps, ] <- tau2B
1274
-	lM(object)$tau2B <- tmp
1275
-	tmp <- physical(lM(object))$sig2A
1276
-	tmp[snps, ] <- sig2A
1277
-	lM(object)$sig2A <- tmp
1278
-	tmp <- physical(lM(object))$sig2B
1279
-	tmp[snps, ] <- sig2B
1280
-	lM(object)$sig2B <- tmp
1281
-	tmp <- physical(lM(object))$nuA
1282
-	tmp[snps, ] <- nuA
1283
-	lM(object)$nuA <- tmp
1284
-	tmp <- physical(lM(object))$nuB
1285
-	tmp[snps, ] <- nuB
1286
-	lM(object)$nuB <- tmp
1287
-	tmp <- physical(lM(object))$phiA
1288
-	tmp[snps, ] <- phiA
1289
-	lM(object)$phiA <- tmp
1290
-	tmp <- physical(lM(object))$phiB
1291
-	tmp[snps, ] <- phiB
1292
-	lM(object)$phiB <- tmp
1293
-	tmp <- physical(lM(object))$phiPrimeA
1294
-	tmp[snps, ] <- phiA2
1295
-	lM(object)$phiPrimeA <- tmp
1296
-	tmp <- physical(lM(object))$phiPrimeB
1297
-	tmp[snps, ] <- phiB2
1298
-	lM(object)$phiPrimeB <- tmp
1299
-
1300
-	tmp <- physical(lM(object))$corrAB
1301
-	tmp[snps, ] <- corrAB
1302
-	lM(object)$corrAB <- tmp
1303
-	tmp <- physical(lM(object))$corrAA
1304
-	tmp[snps, ] <- corrAA
1305
-	lM(object)$corrAA <- tmp
1306
-	tmp <- physical(lM(object))$corrBB
1307
-	tmp[snps, ] <- corrBB
1308
-	lM(object)$corrBB <- tmp
1309
-	lapply(assayData(object), close)
1310
-	lapply(lM(object), close)
1397
+	if(THR.NU.PHI){
1398
+		nuA[nuA < MIN.NU] <- MIN.NU
1399
+		nuB[nuB < MIN.NU] <- MIN.NU
1400
+		phiA[phiA < MIN.PHI] <- MIN.PHI
1401
+		phiA2[phiA2 < MIN.PHI] <- MIN.PHI
1402
+		phiB[phiB < MIN.PHI] <- MIN.PHI
1403
+		phiB2[phiB2 < MIN.PHI] <- MIN.PHI
1404
+	}
1405
+	nuA(object)[marker.index, ] <- nuA
1406
+	nuB(object)[marker.index, ] <- nuB
1407
+	phiA(object)[marker.index, ] <- phiA
1408
+	phiB(object)[marker.index, ] <- phiB
1409
+	phiPrimeA(object)[marker.index, ] <- phiA2
1410
+	phiPrimeB(object)[marker.index, ] <- phiB2
1311 1411
 	TRUE
1312 1412
 }
1313 1413
 
1314
-fit.lm4 <- function(strata.index,
1414
+fit.lm4 <- function(strata,
1315 1415
 		    index.list,
1316
-		    marker.index,
1317 1416
 		    object,
1318
-		    Ns,
1319
-		    batchSize,
1320 1417
 		    SNRMin,
1321 1418
 		    MIN.SAMPLES,
1322 1419
 		    MIN.OBS,
... ...
@@ -1325,142 +1422,104 @@ fit.lm4 <- function(strata.index,
1325 1422
 		    THR.NU.PHI,
1326 1423
 		    MIN.NU,
1327 1424
 		    MIN.PHI,
1328
-		    verbose, ...){
1329
-	physical <- get("physical")
1330
-	if(verbose) message("Probe stratum ", strata.index, " of ", length(index.list))
1331
-	open(object)
1332
-	open(normal)
1333
-	open(snpflags)
1334
-	snps <- index.list[[strata.index]]
1335
-	batches <- split(seq(along=batch(object)), batch(object))
1336
-	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
1337
-	nuA <- phiA <- sig2A <- tau2A <- matrix(NA, length(snps), length(unique(batch(object))))
1338
-##	cA <- matrix(NA, length(snps), ncol(object))
1339
-	ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object))
1340
-	flags <- snpflags[ii, , drop=FALSE]
1341
-	noflags <- rowSums(flags, na.rm=TRUE) == 0
1342
-	lapply(lM(object), open)
1343
-	nuIA <- physical(lM(object))$nuA[ii, ]
1344
-	nuIB <- physical(lM(object))$nuB[ii, ]
1345
-	phiIA <- physical(lM(object))$phiA[ii,]
1346
-	phiIB <- physical(lM(object))$phiB[ii,]
1347
-
1348
-	i1 <- rowSums(nuIA < 20, na.rm=TRUE) == 0
1349
-	i2 <- rowSums(nuIB < 20, na.rm=TRUE) == 0
1350
-	i3 <- rowSums(phiIA < 20, na.rm=TRUE) == 0
1351
-	i4 <- rowSums(phiIB < 20, na.rm=TRUE) == 0
1352
-
1353
-	snp.index <- which(i1 & i2 & i3 & i4 & noflags)
1354
-	if(length(snp.index) == 0){
1355
-		warning("No snps meet the following criteria: (1) nu and phi > 20 and (2) at least MIN.OBS in each genotype cluster. CN not estimated for nonpolymorphic loci on X")
1356
-		return(TRUE)
1357
-	}
1358
-	if(length(snp.index) >= 5000){
1359
-		snp.index <- sample(snp.index, 5000)
1360
-	}
1361
-	phiA.snp <- physical(lM(object))$phiA[snp.index, , drop=FALSE]
1362
-	phiB.snp <- physical(lM(object))$phiB[snp.index, , drop=FALSE]
1363
-	A.snp <- as.matrix(A(object)[snp.index, ])
1364
-	B.snp <- as.matrix(B(object)[snp.index, ])
1365
-	NORM.snp <- as.matrix(normal[snp.index, ])
1366
-	NORM.np <- as.matrix(normal[snps, ])
1425
+		    verbose, is.lds, ...){
1426
+	if(is.lds) {physical <- get("physical"); open(object)}
1367 1427
 	gender <- object$gender
1428
+	enough.males <- sum(gender==1) > MIN.SAMPLES
1429
+	enough.females <- sum(gender==2) > MIN.SAMPLES
1430
+	if(!enough.males & !enough.females){
1431
+		message(paste("fewer than", MIN.SAMPLES, "men and women.  Copy number not estimated for CHR X"))
1432
+		return(object)
1433
+	}
1434
+	if(verbose) message("Probe stratum ", strata, " of ", length(index.list))
1435
+	marker.index <- index.list[[strata]]
1436
+	batches <- split(seq_along(batch(object)), as.character(batch(object)))
1437
+	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
1438
+	nc <- length(batchNames(object))
1368 1439
 
1440
+	if(enough.males){
1441
+		res <- summarizeMaleXNps(marker.index=marker.index,
1442
+					 batches=batches,
1443
+					 object=object, MIN.SAMPLES=MIN.SAMPLES)
1444
+		medianA.AA.M <- res[["medianA.AA"]]
1445
+		madA.AA.M <- res[["madA.AA"]]
1369 1446
 
1370
-	pseudoAR <- position(object)[snps] < 2709520 | (position(object)[snps] > 154584237 & position(object)[snps] < 154913754)
1371
-	pseudoAR[is.na(pseudoAR)] <- FALSE
1447
+	}
1448
+	medianA.AA.F <- as.matrix(medianA.AA(object)[marker.index, ]) ## median for women
1449
+	madA.AA.F <- as.matrix(madA.AA(object)[marker.index, ]) ## median for women
1450
+	split.gender <- split(gender, as.character(batch(object)))
1451
+	N.M <- sapply(split.gender, function(x) sum(x==1))
1452
+	N.F <- sapply(split.gender, function(x) sum(x==2))
1453
+	nuA <- as.matrix(nuA(object)[marker.index, ])
1454
+	nuB <- as.matrix(nuB(object)[marker.index, ])
1455
+	phiA <- as.matrix(phiA(object)[marker.index, ])
1456
+	phiB <- as.matrix(phiB(object)[marker.index, ])
1372 1457
 
1373
-	GG <- as.matrix(calls(object)[snp.index, ])
1374
-	CP <- as.matrix(snpCallProbability(object)[snp.index, ])
1375
-	AA.np <- as.matrix(A(object)[snps, ])
1376
-	##if(missing(which.batches)) which.batches <- seq(along=batches)
1377
-	##batches <- batches[which.batches]
1378
-	for(k in batches){
1379
-		##if(verbose) message("SNP batch ", ii, " of ", length(batches))
1380
-		G <- GG[, k]
1381
-		xx <- CP[, k]
1382
-		highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
1383
-		G <- G*highConf*NORM.snp[, k]
1384
-		##snps
1385
-		AA <- A.snp[, k]
1386
-		BB <- B.snp[, k]
1387
-
1388
-
1389
-		##index <- GT.B <- GT.A <- vector("list", 3)
1390