Browse code

Added citation to crlmmCopynumber. Deleted many of the commented functions in cnrma-functions.R

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@48963 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 21/08/2010 02:50:15
Showing7 changed files

... ...
@@ -74,19 +74,12 @@ export(crlmm,
74 74
        genotype2, genotypeLD,
75 75
        crlmmCopynumber2, crlmmCopynumberLD, crlmmCopynumber)
76 76
 export(constructIlluminaCNSet)
77
-##export(linesCNSet)
78
-##export(fit.lm1, fit.lm2, fit.lm3, fit.lm4, construct,
79
-##       dqrlsWrapper, fit.wls)
80
-export(computeCopynumber, totalCopynumber)
77
+export(totalCopynumber)
81 78
 export(cnrma, cnrma2)
82 79
 exportMethods(A, B, nuA, nuB, phiA, phiB, corr, tau2, Ns)
83 80
 export(genotypeSummary,
84 81
        shrinkSummary,
85 82
        estimateCnParameters)
86
-##       summarizeSnps,
87
-##       summarizeNps,
88
-##       shrinkGenotypeSummaries,
89
-##       summarizeMaleXGenotypes
90 83
 
91 84
 ## For debugging
92 85
 ## exportPattern("^[^\\.]")
... ...
@@ -2,14 +2,14 @@
2 2
 ##setGeneric("getParam", function(object, name, batch) standardGeneric("getParam"))
3 3
 setGeneric("cnIndex", function(object) standardGeneric("cnIndex"))
4 4
 setGeneric("cnNames", function(object) standardGeneric("cnNames"))
5
-setGeneric("computeCopynumber", function(object, ...) standardGeneric("computeCopynumber"))
5
+##setGeneric("computeCopynumber", function(object, ...) standardGeneric("computeCopynumber"))
6 6
 ##setGeneric("pr", function(object, name, batch, value) standardGeneric("pr"))
7 7
 setGeneric("snpIndex", function(object) standardGeneric("snpIndex"))
8 8
 setGeneric("snpNames", function(object) standardGeneric("snpNames"))
9 9
 
10 10
 setGeneric("CA", function(object, ...) standardGeneric("CA"))
11 11
 setGeneric("CB", function(object, ...) standardGeneric("CB"))
12
-##setGeneric("totalCopyNumber", function(object, ...) standardGeneric("totalCopyNumber"))
12
+setGeneric("totalCopynumber", function(object, ...) standardGeneric("totalCopynumber"))
13 13
 
14 14
 
15 15
 setGeneric("Ns", function(object, ...) standardGeneric("Ns"))
... ...
@@ -91,97 +91,6 @@ construct <- function(filenames,
91 91
 	return(cnSet)
92 92
 }
93 93
 
94
-##genotype <- function(filenames,
95
-##		     cdfName,
96
-##		     batch,
97
-##		     mixtureSampleSize=10^5,
98
-##		     eps=0.1,
99
-##		     verbose=TRUE,
100
-##		     seed=1,
101
-##		     sns,
102
-##		     copynumber=TRUE,
103
-##		     probs=rep(1/3, 3),
104
-##		     DF=6,
105
-##		     SNRMin=5,
106
-##		     recallMin=10,
107
-##		     recallRegMin=1000,
108
-##		     gender=NULL,
109
-##		     returnParams=TRUE,
110
-##		     badSNP=0.7){
111
-##	if(missing(cdfName)) stop("must specify cdfName")
112
-##	if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
113
-##	if(missing(sns)) sns <- basename(filenames)
114
-##	## callSet contains potentially very big matrices
115
-##	## More big matrices are created within snprma, that will then be removed.
116
-##	callSet <- construct(filenames=filenames,
117
-##			     cdfName=cdfName,
118
-##			     copynumber=copynumber,
119
-##			     sns=sns,
120
-##			     verbose=verbose,
121
-##			     batch=batch)
122
-##	##lM(callSet) <- initializeParamObject(list(featureNames(callSet), unique(protocolData(callSet)$batch)))
123
-##	mixtureParams <- matrix(NA, 4, length(filenames))
124
-##	snp.index <- which(isSnp(callSet)==1)
125
-##	sample.strata <- splitIndicesByLength(1:ncol(callSet), ocSamples())
126
-##	iter <- 1
127
-####	for(j in batches){
128
-##	j <- unlist(sample.strata)
129
-##		if(verbose) message("Batch ", iter, " of ", length(sample.strata))
130
-##		snprmaRes <- snprma(filenames=filenames[j],
131
-##				    mixtureSampleSize=mixtureSampleSize,
132
-##				    fitMixture=TRUE,
133
-##				    eps=eps,
134
-##				    verbose=verbose,
135
-##				    seed=seed,
136
-##				    cdfName=cdfName,
137
-##				    sns=sns[j])
138
-##		stopifnot(identical(featureNames(callSet)[snp.index], snprmaRes$gns))
139
-##		pData(callSet)$SKW[j] <- snprmaRes$SKW
140
-##		pData(callSet)$SNR[j] <- snprmaRes$SNR
141
-##		suppressWarnings(A(callSet)[snp.index, j] <- snprmaRes[["A"]])
142
-##		suppressWarnings(B(callSet)[snp.index, j] <- snprmaRes[["B"]])
143
-##		mixtureParams[, j] <- snprmaRes$mixtureParams
144
-##		rm(snprmaRes); ##gc()
145
-##		if(copynumber){
146
-##			np.index <- which(isSnp(callSet) == 0)
147
-##			cnrmaRes <- cnrma(filenames=filenames[j],
148
-##					  cdfName=cdfName,
149
-##					  row.names=featureNames(callSet)[np.index],
150
-##					  sns=sns[j],
151
-##					  seed=seed,
152
-##					  verbose=verbose)
153
-##			stopifnot(identical(featureNames(callSet)[np.index], rownames(cnrmaRes)))
154
-##			A(callSet)[np.index, j] <- cnrmaRes
155
-##			rm(cnrmaRes); ##gc()
156
-##		}
157
-##		## as.matrix needed when ffdf is used
158
-##		tmp <- crlmmGT(A=as.matrix(A(callSet)[snp.index, j]),
159
-##			       B=as.matrix(B(callSet)[snp.index, j]),
160
-##			       SNR=callSet$SNR[j],
161
-##			       mixtureParams=mixtureParams[, j],
162
-##			       cdfName=annotation(callSet),
163
-##			       row.names=featureNames(callSet)[snp.index],
164
-##			       col.names=sampleNames(callSet)[j],
165
-##			       probs=probs,
166
-##			       DF=DF,
167
-##			       SNRMin=SNRMin,
168
-##			       recallMin=recallMin,
169
-##			       recallRegMin=recallRegMin,
170
-##			       gender=gender,
171
-##			       verbose=verbose,
172
-##			       returnParams=returnParams,
173
-##			       badSNP=badSNP)
174
-##		snpCall(callSet)[snp.index, j] <- tmp[["calls"]]
175
-##		snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]]
176
-##		callSet$gender[j] <- tmp$gender
177
-##		iter <- iter+1
178
-####	}
179
-##	return(callSet)
180
-##}
181
-
182
-
183
-
184
-##genotypeLargeData
185 94
 genotype <- function(filenames,
186 95
 		       cdfName,
187 96
 		       batch,
... ...
@@ -324,18 +233,6 @@ shrink <- function(x, Ns, DF.PRIOR){
324 233
 	return(x)
325 234
 }
326 235
 
327
-applyByGenotype <- function(x, FUN, G){
328
-	FUN <- match.fun(FUN)
329
-	tmp <- matrix(NA, nrow(x), 3)
330
-	for(j in 1:3){
331
-		GT <- G==j
332
-		GT[GT == FALSE] <- NA
333
-		gt.x <- GT*x
334
-		tmp[, j] <- FUN(gt.x, na.rm=TRUE)
335
-	}
336
-	tmp
337
-}
338
-
339 236
 rowCors <- function(x, y, ...){
340 237
 	N <- rowSums(!is.na(x))
341 238
 	x <- suppressWarnings(log2(x))
... ...
@@ -346,15 +243,6 @@ rowCors <- function(x, y, ...){
346 243
 	return(covar/(sd.x*sd.y))
347 244
 }
348 245
 
349
-corByGenotype <- function(A, B, G, Ns, which.cluster=c(1,2,3)[1]){##, DF.PRIOR){
350
-	x <- A * (G == which.cluster)
351
-	x[x==0] <- NA
352
-	y <- B * (G == which.cluster)
353
-	res <- as.matrix(rowCors(x, y, na.rm=TRUE))
354
-##	cors <- shrink(res, Ns[, which.cluster], DF.PRIOR)
355
-	res
356
-}
357
-
358 246
 dqrlsWrapper <- function(x, y, wts, tol=1e-7){
359 247
 	n <- NROW(y)
360 248
 	p <- ncol(x)
... ...
@@ -367,26 +255,12 @@ dqrlsWrapper <- function(x, y, wts, tol=1e-7){
367 255
 }
368 256
 
369 257
 
370
-##fit.wls <- function(allele, Ystar, W, Ns, autosome=TRUE){
371 258
 fit.wls <- function(NN, sigma, allele, Y, autosome, X){
372
-	##		Np <- NN
373
-##		Np[Np < 1] <- 1
374
-##		vA2 <- vA^2/Np
375
-##		vB2 <- vB^2/Np
376
-##		wA <- sqrt(1/vA2)
377
-##		wB <- sqrt(1/vB2)
378
-##		YA <- muA*wA
379
-##		YB <- muB*wB
380 259
 	Np <- NN
381 260
 	Np[Np < 1] <- 1
382 261
 	W <- (sigma/sqrt(Np))^-1
383 262
 	Ystar <- Y*W
384 263
 	complete <- which(rowSums(is.na(W)) == 0 & rowSums(is.na(Ystar)) == 0)
385
-##	if(any(!is.finite(W))){## | any(!is.finite(V))){
386
-##		i <- which(rowSums(!is.finite(W)) > 0)
387
-##		stop("Possible zeros in the within-genotype estimates of the spread (vA, vB). ")
388
-##	}
389
-##	NOHET <- mean(NN[, 2], na.rm=TRUE) < 0.05
390 264
 	if(missing(allele)) stop("must specify allele")
391 265
 	if(autosome & missing(X)){
392 266
 		if(allele == "A") X <- cbind(1, 2:0)
... ...
@@ -397,9 +271,6 @@ fit.wls <- function(NN, sigma, allele, Y, autosome, X){
397 271
 		if(allele == "B") X <- cbind(1, c(0, 1, 0, 1, 2), c(1, 0, 2, 1, 0))
398 272
 	}
399 273
 	betahat <- matrix(NA, ncol(X), nrow(Ystar))
400
-##	if(NOHET) X <- X[-2, ] ##more than 1 X chromosome, but all homozygous
401
-	##How to quickly generate Xstar, Xstar = diag(W) %*% X
402
-	##Xstar <- apply(W, 1, generateX, X)
403 274
 	ww <- rep(1, ncol(Ystar))
404 275
 	for(i in complete){
405 276
 		betahat[, i] <- dqrlsWrapper(W[i, ] * X, Ystar[i, ], ww)
... ...
@@ -408,379 +279,6 @@ fit.wls <- function(NN, sigma, allele, Y, autosome, X){
408 279
 	return(betahat)
409 280
 }
410 281
 
411
-##nuphiAlleleX <- function(allele, Ystar, W, Ns, chrX=FALSE){
412
-##	complete <- rowSums(is.na(W)) == 0
413
-##	if(any(!is.finite(W))){## | any(!is.finite(V))){
414
-##		i <- which(rowSums(!is.finite(W)) > 0)
415
-##		stop("Possible zeros in the within-genotype estimates of the spread (vA, vB). ")
416
-##	}
417
-##	##NOHET <- mean(Ns[, 2], na.rm=TRUE) < 0.05
418
-##	if(missing(allele)) stop("must specify allele")
419
-##	if(allele == "A") X <- cbind(1, c(1, 0, 2, 1, 0), c(0, 1, 0, 1, 2))
420
-##	if(allele == "B") X <- cbind(1, c(0, 1, 0, 1, 2), c(1, 0, 2, 1, 0))
421
-##	##if(NOHET) X <- X[-2, ] ##more than 1 X chromosome, but all homozygous
422
-##	##How to quickly generate Xstar, Xstar = diag(W) %*% X
423
-##	Xstar <- apply(W, 1, generateX, X)
424
-##	IXTX <- apply(Xstar, 2, generateIXTX, nrow=nrow(X))
425
-##
426
-##	betahat <- matrix(NA, 3, nrow(Ystar))
427
-##	ses <- matrix(NA, 3, nrow(Ystar))
428
-##	##betahat <- matrix(NA, 2, nrow(Ystar))
429
-##	##ses <- matrix(NA, 2, nrow(Ystar))
430
-##	for(i in 1:nrow(Ystar)){
431
-##		betahat[, i] <- crossprod(matrix(IXTX[, i], ncol(X), ncol(X)), crossprod(matrix(Xstar[, i], nrow=nrow(X)), Ystar[i, ]))
432
-##		ssr <- sum((Ystar[i, ] - matrix(Xstar[, i], nrow(X), ncol(X)) %*% matrix(betahat[, i], ncol(X), 1))^2)
433
-##		ses[, i] <- sqrt(diag(matrix(IXTX[, i], ncol(X), ncol(X)) * ssr))
434
-##	}
435
-##	nu <- betahat[1, ]
436
-##	phi <- betahat[2, ]
437
-##	phi2 <- betahat[3, ]
438
-##	return(list(nu, phi, phi2))
439
-##}
440
-
441
-
442
-##nuphiAllele <- function(object, allele, Ystar, W, tmp.objects, cnOptions){
443
-##	I <- isSnp(object)
444
-##	Ystar <- Ystar[I, ]
445
-##	rownames(Ystar) <- featureNames(object)[isSnp(object)]
446
-##	complete <- rowSums(is.na(W)) == 0 & I
447
-##	W <- W[I, ]
448
-##	if(any(!is.finite(W))){## | any(!is.finite(V))){
449
-##		i <- which(rowSums(!is.finite(W)) > 0)
450
-##		stop("Possible zeros in the within-genotype estimates of the spread (vA, vB). ")
451
-##	}
452
-##	Ns <- tmp.objects[["Ns"]]
453
-##	Ns <- Ns[I, ]
454
-##	CHR <- unique(chromosome(object))
455
-##	batch <- unique(batch(object))
456
-##	nuA <- getParam(object, "nuA", batch)
457
-##	nuB <- getParam(object, "nuB", batch)
458
-##	phiA <- getParam(object, "phiA", batch)
459
-##	phiB <- getParam(object, "phiB", batch)
460
-##	if(CHR==23){
461
-##		phiAX <- getParam(object, "phiAX", batch)
462
-##		phiBX <- getParam(object, "phiBX", batch)
463
-##	}
464
-##	NOHET <- mean(Ns[, "AB"], na.rm=TRUE) < 0.05
465
-##	if(missing(allele)) stop("must specify allele")
466
-##	if(CHR == 23){
467
-##		if(length(grep("AB", colnames(W))) > 0){
468
-##			if(allele == "A") X <- cbind(1, c(1, 0, 2, 1, 0), c(0, 1, 0, 1, 2))
469
-##			if(allele == "B") X <- cbind(1, c(0, 1, 0, 1, 2), c(1, 0, 2, 1, 0))
470
-##		} else{
471
-##		if(allele == "A") X <- cbind(1, c(1, 0, 2, 0), c(0, 1, 0, 2))
472
-##			if(allele == "B") X <- cbind(1, c(0, 1, 0, 2), c(1, 0, 2, 0))
473
-##		}
474
-##		betahat <- matrix(NA, 3, nrow(Ystar))
475
-##	} else {##autosome
476
-##		if(allele == "A") X <- cbind(1, 2:0) else X <- cbind(1, 0:2)
477
-##		if(NOHET) X <- X[-2, ] ##more than 1 X chromosome, but all homozygous
478
-##		betahat <- matrix(NA, 2, nrow(Ystar))
479
-##	}
480
-##	ww <- rep(1, ncol(Ystar))
481
-##	II <- which(rowSums(is.nan(Ystar)) == 0)
482
-##	for(i in II){
483
-##		betahat[, i] <- dqrlsWrapper(W[i, ] * X, Ystar[i, ], ww)
484
-##	}
485
-##	if(allele == "A"){
486
-##		nuA[complete] <- betahat[1, ]
487
-##		phiA[complete] <- betahat[2, ]
488
-##		object <- pr(object, "nuA", batch, nuA)
489
-##		object <- pr(object, "phiA", batch, phiA)
490
-##		if(CHR == 23){
491
-##			phiAX[complete] <- betahat[3, ]
492
-##			object <- pr(object, "phiAX", batch, phiAX)
493
-##		}
494
-##	}
495
-##	if(allele=="B"){
496
-##		nuB[complete] <- betahat[1, ]
497
-##		phiB[complete] <- betahat[2, ]
498
-##		object <- pr(object, "nuB", batch, nuB)
499
-##		object <- pr(object, "phiB", batch, phiB)
500
-##		if(CHR == 23){
501
-##			phiBX[complete] <- betahat[3, ]
502
-##			object <- pr(object, "phiBX", batch, phiBX)
503
-##		}
504
-##	}
505
-##	return(object)
506
-##}
507
-
508
-
509
-
510
-predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){
511
-	pkgname <- paste(cdfName, "Crlmm", sep="")
512
-	path <- system.file("extdata", package=pkgname)
513
-        loader("23index.rda", .crlmmPkgEnv, pkgname)
514
-	index <- getVarInEnv("index")
515
-	##load(file.path(path, "23index.rda"))
516
-	XIndex <- index[[1]]
517
-	if(class(res) != "ABset"){
518
-		A <- res$A
519
-		B <- res$B
520
-	} else{
521
-		A <- res@assayData[["A"]]
522
-		B <- res@assayData[["B"]]
523
-	}
524
-	tmp <- which(rowSums(is.na(A)) > 0)
525
-	XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median, na.rm=TRUE)/2
526
-	SNR <- res$SNR
527
-	if(sum(SNR>SNRMin)==1){
528
-		gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5)))
529
-	}else{
530
-		gender <- kmeans(XMedian, c(min(XMedian[SNR>SNRMin]), max(XMedian[SNR>SNRMin])))[["cluster"]]
531
-	}
532
-	return(gender)
533
-}
534
-
535
-
536
-##replace with release/crlmm/R/cnrma-functions
537
-##crlmmCopynumber <- function(object,
538
-##			    chromosome=1:23,
539
-##			    which.batches,
540
-##			    MIN.SAMPLES=10,
541
-##			    SNRMin=5,
542
-##			    MIN.OBS=3,
543
-##			    DF.PRIOR=50,
544
-##			    bias.adj=FALSE,
545
-##			    prior.prob=rep(1/4,4),
546
-##			    seed=1,
547
-##			    verbose=TRUE,
548
-##			    GT.CONF.THR=0.99,
549
-##			    PHI.THR=2^6,
550
-##			    nHOM.THR=5,
551
-##			    MIN.NU=2^3,
552
-##			    MIN.PHI=2^3,
553
-##			    THR.NU.PHI=TRUE,
554
-##			    thresholdCopynumber=TRUE,
555
-##			    weighted.lm=TRUE){
556
-##	ffIsLoaded <- class(calls(object))[[1]] == "ff"
557
-##	if(ffIsLoaded){
558
-##		open(object)
559
-##		open(object$SKW)
560
-##		open(object$SNR)
561
-##	}
562
-##	stopifnot("batch" %in% varLabels(protocolData(object)))
563
-##	stopifnot("chromosome" %in% fvarLabels(object))
564
-##	stopifnot("position" %in% fvarLabels(object))
565
-##	stopifnot("isSnp" %in% fvarLabels(object))
566
-##	##batch <- object$batch
567
-##	batch <- batch(object)
568
-##	if(ffIsLoaded) {
569
-##		open(object$SNR)
570
-##		SNR <- object$SNR[, ]
571
-##	} else SNR <-  object$SNR
572
-##	batches <- split((1:ncol(object))[SNR > SNRMin], batch[SNR > SNRMin])
573
-##	if(any(sapply(batches, length) < MIN.SAMPLES)) message("Excluding batches with fewer than ", MIN.SAMPLES, " samples")
574
-##	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
575
-##	if(missing(which.batches)) which.batches <- seq(along=batches)
576
-##	for(i in chromosome){
577
-##		if(verbose) cat("Chromosome ", i, "\n")
578
-##		if(i >= 24) next()
579
-##		ii <- which.batches[1]
580
-##		for(j in batches[which.batches]){
581
-##			if(verbose) message("Batch ", ii, " of ", length(which.batches))
582
-##			row.index <- which(chromosome(object) == i)
583
-##			##Note that ffdf assayDataElements are data.frames after subsetting(not matrices)
584
-####			ca <- as.matrix(CA(object)[row.index, j])
585
-####			cb <- as.matrix(CB(object)[row.index, j])
586
-####			dimnames(ca) <- dimnames(cb) <- list(featureNames(object)[row.index], sampleNames(object)[j])
587
-##			tmp <- new("CNSet",
588
-##				   call=as.matrix(calls(object)[row.index, j]),
589
-##				   callProbability=as.matrix(snpCallProbability(object)[row.index, j]),
590
-##				   alleleA=as.matrix(A(object)[row.index, j]),
591
-##				   alleleB=as.matrix(B(object)[row.index, j]),
592
-####				   CA=ca, CB=cb,
593
-##				   phenoData=phenoData(object)[j, ],
594
-##				   annotation=annotation(object))
595
-##			featureData(tmp) <- addFeatureAnnotation(tmp)
596
-##			featureData(tmp) <- lm.parameters(tmp, batch=unique(batch[j]))
597
-##			tmp$batch <- batch(object)[j]
598
-##			tmp <- computeCopynumber(tmp,
599
-##						 MIN.OBS=MIN.OBS,
600
-##						 DF.PRIOR=DF.PRIOR,
601
-##						 bias.adj=bias.adj,
602
-##						 prior.prob=prior.prob,
603
-##						 seed=seed,
604
-##						 verbose=verbose,
605
-##						 GT.CONF.THR=GT.CONF.THR,
606
-##						 PHI.THR=PHI.THR,
607
-##						 nHOM.THR=nHOM.THR,
608
-##						 MIN.NU=MIN.NU,
609
-##						 MIN.PHI=MIN.PHI,
610
-##						 THR.NU.PHI=THR.NU.PHI,
611
-##						 thresholdCopynumber=thresholdCopynumber)
612
-##			fData(tmp) <- fData(tmp)[, -(1:3)]
613
-####			CA(tmp) <- matrix(as.integer(CA(tmp)*100), nrow=nrow(tmp), ncol=ncol(tmp),
614
-####					  dimnames=list(featureNames(tmp), sampleNames(tmp)))
615
-####			CB(tmp) <- matrix(as.integer(CB(tmp)*100), nrow=nrow(tmp), ncol=ncol(tmp),
616
-####					  dimnames=list(featureNames(tmp), sampleNames(tmp)))
617
-####			CA(object)[row.index, j] <- CA(tmp)
618
-####			CB(object)[row.index, j] <- CB(tmp)
619
-##			##ad-hocery
620
-##			batchName <- unique(batch(object)[j])
621
-##			fvarLabels(tmp)[15:17] <- paste(c("corrAB", "corrBB", "corrAA"), batchName, sep=".")
622
-##			fvarLabels(tmp)[13:14] <- paste(c("phiPrimeA", "phiPrimeB"), batchName, sep=".")
623
-##			fvarLabels(tmp) <- gsub("_", ".", fvarLabels(tmp))
624
-##			##fvarLabels(tmp) <- gsub("\\.[1-9]", "", fvarLabels(tmp))
625
-##			if(ffIsLoaded){
626
-##				physical <- get("physical")
627
-##				fData(tmp) <- fData(tmp)[, fvarLabels(tmp) %in% names(physical(lM(object)))]
628
-##				jj <- match(fvarLabels(tmp), names(lM(object)))
629
-##				lM(object)[row.index, jj] <- fData(tmp)
630
-##			} else {
631
-##				nms <- paste(names(lM(object)), batchName, sep=".")
632
-##				fData(tmp) <- fData(tmp)[, fvarLabels(tmp) %in% nms]
633
-##				for(k in seq_along(fvarLabels(tmp))){
634
-##					kk <- match(fvarLabels(tmp)[k], paste(names(lM(object)), batchName, sep="."))
635
-##					column <- match(batchName, colnames(lM(object)[[k]]))
636
-##					lM(object)[[k]][row.index, column] <- fData(tmp)[, k]
637
-##				}
638
-##			}
639
-##			rm(tmp); ##gc()
640
-##			ii <- ii+1
641
-##		}
642
-##	}
643
-##	return(object)
644
-##}
645
-
646
-crlmmCopynumberLD <- function(object,
647
-			     which.batches,
648
-			    MIN.SAMPLES=10,
649
-			    SNRMin=5,
650
-			    MIN.OBS=1,
651
-			    DF.PRIOR=50,
652
-			    bias.adj=FALSE,
653
-			    prior.prob=rep(1/4,4),
654
-			    seed=1,
655
-			    verbose=TRUE,
656
-			    GT.CONF.THR=0.99,
657
-			    PHI.THR=2^6,
658
-			    nHOM.THR=5,
659
-			    MIN.NU=2^3,
660
-			    MIN.PHI=2^3,
661
-			    THR.NU.PHI=TRUE,
662
-			    thresholdCopynumber=TRUE){
663
-	stopifnot("batch" %in% varLabels(protocolData(object)))
664
-	stopifnot("chromosome" %in% fvarLabels(object))
665
-	stopifnot("position" %in% fvarLabels(object))
666
-	stopifnot("isSnp" %in% fvarLabels(object))
667
-	##lM(object) <- initializeParamObject(list(featureNames(object), unique(protocolData(object)$batch)))
668
-	batch <- batch(object)
669
-	XIndex.snps <- which(chromosome(object) == 23 & isSnp(object) & !is.na(chromosome(object)))
670
-	##YIndex.snps <- (1:nrow(object))[chromosome(object) == 24 & isSnp(object)]
671
-	XIndex.nps <- which(chromosome(object) == 23 & !isSnp(object) & !is.na(chromosome(object)))
672
-	##autosomeIndex.snps <- (1:nrow(object))[chromosome(object) < 23 & isSnp(object) & !is.na(chromosome(object))]
673
-	autosomeIndex.snps <- which(chromosome(object) < 23 & isSnp(object) & !is.na(chromosome(object)))
674
-	autosomeIndex.nps <- which(chromosome(object) < 23 & !isSnp(object) & !is.na(chromosome(object)))
675
-	##autosomeIndex.nps <- (1:nrow(object))[chromosome(object) < 23 & !isSnp(object) & !is.na(chromosome(object))]
676
-
677
-	## Do chromosome X in batches
678
-	Ns <- initializeBigMatrix("Ns", nrow(object), 5)
679
-	colnames(Ns) <- c("A", "B", "AA", "AB", "BB")
680
-	if(!file.exists(file.path(ldPath(), "normal.rda"))){
681
-		normal <- initializeBigMatrix("normal", nrow(object), ncol(object), vmode="integer")
682
-		normal[,] <- 1L
683
-		save(normal, file=file.path(ldPath(), "normal.rda"))
684
-	} else load(file.path(ldPath(), "normal.rda"))
685
-	if(!file.exists(file.path(ldPath(), "snpflags.rda"))){
686
-		snpflags <- initializeBigMatrix("snpflags", nrow(object), length(unique(batch(object))), vmode="integer")
687
-		snpflags[,] <- 0L
688
-		save(snpflags, file=file.path(ldPath(), "snpflags.rda"))
689
-	} else{
690
-		load(file.path(ldPath(), "snpflags.rda"))
691
-	}
692
-	if(verbose) message("Estimating allele-specific copy number at autosomal SNPs")
693
-	snpBatches <- splitIndicesByLength(autosomeIndex.snps, ocProbesets())
694
-	ocLapply(seq(along=snpBatches),
695
-		 fit.lm1,
696
-		 marker.index=autosomeIndex.snps,
697
-		 object=object,
698
-		 Ns=Ns,
699
-		 normal=normal,
700
-		 snpflags=snpflags,
701
-		 snpBatches=snpBatches,
702
-		 batchSize=ocProbesets(),
703
-		 SNRMin=SNRMin,
704
-		 MIN.SAMPLES=MIN.SAMPLES,
705
-		 MIN.OBS=MIN.OBS,
706
-		 DF=DF.PRIOR,
707
-		 GT.CONF.THR=GT.CONF.THR,
708
-		 THR.NU.PHI=THR.NU.PHI,
709
-		 MIN.NU=MIN.NU,
710
-		 MIN.PHI=MIN.PHI,
711
-		 verbose=verbose,
712
-		 neededPkgs="crlmm")
713
-	## autosomal NPs
714
-	snpBatches <- splitIndicesByLength(autosomeIndex.nps, ocProbesets())
715
-	if(verbose) message("Estimating total copy number at nonpolymorphic loci")
716
-	ocLapply(seq(along=snpBatches),
717
-			fit.lm2,
718
-			marker.index=autosomeIndex.nps,
719
-			object=object,
720
-			Ns=Ns,
721
-			normal=normal,
722
-			snpflags=snpflags,
723
-			snpBatches=snpBatches,
724
-			batchSize=ocProbesets(),
725
-			SNRMin=SNRMin,
726
-			MIN.SAMPLES=MIN.SAMPLES,
727
-			MIN.OBS=MIN.OBS,
728
-			DF=DF.PRIOR,
729
-			GT.CONF.THR=GT.CONF.THR,
730
-			THR.NU.PHI=THR.NU.PHI,
731
-			MIN.NU=MIN.NU,
732
-			MIN.PHI=MIN.PHI,
733
-			verbose=verbose,
734
-		 neededPkgs="crlmm")
735
-	snpBatches <- splitIndicesByLength(XIndex.snps, ocProbesets())
736
-	if(verbose) message("Estimating allele-specific copy number at polymorphic loci on chromosome X")
737
-	ocLapply(seq(along=snpBatches),
738
-		 fit.lm3,
739
-		 marker.index=XIndex.snps,
740
-		 object=object,
741
-		 Ns=Ns,
742
-		 normal=normal,
743
-		 snpflags=snpflags,
744
-		 snpBatches=snpBatches,
745
-		 batchSize=ocProbesets(),
746
-		 SNRMin=SNRMin,
747
-		 MIN.SAMPLES=MIN.SAMPLES,
748
-		 MIN.OBS=MIN.OBS,
749
-		 DF=DF.PRIOR,
750
-		 GT.CONF.THR=GT.CONF.THR,
751
-		 THR.NU.PHI=THR.NU.PHI,
752
-		 MIN.NU=MIN.NU,
753
-		 MIN.PHI=MIN.PHI,
754
-		 verbose=verbose,
755
-		 neededPkgs="crlmm")
756
-	if(verbose) message("Estimating total copy number for nonpolymorphic loci on chromosome X")
757
-	snpBatches <- splitIndicesByLength(XIndex.nps, ocProbesets())
758
-	ocLapply(seq(along=snpBatches),
759
-		 fit.lm4,
760
-		 marker.index=XIndex.nps,
761
-		 object=object,
762
-		 Ns=Ns,
763
-		 normal=normal,
764
-		 snpflags=snpflags,
765
-		 snpBatches=snpBatches,
766
-		 batchSize=ocProbesets(),
767
-		 SNRMin=SNRMin,
768
-		 MIN.SAMPLES=MIN.SAMPLES,
769
-		 MIN.OBS=MIN.OBS,
770
-		 DF=DF.PRIOR,
771
-		 GT.CONF.THR=GT.CONF.THR,
772
-		 THR.NU.PHI=THR.NU.PHI,
773
-		 MIN.NU=MIN.NU,
774
-		 MIN.PHI=MIN.PHI,
775
-		 verbose=verbose,
776
-		 neededPkgs="crlmm")
777
-	return(object)
778
-}
779
-crlmmCopynumber2 <- crlmmCopynumberLD
780
-
781
-
782
-
783
-
784 282
 shrinkGenotypeSummaries <- function(strata, index.list, object, MIN.OBS, MIN.SAMPLES, DF.PRIOR,
785 283
 				    verbose, is.lds){
786 284
 	if(is.lds) {physical <- get("physical"); open(object)}
... ...
@@ -1129,24 +627,12 @@ summarizeMaleXGenotypes <- function(marker.index,
1129 627
 		madB <- cbind(statsB.AA[, 1], statsB.AB[, 1], statsB.BB[, 1])
1130 628
 		rm(statsA.AA, statsA.AB, statsA.BB, statsB.AA, statsB.AB, statsB.BB)
1131 629
 
1132
-##		A <- log2(A); B <- log2(B)
1133
-##		tau2A.AA <- summaryStats(G.AA, A, FUNS="rowMAD")^2
1134
-##		tau2A.BB <- summaryStats(G.BB, A, FUNS="rowMAD")^2
1135
-##		tau2B.AA <- summaryStats(G.AA, B, FUNS="rowMAD")^2
1136
-##		tau2B.BB <- summaryStats(G.BB, B, FUNS="rowMAD")^2
1137
-		##tau2A <- cbind(tau2A.AA, tau2A.BB)
1138
-		##tau2B <- cbind(tau2B.AA, tau2B.BB)
1139 630
 		NN.M <- cbind(N.AA.M, N.AB.M, N.BB.M)
1140 631
 		NN.Mlist[[k]] <- NN.M
1141 632
 
1142 633
 		shrink.madA[[k]] <- shrink(madA, NN.M, DF.PRIOR)
1143 634
 		shrink.madB[[k]] <- shrink(madB, NN.M, DF.PRIOR)
1144 635
 
1145
-##		shrink.tau2A.BB[, k] <- shrink(tau2A.BB[, k, drop=FALSE], NN.M[, 3], DF.PRIOR)[, drop=FALSE]
1146
-##		shrink.tau2B.AA[, k] <- shrink(tau2B.AA[, k, drop=FALSE], NN.M[, 1], DF.PRIOR)[, drop=FALSE]
1147
-##		shrink.tau2A.AA[, k] <- shrink(tau2A.AA[, k, drop=FALSE], NN.M[, 1], DF.PRIOR)[, drop=FALSE]
1148
-##		shrink.tau2B.BB[, k] <- shrink(tau2B.BB[, k, drop=FALSE], NN.M[, 3], DF.PRIOR)[, drop=FALSE]
1149
-
1150 636
 		##---------------------------------------------------------------------------
1151 637
 		## SNPs that we'll use for imputing location/scale of unobserved genotypes
1152 638
 		##---------------------------------------------------------------------------
... ...
@@ -1590,304 +1076,6 @@ imputeCenterX <- function(muA, muB, Ns, index.complete, MIN.OBS){
1590 1076
 	list(muA, muB)
1591 1077
 }
1592 1078
 
1593
-##oneBatch <- function(object, cnOptions, tmp.objects){
1594
-##	muA <- tmp.objects[["muA"]]
1595
-##	muB <- tmp.objects[["muB"]]
1596
-##	Ns <- tmp.objects[["Ns"]]
1597
-##	CHR <- unique(chromosome(object))
1598
-##	##---------------------------------------------------------------------------
1599
-##	## Impute sufficient statistics for unobserved genotypes (plate-specific)
1600
-##	##---------------------------------------------------------------------------
1601
-##	MIN.OBS <- cnOptions$MIN.OBS
1602
-##	index.AA <- which(Ns[, "AA"] >= MIN.OBS)
1603
-##	index.AB <- which(Ns[, "AB"] >= MIN.OBS)
1604
-##	index.BB <- which(Ns[, "BB"] >= MIN.OBS)
1605
-##	correct.orderA <- muA[, "AA"] > muA[, "BB"]
1606
-##	correct.orderB <- muB[, "BB"] > muB[, "AA"]
1607
-##	##For chr X, this will ignore the males
1608
-##	nobs <- rowSums(Ns[, 3:5] >= MIN.OBS, na.rm=TRUE) == 3
1609
-##	index.complete <- which(correct.orderA & correct.orderB & nobs) ##be selective here
1610
-##	size <- min(5000, length(index.complete))
1611
-##	if(size == 5000) index.complete <- sample(index.complete, 5000)
1612
-##	if(length(index.complete) < 200){
1613
-##		stop("fewer than 200 snps pass criteria for predicting the sufficient statistics")
1614
-##	}
1615
-##	index <- tmp.objects[["index"]]
1616
-##	index[[1]] <- which(Ns[, "AA"] == 0 & (Ns[, "AB"] >= MIN.OBS & Ns[, "BB"] >= MIN.OBS))
1617
-##	index[[2]] <- which(Ns[, "AB"] == 0 & (Ns[, "AA"] >= MIN.OBS & Ns[, "BB"] >= MIN.OBS))
1618
-##	index[[3]] <- which(Ns[, "BB"] == 0 & (Ns[, "AB"] >= MIN.OBS & Ns[, "AA"] >= MIN.OBS))
1619
-##	mnA <- muA[, 3:5]
1620
-##	mnB <- muB[, 3:5]
1621
-##	for(j in 1:3){
1622
-##		if(length(index[[j]]) == 0) next()
1623
-##		X <- cbind(1, mnA[index.complete,  -j, drop=FALSE], mnB[index.complete,  -j, drop=FALSE])
1624
-##		Y <- cbind(mnA[index.complete, j], mnB[index.complete,  j])
1625
-##		betahat <- solve(crossprod(X), crossprod(X,Y))
1626
-##		X <- cbind(1, mnA[index[[j]],  -j, drop=FALSE],  mnB[index[[j]],  -j, drop=FALSE])
1627
-##		mus <- X %*% betahat
1628
-##		muA[index[[j]], j+2] <- mus[, 1]
1629
-##		muB[index[[j]], j+2] <- mus[, 2]
1630
-##	}
1631
-##	nobsA <- Ns[, "A"] > MIN.OBS
1632
-##	nobsB <- Ns[, "B"] > MIN.OBS
1633
-##	notMissing <- !(is.na(muA[, "A"]) | is.na(muA[, "B"]) | is.na(muB[, "A"]) | is.na(muB[, "B"]))
1634
-##	complete <- list()
1635
-##	complete[[1]] <- which(correct.orderA & correct.orderB & nobsA & notMissing) ##be selective here
1636
-##	complete[[2]] <- which(correct.orderA & correct.orderB & nobsB & notMissing) ##be selective here
1637
-##	size <- min(5000, length(complete[[1]]))
1638
-##	if(size > 5000) complete <- lapply(complete, function(x) sample(x, size))
1639
-##	if(CHR == 23){
1640
-##		index <- list()
1641
-##		index[[1]] <- which(Ns[, "A"] == 0)
1642
-##		index[[2]] <- which(Ns[, "B"] == 0)
1643
-##		cols <- 2:1
1644
-##		for(j in 1:2){
1645
-##			if(length(index[[j]]) == 0) next()
1646
-##			X <- cbind(1, muA[complete[[j]], cols[j]], muB[complete[[j]], cols[j]])
1647
-##			Y <- cbind(muA[complete[[j]], j], muB[complete[[j]], j])
1648
-##			betahat <- solve(crossprod(X), crossprod(X,Y))
1649
-##			X <- cbind(1, muA[index[[j]], cols[j]],  muB[index[[j]], cols[j]])
1650
-##			mus <- X %*% betahat
1651
-##			muA[index[[j]], j] <- mus[, 1]
1652
-##			muB[index[[j]], j] <- mus[, 2]
1653
-##		}
1654
-##	}
1655
-##	##missing two genotypes
1656
-##	noAA <- Ns[, "AA"] < MIN.OBS
1657
-##	noAB <- Ns[, "AB"] < MIN.OBS
1658
-##	noBB <- Ns[, "BB"] < MIN.OBS
1659
-##	index[[1]] <- noAA & noAB
1660
-##	index[[2]] <- noBB & noAB
1661
-##	index[[3]] <- noAA & noBB
1662
-####	snpflags <- envir[["snpflags"]]
1663
-##	snpflags <- index[[1]] | index[[2]] | index[[3]]
1664
-####	snpflags[, p] <- index[[1]] | index[[2]] | index[[3]]
1665
-##
1666
-##	##---------------------------------------------------------------------------
1667
-##	## Two genotype clusters not observed -- would sequence help? (didn't seem to)
1668
-##	## 1. extract index of complete data
1669
-##	## 2. Regress  mu_missing ~ sequence + mu_observed
1670
-##	## 3. solve for nu assuming the median is 2
1671
-##	##---------------------------------------------------------------------------
1672
-##	cols <- c(3, 1, 2)
1673
-##	for(j in 1:3){
1674
-##		if(sum(index[[j]]) == 0) next()
1675
-##		k <- cols[j]
1676
-##		X <- cbind(1, mnA[index.complete, k], mnB[index.complete, k])
1677
-##		Y <- cbind(mnA[index.complete,  -k],
1678
-##			   mnB[index.complete,  -k])
1679
-##		betahat <- solve(crossprod(X), crossprod(X,Y))
1680
-##		X <- cbind(1, mnA[index[[j]],  k], mnB[index[[j]],  k])
1681
-##		mus <- X %*% betahat
1682
-##		muA[index[[j]], -c(1, 2, k+2)] <- mus[, 1:2]
1683
-##		muB[index[[j]], -c(1, 2, k+2)] <- mus[, 3:4]
1684
-##	}
1685
-##	negA <- rowSums(muA < 0) > 0
1686
-##	negB <- rowSums(muB < 0) > 0
1687
-##	snpflags <- snpflags| negA | negB | rowSums(is.na(muA[, 3:5]), na.rm=TRUE) > 0
1688
-##	tmp.objects$snpflags <- snpflags
1689
-##	tmp.objects[["muA"]] <- muA
1690
-##	tmp.objects[["muB"]] <- muB
1691
-##	tmp.objects[["snpflags"]] <- snpflags
1692
-##	return(tmp.objects)
1693
-##}
1694
-##
1695
-##Estimate tau2, sigma2, and correlation (updates the object)
1696
-##locationAndScale <- function(object, cnOptions, tmp.objects){
1697
-##	DF.PRIOR <- cnOptions$DF.PRIOR
1698
-##	Ns <- tmp.objects[["Ns"]]
1699
-##	index <- tmp.objects[["index"]]
1700
-##	index.AA <- index[[1]]
1701
-##	index.AB <- index[[2]]
1702
-##	index.BB <- index[[3]]
1703
-##	rm(index); ##gc()
1704
-##
1705
-##	GT.A <- tmp.objects[["GT.A"]]
1706
-##	GT.B <- tmp.objects[["GT.B"]]
1707
-##	AA.A <- GT.A[[1]]
1708
-##	AB.A <- GT.A[[2]]
1709
-##	BB.A <- GT.A[[3]]
1710
-##
1711
-##	AA.B <- GT.B[[1]]
1712
-##	AB.B <- GT.B[[2]]
1713
-##	BB.B <- GT.B[[3]]
1714
-##	x <- BB.A[index.BB, ]
1715
-##	##batch <- unique(object$batch)
1716
-##	batch <- unique(batch(object))
1717
-##	tau2A <- getParam(object, "tau2A", batch)
1718
-##	tau2A[index.BB] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2
1719
-##	DF <- Ns[, "BB"]-1
1720
-##	DF[DF < 1] <- 1
1721
-##	med <- median(tau2A, na.rm=TRUE)
1722
-##	tau2A <- (tau2A * DF  +  med * DF.PRIOR)/(DF.PRIOR + DF)
1723
-##	tau2A[is.na(tau2A) & isSnp(object)] <- med
1724
-##	object <- pr(object, "tau2A", batch, tau2A)
1725
-##
1726
-##	sig2B <- getParam(object, "sig2B", batch)
1727
-##	x <- BB.B[index.BB, ]
1728
-##	sig2B[index.BB] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2
1729
-##	med <- median(sig2B, na.rm=TRUE)
1730
-##	sig2B <- (sig2B * DF  +  med * DF.PRIOR)/(DF.PRIOR + DF)
1731
-##	sig2B[is.na(sig2B) & isSnp(object)] <- med
1732
-##	object <- pr(object, "sig2B", batch, sig2B)
1733
-##
1734
-##	tau2B <- getParam(object, "tau2B", batch)
1735
-##	x <- AA.B[index.AA, ]
1736
-##	tau2B[index.AA] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2
1737
-##	DF <- Ns[, "AA"]-1
1738
-##	DF[DF < 1] <- 1
1739
-##	med <- median(tau2B, na.rm=TRUE)
1740
-##	tau2B <- (tau2B * DF  +  med * DF.PRIOR)/(DF.PRIOR + DF)
1741
-##	tau2B[is.na(tau2B) & isSnp(object)] <- med
1742
-##	object <- pr(object, "tau2B", batch, tau2B)
1743
-##
1744
-##	sig2A <- getParam(object, "sig2A", batch)
1745
-##	x <- AA.A[index.AA, ]
1746
-##	sig2A[index.AA] <- rowMAD(log2(x), log2(x), na.rm=TRUE)^2##var(log(IA)|AA)
1747
-##	med <- median(sig2A, na.rm=TRUE)
1748
-##	sig2A <- (sig2A * DF  +  med * DF.PRIOR)/(DF.PRIOR + DF)
1749
-##	sig2A[is.na(sig2A) & isSnp(object)] <- med
1750
-##	object <- pr(object, "sig2A", batch, sig2A)
1751
-##
1752
-##	if(length(index.AB) > 0){ ##all homozygous is possible
1753
-##		corr <- getParam(object, "corr", batch)
1754
-##		x <- AB.A[index.AB, ]
1755
-##		y <- AB.B[index.AB, ]
1756
-##		corr[index.AB] <- rowCors(x, y, na.rm=TRUE)
1757
-##		corr[corr < 0] <- 0
1758
-##		DF <- Ns[, "AB"]-1
1759
-##		DF[DF<1] <- 1
1760
-##		med <- median(corr, na.rm=TRUE)
1761
-##		corr <- (corr*DF  +  med * DF.PRIOR)/(DF.PRIOR + DF)
1762
-##		corr[is.na(corr) & isSnp(object)] <- med
1763
-##		object <- pr(object, "corr", batch, corr)
1764
-##	}
1765
-##	corrB.AA <- getParam(object, "corrB.AA", batch)
1766
-##	backgroundB <- AA.B[index.AA, ]
1767
-##	signalA <- AA.A[index.AA, ]
1768
-##	corrB.AA[index.AA] <- rowCors(backgroundB, signalA, na.rm=TRUE)
1769
-##	DF <- Ns[, "AA"]-1
1770
-##	DF[DF < 1] <- 1
1771
-##	med <- median(corrB.AA, na.rm=TRUE)
1772
-##	corrB.AA <- (corrB.AA*DF + med*DF.PRIOR)/(DF.PRIOR + DF)
1773
-##	corrB.AA[is.na(corrB.AA) & isSnp(object)] <- med
1774
-##	object <- pr(object, "corrB.AA", batch, corrB.AA)
1775
-##
1776
-##	corrA.BB <- getParam(object, "corrA.BB", batch)
1777
-##	backgroundA <- BB.A[index.BB, ]
1778
-##	signalB <- BB.B[index.BB, ]
1779
-##	corrA.BB[index.BB] <- rowCors(backgroundA, signalB, na.rm=TRUE)
1780
-##	DF <- Ns[, "BB"]-1
1781
-##	DF[DF < 1] <- 1
1782
-##	med <- median(corrA.BB, na.rm=TRUE)
1783
-##	corrA.BB <- (corrA.BB*DF + med*DF.PRIOR)/(DF.PRIOR + DF)
1784
-##	corrA.BB[is.na(corrA.BB) & isSnp(object)] <- med
1785
-##	object <- pr(object, "corrA.BB", batch, corrA.BB)
1786
-##	return(object)
1787
-##}
1788
-##
1789
-##coefs <- function(object, cnOptions, tmp.objects){
1790
-##	##batch <- unique(object$batch)
1791
-##	batch <- unique(batch(object))
1792
-##	CHR <- unique(chromosome(object))
1793
-##	muA <- tmp.objects[["muA"]]
1794
-##	muB <- tmp.objects[["muB"]]
1795
-##	vA <- tmp.objects[["vA"]]
1796
-##	vB <- tmp.objects[["vB"]]
1797
-##	Ns <- tmp.objects[["Ns"]]
1798
-##	if(CHR != 23){
1799
-##		IA <- muA[, 3:5]
1800
-##		IB <- muB[, 3:5]
1801
-##		vA <- vA[, 3:5]
1802
-##		vB <- vB[, 3:5]
1803
-##		Np <- Ns[, 3:5]
1804
-##	} else {
1805
-##		NOHET <- is.na(median(vA[, "AB"], na.rm=TRUE))
1806
-##		if(NOHET){
1807
-##			IA <- muA[, -4]
1808
-##			IB <- muB[, -4]
1809
-##			vA <- vA[, -4]
1810
-##			vB <- vB[, -4]
1811
-##			Np <- Ns[, -4]
1812
-##		} else{
1813
-##			IA <- muA
1814
-##			IB <- muB
1815
-##			vA <- vA
1816
-##			vB <- vB
1817
-##			Np <- Ns
1818
-##		}
1819
-##
1820
-##	}
1821
-##	Np[Np < 1] <- 1
1822
-##	vA2 <- vA^2/Np
1823
-##	vB2 <- vB^2/Np
1824
-##	wA <- sqrt(1/vA2)
1825
-##	wB <- sqrt(1/vB2)
1826
-##	YA <- IA*wA
1827
-##	YB <- IB*wB
1828
-##	##update lm.coefficients stored in object
1829
-##	object <- nuphiAllele(object, allele="A", Ystar=YA, W=wA, tmp.objects=tmp.objects, cnOptions=cnOptions)
1830
-##	object <- nuphiAllele(object, allele="B", Ystar=YB, W=wB, tmp.objects=tmp.objects, cnOptions=cnOptions)
1831
-##	##---------------------------------------------------------------------------
1832
-##	##Estimate crosshyb using X chromosome and sequence information
1833
-##	##---------------------------------------------------------------------------
1834
-##	##browser()
1835
-##	####data(sequences, package="genomewidesnp6Crlmm")
1836
-##	##snpflags <- envir[["snpflags"]]
1837
-##	##muA <- envir[["muA"]][, p, 3:5]
1838
-##	##muB <- envir[["muB"]][, p, 3:5]
1839
-##	##Y <- envir[["phiAx"]]
1840
-##	##load("sequences.rda")
1841
-##	##seqA <- sequences[, "A", ][, 1]
1842
-##	##seqA <- seqA[match(snps, names(seqA))]
1843
-##	##X <- cbind(1, sequenceDesignMatrix(seqA))
1844
-##	##X <- cbind(X, nuA[, p], phiA[, p], nuB[, p], phiB[, p])
1845
-##	##missing <- rowSums(is.na(X)) > 0
1846
-##	##betahat <- solve(crossprod(X[!missing, ]), crossprod(X[!missing, ], Y[!missing]))
1847
-##	return(object)
1848
-##}
1849
-##
1850
-##polymorphic <- function(object, cnOptions, tmp.objects){
1851
-##	##batch <- unique(object$batch)
1852
-##	batch <- unique(batch(object))
1853
-##	CHR <- unique(chromosome(object))
1854
-##	vA <- tmp.objects[["vA"]]
1855
-##	vB <- tmp.objects[["vB"]]
1856
-##	Ns <- tmp.objects[["Ns"]]
1857
-##
1858
-##	nuA <- getParam(object, "nuA", batch)
1859
-##	nuB <- getParam(object, "nuB", batch)
1860
-####	nuA.se <- getParam(object, "nuA.se", batch)
1861
-####	nuB.se <- getParam(object, "nuB.se", batch)
1862
-##
1863
-##	phiA <- getParam(object, "phiA", batch)
1864
-##	phiB <- getParam(object, "phiB", batch)
1865
-####	phiA.se <- getParam(object, "phiA.se", batch)
1866
-####	phiB.se <- getParam(object, "phiB.se", batch)
1867
-##	A <- A(object)
1868
-##	B <- B(object)
1869
-##
1870
-##	NOHET <- mean(Ns[, "AB"], na.rm=TRUE) < 0.05
1871
-##	##---------------------------------------------------------------------------
1872
-##	## Estimate CA, CB
1873
-##	##---------------------------------------------------------------------------
1874
-##	if(CHR == 23){
1875
-##		phiAX <- getParam(object, "phiAX", batch)  ##nonspecific hybridization coef
1876
-##		phiBX <- getParam(object, "phiBX", batch)  ##nonspecific hybridization coef
1877
-##		phistar <- phiBX/phiA
1878
-####		tmp <- (B-nuB - phistar*A + phistar*nuA)/phiB
1879
-####		copyB <- tmp/(1-phistar*phiAX/phiB)
1880
-####		copyA <- (A-nuA-phiAX*copyB)/phiA
1881
-####		CB(object) <- copyB  ## multiplies by 100 and converts to integer
1882
-####		CA(object) <- copyA
1883
-##	} else{
1884
-####		CA(object) <- matrix((1/phiA*(A-nuA)), nrow(A), ncol(A))
1885
-####		CB(object) <- matrix((1/phiB*(B-nuB)), nrow(B), ncol(B))
1886
-##
1887
-##	}
1888
-##	return(object)
1889
-##}
1890
-##
1891 1079
 ##posteriorProbability.snps <- function(object, cnOptions, tmp.objects=list()){
1892 1080
 ##	I <- isSnp(object)
1893 1081
 ##	gender <- object$gender
... ...
@@ -2003,19 +1191,6 @@ imputeCenterX <- function(muA, muB, Ns, index.complete, MIN.OBS){
2003 1191
 ##	return(tmp.objects)
2004 1192
 ##}
2005 1193
 ##
2006
-
2007
-##bias1 <- function(strata.index,
2008
-##		  snpBatches,
2009
-##		  index,
2010
-##		  object,
2011
-##		  normal,
2012
-##		  emit,
2013
-##		  prior.prob,
2014
-##		  MIN.SAMPLES,
2015
-##		  verbose){
2016
-##
2017
-##}
2018
-
2019 1194
 ##bias2 <- function(strata.index,
2020 1195
 ##		  snpBatches,
2021 1196
 ##		  index,
... ...
@@ -2109,9 +1284,6 @@ imputeCenterX <- function(muA, muB, Ns, index.complete, MIN.OBS){
2109 1284
 ##		 MIN.SAMPLES=MIN.SAMPLES,
2110 1285
 ##		 verbose=verbose)
2111 1286
 ##}
2112
-
2113
-
2114
-##biasAdjNP <- function(plateIndex, envir, priorProb){
2115 1287
 ##biasAdjNP <- function(object, cnOptions, tmp.objects){
2116 1288
 ##	##batch <- unique(object$batch)
2117 1289
 ##	batch <- unique(batch(object))
... ...
@@ -2173,138 +1345,6 @@ imputeCenterX <- function(muA, muB, Ns, index.complete, MIN.OBS){
2173 1345
 ##}
2174 1346
 
2175 1347
 
2176
-##getParams <- function(object, batch){
2177
-##	##batch <- unique(object$batch)
2178
-##	batch <- unique(batch(object))
2179
-##	if(length(batch) > 1) stop("batch variable not unique")
2180
-##	nuA <- as.numeric(fData(object)[, match(paste("nuA", batch, sep="_"), fvarLabels(object))])
2181
-##	nuB <- as.numeric(fData(object)[, match(paste("nuB", batch, sep="_"), fvarLabels(object))])
2182
-##	phiA <- as.numeric(fData(object)[, match(paste("phiA", batch, sep="_"), fvarLabels(object))])
2183
-##	phiB <- as.numeric(fData(object)[, match(paste("phiB", batch, sep="_"), fvarLabels(object))])
2184
-##	tau2A <- as.numeric(fData(object)[, match(paste("tau2A", batch, sep="_"), fvarLabels(object))])
2185
-##	tau2B <- as.numeric(fData(object)[, match(paste("tau2B", batch, sep="_"), fvarLabels(object))])
2186
-##	sig2A <- as.numeric(fData(object)[, match(paste("sig2A", batch, sep="_"), fvarLabels(object))])
2187
-##	sig2B <- as.numeric(fData(object)[, match(paste("sig2B", batch, sep="_"), fvarLabels(object))])
2188
-##	corrA.BB <- as.numeric(fData(object)[, match(paste("corrA.BB", batch, sep="_"), fvarLabels(object))])
2189
-##	corrB.AA <- as.numeric(fData(object)[, match(paste("corrB.AA", batch, sep="_"), fvarLabels(object))])
2190
-##	corr <- as.numeric(fData(object)[, match(paste("corr", batch, sep="_"), fvarLabels(object))])
2191
-##	params <- list(nuA=nuA,
2192
-##		       nuB=nuB,
2193
-##		       phiA=phiA,
2194
-##		       phiB=phiB,
2195
-##		       tau2A=tau2A,
2196
-##		       tau2B=tau2B,
2197
-##		       sig2A=sig2A,
2198
-##		       sig2B=sig2B,
2199
-##		       corrA.BB=corrA.BB,
2200
-##		       corrB.AA=corrB.AA,
2201
-##		       corr=corr)
2202
-##	return(params)
2203
-##}
2204
-##
2205
-##
2206
-#### Constrain nu and phi to positive values
2207
-##thresholdModelParams <- function(object, cnOptions){
2208
-##	MIN.NU <- cnOptions$MIN.NU
2209
-##	MIN.PHI <- cnOptions$MIN.PHI
2210
-##	batch <- unique(object$batch)
2211
-##	nuA <- getParam(object, "nuA", batch)
2212
-##	nuA[nuA < MIN.NU] <- MIN.NU
2213
-##	object <- pr(object, "nuA", batch, nuA)
2214
-##	nuB <- getParam(object, "nuB", batch)
2215
-##	if(!all(is.na(nuB))){
2216
-##		nuB[nuB < MIN.NU] <- MIN.NU
2217
-##		object <- pr(object, "nuB", batch, nuB)
2218
-##	}
2219
-##	phiA <- getParam(object, "phiA", batch)
2220
-##	phiA[phiA < MIN.PHI] <- MIN.PHI
2221
-##	object <- pr(object, "phiA", batch, phiA)
2222
-##	phiB <- getParam(object, "phiB", batch)
2223
-##	if(!all(is.na(phiB))){
2224
-##		phiB[phiB < MIN.PHI] <- MIN.PHI
2225
-##		object <- pr(object, "phiB", batch, phiB)
2226
-##	}
2227
-##	phiAX <- as.numeric(getParam(object, "phiAX", batch))
2228
-##	if(!all(is.na(phiAX))){
2229
-##		phiAX[phiAX < MIN.PHI] <- MIN.PHI
2230
-##		object <- pr(object, "phiAX", batch, phiAX)
2231
-##	}
2232
-##	phiBX <- as.numeric(getParam(object, "phiBX", batch))
2233
-##	if(!all(is.na(phiBX))){
2234
-##		phiBX[phiBX < MIN.PHI] <- MIN.PHI
2235
-##		object <- pr(object, "phiBX", batch, phiBX)
2236
-##	}
2237
-##	return(object)
2238
-##}
2239
-##
2240
-##cnCNSet <- function(object, cnOptions){
2241
-##	PLATE <- unique(batch(object))
2242
-##	verbose <- cnOptions$verbose
2243
-##	tmp.objects <- instantiateObjects(object, cnOptions)
2244
-##	bias.adj <- cnOptions$bias.adj
2245
-##	if(bias.adj & ncol(object) <= 15){
2246
-##		warning(paste("bias.adj is TRUE, but too few samples to perform this step"))
2247
-##		cnOptions$bias.adj <- bias.adj <- FALSE
2248
-##	}
2249
-##	if(bias.adj){
2250
-##		if(verbose) message("Dropping samples with low posterior prob. of normal copy number (samples dropped is locus-specific)")
2251
-##		tmp.objects <- biasAdjNP(object, cnOptions, tmp.objects)
2252
-##		tmp.objects <- biasAdj(object, cnOptions, tmp.objects)
2253
-##		if(verbose) message("Recomputing location and scale parameters")
2254
-##	}
2255
-##	##update tmp.objects
2256
-##	tmp.objects <- withinGenotypeMoments(object,
2257
-##					     cnOptions=cnOptions,
2258
-##					     tmp.objects=tmp.objects)
2259
-##	object <- locationAndScale(object, cnOptions, tmp.objects)
2260
-##	tmp.objects <- oneBatch(object,
2261
-##				cnOptions=cnOptions,
2262
-##				tmp.objects=tmp.objects)
2263
-##	##coefs calls nuphiAllele.
2264
-##	object <- coefs(object, cnOptions, tmp.objects)
2265
-##	##nuA=getParam(object, "nuA", PLATE)
2266
-##	THR.NU.PHI <- cnOptions$THR.NU.PHI
2267
-##	if(THR.NU.PHI){
2268
-##		verbose <- cnOptions$verbose
2269
-##		##if(verbose) message("Thresholding nu and phi")
2270
-##		object <- thresholdModelParams(object, cnOptions)
2271
-##	}
2272
-##	##if(verbose) message("\nAllele specific copy number")
2273
-##	object <- polymorphic(object, cnOptions, tmp.objects)
2274
-##	if(any(!isSnp(object))){  ##there are nonpolymorphic probes
2275
-##		##if(verbose) message("\nCopy number for nonpolymorphic probes...")
2276
-##		object <- nonpolymorphic(object, cnOptions, tmp.objects)
2277
-##	}
2278
-##	##---------------------------------------------------------------------------
2279
-##	##Note: the replacement method multiples by 100
2280
-####	CA(object)[, batch==PLATE] <- CA(object)
2281
-####	CB(object)[, batch==PLATE] <- CB(object)
2282
-##	##---------------------------------------------------------------------------
2283
-##	##update-the plate-specific parameters for copy number
2284
-##	object <- pr(object, "nuA", PLATE, getParam(object, "nuA", PLATE))
2285
-##	object <- pr(object, "nuA.se", PLATE, getParam(object, "nuA.se", PLATE))
2286
-##	object <- pr(object, "nuB", PLATE, getParam(object, "nuB", PLATE))
2287
-##	object <- pr(object, "nuB.se", PLATE, getParam(object, "nuB.se", PLATE))
2288
-##	object <- pr(object, "phiA", PLATE, getParam(object, "phiA", PLATE))
2289
-##	object <- pr(object, "phiA.se", PLATE, getParam(object, "phiA.se", PLATE))
2290
-##	object <- pr(object, "phiB", PLATE, getParam(object, "phiB", PLATE))
2291
-##	object <- pr(object, "phiB.se", PLATE, getParam(object, "phiB.se", PLATE))
2292
-##	object <- pr(object, "tau2A", PLATE, getParam(object, "tau2A", PLATE))
2293
-##	object <- pr(object, "tau2B", PLATE, getParam(object, "tau2B", PLATE))
2294
-##	object <- pr(object, "sig2A", PLATE, getParam(object, "sig2A", PLATE))
2295
-##	object <- pr(object, "sig2B", PLATE, getParam(object, "sig2B", PLATE))
2296
-##	object <- pr(object, "phiAX", PLATE, as.numeric(getParam(object, "phiAX", PLATE)))
2297
-##	object <- pr(object, "phiBX", PLATE, as.numeric(getParam(object, "phiBX", PLATE)))
2298
-##	object <- pr(object, "corr", PLATE, getParam(object, "corr", PLATE))
2299
-##	object <- pr(object, "corrA.BB", PLATE, getParam(object, "corrA.BB", PLATE))
2300
-##	object <- pr(object, "corrB.AA", PLATE, getParam(object, "corrB.AA", PLATE))
2301
-##	##object <- object[order(chromosome(object), position(object)), ]
2302
-####	if(cnOptions[["thresholdCopynumber"]]){
2303
-####		object <- thresholdCopynumber(object)
2304
-####	}
2305
-##	return(object)
2306
-##}
2307
-##
2308 1348
 
2309 1349
 
2310 1350
 ## constructors for Illumina platform
... ...
@@ -2682,8 +1722,6 @@ crlmmCopynumber <- function(object,
2682 1722
 			    seed=1,
2683 1723
 			    verbose=TRUE,
2684 1724
 			    GT.CONF.THR=0.95,
2685
-			    PHI.THR=2^6,
2686
-			    nHOM.THR=5,
2687 1725
 			    MIN.NU=2^3,
2688 1726
 			    MIN.PHI=2^3,
2689 1727
 			    THR.NU.PHI=TRUE,
... ...
@@ -2764,6 +1802,11 @@ crlmmCopynumber <- function(object,
2764 1802
 	}
2765 1803
 	return(object)
2766 1804
 }
1805
+crlmmCopynumber2 <- function(){
1806
+	.Defunct(msg="The crlmmCopynumber2 function has been deprecated. The function crlmmCopynumber should be used instead.  crlmmCopynumber will support large data using ff provided that the ff package is loaded.")
1807
+}
1808
+crlmmCopynumberLD <- crlmmCopynumber2
1809
+
2767 1810
 
2768 1811
 estimateCnParameters <- function(object,
2769 1812
 				 type=c("SNP", "NP", "X.SNP", "X.NP"),
... ...
@@ -421,7 +421,9 @@ setMethod("CB", signature=signature(object="CNSet"),
421 421
 		  return(cb)
422 422
 	  })
423 423
 
424
-totalCopynumber <- function(object, ...){
424
+##totalCopynumber <- function(object, ...){
425
+setMethod("totalCopynumber", signature=signature(object="CNSet"),
426
+	  function(object, ...){
425 427
 	message("copy number at nonpolymorphic probes is not currently available")
426 428
 	ca <- CA(object, ...)
427 429
 	cb <- CB(object, ...)
... ...
@@ -436,7 +438,7 @@ totalCopynumber <- function(object, ...){
436 438
 		if(length(np.index) > 0) cb[np.index, ] <- 0
437 439
 	}
438 440
 	return(ca+cb)
439
-}
441
+})
440 442
 
441 443
 setReplaceMethod("snpCall", c("CNSet", "ff_or_matrix"),
442 444
                  function(object, ..., value){
... ...
@@ -80,9 +80,9 @@ cn4 <- totalCopynumber(cnSet, i=index, j=1:2)
80 80
 all.equal(cn4, cn2)
81 81
 
82 82
 ## markers 1-5, all samples
83
-cn5 <- totalCopyNumber(cnSet, i=1:5)
83
+cn5 <- totalCopynumber(cnSet, i=1:5)
84 84
 ## all markers, samples 1-5
85
-cn6 <- totalCopyNumber(cnSet, j=1:5)
85
+cn6 <- totalCopynumber(cnSet, j=1:5)
86 86
 
87 87
 ## NOTE: subsetting the object before extracting copy number
88 88
 ##       can be very inefficient when the data set is very large,
... ...
@@ -7,31 +7,17 @@
7 7
   Locus- and allele-specific estimation of copy number.
8 8
 }
9 9
 \usage{
10
-%crlmmCopynumber(object, chromosome = 1:23, which.batches, MIN.SAMPLES
11
-%= 10, SNRMin = 5, MIN.OBS = 3, DF.PRIOR = 50, bias.adj = FALSE,
12
-%prior.prob = rep(1/4, 4), seed = 1, verbose = TRUE, GT.CONF.THR =
13
-%0.99, PHI.THR = 2^6, nHOM.THR = 5, MIN.NU = 2^3, MIN.PHI = 2^3,
14
-%THR.NU.PHI = TRUE, thresholdCopynumber = TRUE)
15
-
16
-crlmmCopynumberLD(object, which.batches, MIN.SAMPLES = 10, SNRMin = 5,
17
-MIN.OBS = 1, DF.PRIOR = 50, bias.adj = FALSE, prior.prob = rep(1/4,
18
-4), seed = 1, verbose = TRUE, GT.CONF.THR = 0.99, PHI.THR = 2^6,
19
-nHOM.THR = 5, MIN.NU = 2^3, MIN.PHI = 2^3, THR.NU.PHI = TRUE,
20
-thresholdCopynumber = TRUE)
21
-
22
-}
23
-\arguments{
24
-  \item{object}{object of class \code{SnpSuperSet}.
25
-}
26
-
27
-  \item{which.batches}{ Character vector with length equal to the number of
28
-  samples.  Used to adjust for batch effects.  Chemistry plate or
29
-  date often work well.  See examples.
30
-
31
-  Ignored in crlmmCopynumberLD.
32
-
10
+crlmmCopynumber(object, MIN.SAMPLES=10, SNRMin = 5, MIN.OBS = 1, 
11
+	        DF.PRIOR = 50, bias.adj = FALSE,
12
+                prior.prob = rep(1/4, 4), seed = 1, verbose = TRUE,
13
+                GT.CONF.THR = 0.95, PHI.THR = 2^6, MIN.NU = 2^3, MIN.PHI = 2^3,
14
+                THR.NU.PHI = TRUE, type=c("SNP", "NP", "X.SNP", "X.NP"))
33 15
 }
34 16
 
17
+\arguments{
18
+  
19
+  \item{object}{object of class \code{CNSet}.}
20
+  
35 21
  \item{MIN.SAMPLES}{ 'Integer'.  The minimum number of samples in a
36 22
   batch.  Bathes with fewer than MIN.SAMPLES are skipped.  Therefore,
37 23
   samples in batches with fewer than MIN.SAMPLES have NA's for the
... ...
@@ -40,22 +26,22 @@ thresholdCopynumber = TRUE)
40 26
 
41 27
 }
42 28
 
43
-  \item{SNRMin}{ Samples with low signal to noise ratios are
44
-  excluded.  
45
-}
29
+  \item{SNRMin}{ Samples with low signal to noise ratios are  excluded.  }
46 30
 
47 31
   \item{MIN.OBS}{ 
48 32
 
49
-  For genotypes with fewer than \code{MIN.OBS}, the within-genotype
50
-  median is imputed from the observed genotypes.  For example, assume
51
-  at at a given SNP genotypes AA and AB were observed and BB is an
52
-  unobserved genotype.  For SNPs in which all 3 genotypes were
53
-  observed, we fit the model E(mean_BB) = beta0 + beta1*mean_AA +
54
-  beta2*mean_AB, obtaining estimates; of beta0, beta1, and beta2.  The
55
-  imputed mean at the SNP with unobserved BB is then beta0hat +
56
-  beta1hat * mean_AA of beta2hat * mean_AB.
33
+  For a SNP with with fewer than \code{MIN.OBS} of a genotype in a given
34
+  batch, the within-genotype median is imputed.  The imputation is based
35
+  on a regression using SNPs for which all three biallelic genotypes are
36
+  observed.  For example, assume at at a given SNP genotypes AA and AB
37
+  were observed and BB is an unobserved genotype.  For SNPs in which all
38
+  3 genotypes were observed, we fit the model E(mean_BB) = beta0 +
39
+  beta1*mean_AA + beta2*mean_AB, obtaining estimates; of beta0, beta1,
40
+  and beta2.  The imputed mean at the SNP with unobserved BB is then
41
+  beta0hat + beta1hat * mean_AA of beta2hat * mean_AB.
57 42
 
58 43
 }
44
+
59 45
   \item{DF.PRIOR}{
60 46
 
61 47
   The 2 x 2 covariance matrix of the background and signal variances
... ...
@@ -67,23 +53,25 @@ thresholdCopynumber = TRUE)
67 53
   estimate DF.PRIOR empirically.
68 54
 
69 55
 }
70
-  \item{bias.adj}{ 
71 56
 
72
-  If \code{TRUE}, initial estimates of the linear model are updated
73
-  after excluding samples that have a low posterior probability of
74
-  normal copy number.  Excluding samples that have a low posterior
75
-  probability can be helpful at loci in which a substantial fraction
76
-  of the samples have a copy number alteration.  For additional
77
-  information, see Scharpf et al., 2009.
57
+\item{bias.adj}{
78 58
 
79
-  This argument is ignored in crlmmCopynumberLD.
59
+  \code{bias.adj} is currently ignored (as well as the prior.prob
60
+  argument).  We plan to add this feature back to the crlmm package in
61
+  the near future. This feature, when \code{TRUE}, updated initial
62
+  estimates from the linear model after excluding samples with a low
63
+  posterior probability of normal copy number.  Excluding samples that
64
+  have a low posterior probability can be helpful at loci in which a
65
+  substantial fraction of the samples have a copy number alteration.
66
+  For additional information, see Scharpf et al., 2010.
80 67
 
81 68
 }
82 69
   \item{prior.prob}{
83 70
 
84
-  A numerical vector providing prior probabilities for copy number
85
-  states corresponding to homozygous deletion, hemizygous deletion,
86
-  normal copy number, and amplification, respectively.
71
+    This argument is currently ignored.  A numerical vector providing
72
+  prior probabilities for copy number states corresponding to homozygous
73
+  deletion, hemizygous deletion, normal copy number, and amplification,
74
+  respectively.
87 75
 
88 76
 }
89 77
   \item{seed}{ Seed for random number generation.}
... ...
@@ -94,56 +82,56 @@ thresholdCopynumber = TRUE)
94 82
 
95 83
     Confidence threshold for genotype calls (0, 1).  Calls with
96 84
     confidence scores below this theshold are not used to estimate the
97
-    within-genotype medians.
85
+    within-genotype medians. See Carvalho et al., 2007 for information
86
+    regarding confidence scores of biallelic genotypes.
98 87
 
99 88
 }
100 89
 
101
-  \item{PHI.THR}{ 
102
-    SNPs with slopes (phi values) below this value are flagged.
103
-    Flagged SNPs are not used in a regression to impute background and
104
-    slope coefficients at nonpolymorphic loci.
105
-}
106 90
 
107
-  \item{nHOM.THR}{ 
91
+  \item{MIN.NU}{ numeric. Minimum value for background
92
+    intensity. Ignored if \code{THR.NU.PHI} is \code{FALSE}. }
93
+  
94
+  \item{MIN.PHI}{numeric. Minimum value for slope. Ignored if
95
+    \code{THR.NU.PHI} is \code{FALSE}.}
96
+  
97
+  \item{THR.NU.PHI}{  If \code{THR.NU.PHI} is \code{FALSE},
98
+    \code{MIN.NU} and \code{MIN.PHI} are ignored. When TRUE, background
99
+    (nu) and slope (phi) coefficients below MIN.NU and MIN.PHI are set
100
+    to MIN.NU and MIN.PHI, respectively.}
101
+ }
102
+  
103
+ \references{
104
+   
105
+  Carvalho B, Bengtsson H, Speed TP, Irizarry RA. Exploration,
106
+  normalization, and genotype calls of high-density oligonucleotide SNP
107
+  array data. Biostatistics. 2007 Apr;8(2):485-99. Epub 2006 Dec
108
+  22. PMID: 17189563.
108 109
 
109
-  If fewer than \code{nHOM.THR} homozygous genotypes (AA or BB) are
110
-    observed, the SNPs is flagged.  Flagged SNPs are not used in a
111
-    regression to impute background and slope coefficients at
112
-    nonpolymorphic loci.
113
-
114
-}
115
-
116
-  \item{MIN.NU}{ numeric. Minimum threshold for background. Ignored if \code{THR.NU.PHI} is \code{FALSE}.
117
-}
118
-  \item{MIN.PHI}{numeric. Minimum threshold for slope. Ignored if \code{THR.NU.PHI} is \code{FALSE}.
119
-}
120
-  \item{THR.NU.PHI}{
121
-  If \code{THR.NU.PHI} is \code{FALSE}, \code{MIN.NU} and
122
-  \code{MIN.PHI} are ignored.
123
-}
124
-  \item{thresholdCopynumber}{
125
-
126
-  If \code{TRUE}, allele-specific number estimates are truncated.
127
-  Values less than 0.05 are assigned the value 0.05; values exceeding
128
-  5 are assigned the value 5.  
129
-
130
-  Ignored in crlmmCopynumberLD.  Extreme values are automatically
131
-  truncated.
132
-
133
-}
110
+  Carvalho BS, Louis TA, Irizarry RA. 
111
+  Quantifying uncertainty in genotype calls.
112
+  Bioinformatics. 2010 Jan 15;26(2):242-9.
134 113
 
114
+  Scharpf RB, Ruczinski I, Carvalho B, Doan B, Chakravarti A, and
115
+  Irizarry RA, Biostatistics.  Biostatistics, Epub July 2010.
116
+  
135 117
 }
136 118
 
137 119
 \details{
138 120
 
139
-	The function crlmmCopynumber uses matrices instead of ff
140
-	objects if the ff library is not loaded.
141
-
142
-	The function crlmmCopynumberLD allows parallel processing via
143
-	and requires large data support via the ff package.
144
-
145
-	We plan to phase out crlmmCopynumber and replace this function
146
-	by crlmmCopynumberLD.
121
+	The function crlmmCopynumber uses matrices instead of ff objects
122
+	if the ff library is not loaded. When the ff package is loaded,
123
+	large data support is enabled.  Normalized intensities
124
+	(\code{alleleA} and \code{alleleB}), genotype calls and
125
+	confidence scores (\code{snpCall} and \code{snpCallProbability})
126
+	are stored in \code{assayData} slot.  Summary statistics for
127
+	each batch, including the linear model paramters for copy
128
+	number, are stored in the \code{batchStatistics} slot.  Both the
129
+	\code{assayData} and \code{batchStatistics} slot are of class
130
+	\code{AssayData} with elements that are ff objects (if ff
131
+	package is loaded) or matrices.
132
+
133
+	The functions \code{crlmmCopynumberLD} and
134
+	\code{crlmmCopynumber2} have been deprecated.
147 135
 
148 136
 }
149 137
 \author{R. Scharpf}
... ...
@@ -54,25 +54,6 @@ all(isCurrent(cnSet))
54 54
 	nuA(container2)[1:5, ] <- xx
55 55
 	invisible(close(nuA(container2)))
56 56
 }
57
-
58
-
59
-
60
-
61
-nms <- names(lM(sample.CNSetLMff))
62
-##names must be tau2A, tau2B, ...
63
-nms2 <- sapply(nms, function(x) strsplit(x, "\\.1")[[1]][[1]], USE.NAMES=FALSE)
64
-names(sample.CNSetLMff@lM) <- nms2
65
-cnSetff <- as(sample.CNSetLMff, "CNSet")
66
-## try replacement method without warning
67
-nuA(cnSetff)[1:10, ] <- matrix(1:10, 10, 1)
68
-
69
-## try subset method without warning
70
-
71
-
72
-
73
-## information on the features
74
-fvarLabels(cnSet)
75
-##
76 57
 ## --------------------------------------------------
77 58
 ## accessors for the feature-level info
78 59
 ## --------------------------------------------------
... ...
@@ -81,8 +62,6 @@ position(cnSet)[1:5]
81 62
 isSnp(cnSet)[1:5]
82 63
 ## 980 nonpolymorphic markers and 1139 polymoprhic markers
83 64
 table(isSnp(cnSet))
84
-## --------------------------------------------------
85
-
86 65
 ## --------------------------------------------------
87 66
 ## sample-level statistics computed by crlmm
88 67
 ## --------------------------------------------------
... ...
@@ -95,16 +74,14 @@ cnSet$SKW[1:5]
95 74
 ## the gender (gender is imputed unless specified in the call to crlmm)
96 75
 table(cnSet$gender)  ## 1=male, 2=female
97 76
 ## --------------------------------------------------
98
-
99
-
100
-## --------------------------------------------------
101
-## accessors for linear model parameters estimated from
102
-## the linear model for copy number
103
-## (note that the parameters have dimension R x C, where
104
-##  R corresponds to the number of features and C corresponds
105
-##  to the number of batches)
106 77
 ## --------------------------------------------------
107
-## estimate of background
78
+##
79
+## accessors for parameters estimated from the linear model for copy
80
+## number (note that the parameters have dimension R x C, where R
81
+## corresponds to the number of features and C corresponds to the
82
+## number of batches)
83
+## -------------------------------------------------- estimate of
84
+## background
108 85
 dim(nu(cnSet, "A"))
109 86
 ## background for the A allele in the 2 batches for the
110 87
 ## first 5 markers
... ...
@@ -146,15 +123,15 @@ cn2 <- CA(cnSet, i=index, j=1:2)
146 123
 ## note, ca+cb will give NAs at nonpolymorphic loci
147 124
 CA(cnSet, i=index, j=1:2) + cb
148 125
 ## A shortcut for total copy number
149
-cn3 <- totalCopyNumber(cnSet, i=1:5, j=1:2)
126
+cn3 <- totalCopynumber(cnSet, i=1:5, j=1:2)
150 127
 all.equal(cn3, cn1)
151
-cn4 <- totalCopyNumber(cnSet, i=index, j=1:2)
128
+cn4 <- totalCopynumber(cnSet, i=index, j=1:2)
152 129
 all.equal(cn4, cn2)
153 130
 
154 131
 ## markers 1-5, all samples
155
-cn5 <- totalCopyNumber(cnSet, i=1:5)
132
+cn5 <- totalCopynumber(cnSet, i=1:5)
156 133
 ## all markers, samples 1-5
157
-cn6 <- totalCopyNumber(cnSet, j=1:5)
134
+cn6 <- totalCopynumber(cnSet, j=1:5)
158 135
 
159 136
 ## NOTE: subsetting the object before extracting copy number
160 137
 ##       can be very inefficient when the data set is very large,