Browse code

added a few Rd files. Removed metaArray from NAMESPACE

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45865 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 11/04/2010 16:10:26
Showing9 changed files

... ...
@@ -509,3 +509,9 @@ then readIDAT() should work. Thanks to Pierre Cherel who reported this error.
509 509
 ** crlmmIllumina2() and crlmmIlluminaV2() now use the crlmmGT2() 
510 510
 function (which expects ff objects and supports parallel processing)
511 511
 ** updated crlmmIllumina.pdf vignette
512
+
513
+2010-04-11 R. Scharpf committed version 1.5.48
514
+** added a few .Rd files
515
+
516
+
517
+
... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.5.47
4
+Version: 1.5.48
5 5
 Date: 2010-04-09
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>
7 7
 Maintainer: Benilton S Carvalho <carvalho@bclab.org>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -23,8 +23,7 @@ Imports: affyio (>= 1.15.2),
23 23
          utils
24 24
 Suggests: hapmapsnp6,
25 25
           genomewidesnp6Crlmm (>= 1.0.2),
26
-          snpMatrix,
27
-          metaArray
26
+          snpMatrix
28 27
 Collate: AllGenerics.R
29 28
 	 AllClasses.R
30 29
 	 methods-CNSet.R
... ...
@@ -29,7 +29,7 @@ importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet, CNSet)
29 29
 importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs,
30 30
 		  "confs<-", cnConfidence, "cnConfidence<-", isSnp,
31 31
 		  chromosome, position, CA, "CA<-", CB, "CB<-", A, B,
32
-		  "A<-", "B<-")
32
+		  "A<-", "B<-", open, close)
33 33
 
34 34
 importFrom(oligoClasses, chromosome2integer, celfileDate, list.celfiles,
35 35
            copyNumber, initializeBigMatrix, initializeBigVector)
... ...
@@ -56,25 +56,20 @@ importFrom(ellipse, ellipse)
56 56
 importFrom(ff, ffdf)
57 57
 
58 58
 exportClasses(CNSetLM)
59
-exportMethods(copyNumber, open, "[", show, lM, "lM<-")
59
+exportMethods(open, "[", show, lM)
60 60
 export(crlmm, 
61 61
        crlmmCopynumber, 
62 62
        crlmmIllumina, 
63
-       crlmmIlluminaRS, 
64
-       ellipse, 
63
+##       ellipse, 
65 64
        genotype, 
66
-       getParam, 
67 65
        readIdatFiles, 
68 66
        snprma,
69 67
        snprma2,
70 68
        crlmm2,
71 69
        genotype2,
72
-       cnrma2,
73
-       processCEL2,
74 70
        batch,
75 71
        crlmmCopynumber2)
76
-
77
-export(initializeParamObject)
72
+##export(initializeParamObject, biasAdjNP2)
78 73
 
79 74
 
80 75
 
... ...
@@ -75,25 +75,11 @@ construct <- function(filenames, cdfName, copynumber=FALSE,
75 75
 	sampleNames(callSet) <- sns
76 76
 	return(callSet)
77 77
 }
78
-setMethod("close", "AlleleSet", function(con, ...){
79
-	object <- con
80
-	names <- ls(assayData(object))
81
-	L <- length(names)
82
-	for(i in 1:L) close(eval(substitute(assayData(object)[[NAME]], list(NAME=names[i]))))
83
-	return()
84
-})
85
-setMethod("open", "AlleleSet", function(con, ...){
86
-	object <- con
87
-	names <- ls(assayData(object))
88
-	L <- length(names)
89
-	for(i in 1:L) open(eval(substitute(assayData(object)[[NAME]], list(NAME=names[i]))))
90
-	return()
91
-})
92
-##setReplaceMethod("calls", "SnpSuperSet", function(object, value) assayDataElementReplace(object, "call", value))
93
-##setReplaceMethod("confs", "SnpSuperSet", function(object, value) assayDataElementReplace(object, "callProbability", value))
94
-##setMethod("confs", "SnpSuperSet", function(object) assayDataElement(object, "callProbability"))
78
+
79
+
95 80
 genotype <- function(filenames,
96 81
 		     cdfName,
82
+		     batch,
97 83
 		     mixtureSampleSize=10^5,
98 84
 		     eps=0.1,
99 85
 		     verbose=TRUE,
... ...
@@ -111,6 +97,12 @@ genotype <- function(filenames,
111 97
 	if(missing(cdfName)) stop("must specify cdfName")
112 98
 	if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
113 99
 	if(missing(sns)) sns <- basename(filenames)
100
+	if(missing(batch)){
101
+		warning("The batch variable is not specified. The scan date of the array will be used as a surrogate for batch.  The batch variable does not affect the preprocessing or genotyping, but is important for copy number estimation.")
102
+	} else {
103
+		if(length(batch) != length(filenames))
104
+			stop("batch variable must be the same length as the filenames")
105
+	}	
114 106
 	## callSet contains potentially very big matrices
115 107
 	## More big matrices are created within snprma, that will then be removed.
116 108
 	callSet <- construct(filenames=filenames,
... ...
@@ -118,6 +110,9 @@ genotype <- function(filenames,
118 110
 			     copynumber=copynumber,
119 111
 			     sns=sns,
120 112
 			     verbose=verbose)
113
+	if(missing(batch)){
114
+		protocolData(callSet)$batch <- as.numeric(as.factor(protocolData(callSet)$ScanDate))
115
+	}
121 116
 	mixtureParams <- matrix(NA, 4, length(filenames))
122 117
 	snp.index <- which(isSnp(callSet)==1)
123 118
 	batches <- splitIndicesByLength(1:ncol(callSet), ocSamples())
... ...
@@ -193,13 +188,13 @@ genotype2 <- function(filenames,
193 188
 		      gender=NULL,
194 189
 		      returnParams=TRUE,
195 190
 		      badSNP=0.7){
191
+	if(!isPackageLoaded("ff")) stop("Must load package 'ff'")
196 192
 	if(!copynumber){
197 193
 		callSet <- crlmm2(filenames=filenames,
198 194
 				  cdfName=cdfName,
199 195
 				  mixtureSampleSize=mixtureSampleSize,
200 196
 				  eps=eps,
201 197
 				  verbose=verbose,
202
-				  seed=seed,
203 198
 				  sns=sns,
204 199
 				  probs=probs,
205 200
 				  DF=DF,
... ...
@@ -232,9 +227,7 @@ genotype2 <- function(filenames,
232 227
 	if(!missing(batch)) protocolData(callSet)$batch <- batch
233 228
 	mixtureParams <- matrix(NA, 4, length(filenames))
234 229
 	snp.index <- which(isSnp(callSet)==1)
235
-	snprmaRes <- snprma2(##A=A(callSet),
236
-			     ##B=B(callSet),
237
-			       filenames=filenames,
230
+	snprmaRes <- snprma2(filenames=filenames,
238 231
 			       mixtureSampleSize=mixtureSampleSize,
239 232
 			       fitMixture=TRUE,
240 233
 			       eps=eps,
... ...
@@ -249,13 +242,9 @@ genotype2 <- function(filenames,
249 242
 	open(snprmaRes[["SKW"]])
250 243
 	open(snprmaRes[["mixtureParams"]])
251 244
 	if(verbose) message("Updating elements of callSet")
252
-##	A(callSet) <- snprmaRes[["A"]]
253
-##	B(callSet) <- snprmaRes[["B"]]
254 245
 	bb = ocProbesets()*ncol(A)*8
255 246
 	ffrowapply(A(callSet)[i1:i2, ] <- snprmaRes[["A"]][i1:i2, ], X=snprmaRes[["A"]], BATCHBYTES=bb)
256 247
 	ffrowapply(B(callSet)[i1:i2, ] <- snprmaRes[["B"]][i1:i2, ], X=snprmaRes[["B"]], BATCHBYTES=bb)
257
-	##ffrowapply(A(callSet)[i1:i2, ] <- snprmaRes[["A"]][i1:i2, ], X=snprmaRes[["A"]])
258
-	##ffrowapply(B(callSet)[i1:i2, ] <- snprmaRes[["B"]][i1:i2, ], X=snprmaRes[["B"]])
259 248
 	if(verbose) message("Finished updating elements of callSet")
260 249
 	stopifnot(identical(featureNames(callSet)[snp.index], snprmaRes$gns))
261 250
 	pData(callSet)$SKW <- snprmaRes$SKW
... ...
@@ -289,435 +278,14 @@ genotype2 <- function(filenames,
289 278
 			   badSNP=badSNP)
290 279
 	open(tmp[["calls"]])
291 280
 	open(tmp[["confs"]])
292
-	##snpCall(callSet) <- tmp[["calls"]]
293
-	##snpCallProbability(callSet) <- tmp[["confs"]]
294 281
 	bb = ocProbesets()*ncol(A)*8
295 282
 	ffrowapply(snpCall(callSet)[i1:i2, ] <- tmp[["calls"]][i1:i2, ], X=tmp[["calls"]], BATCHBYTES=bb)
296 283
 	ffrowapply(snpCallProbability(callSet)[i1:i2, ] <- tmp[["confs"]][i1:i2, ], X=tmp[["confs"]], BATCHBYTES=bb)
297
-##	ffrowapply(snpCall(callSet)[i1:i2, ] <- tmp[["calls"]][i1:i2, ], X=tmp[["calls"]])
298
-##	ffrowapply(snpCallProbability(callSet)[i1:i2, ] <- tmp[["confs"]][i1:i2, ], X=tmp[["confs"]])
299 284
 	callSet$gender <- tmp$gender
300 285
 	cnSet <- as(callSet, "CNSetLM")
301 286
 	return(cnSet)
302 287
 }
303 288
 
304
-snprma2RS <- function(A, B,
305
-		      filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
306
-		      eps=0.1, verbose=TRUE, seed=1, cdfName, sns){
307
-  if (missing(sns)) sns <- basename(filenames)
308
-  if (missing(cdfName))
309
-    cdfName <- read.celfile.header(filenames[1])[["cdfName"]]
310
-  pkgname <- getCrlmmAnnotationName(cdfName)
311
-  stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose))
312
-  
313
-  if(verbose) message("Loading annotations and mixture model parameters.")
314
-  loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
315
-  pnsa <- getVarInEnv("pnsa")
316
-  pnsb <- getVarInEnv("pnsb")
317
-  gns <- getVarInEnv("gns")
318
-  
319
-  ##We will read each cel file, summarize, and run EM one by one
320
-  ##We will save parameters of EM to use later
321
-  if(verbose) message("Initializing objects.")
322
-  mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames), "double")
323
-  SNR <- initializeBigVector("crlmmSNR-", length(filenames), "double")
324
-  SKW <- initializeBigVector("crlmmSKW-", length(filenames), "double")
325
-  
326
-  ## This is the sample for the fitting of splines
327
-  ## BC: I like better the idea of the user passing the seed,
328
-  ##     because this might intefere with other analyses
329
-  ##     (like what happened to GCRMA)
330
-  ##S will hold (A+B)/2 and M will hold A-B
331
-  ##NOTE: We actually dont need to save S. Only for pics etc...
332
-  ##f is the correction. we save to avoid recomputing
333
-##  A <- initializeBigMatrix("crlmmA-", length(pnsa), length(filenames), "integer")
334
-##  B <- initializeBigMatrix("crlmmB-", length(pnsb), length(filenames), "integer")
335
-
336
-  sampleBatches <- splitIndicesByNode(seq(along=filenames))
337
-
338
-  if(verbose) message("Processing ", length(filenames), " files.")
339
-
340
-  ocLapply(sampleBatches, processCEL_RS, filenames=filenames,
341
-           fitMixture=fitMixture, A=A, B=B, SKW=SKW, SNR=SNR,
342
-           mixtureParams=mixtureParams, eps=eps, seed=seed,
343
-           mixtureSampleSize=mixtureSampleSize, pkgname=pkgname,
344
-           neededPkgs=c("crlmm", pkgname))
345
-  
346
-  list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
347
-}
348
-
349
-processCEL_RS <- function(i, filenames, fitMixture, A, B, SKW, SNR,
350
-			  mixtureParams, eps, seed, mixtureSampleSize,
351
-			  pkgname){
352
-  
353
-  loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
354
-  loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
355
-  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
356
-  autosomeIndex <- getVarInEnv("autosomeIndex")
357
-  pnsa <- getVarInEnv("pnsa")
358
-  pnsb <- getVarInEnv("pnsb")
359
-  fid <- getVarInEnv("fid")
360
-  reference <- getVarInEnv("reference")
361
-  aIndex <- getVarInEnv("aIndex")
362
-  bIndex <- getVarInEnv("bIndex")
363
-  SMEDIAN <- getVarInEnv("SMEDIAN")
364
-  theKnots <- getVarInEnv("theKnots")
365
-  gns <- getVarInEnv("gns")
366
-
367
-  ## for mixture
368
-  set.seed(seed)
369
-  idx <- sort(sample(autosomeIndex, mixtureSampleSize))
370
-  ##for skewness. no need to do everything
371
-  idx2 <- sample(length(fid), 10^5)
372
-
373
-  open(A)
374
-  open(B)
375
-  open(SKW)
376
-  open(mixtureParams)
377
-  open(SNR)
378
-  ##RS
379
-  iii <- seq(along=pnsa)
380
-
381
-  for (k in i){
382
-    y <- as.matrix(read.celfile(filenames[k], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
383
-    x <- log2(y[idx2])
384
-    SKW[k] <- mean((x-mean(x))^3)/(sd(x)^3)
385
-    rm(x)
386
-    y <- normalize.quantiles.use.target(y, target=reference)
387
-    A[iii, k] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
388
-    B[iii, k] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
389
-    rm(y)
390
-    
391
-    if(fitMixture){
392
-      S <- (log2(A[idx,k])+log2(B[idx, k]))/2 - SMEDIAN
393
-      M <- log2(A[idx, k])-log2(B[idx, k])
394
-      tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
395
-      mixtureParams[, k] <- tmp[["coef"]]
396
-      SNR[k] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
397
-    } else {
398
-      mixtureParams[, k] <- NA
399
-      SNR[k] <- NA
400
-    }
401
-  }
402
-  close(A)
403
-  close(B)
404
-  close(SKW)
405
-  close(mixtureParams)
406
-  close(SNR)
407
-  TRUE
408
-}
409
-
410
-crlmmGT2_RS<- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
411
-		       col.names=NULL, probs=c(1/3, 1/3, 1/3), DF=6,
412
-		       SNRMin=5, recallMin=10, recallRegMin=1000,
413
-		       gender=NULL, desctrucitve=FALSE, verbose=TRUE,
414
-		       returnParams=FALSE, badSNP=.7){
415
-  open(SNR)
416
-  open(A)
417
-  open(B)
418
-  open(mixtureParams)
419
-  ## expect objects to be ff
420
-  
421
-  keepIndex <- which( SNR[] > SNRMin)
422
-  if(length(keepIndex)==0) stop("No arrays above quality threshold!")
423
-  
424
-  NC <- ncol(A)
425
-  NR <- nrow(A)
426
-  
427
-  pkgname <- getCrlmmAnnotationName(cdfName)
428
-  stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose))
429
-
430
-  if(verbose) message("Loading annotations.")
431
-  loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
432
-  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
433
-  ## this is toget rid of the 'no visible binding' notes
434
-  ## variable definitions
435
-  XIndex <- getVarInEnv("XIndex")
436
-  autosomeIndex <- getVarInEnv("autosomeIndex")
437
-  YIndex <- getVarInEnv("YIndex")
438
-  SMEDIAN <- getVarInEnv("SMEDIAN")
439
-  theKnots <- getVarInEnv("theKnots")
440
-  regionInfo <- getVarInEnv("regionInfo")
441
-  params <- getVarInEnv("params")
442
-  ##RS
443
-  pnsa <- getVarInEnv("pnsa")
444
-  NR <- length(pnsa)
445
-  
446
-  ##IF gender not provide, we predict
447
-  if(is.null(gender)){
448
-    if(verbose) message("Determining gender.")
449
-    XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2
450
-    if(sum(SNR[] > SNRMin)==1){
451
-      gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5)))
452
-    }else{
453
-      gender <- kmeans(XMedian, c(min(XMedian[SNR[]>SNRMin]), max(XMedian[SNR[]>SNRMin])))[["cluster"]]
454
-    }
455
-  }
456
-  
457
-  Indexes <- list(autosomeIndex, XIndex, YIndex)
458
-  cIndexes <- list(keepIndex, 
459
-                   keepIndex[which(gender[keepIndex]==2)], 
460
-                   keepIndex[which(gender[keepIndex]==1)])
461
-  
462
-  if(verbose) cat("Calling", NR, "SNPs for recalibration... ")
463
-
464
-  ## call C
465
-  fIndex <- which(gender==2)
466
-  mIndex <- which(gender==1)
467
-
468
-  ## different here
469
-  ## use gtypeCallerR in batches
470
-  ##RS
471
-  snpBatches <- splitIndicesByLength(1:NR, ocProbesets())
472
-  newparamsBatch <- vector("list", length(snpBatches))
473
-
474
-  process1 <- function(idxBatch, snpBatches, autosomeIndex, XIndex,
475
-                       YIndex, A, B, mixtureParams, fIndex, mIndex,
476
-                       params, cIndexes, SMEDIAN, theKnots, DF, probs, batchSize){
477
-    open(A)
478
-    open(B)
479
-    open(mixtureParams)
480
-    snps <- snpBatches[[idxBatch]]
481
-    rSnps <- range(snps)
482
-    last <- (idxBatch-1)*batchSize
483
-    IndexesBatch <- list(autosomeIndex[autosomeIndex %in% snps]-last,
484
-                         XIndex[XIndex %in% snps]-last,
485
-                         YIndex[YIndex %in% snps]-last)
486
-    IndexesBatch <- lapply(IndexesBatch, as.integer)
487
-    tmpA <- as.matrix(A[snps,])
488
-    tmpB <- as.matrix(B[snps,])
489
-    ## newparamsBatch[[idxBatch]]
490
-
491
-    tmp <- gtypeCallerR(tmpA, tmpB, fIndex, mIndex,
492
-                        params[["centers"]][snps,],
493
-                        params[["scales"]][snps,],
494
-                        params[["N"]][snps,],
495
-                        IndexesBatch, cIndexes,
496
-                        sapply(IndexesBatch, length),
497
-                        sapply(cIndexes, length), SMEDIAN,
498
-                        theKnots, mixtureParams[], DF, probs, 0.025)
499
-    
500
-    last <- rSnps[2]
501
-    rm(snps, rSnps, IndexesBatch, tmpA, tmpB)
502
-    close(A)
503
-    close(B)
504
-    close(mixtureParams)
505
-    tmp
506
-  }
507
-
508
-  newparamsBatch <- ocLapply(seq(along=snpBatches), process1,
509
-                             snpBatches=snpBatches,
510
-                             autosomeIndex=autosomeIndex, XIndex=XIndex,
511
-                             YIndex=YIndex, A=A, B=B,
512
-                             mixtureParams=mixtureParams, fIndex=fIndex,
513
-                             mIndex=mIndex, params=params,
514
-                             cIndexes=cIndexes, SMEDIAN=SMEDIAN,
515
-                             theKnots=theKnots, DF=DF, probs=probs,
516
-                             batchSize=ocProbesets())
517
-##   last <- 0
518
-##   for (idxBatch in seq(along=snpBatches)){
519
-##     snps <- snpBatches[[idxBatch]]
520
-##     rSnps <- range(snps)
521
-##     IndexesBatch <- list(autosomeIndex[autosomeIndex %in% snps]-last,
522
-##                          XIndex[XIndex %in% snps]-last,
523
-##                          YIndex[YIndex %in% snps]-last)
524
-##     IndexesBatch <- lapply(IndexesBatch, as.integer)
525
-##     tmpA <- A[snps,]
526
-##     tmpB <- B[snps,]
527
-##     newparamsBatch[[idxBatch]] <- gtypeCallerR(tmpA, tmpB, fIndex, mIndex,
528
-##                                                params[["centers"]][snps,],
529
-##                                                params[["scales"]][snps,],
530
-##                                                params[["N"]][snps,],
531
-##                                                IndexesBatch, cIndexes,
532
-##                                                sapply(IndexesBatch, length),
533
-##                                                sapply(cIndexes, length),
534
-##                                                SMEDIAN, theKnots,
535
-##                                                mixtureParams[], DF, probs, 0.025)
536
-##     last <- rSnps[2]
537
-##     rm(snps, rSnps, IndexesBatch, tmpA, tmpB)
538
-##   }
539
-##   rm(last)
540
-  
541
-  newparams <- vector("list", 3)
542
-  names(newparams) <- c("centers", "scales", "N")
543
-  newparams[["centers"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 1))
544
-  newparams[["scales"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 2))
545
-  newparams[["N"]] <- do.call("rbind", lapply(newparamsBatch, "[[", 3))
546
-  rm(newparamsBatch)
547
-  if(verbose) message("Done.")
548
-  if(verbose) message("Estimating recalibration parameters.")
549
-  d <- newparams[["centers"]] - params$centers
550
-
551
-  ##regression 
552
-  Index <- intersect(which(pmin(newparams[["N"]][, 1],
553
-                                newparams[["N"]][, 2],
554
-                                newparams[["N"]][, 3]) > recallMin &
555
-                                !apply(regionInfo, 1, any)),
556
-                                autosomeIndex)
557
-  if(length(Index) < recallRegMin){
558
-    warning("Recalibration not possible. Possible cause: small sample size.")
559
-    newparams <- params
560
-    dev <- vector("numeric", nrow(newparams[["centers"]]))
561
-    SS <- matrix(Inf, 3, 3)
562
-    DD <- 0
563
-  }else{
564
-    data4reg <- as.data.frame(newparams[["centers"]][Index,])
565
-    names(data4reg) <- c("AA", "AB", "BB")
566
-    regParams <- cbind(  coef(lm(AA~AB*BB, data=data4reg)),
567
-                       c(coef(lm(AB~AA+BB, data=data4reg)), 0), 
568
-                       coef(lm(BB~AA*AB, data=data4reg)))
569
-    rownames(regParams) <- c("intercept", "X", "Y", "XY")
570
-    rm(data4reg)
571
-  
572
-    minN <- 3
573
-    newparams[["centers"]][newparams[["N"]] < minN] <- NA
574
-    Index <- setdiff(which(rowSums(is.na(newparams[["centers"]]))==1), YIndex)
575
-    if(verbose) cat("Filling out empty centers")
576
-    for(i in Index){
577
-      if(verbose) if(i%%10000==0)cat(".")
578
-      mu <- newparams[["centers"]][i, ]
579
-      j <- which(is.na(mu))
580
-      newparams[["centers"]][i, j] <- c(1, mu[-j], prod(mu[-j]))%*%regParams[, j]
581
-    }
582
-    
583
-    ##remaing NAs are made like originals
584
-    if(length(YIndex)>0){
585
-      noMoveIndex <- union(setdiff(which(rowSums(is.na(newparams[["centers"]]))>0), YIndex), 
586
-                           YIndex[rowSums(is.na(newparams[["centers"]][YIndex, ])>1)])
587
-    }
588
-    snps2ignore <- which(rowSums(is.na(newparams[["centers"]])) > 0)
589
-    snps2keep <- setdiff(autosomeIndex, snps2ignore)
590
-    rm(snps2ignore)
591
-    newparams[["centers"]][is.na(newparams[["centers"]])] <- params[["centers"]][is.na(newparams[["centers"]])]
592
-    if(verbose) cat("\n")
593
-  
594
-    if(verbose) message("Calculating and standardizing size of shift.")
595
-    GG <- DD <- newparams[["centers"]] - params[["centers"]]
596
-    DD <- sweep(DD, 2, colMeans(DD[autosomeIndex, ]))
597
-    SS <- cov(DD[autosomeIndex, ])
598
-    SSI <- solve(SS)
599
-    dev <- vector("numeric", nrow(DD))
600
-    if(length(YIndex)){
601
-      dev[-YIndex] <- apply(DD[-YIndex, ], 1, function(x) x%*%SSI%*%x)
602
-      dev[-YIndex] <- 1/sqrt( (2*pi)^3*det(SS))*exp(-0.5*dev[-YIndex])
603
-      ##Now Y (only two params)
604
-      SSY <- SS[c(1, 3), c(1, 3)]
605
-      SSI <- solve(SSY) 
606
-      dev[YIndex] <- apply(DD[YIndex, c(1, 3)], 1, function(x) x%*%SSI%*%x)
607
-      dev[YIndex] <- 1/sqrt( (2*pi)^2*det(SSY))*exp(-0.5*dev[YIndex])
608
-    } else {
609
-      dev=apply(DD,1,function(x) x%*%SSI%*%x)
610
-      dev=1/sqrt( (2*pi)^3*det(SS))*exp(-0.5*dev)
611
-    }
612
-  }
613
-    
614
-  ## BC: must keep SD
615
-  params[-2] <- newparams[-2]
616
-  
617
-  rm(newparams);gc(verbose=FALSE)  
618
-  if(verbose) cat("Calling", NR, "SNPs... ")
619
-  ## ###################
620
-  ## ## MOVE TO C#######
621
-
622
-  ## running in batches
623
-  ## snpBatches <- splitIndicesByLength(1:nrow(A), ocProbesets())
624
-
625
-  process2 <- function(idxBatch, snpBatches, autosomeIndex, XIndex,
626
-                       YIndex, A, B, mixtureParams, fIndex, mIndex,
627
-                       params, cIndexes, SMEDIAN, theKnots, DF, probs,
628
-                       regionInfo, batchSize){
629
-    open(A)
630
-    open(B)
631
-    open(mixtureParams)
632
-    snps <- snpBatches[[idxBatch]]
633
-    tmpA <- as.matrix(A[snps,])
634
-    tmpB <- as.matrix(B[snps,])
635
-    rSnps <- range(snps)
636
-    last <- (idxBatch-1)*batchSize
637
-    IndexesBatch <- list(autosomeIndex[autosomeIndex %in% snps]-last,
638
-                         XIndex[XIndex %in% snps]-last,
639
-                         YIndex[YIndex %in% snps]-last)
640
-    IndexesBatch <- lapply(IndexesBatch, as.integer)
641
-    ImNull <- gtypeCallerR2(tmpA, tmpB, fIndex, mIndex,
642
-                            params[["centers"]][snps,],
643
-                            params[["scales"]][snps,],
644
-                            params[["N"]][snps,],
645
-                            IndexesBatch, cIndexes,
646
-                            sapply(IndexesBatch, length),
647
-                            sapply(cIndexes, length),
648
-                            SMEDIAN, theKnots, mixtureParams[],
649
-                            DF, probs, 0.025,
650
-                            which(regionInfo[snps, 2]),
651
-                            which(regionInfo[snps, 1]))
652
-    A[snps,] <- tmpA
653
-    B[snps,] <- tmpB
654
-    last <- rSnps[2]
655
-    rm(tmpA, tmpB, snps, rSnps, IndexesBatch, ImNull)
656
-    close(A)
657
-    close(B)
658
-    close(mixtureParams)
659
-  }
660
-
661
-  ocLapply(seq(along=snpBatches), process2, snpBatches=snpBatches,
662
-           autosomeIndex=autosomeIndex, XIndex=XIndex, YIndex=YIndex,
663
-           A=A, B=B, mixtureParams=mixtureParams, fIndex=fIndex,
664
-           mIndex=mIndex, params=params, cIndexes=cIndexes,
665
-           SMEDIAN=SMEDIAN, theKnots=theKnots, DF=DF, probs=probs,
666
-           regionInfo=regionInfo, batchSize=ocProbesets())
667
-  
668
-##   last <- 0
669
-##   for (idxBatch in seq(along=snpBatches)){
670
-##     snps <- snpBatches[[idxBatch]]
671
-##     tmpA <- A[snps,]
672
-##     tmpB <- B[snps,]
673
-##     rSnps <- range(snps)
674
-##     IndexesBatch <- list(autosomeIndex[autosomeIndex %in% snps]-last,
675
-##                          XIndex[XIndex %in% snps]-last,
676
-##                          YIndex[YIndex %in% snps]-last)
677
-##     IndexesBatch <- lapply(IndexesBatch, as.integer)
678
-##     ImNull <- gtypeCallerR2(tmpA, tmpB, fIndex, mIndex,
679
-##                             params[["centers"]][snps,],
680
-##                             params[["scales"]][snps,],
681
-##                             params[["N"]][snps,],
682
-##                             IndexesBatch, cIndexes,
683
-##                             sapply(IndexesBatch, length),
684
-##                             sapply(cIndexes, length),
685
-##                             SMEDIAN, theKnots, mixtureParams[],
686
-##                             DF, probs, 0.025,
687
-##                             which(regionInfo[snps, 2]),
688
-##                             which(regionInfo[snps, 1]))
689
-##     A[snps,] <- tmpA
690
-##     B[snps,] <- tmpB
691
-##     last <- rSnps[2]
692
-##     rm(tmpA, tmpB, snps, rSnps, IndexesBatch, ImNull)
693
-##   }
694
-##   
695
-##   gc(verbose=FALSE)
696
-  ##  END MOVE TO C#######
697
-  ## ##################
698
-  
699
-  dev <- dev/(dev+1/383)
700
-  if(!is.null(row.names)){ rownames(A) <- rownames(B) <- names(dev) <- row.names}
701
-  if(!is.null(col.names)){ colnames(A) <- colnames(B) <- col.names}
702
-
703
-  if(length(Index) >= recallRegMin){
704
-   tmp4batchQC <- DD[autosomeIndex,]/(params[["N"]][autosomeIndex,]+1)
705
-   tmpSnpQc <- dev[autosomeIndex]
706
-   SS <- cov(tmp4batchQC[tmpSnpQc < badSNP,])
707
-   batchQC <- mean(diag(SS))
708
-  }else{
709
-    SS <- matrix(0, 3, 3)
710
-    batchQC <- Inf
711
-  }
712
-  
713
-  if(verbose) message("Done.")
714
-  if (returnParams){
715
-    return(list(calls=A, confs=B, SNPQC=dev, batchQC=batchQC, params=params, DD=DD, covDD=SS, gender=gender, pkgname=pkgname))
716
-  }else{
717
-    return(list(calls=A, confs=B, SNPQC=dev, batchQC=batchQC, DD=DD, covDD=SS, gender=gender, pkgname=pkgname))
718
-  }
719
-}
720
-
721 289
 
722 290
 ##---------------------------------------------------------------------------
723 291
 ##---------------------------------------------------------------------------
... ...
@@ -853,17 +421,13 @@ crlmmIlluminaRS <- function(sampleSheet=NULL,
853 421
 		##  Here, I'm just using the # of rows returned from the above function
854 422
 		if(k == 1){
855 423
 			if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability")
856
-			load.obj <- loadObject("callSet", load.it)
857
-			if(!load.obj){
858
-				callSet <- new("SnpSuperSet",
859
-					       alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)),
860
-					       alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
861
-					       call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
862
-					       callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
863
-					       annotation=cdfName)
864
-				sampleNames(callSet) <- sns
865
-				save(callSet, file=file.path(ldPath(), "callSet.rda"))
866
-			} else load(file.path(ldPath(), "callSet.rda"))
424
+			callSet <- new("SnpSuperSet",
425
+				       alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)),
426
+				       alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
427
+				       call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
428
+				       callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
429
+				       annotation=cdfName)
430
+			sampleNames(callSet) <- sns
867 431
 			phenoData(callSet) <- getPhenoData(sampleSheet=sampleSheet,
868 432
 							   arrayNames=sns,
869 433
 							   arrayInfoColNames=arrayInfoColNames)
... ...
@@ -878,6 +442,7 @@ crlmmIlluminaRS <- function(sampleSheet=NULL,
878 442
 			##pData(callSet)$SNR <- rep(NA, length(sns))
879 443
 			pData(callSet)$gender <- rep(NA, length(sns))
880 444
 			mixtureParams <- initializeBigMatrix("crlmmMixt-", nr=4, nc=ncol(callSet), vmode="double")
445
+			save(mixtureParams, file=file.path(ldPath(), "mixtureParams.rda"))
881 446
 			if(missing(batch)){
882 447
 				protocolData(callSet)$batch <- rep(NA, length(sns))
883 448
 			} else{
... ...
@@ -907,6 +472,16 @@ crlmmIlluminaRS <- function(sampleSheet=NULL,
907 472
 		rm(res); gc()
908 473
 		k <- k+1
909 474
 	}
475
+	save(callSet, file=file.path(ldPath(), "callSet.rda"))
476
+	##otherwise, A and B get overwritten
477
+	##AA <- initializeBigMatrix("crlmmA", nrow(callSet), ncol(callSet), "integer")
478
+	##BB <- initializeBigMatrix("crlmmB", nrow(callSet), ncol(callSet), "integer")
479
+	##bb = ocProbesets()*ncol(A)*8
480
+	AA <- clone(A(callSet))
481
+	BB <- clone(B(callSet))
482
+	##ffrowapply(AA[i1:i2, ] <- A(callSet)[i1:i2, ], X=A(callSet), BATCHBYTES=bb)
483
+	##ffrowapply(BB[i1:i2, ] <- B(callSet)[i1:i2, ], X=B(callSet), BATCHBYTES=bb)
484
+	##crlmmGT2 overwrites A and B.
910 485
 	tmp <- crlmmGT2(A=A(callSet),
911 486
 			B=B(callSet),
912 487
 			SNR=callSet$SNR,
... ...
@@ -925,27 +500,13 @@ crlmmIlluminaRS <- function(sampleSheet=NULL,
925 500
 			badSNP=badSNP)
926 501
 	open(tmp[["calls"]])
927 502
 	open(tmp[["confs"]])
503
+	A(callSet) <- AA
504
+	B(callSet) <- BB
928 505
 	snpCall(callSet) <- tmp[["calls"]]
929 506
 	## MR: many zeros in the conf. scores (?)
930 507
 	snpCallProbability(callSet) <- tmp[["confs"]]
931 508
 	callSet$gender <- tmp$gender
932
-	if(copynumber){
933
-		load.obj <- loadObject("cnSet", load.it)
934
-		if(!load.obj){
935
-			cnSet <- as(callSet, "CNSetLM")
936
-		} else {
937
-			load(file.path(ldPath(), "cnSet.rda"))
938
-			A(cnSet) <- A(callSet)
939
-			B(cnSet) <- B(callSet)
940
-			snpCall(cnSet) <- snpCall(callSet)
941
-			snpCallProbability(cnSet) <- snpCallProbability(callSet)
942
-			annotation(cnSet) <- annotation(callSet)
943
-			featureData(cnSet) <- featureData(callSet)
944
-			protocolData(cnSet) <- protocolData(callSet)
945
-			phenoData(cnSet) <- phenoData(callSet)
946
-			experimentData(cnSet) <- experimentData(callSet)
947
-		}
948
-	}
509
+	if(copynumber) cnSet <- as(callSet, "CNSetLM")
949 510
 	close(mixtureParams)
950 511
 	rm(tmp); gc()
951 512
 	return(cnSet)
... ...
@@ -1298,7 +859,7 @@ crlmmCopynumber <- function(object,
1298 859
 }
1299 860
 
1300 861
 crlmmCopynumber2 <- function(object,
1301
-			    which.batches,
862
+			     which.batches,
1302 863
 			    MIN.SAMPLES=10,
1303 864
 			    SNRMin=5,
1304 865
 			    MIN.OBS=1,
... ...
@@ -1313,8 +874,7 @@ crlmmCopynumber2 <- function(object,
1313 874
 			    MIN.NU=2^3,
1314 875
 			    MIN.PHI=2^3,
1315 876
 			    THR.NU.PHI=TRUE,
1316
-			    thresholdCopynumber=TRUE,
1317
-			     load.it=TRUE){
877
+			    thresholdCopynumber=TRUE){
1318 878
 	stopifnot("batch" %in% varLabels(protocolData(object)))
1319 879
 	stopifnot("chromosome" %in% fvarLabels(object))
1320 880
 	stopifnot("position" %in% fvarLabels(object))
... ...
@@ -1325,9 +885,7 @@ crlmmCopynumber2 <- function(object,
1325 885
 	XIndex.nps <- (1:nrow(object))[chromosome(object) == 23 & !isSnp(object) & !is.na(chromosome(object))]
1326 886
 	autosomeIndex.snps <- (1:nrow(object))[chromosome(object) < 23 & isSnp(object) & !is.na(chromosome(object))]
1327 887
 	autosomeIndex.nps <- (1:nrow(object))[chromosome(object) < 23 & !isSnp(object) & !is.na(chromosome(object))]	
1328
-	##Indexes <- list(autosomeIndex, XIndex, YIndex)
1329
-	##snpBatches <- splitIndicesByLength(1:nrow(object), ocProbesets())
1330
-	snpBatches <- splitIndicesByLength(autosomeIndex.snps, ocProbesets())
888
+
1331 889
 	## Do chromosome X in batches
1332 890
 	Ns <- initializeBigMatrix("Ns", nrow(object), 5)
1333 891
 	colnames(Ns) <- c("A", "B", "AA", "AB", "BB")
... ...
@@ -1345,25 +903,26 @@ crlmmCopynumber2 <- function(object,
1345 903
 				
1346 904
 	} 
1347 905
 	if(verbose) message("Estimating allele-specific copy number at autosomal SNPs")
906
+	snpBatches <- splitIndicesByLength(autosomeIndex.snps, ocProbesets())
1348 907
 	ocLapply(seq(along=snpBatches),
1349
-			fit.lm1,
1350
-			autosomeIndex=autosomeIndex.snps,
1351
-			object=object,
1352
-			Ns=Ns,
1353
-			normal=normal,
1354
-			snpflags=snpflags,
1355
-			snpBatches=snpBatches,
1356
-			batchSize=ocProbesets(),
1357
-			SNRMin=SNRMin,
1358
-			MIN.SAMPLES=MIN.SAMPLES,
1359
-			MIN.OBS=MIN.OBS,
1360
-			DF=DF.PRIOR,
1361
-			GT.CONF.THR=GT.CONF.THR,
1362
-			THR.NU.PHI=THR.NU.PHI,
1363
-			MIN.NU=MIN.NU,
1364
-			MIN.PHI=MIN.PHI,
1365
-			verbose=verbose,
1366
-			neededPkgs="crlmm")
908
+		 fit.lm1,
909
+		 autosomeIndex=autosomeIndex.snps,
910
+		 object=object,
911
+		 Ns=Ns,
912
+		 normal=normal,
913
+		 snpflags=snpflags,
914
+		 snpBatches=snpBatches,
915
+		 batchSize=ocProbesets(),
916
+		 SNRMin=SNRMin,
917
+		 MIN.SAMPLES=MIN.SAMPLES,
918
+		 MIN.OBS=MIN.OBS,
919
+		 DF=DF.PRIOR,
920
+		 GT.CONF.THR=GT.CONF.THR,
921
+		 THR.NU.PHI=THR.NU.PHI,
922
+		 MIN.NU=MIN.NU,
923
+		 MIN.PHI=MIN.PHI,
924
+		 verbose=verbose,
925
+		 neededPkgs="crlmm")
1367 926
 	## autosomal NPs
1368 927
 	snpBatches <- splitIndicesByLength(autosomeIndex.nps, ocProbesets())
1369 928
 	if(verbose) message("Estimating total copy number at nonpolymorphic loci")
... ...
@@ -1387,7 +946,7 @@ crlmmCopynumber2 <- function(object,
1387 946
 			verbose=verbose,
1388 947
 		 neededPkgs="crlmm")
1389 948
 	snpBatches <- splitIndicesByLength(XIndex.snps, ocProbesets())
1390
-	if(verbose) message("Estimating total copy number at polymorphic loci on chromosome X")
949
+	if(verbose) message("Estimating allele-specific copy number at polymorphic loci on chromosome X")
1391 950
 	ocLapply(seq(along=snpBatches),
1392 951
 			fit.lm3,
1393 952
 			autosomeIndex=XIndex.snps,
... ...
@@ -1409,7 +968,6 @@ crlmmCopynumber2 <- function(object,
1409 968
 		 neededPkgs="crlmm")
1410 969
 	if(verbose) message("Estimating total copy number for nonpolymorphic loci on chromosome X")
1411 970
 	snpBatches <- splitIndicesByLength(XIndex.nps, ocProbesets())
1412
-	if(verbose) message("Estimating total copy number at nonpolymorphic loci on chromosome X")
1413 971
 	tmp <- ocLapply(seq(along=snpBatches),
1414 972
 			fit.lm4,
1415 973
 			XIndex=XIndex.nps,
... ...
@@ -1432,6 +990,7 @@ crlmmCopynumber2 <- function(object,
1432 990
 	return(object)
1433 991
 }
1434 992
 
993
+
1435 994
 fit.lm1 <- function(idxBatch,
1436 995
 		    snpBatches,
1437 996
 		    autosomeIndex,
... ...
@@ -1449,25 +1008,32 @@ fit.lm1 <- function(idxBatch,
1449 1008
 		    MIN.NU,
1450 1009
 		    MIN.PHI,
1451 1010
 		    verbose, ...){
1452
-			##   which.batches, ...){
1453 1011
 	if(verbose) message("Probe batch ", idxBatch, " of ", length(snpBatches))
1012
+	snps <- snpBatches[[idxBatch]]
1013
+	batches <- split(seq(along=batch(object)), batch(object))
1014
+	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
1015
+	
1454 1016
 	open(object)
1455 1017
 	open(snpflags)
1456 1018
 	open(normal)
1457
-	snps <- snpBatches[[idxBatch]]
1458
-		batches <- split(seq(along=batch(object)), batch(object))
1459
-	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
1460
-	corr <- corrA.BB <- corrB.AA <- sig2B <- sig2A <- tau2B <- tau2A <- matrix(NA, length(snps), length(unique(batches)))
1019
+
1020
+	corr <- corrA.BB <- corrB.AA <- sig2B <- sig2A <- tau2B <- tau2A <- matrix(NA, length(snps), length(unique(batch(object))))
1461 1021
 	flags <- nuA <- nuB <- phiA <- phiB <- corr
1022
+
1023
+	normal.snps <- normal[snps, ]
1462 1024
 	cB <- cA <- matrix(NA, length(snps), ncol(object))
1025
+	GG <- as.matrix(calls(object)[snps, ])
1026
+	CP <- as.matrix(snpCallProbability(object)[snps, ])
1027
+	AA <- as.matrix(A(object)[snps, ])
1028
+	BB <- as.matrix(B(object)[snps, ])
1463 1029
 	for(k in batches){
1464
-		G <- calls(object)[snps, k]
1465
-		NORM <- normal[snps, k]
1466
-		xx <- snpCallProbability(object)[snps, k]
1030
+		G <- GG[, k]
1031
+		NORM <- normal.snps[, k]
1032
+		xx <- CP[, k]
1467 1033
 		highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
1468 1034
 		G <- G*highConf*NORM
1469
-		A <- A(object)[snps, k]
1470
-		B <- B(object)[snps, k]
1035
+		A <- AA[, k]
1036
+		B <- BB[, k]
1471 1037
 		##index <- GT.B <- GT.A <- vector("list", 3)
1472 1038
 		##names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB")
1473 1039
 		Ns <- applyByGenotype(matrix(1, nrow(G), ncol(G)), rowSums, G)
... ...
@@ -1490,7 +1056,7 @@ fit.lm1 <- function(idxBatch,
1490 1056
 		corr[, J] <- corByGenotype(A=A, B=B, G=G, Ns=Ns, which.cluster=2, DF.PRIOR)
1491 1057
 		corrB.AA[, J] <- corByGenotype(A=A, B=B, G=G, Ns=Ns, which.cluster=1, DF.PRIOR)
1492 1058
 		corrA.BB[, J] <- corByGenotype(A=A, B=B, G=G, Ns=Ns, which.cluster=3, DF.PRIOR)
1493
-		##formerly oneBatch()...
1059
+
1494 1060
 		##---------------------------------------------------------------------------
1495 1061
 		## Impute sufficient statistics for unobserved genotypes (plate-specific)
1496 1062
 		##---------------------------------------------------------------------------
... ...
@@ -1532,6 +1098,8 @@ fit.lm1 <- function(idxBatch,
1532 1098
 			muA[index[[j]], -kk] <- mus[, 1:2]
1533 1099
 			muB[index[[j]], -kk] <- mus[, 3:4]
1534 1100
 		}
1101
+		rm(betahat, X, Y, mus, index, noAA, noAB, noBB, res)
1102
+		gc()
1535 1103
 		negA <- rowSums(muA < 0) > 0
1536 1104
 		negB <- rowSums(muB < 0) > 0
1537 1105
 		flags[, J] <- rowSums(Ns == 0) > 0
... ...
@@ -1561,21 +1129,53 @@ fit.lm1 <- function(idxBatch,
1561 1129
 		## formerly polymorphic():  calculate copy number
1562 1130
 		cA[, k] <- matrix((1/phiA[, J]*(A-nuA[, J])), nrow(A), ncol(A))
1563 1131
 		cB[, k] <- matrix((1/phiB[, J]*(B-nuB[, J])), nrow(B), ncol(B))
1132
+		rm(G, A, B, NORM, wA, wB, YA,YB, res, negA, negB, Np, Ns)
1133
+		gc()
1564 1134
 	}
1135
+
1136
+	cA[cA < 0.05] <- 0.05
1137
+	cB[cB < 0.05] <- 0.05
1138
+	cA[cA > 5] <-  5
1139
+	cB[cB > 5] <- 5
1565 1140
 	cA <- matrix(as.integer(cA*100), nrow(cA), ncol(cA))
1566 1141
 	cB <- matrix(as.integer(cB*100), nrow(cB), ncol(cB))
1142
+
1143
+
1144
+	
1567 1145
 	CA(object)[snps, ] <- cA
1568 1146
 	CB(object)[snps, ] <- cB
1147
+
1148
+	
1569 1149
 	snpflags[snps, ] <- flags
1570 1150
 	lapply(lM(object), open)
1571
-	lM(object)$tau2A[snps, ] <- tau2A
1572
-	lM(object)$tau2B[snps, ] <- tau2B
1573
-	lM(object)$sig2A[snps, ] <- sig2A
1574
-	lM(object)$sig2B[snps, ] <- sig2B
1575
-	lM(object)$nuA[snps, ] <- nuA
1576
-	lM(object)$nuB[snps, ] <- nuB
1577
-	lM(object)$phiA[snps, ] <- phiA
1578
-	lM(object)$phiB[snps, ] <- phiB
1151
+	
1152
+	tmp <- physical(lM(object))$tau2A
1153
+	tmp[snps, ] <- tau2A
1154
+	lM(object)$tau2A <- tmp
1155
+	tmp <- physical(lM(object))$tau2B
1156
+	tmp[snps, ] <- tau2B
1157
+	lM(object)$tau2B <- tmp
1158
+	tmp <- physical(lM(object))$tau2B
1159
+	tmp[snps, ] <- tau2B
1160
+	lM(object)$tau2B <- tmp
1161
+	tmp <- physical(lM(object))$sig2A
1162
+	tmp[snps, ] <- sig2A
1163
+	lM(object)$sig2A <- tmp
1164
+	tmp <- physical(lM(object))$sig2B
1165
+	tmp[snps, ] <- sig2B
1166
+	lM(object)$sig2B <- tmp
1167
+	tmp <- physical(lM(object))$nuA
1168
+	tmp[snps, ] <- nuA
1169
+	lM(object)$nuA <- tmp
1170
+	tmp <- physical(lM(object))$nuB
1171
+	tmp[snps, ] <- nuB
1172
+	lM(object)$nuB <- tmp
1173
+	tmp <- physical(lM(object))$phiA
1174
+	tmp[snps, ] <- phiA
1175
+	lM(object)$phiA <- tmp
1176
+	tmp <- physical(lM(object))$phiB
1177
+	tmp[snps, ] <- phiB
1178
+	lM(object)$phiB <- tmp
1579 1179
 	lapply(assayData(object), close)
1580 1180
 	lapply(lM(object), close)
1581 1181
 	TRUE
... ...
@@ -1600,45 +1200,56 @@ fit.lm2 <- function(idxBatch,
1600 1200
 		    verbose, ...){
1601 1201
 			##   which.batches, ...){
1602 1202
 	if(verbose) message("Probe batch ", idxBatch, " of ", length(snpBatches))
1603
-	open(object)
1604
-	open(snpflags)
1605
-	open(normal)
1606 1203
 	snps <- snpBatches[[idxBatch]]
1607 1204
 	batches <- split(seq(along=batch(object)), batch(object))
1608 1205
 	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
1206
+	
1207
+	open(object)
1208
+	open(snpflags)
1209
+	open(normal)
1210
+
1211
+	
1609 1212
 	cA <- matrix(NA, length(snps), ncol(object))
1610 1213
 	ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object))
1611
-	flags <- snpflags[, ]
1612
-	noflags <- rowSums(flags) == 0
1214
+	flags <- as.matrix(snpflags[,])  
1215
+	noflags <- rowSums(flags, na.rm=TRUE) == 0  ##NA's for unevaluated batches
1613 1216
 	## We do not want to write to discuss for each batch.  More efficient to
1614 1217
 	## write to disk after estimating these parameters for all batches.
1615
-	nuA.np <- phiA.np <- sig2A.np <- matrix(NA, length(snps), length(unique(batches)))
1218
+	nuA.np <- phiA.np <- sig2A.np <- matrix(NA, length(snps), length(unique(batch(object))))
1616 1219
 	## for imputation, we need the corresponding parameters of the snps
1617
-	nuA <- lM(object)$nuA
1618
-	phiA <- lM(object)$phiA
1619
-	nuB <- lM(object)$nuB
1620
-	phiB <- lM(object)$phiB
1621
-	snp.ind <- ii & noflags
1622
-	NORM.np <- normal[snps, ]
1220
+	NN <- min(10e3, length(which(ii & noflags)))
1221
+	snp.ind <- sample(which(ii & noflags), NN)
1222
+	nnuA.snp <- as.matrix(physical(lM(object))$nuA[snp.ind,])
1223
+	pphiA.snp <- as.matrix(physical(lM(object))$phiA[snp.ind,])
1224
+	nnuB.snp <- as.matrix(physical(lM(object))$nuB[snp.ind,])
1225
+	pphiB.snp <- as.matrix(physical(lM(object))$phiB[snp.ind,])
1226
+
1227
+	AA.snp <- as.matrix(A(object)[snp.ind, ])
1228
+	BB.snp <- as.matrix(B(object)[snp.ind, ])
1229
+	NNORM.snp <- as.matrix(normal[snp.ind, ])
1230
+	NORM.np <- as.matrix(normal[snps, ])
1231
+	AA.np <- as.matrix(A(object)[snps, ])
1232
+	GG <- as.matrix(calls(object)[snp.ind, ])
1233
+	CP <- as.matrix(snpCallProbability(object)[snp.ind, ])
1623 1234
 	for(k in batches){
1624 1235
 		##if(verbose) message("SNP batch ", ii, " of ", length(batches))
1625 1236
 		J <- match(unique(batch(object)[k]), unique(batch(object)))
1626
-		snp.index <- snp.ind & nuA[, J] > 20 & nuB[, J] > 20 & phiA[, J] > 20 & phiB[, J] > 20
1627
-		if(sum(snp.index) >= 5000){
1628
-			snp.index <- sample(which(snp.index), 5000)
1629
-		}
1630
-		phiA.snp <- phiA[snp.index, J]
1631
-		phiB.snp <- phiB[snp.index, J]
1632
-		A.snp <- A(object)[snp.index, k]
1633
-		B.snp <- B(object)[snp.index, k]
1634
-		NORM.snp <- normal[snp.index, k]
1635
-		G <- calls(object)[snp.index, k]
1636
-		xx <- snpCallProbability(object)[snp.index, k]
1237
+##		snp.index <- snp.ind & nuA[, J] > 20 & nuB[, J] > 20 & phiA[, J] > 20 & phiB[, J] > 20
1238
+##		if(sum(snp.index) >= 5000){
1239
+##			snp.index <- sample(which(snp.index), 5000)
1240
+##		} else snp.index <- which(snp.index)
1241
+		phiA.snp <- pphiA.snp[, J]
1242
+		phiB.snp <- pphiB.snp[, J]
1243
+		A.snp <- AA.snp[, k]
1244
+		B.snp <- BB.snp[, k]
1245
+		NORM.snp <- NNORM.snp[, k]
1246
+		G <- GG[, k]
1247
+		xx <- CP[, k]
1637 1248
 		highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
1638
-		G <- G*highConf*NORM.snp[, k]
1249
+		G <- G*highConf*NORM.snp
1639 1250
 		G[G==0] <- NA
1640 1251
 		##nonpolymorphic
1641
-		A.np <- A(object)[snps, k]
1252
+		A.np <- AA.np[, k]
1642 1253
 		Ns <- applyByGenotype(matrix(1, nrow(G), ncol(G)), rowSums, G)
1643 1254
 		muA <- applyByGenotype(A.snp, rowMedians, G)
1644 1255
 		muB <- applyByGenotype(B.snp, rowMedians, G)
... ...
@@ -1660,13 +1271,27 @@ fit.lm2 <- function(idxBatch,
1660 1271
 		}
1661 1272
 		cA[, k] <- 1/phiA.np[, J] * (A.np - nuA.np[, J])
1662 1273
 		sig2A.np[, J] <- rowMAD(log2(A.np*NORM.np[, k]), na.rm=TRUE)
1274
+		rm(NORM.snp, highConf, xx, G, Ns, A.np, X, Y, betahat, mus, logPhiT)
1275
+		gc()
1663 1276
 	}
1277
+
1278
+	cA[cA < 0.05] <- 0.05
1279
+	cA[cA > 5] <-  5
1664 1280
 	cA <- matrix(as.integer(cA*100), nrow(cA), ncol(cA))
1665 1281
 	CA(object)[snps, ] <- cA
1666 1282
 	##open(lM(object))
1667
-	lM(object)$sig2A[snps, ] <- sig2A.np
1668
-	lM(object)$nuA[snps, ] <- nuA.np
1669
-	lM(object)$phiA[snps, ] <- phiA.np
1283
+	tmp <- physical(lM(object))$nuA
1284
+	tmp[snps, ] <- nuA.np
1285
+	lM(object)$nuA <- tmp
1286
+	tmp <- physical(lM(object))$sig2A
1287
+	tmp[snps, ] <- sig2A.np
1288
+	lM(object)$sig2A <- tmp
1289
+	tmp <- physical(lM(object))$phiA
1290
+	tmp[snps, ] <- phiA.np
1291
+	lM(object)$sig2A <- tmp
1292
+##	lM(object)$sig2A[snps, ] <- sig2A.np
1293
+##	lM(object)$nuA[snps, ] <- nuA.np
1294
+##	lM(object)$phiA[snps, ] <- phiA.np
1670 1295
 	lapply(assayData(object), close)
1671 1296
 	lapply(lM(object), close)
1672 1297
 	TRUE
... ...
@@ -1692,13 +1317,14 @@ fit.lm3 <- function(idxBatch,
1692 1317
 		    verbose, ...){
1693 1318
 			##   which.batches, ...){
1694 1319
 	if(verbose) message("Probe batch ", idxBatch, " of ", length(snpBatches))
1320
+		snps <- snpBatches[[idxBatch]]
1321
+	batches <- split(seq(along=batch(object)), batch(object))
1322
+	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
1323
+	
1695 1324
 	open(snpflags)
1696 1325
 	open(normal)
1697 1326
 	open(object)
1698
-	snps <- snpBatches[[idxBatch]]
1699
-	batches <- split(seq(along=batch(object)), batch(object))
1700
-	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
1701
-	corrAB <- corrBB <- corrAA <- sig2B <- sig2A <- tau2B <- tau2A <- matrix(NA, length(snps), length(unique(batches)))
1327
+	corrAB <- corrBB <- corrAA <- sig2B <- sig2A <- tau2B <- tau2A <- matrix(NA, length(snps), length(unique(batch(object))))
1702 1328
 	phiA2 <- phiB2 <- tau2A
1703 1329
 	flags <- nuA <- nuB <- phiA <- phiB <- corrAB
1704 1330
 	cB <- cA <- matrix(NA, length(snps), ncol(object))
... ...
@@ -1706,15 +1332,21 @@ fit.lm3 <- function(idxBatch,
1706 1332
 	IX <- matrix(gender, length(snps), ncol(object))
1707 1333
 	NORM <- normal[snps,]
1708 1334
 	IX <- IX==2
1335
+
1336
+	GG <- as.matrix(calls(object)[snps, ])
1337
+	CP <- as.matrix(snpCallProbability(object)[snps,])
1338
+	AA <- as.matrix(A(object)[snps, ])
1339
+	BB <- as.matrix(B(object)[snps, ])
1709 1340
 	for(k in batches){
1710 1341
 		##if(verbose) message("SNP batch ", ii, " of ", length(batches)) 
1711 1342
 		## within-genotype moments
1712
-		G <- calls(object)[snps, k]
1713
-		xx <- snpCallProbability(object)[snps, k]
1343
+		gender <- object$gender[k]
1344
+		G <- GG[, k]
1345
+		xx <- CP[, k]
1714 1346
 		highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
1715 1347
 		G <- G*highConf*NORM[, k]
1716
-		A <- A(object)[snps, k]
1717
-		B <- B(object)[snps, k]
1348
+		A <- AA[, k]
1349
+		B <- BB[, k]
1718 1350
 		##index <- GT.B <- GT.A <- vector("list", 3)
1719 1351
 		##names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB")
1720 1352
 		Ns.F <- applyByGenotype(matrix(1, nrow(G), sum(gender==2)), rowSums, G[, gender==2])
... ...
@@ -1753,10 +1385,10 @@ fit.lm3 <- function(idxBatch,
1753 1385
 		correct.orderB <- muB.F[, 3] > muB.F[, 1]
1754 1386
 		index.complete <- intersect(which(correct.orderA & correct.orderB), intersect(index[[1]], intersect(index[[2]], index[[3]])))
1755 1387
 		size <- min(5000, length(index.complete))
1756
-		if(size == 5000) index.complete <- sample(index.complete, 5000, replace=TRUE)
1757 1388
 		if(length(index.complete) < 200){
1758 1389
 			stop("fewer than 200 snps pass criteria for predicting the sufficient statistics")
1759 1390
 		}
1391
+		if(size==5000) index.complete <- sample(index.complete, size)
1760 1392
 		index <- vector("list", 3)
1761 1393
 		index[[1]] <- which(Ns.F[, 1] == 0 & (Ns.F[, 2] >= MIN.OBS & Ns.F[, 3] >= MIN.OBS))
1762 1394
 		index[[2]] <- which(Ns.F[, 2] == 0 & (Ns.F[, 1] >= MIN.OBS & Ns.F[, 3] >= MIN.OBS))
... ...
@@ -1772,10 +1404,11 @@ fit.lm3 <- function(idxBatch,
1772 1404
 		complete[[2]] <- which(correct.orderA & correct.orderB & nobsB & notMissing) ##be selective here	
1773 1405
 		size <- min(5000, length(complete[[1]]))
1774 1406
 		if(size > 5000) complete <- lapply(complete, function(x) sample(x, size))
1407
+		##
1775 1408
 		res <- imputeCenterX(muA.M, muB.M, Ns.M, complete, MIN.OBS)
1776 1409
 		muA.M <- res[[1]]
1777 1410
 		muB.M <- res[[2]]
1778
-
1411
+		##
1779 1412
 		## Monomorphic SNPs.  Mixture model may be better
1780 1413
 		## Improve estimation by borrowing strength across batch
1781 1414
 		noAA <- Ns.F[, 1] < MIN.OBS
... ...
@@ -1833,9 +1466,17 @@ fit.lm3 <- function(idxBatch,
1833 1466
 		phistar <- phiB2[, J]/phiA[, J]
1834 1467
 		tmp <- (B-nuB[, J] - phistar*A + phistar*nuA[, J])/phiB[, J]
1835 1468
 		cB[, k] <- tmp/(1-phistar*phiA2[, J]/phiB[, J])
1836
-		cA[, k] <- (A-nuA[, J]-phiA2[, J]*cB)/phiA[, J]
1469
+		cA[, k] <- (A-nuA[, J]-phiA2[, J]*cB[, k])/phiA[, J]
1837 1470
 		##some of the snps are called for the men, but not the women
1471
+		rm(YA, YB, wA, wB, res, tmp, phistar, A, B, G, index)
1472
+		gc()
1838 1473
 	}
1474
+
1475
+	cA[cA < 0.05] <- 0.05
1476
+	cB[cB < 0.05] <- 0.05
1477
+	cA[cA > 5] <-  5
1478
+	cB[cB > 5] <- 5
1479
+	
1839 1480
 	##--------------------------------------------------
1840 1481
 	##RS: need to fix.  why are there NA's by coercion
1841 1482
 	cA <- matrix(as.integer(cA*100), nrow(cA), ncol(cA))
... ...
@@ -1846,16 +1487,49 @@ fit.lm3 <- function(idxBatch,
1846 1487
 	CA(object)[snps, ] <- cA
1847 1488
 	CB(object)[snps, ] <- cB
1848 1489
 	snpflags[snps, ] <- flags
1849
-	lM(object)$tau2A[snps, ] <- tau2A
1850
-	lM(object)$tau2B[snps, ] <- tau2B
1851
-	lM(object)$sig2A[snps, ] <- sig2A
1852
-	lM(object)$sig2B[snps, ] <- sig2B
1853
-	lM(object)$nuA[snps, ] <- nuA
1854
-	lM(object)$nuB[snps, ] <- nuB
1855
-	lM(object)$phiA[snps, ] <- phiA
1856
-	lM(object)$phiB[snps, ] <- phiB
1857
-	lM(object)$phiPrimeA[snps, ] <- phiA2
1858
-	lM(object)$phiPrimeB[snps, ] <- phiB2
1490
+	tmp <- physical(lM(object))$tau2A
1491
+	tmp[snps, ] <- tau2A
1492
+	lM(object)$tau2A <- tmp
1493
+	tmp <- physical(lM(object))$tau2B
1494
+	tmp[snps, ] <- tau2B
1495
+	lM(object)$tau2B <- tmp
1496
+	tmp <- physical(lM(object))$tau2B
1497
+	tmp[snps, ] <- tau2B
1498
+	lM(object)$tau2B <- tmp
1499
+	tmp <- physical(lM(object))$sig2A
1500
+	tmp[snps, ] <- sig2A
1501
+	lM(object)$sig2A <- tmp
1502
+	tmp <- physical(lM(object))$sig2B
1503
+	tmp[snps, ] <- sig2B
1504
+	lM(object)$sig2B <- tmp
1505
+	tmp <- physical(lM(object))$nuA
1506
+	tmp[snps, ] <- nuA
1507
+	lM(object)$nuA <- tmp
1508
+	tmp <- physical(lM(object))$nuB
1509
+	tmp[snps, ] <- nuB
1510
+	lM(object)$nuB <- tmp
1511
+	tmp <- physical(lM(object))$phiA
1512
+	tmp[snps, ] <- phiA
1513
+	lM(object)$phiA <- tmp
1514
+	tmp <- physical(lM(object))$phiB
1515
+	tmp[snps, ] <- phiB
1516
+	lM(object)$phiB <- tmp
1517
+	tmp <- physical(lM(object))$phiPrimeA
1518
+	tmp[snps, ] <- phiA2
1519
+	lM(object)$phiPrimeA <- tmp
1520
+	tmp <- physical(lM(object))$phiPrimeB
1521
+	tmp[snps, ] <- phiB2
1522
+	lM(object)$phiPrimeB <- tmp
1523
+##	lM(object)$tau2A[snps, ] <- tau2A
1524
+##	lM(object)$tau2B[snps, ] <- tau2B
1525
+##	lM(object)$sig2A[snps, ] <- sig2A
1526
+##	lM(object)$sig2B[snps, ] <- sig2B
1527
+##	lM(object)$nuA[snps, ] <- nuA
1528
+##	lM(object)$nuB[snps, ] <- nuB
1529
+##	lM(object)$phiA[snps, ] <- phiA
1530
+##	lM(object)$phiB[snps, ] <- phiB
1531
+##	lM(object)$phiPrimeA[snps, ] <- phiA2
1532
+##	lM(object)$phiPrimeB[snps, ] <- phiB2
1859 1533
 	lapply(assayData(object), close)
1860 1534
 	lapply(lM(object), close)
1861 1535
 	TRUE
... ...
@@ -1885,39 +1559,51 @@ fit.lm4 <- function(idxBatch,
1885 1559
 	snps <- snpBatches[[idxBatch]]
1886 1560
 	batches <- split(seq(along=batch(object)), batch(object))
1887 1561
 	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
1888
-	nuA <- phiA <- sig2A <- tau2A <- matrix(NA, length(snps), length(unique(batches)))
1562
+	nuA <- phiA <- sig2A <- tau2A <- matrix(NA, length(snps), length(unique(batch(object))))
1889 1563
 	cA <- matrix(NA, length(snps), ncol(object))
1890 1564
 	ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object))
1891 1565
 	flags <- snpflags[ii, , drop=FALSE]
1892
-	noflags <- rowSums(flags) == 0
1566
+	noflags <- rowSums(flags, na.rm=TRUE) == 0
1893 1567
 	lapply(lM(object), open)
1894
-	nuIA <- lM(object)$nuA[ii, ]
1895
-	nuIB <- lM(object)$nuB[ii, ]
1896
-	phiIA <- lM(object)$phiA[ii,]
1897
-	phiIB <- lM(object)$phiB[ii,]
1898
-
1899
-	snp.index <- which(nuIA > 20 & nuIB & 20 & phiIA > 20 & phiIB > 20 & noflags)
1568
+	nuIA <- physical(lM(object))$nuA[ii, ]
1569
+	nuIB <- physical(lM(object))$nuB[ii, ]
1570
+	phiIA <- physical(lM(object))$phiA[ii,]
1571
+	phiIB <- physical(lM(object))$phiB[ii,]
1572
+
1573
+	i1 <- rowSums(nuIA < 20, na.rm=TRUE) == 0
1574
+	i2 <- rowSums(nuIB < 20, na.rm=TRUE) == 0
1575
+	i3 <- rowSums(phiIA < 20, na.rm=TRUE) == 0
1576
+	i4 <- rowSums(phiIB < 20, na.rm=TRUE) == 0
1577
+	
1578
+	snp.index <- which(i1 & i2 & i3 & i4 & noflags)
1579
+	if(length(snp.index) == 0){
1580
+		warning("No snps meet the following criteria: (1) nu and phi > 20 and (2) at least MIN.OBS in each genotype cluster. CN not estimated for nonpolymorphic loci on X")
1581
+		return(TRUE)
1582
+	}
1900 1583
 	if(length(snp.index) >= 5000){
1901 1584
 		snp.index <- sample(snp.index, 5000)
1902 1585
 	}
1903
-	phiA.snp <- lM(object)$phiA[snp.index, , drop=FALSE]
1904
-	phiB.snp <- lM(object)$phiB[snp.index, , drop=FALSE]
1905
-	A.snp <- A(object)[snp.index, ]
1906
-	B.snp <- B(object)[snp.index, ]
1907
-	NORM.snp <- normal[snp.index, ]
1908
-	NORM.np <- normal[snps, ]
1586
+	phiA.snp <- physical(lM(object))$phiA[snp.index, , drop=FALSE]
1587
+	phiB.snp <- physical(lM(object))$phiB[snp.index, , drop=FALSE]
1588
+	A.snp <- as.matrix(A(object)[snp.index, ])
1589
+	B.snp <- as.matrix(B(object)[snp.index, ])
1590
+	NORM.snp <- as.matrix(normal[snp.index, ])
1591
+	NORM.np <- as.matrix(normal[snps, ])
1909 1592
 	gender <- object$gender
1910 1593
 
1911 1594
 
1912 1595
 	pseudoAR <- position(object)[snps] < 2709520 | (position(object)[snps] > 154584237 & position(object)[snps] < 154913754)
1913 1596
 	pseudoAR[is.na(pseudoAR)] <- FALSE
1914
-	
1597
+
1598
+	GG <- as.matrix(calls(object)[snp.index, ])
1599
+	CP <- as.matrix(snpCallProbability(object)[snp.index, ])
1600
+	AA.np <- as.matrix(A(object)[snps, ])
1915 1601
 	##if(missing(which.batches)) which.batches <- seq(along=batches)
1916 1602
 	##batches <- batches[which.batches]
1917 1603
 	for(k in batches){
1918 1604
 		##if(verbose) message("SNP batch ", ii, " of ", length(batches)) 
1919
-		G <- calls(object)[snp.index, k]
1920
-		xx <- snpCallProbability(object)[snp.index, k]
1605
+		G <- GG[, k]
1606
+		xx <- CP[, k]
1921 1607
 		highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
1922 1608
 		G <- G*highConf*NORM.snp[, k]
1923 1609
 		##snps
... ...
@@ -1947,7 +1633,7 @@ fit.lm4 <- function(idxBatch,
1947 1633
 		
1948 1634
 
1949 1635
 		##nonpolymorphic
1950
-		A <- A(object)[snps, k]
1636
+		A <- AA.np[, k]
1951 1637
 		gend <- gender[k]
1952 1638
 		A.M <- A[, gend==1]
1953 1639
 		mu1 <- rowMedians(A.M, na.rm=TRUE)
... ...
@@ -1985,185 +1671,32 @@ fit.lm4 <- function(idxBatch,
1985 1671
 		tmp[, gend==1] <- CT1
1986 1672
 		tmp[, gend==2] <- CT2
1987 1673
 		cA[, k] <- tmp
1674
+		rm(tmp, CT1, CT2, A.F, normal.f, G, AA, BB, Y, X, Ns)
1675
+		gc()
1988 1676
 	}
1677
+
1678
+	cA[cA < 0.05] <- 0.05
1679
+	cA[cA > 5] <-  5
1989 1680
 	cA <- matrix(as.integer(cA*100), nrow(cA), ncol(cA))
1990 1681
 	CA(object)[snps, ] <- cA
1991 1682
 	open(lM(object))
1992
-	lM(object)$sig2A[snps, ] <- sig2A
1993
-	lM(object)$nuA[snps, ] <- nuA
1994
-	lM(object)$phiA[snps, ] <- phiA
1683
+	tmp <- physical(lM(object))$nuA
1684
+	tmp[snps, ] <- nuA
1685
+	lM(object)$nuA <- tmp
1686
+	tmp <- physical(lM(object))$sig2A
1687
+	tmp[snps, ] <- sig2A
1688
+	lM(object)$sig2A <- tmp
1689
+	tmp <- physical(lM(object))$phiA
1690
+	tmp[snps, ] <- phiA
1691
+	lM(object)$sig2A <- tmp
1692
+##	lM(object)$sig2A[snps, ] <- sig2A
1693
+##	lM(object)$nuA[snps, ] <- nuA
1694
+##	lM(object)$phiA[snps, ] <- phiA
1995 1695
 	lapply(assayData(object), close)
1996 1696
 	lapply(lM(object), close)
1997 1697
 	TRUE
1998 1698
 }
1999 1699
 
2000
-
2001
-crlmmWrapper <- function(filenames, cnOptions, ...){
2002
-	cdfName <- cnOptions[["cdfName"]]
2003
-	load.it <- cnOptions[["load.it"]]
2004
-	save.it <- cnOptions[["save.it"]]
2005
-	splitByChr <- cnOptions[["splitByChr"]]
2006
-	crlmmFile <- cnOptions[["crlmmFile"]]
2007
-	intensityFile=cnOptions[["intensityFile"]]
2008
-	rgFile=cnOptions[["rgFile"]]
2009
-	##use.ff=cnOptions[["use.ff"]]
2010
-	outdir <- cnOptions[["outdir"]]
2011
-	if(missing(cdfName)) stop("cdfName is missing -- a valid cdfName is required.  See crlmm:::validCdfNames()")
2012
-	platform <- whichPlatform(cdfName)
2013
-	if(!(platform %in% c("affymetrix", "illumina"))){
2014
-		stop("Only 'affymetrix' and 'illumina' platforms are supported at this time.")
2015
-	} else {
2016
-		message("Checking whether annotation package for the ", platform, " platform is available")
2017
-		if(!isValidCdfName(cdfName)){
2018
-			cat("FALSE\n")
2019
-			stop(cdfName, " is not a valid entry.  See crlmm:::validCdfNames(platform)")
2020
-		} else cat("TRUE\n")
2021
-	}
2022
-	if(missing(intensityFile)) stop("must specify 'intensityFile'.")
2023
-	if(missing(crlmmFile)) stop("must specify 'crlmmFile'.")
2024
-	if(platform == "illumina"){
2025
-		if(missing(rgFile)){
2026
-			##stop("must specify 'rgFile'.")
2027
-			rgFile <- file.path(dirname(crlmmFile), "rgFile.rda")
2028
-			message("rgFile not specified.  Using ", rgFile)
2029
-		}
2030
-		if(!load.it){
2031
-			RG <- readIdatFiles(...)
2032
-			if(save.it) save(RG, file=rgFile)
2033
-		}
2034
-		if(load.it & !file.exists(rgFile)){
2035
-			message("load.it is TRUE, but rgFile not present.  Attempting to read the idatFiles.")
2036
-			RG <- readIdatFiles(...)
2037
-			if(save.it) save(RG, file=rgFile)
2038
-		}
2039
-		if(load.it & file.exists(rgFile)){
2040
-			message("Loading RG file")
2041
-			load(rgFile)
2042
-			RG <- get("RG")
2043
-		}
2044
-	}
2045
-	if(!(file.exists(dirname(crlmmFile)))) stop(dirname(crlmmFile), " does not exist.")
2046
-	if(!(file.exists(dirname(intensityFile)))) stop(dirname(intensityFile), " does not exist.")
2047
-	##---------------------------------------------------------------------------
2048
-	## FIX
2049
-	outfiles <- file.path(dirname(crlmmFile), paste("crlmmSetList_", 1:24, ".rda", sep=""))
2050
-	if(load.it & all(file.exists(outfiles))){
2051
-		load(outfiles[1])
2052
-		crlmmSetList <- get("crlmmSetList")
2053
-		if(!all(sampleNames(crlmmSetList) == basename(filenames))){
2054
-			stop("load.it is TRUE, but sampleNames(crlmmSetList != basename(filenames))")
2055
-		} else{
2056
-			return("load.it is TRUE and 'crlmmSetList_<CHR>.rda' objects found. Nothing to do...")
2057
-		}
2058
-	}
2059
-	if(load.it){
2060
-		if(!file.exists(crlmmFile)){
2061
-			message("load.it is TRUE, but ", crlmmFile, " does not exist.  Rerunning the genotype calling algorithm") 
2062
-			load.it <- FALSE
2063
-		}
2064
-	}
2065
-	if(platform == "affymetrix"){
2066
-		if(!file.exists(crlmmFile) | !load.it){
2067
-			callSet <- crlmm(filenames=filenames,
2068
-					     cdfName=cdfName,
2069
-					     save.it=TRUE,
2070
-					     load.it=load.it,
2071
-					     intensityFile=intensityFile)
2072
-			message("Quantile normalizing the copy number probes...")		
2073
-			cnrmaResult <- cnrma(filenames=filenames, cdfName=cdfName, outdir=outdir)
2074
-			if(save.it){
2075
-				message("Saving callSet and cnrmaResult to", crlmmFile)
2076
-				save(callSet, cnrmaResult, file=crlmmFile)
2077
-			}
2078
-		} else {
2079
-			message("Loading ", crlmmFile, "...")
2080
-			load(intensityFile)				
2081
-			load(crlmmFile)
2082
-			callSet <- get("callSet")
2083
-			cnrmaResult <- get("cnrmaResult")
2084
-		}
2085
-		scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate))
2086
-		protocolData(callSet) <- new("AnnotatedDataFrame",
2087
-					     data=scanDates,
2088
-					     varMetadata=data.frame(labelDescription=colnames(scanDates),
2089
-					     row.names=colnames(scanDates)))
2090
-	}
2091
-	if(platform == "illumina"){
2092
-		if(!file.exists(crlmmFile) | !load.it){		
2093
-			callSet <- crlmmIllumina(RG=RG,
2094
-						 cdfName=cdfName,
2095
-						 sns=sampleNames(RG),
2096
-						 returnParams=TRUE,
2097
-						 save.it=TRUE,
2098
-						 intensityFile=intensityFile)
2099
-			if(save.it) save(callSet, file=crlmmFile)
2100
-		} else {
2101
-			message("Loading ", crlmmFile, "...")
2102
-			load(crlmmFile)
2103
-			callSet <- get("callSet")
2104
-		}
2105
-		protocolData(callSet) <- protocolData(RG)
2106
-	}
2107
-	if(platform=="affymetrix") {
2108
-		protocolData(callSet)[["ScanDate"]] <- as.character(celDates(filenames))
2109
-		sampleNames(protocolData(callSet)) <- sampleNames(callSet)
2110
-	}	
2111
-	load(intensityFile)
2112
-	snprmaResult <- get("res")
2113
-	if(platform=="illumina"){
2114
-		if(exists("cnAB")){
2115
-			np.A <- as.integer(cnAB$A)
2116
-			np.B <- as.integer(cnAB$B)
2117
-			np <- ifelse(np.A > np.B, np.A, np.B)
2118
-			np <- matrix(np, nrow(cnAB$A), ncol(cnAB$A))
2119
-			rownames(np) <- cnAB$gns
2120
-			colnames(np) <- cnAB$sns
2121
-			cnAB$NP <- np
2122
-			##sampleNames(callSet) <- res$sns
2123
-			sampleNames(callSet) <- cnAB$sns
2124
-			cnrmaResult <- get("cnAB")
2125
-		} else cnrmaResult <- NULL
2126
-	}
2127
-	if(platform=="affymetrix"){
2128
-		if(exists("cnrmaResult")){
2129
-			cnrmaResult <- get("cnrmaResult")
2130
-		} else cnrmaResult <- NULL
2131
-	}
2132
-	crlmmResults <- list(snprmaResult=snprmaResult,
2133
-			     cnrmaResult=cnrmaResult,
2134
-			     callSet=callSet)
2135
-
2136
-	if(!save.it){
2137
-		message("Cleaning up")		
2138
-		unlink(intensityFile)
2139
-	}
2140
-	return(crlmmResults)
2141
-}
2142
-
2143
-## NOTE: THIS IS ALSO IN UTILS.R
2144
-## validCdfNames <- function(){
2145
-## 	c("genomewidesnp6",
2146
-## 	  "genomewidesnp5",
2147
-## 	  "human370v1c",
2148
-## 	  "human370quadv3c",
2149
-## 	  "human550v3b",
2150
-## 	  "human650v3a",
2151
-## 	  "human610quadv1b",
2152
-## 	  "human660quadv1a",
2153
-## 	  "human1mduov3b")
2154
-## }
2155
-
2156
-## NOTE: THIS IS ALSO IN UTILS.R
2157
-## isValidCdfName <- function(cdfName){
2158
-## 	chipList <- validCdfNames()
2159
-## 	result <- cdfName %in% chipList	
2160
-## 	if(!(result)){
2161
-## 		warning("cdfName must be one of the following: ",
2162
-## 			chipList)
2163
-## 	}
2164
-## 	return(result)
2165
-## }
2166
-
2167 1700
 whichPlatform <- function(cdfName){
2168 1701
 	index <- grep("genomewidesnp", cdfName)
2169 1702
 	if(length(index) > 0){
... ...
@@ -2316,36 +1849,6 @@ thresholdCopynumber <- function(object){
2316 1849
 	return(object)
2317 1850
 }
2318 1851
 
2319
-cnOptions <- function(
2320
-		      MIN.OBS=3,
2321
-		      DF.PRIOR=50,
2322
-		      bias.adj=FALSE,
2323
-		      prior.prob=rep(1/4, 4),
2324
-		      seed=123,
2325
-		      verbose=TRUE,
2326
-		      GT.CONF.THR=0.99,
2327
-		      PHI.THR=2^6,##used in nonpolymorphic fxn for training
2328
-		      nHOM.THR=5, ##used in nonpolymorphic fxn for training
2329
-		      MIN.NU=2^3,
2330
-		      MIN.PHI=2^3,
2331
-		      THR.NU.PHI=TRUE,
2332
-		      thresholdCopynumber=TRUE){
2333
-	list(
2334
-	     MIN.OBS=MIN.OBS,
2335
-	     DF.PRIOR=DF.PRIOR,
2336
-	     GT.CONF.THR=GT.CONF.THR,
2337
-	     bias.adj=bias.adj,
2338
-	     prior.prob=prior.prob,
2339
-	     seed=seed,
2340
-	     verbose=verbose,
2341
-	     PHI.THR=PHI.THR,
2342
-	     nHOM.THR=nHOM.THR,
2343
-	     MIN.NU=MIN.NU,
2344
-	     MIN.PHI=MIN.PHI,
2345
-	     THR.NU.PHI=THR.NU.PHI,
2346
-	     thresholdCopynumber=thresholdCopynumber)
2347
-}
2348
-
2349 1852
 ##linear model parameters
2350 1853
 lm.parameters <- function(object, batch){##cnOptions){
2351 1854
 	fD <- fData(object)
... ...
@@ -2661,23 +2164,27 @@ imputeCenter <- function(muA, muB, index.complete, index.missing){
2661 2164
 
2662 2165
 imputeCenterX <- function(muA, muB, Ns, index.complete, MIN.OBS){
2663 2166
 	index1 <- which(Ns[, 1] == 0 & Ns[, 3] > MIN.OBS)
2664
-	X <- cbind(1, muA[index.complete[[1]], 3], muB[index.complete[[1]], 3])
2665
-	Y <- cbind(1, muA[index.complete[[1]], 1], muB[index.complete[[1]], 1])
2666
-	betahat <- solve(crossprod(X), crossprod(X,Y))
2667
-	##now with the incomplete SNPs
2668
-	X <- cbind(1, muA[index1, 3], muB[index1, 3])
2669
-	mus <- X %*% betahat
2670
-	muA[index1, 1] <- mus[, 2]
2671
-	muB[index1, 1] <- mus[, 3]
2167
+	if(length(index1) > 0){
2168
+		X <- cbind(1, muA[index.complete[[1]], 3], muB[index.complete[[1]], 3])
2169
+		Y <- cbind(1, muA[index.complete[[1]], 1], muB[index.complete[[1]], 1])
2170
+		betahat <- solve(crossprod(X), crossprod(X,Y))
2171
+		##now with the incomplete SNPs
2172
+		X <- cbind(1, muA[index1, 3], muB[index1, 3])
2173
+		mus <- X %*% betahat
2174
+		muA[index1, 1] <- mus[, 2]
2175
+		muB[index1, 1] <- mus[, 3]
2176
+	}
2672 2177
 	index1 <- which(Ns[, 3] == 0)
2673
-	X <- cbind(1, muA[index.complete[[2]], 1], muB[index.complete[[2]], 1])
2674
-	Y <- cbind(1, muA[index.complete[[2]], 3], muB[index.complete[[2]], 3])
2675
-	betahat <- solve(crossprod(X), crossprod(X,Y))
2676
-	##now with the incomplete SNPs
2677
-	X <- cbind(1, muA[index1, 1], muB[index1, 1])
2678
-	mus <- X %*% betahat
2679
-	muA[index1, 3] <- mus[, 2]
2680
-	muB[index1, 3] <- mus[, 3]	
2178
+	if(length(index1) > 0){
2179
+		X <- cbind(1, muA[index.complete[[2]], 1], muB[index.complete[[2]], 1])
2180
+		Y <- cbind(1, muA[index.complete[[2]], 3], muB[index.complete[[2]], 3])
2181
+		betahat <- solve(crossprod(X), crossprod(X,Y))
2182
+		##now with the incomplete SNPs
2183
+		X <- cbind(1, muA[index1, 1], muB[index1, 1])
2184
+		mus <- X %*% betahat
2185
+		muA[index1, 3] <- mus[, 2]
2186
+		muB[index1, 3] <- mus[, 3]
2187
+	}
2681 2188
 	list(muA, muB)
2682 2189
 }
2683 2190
 
... ...
@@ -3095,6 +2602,113 @@ biasAdj <- function(object, cnOptions, tmp.objects){
3095 2602
 }
3096 2603
 
3097 2604
 
2605
+bias1 <- function(idxBatch,
2606
+		  snpBatches,
2607
+		  index,
2608
+		  object,
2609
+		  normal,
2610
+		  emit,
2611
+		  prior.prob,
2612
+		  MIN.SAMPLES,
2613
+		  verbose){
2614
+	
2615
+}
2616
+
2617
+bias2 <- function(idxBatch,
2618
+		  snpBatches,
2619
+		  index,
2620
+		  object,
2621
+		  normal,
2622
+		  prior.prob,
2623
+		  MIN.SAMPLES,
2624
+		  verbose){
2625
+	open(object)
2626
+	open(normal)
2627
+
2628
+	nps <- snpBatches[[idxBatch]]
2629
+	nuA <- lM(object)$nuA[nps, , drop=FALSE]
2630
+	phiA <- lM(object)$phiA[nps, , drop=FALSE]
2631
+	sig2A <- lM(object)$sig2A[nps, , drop=FALSE]
2632
+	AA <- as.matrix(A(object)[nps, ])
2633
+	batches <- split(seq(along=batch(object)), batch(object))
2634
+	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
2635
+
2636
+	cn.lik <- matrix(NA, length(nps)*ncol(object), 4)
2637
+	argmax.cn <- emit[nps, ]
2638
+	norm <- matrix(1L, length(nps), ncol(object))
2639
+
2640
+	for(k in batches){
2641
+		J <- match(unique(batch(object)[k]), unique(batch(object)))
2642
+		lT <- log2(AA[, k])
2643
+		counter <- 1 ##state counter
2644
+		for(CT in c(0, 1.5, 2, 2.5)){
2645
+			##sds <- sqrt(sig2A[, J]*(CT==0) + sig2A[ , J]*(CT > 0))
2646
+			sds <- sqrt(sig2A[, J])
2647
+			means <- suppressWarnings(log2(nuA[, J]+CT*phiA[, J]))
2648
+			lik <- log(dnorm(lT, mean=means, sd=sds))
2649
+			##emit[[counter]][nps, ] <- tmp
2650
+			cn.lik[, counter] <- as.numeric(lik)
2651
+			counter <- counter+1
2652
+		}
2653
+		outlier <- matrix(rowSums(cn.lik < -10) == 4, length(nps), ncol(object))
2654
+		argmax.cn.lik <- apply(cn.lik, 1, function(x) order(x, decreasing=TRUE)[1])
2655
+		argmax.cn <- matrix(argmax.cn.lik, length(nps), length(k))
2656
+
2657
+		isUp <- argmax.cn > 3
2658
+		prUp <- rowMeans(isUp)
2659
+
2660
+		isDn <- argmax.cn < 3
2661
+		prDn <- rowMeans(isDn)
2662
+
2663
+		index <- which(prUp > 0.05 & prUp > prDn)
2664
+		##if proportion up greater than 5%, trim the high cn est.
2665
+		norm[index, k] <- argmax.cn[index, ] > 3 
2666
+
2667
+		index <- which(prDn > 0.05 & prDn > prUp)
2668
+		norm[index, k] <- argmax.cn[index, ] < 3
2669
+		norm[index, k] <- norm[index, k]*!outlier
2670
+	}
2671
+	normal[nps, ] <- norm
2672
+	TRUE
2673
+}
2674
+
2675
+
2676
+biasAdjust <- function(object, prior.prob=rep(1/4, 4), MIN.SAMPLES=10, verbose=TRUE){
2677
+	load(file.path(ldPath(), "normal.rda"))
2678
+	autosomeIndex.nps <- (1:nrow(object))[chromosome(object) < 23 & !isSnp(object) & !is.na(chromosome(object))]
2679
+
2680
+##	emit <- initializeBigMatrix("emit",
2681
+##				     nrow(object),
2682
+##				     ncol(object),
2683
+##				     vmode="double")
2684
+	if(verbose) message("Bias adjustment for nonpolymorphic loci on chromosomes 1-22.")
2685
+	snpBatches <- splitIndicesByLength(autosomeIndex.nps, ocProbesets())
2686
+	ocLapply(seq(along=snpBatches),
2687
+		 bias2,
2688
+		 index=autosomeIndex.nps,
2689
+		 snpBatches=snpBatches,
2690
+		 object=object,
2691
+		 normal=normal,
2692
+		 prior.prob=prior.prob,
2693
+		 MIN.SAMPLES=MIN.SAMPLES,
2694
+		 verbose=verbose)
2695
+
2696
+	if(verbose) message("Bias adjustment for polymorphic loci on chromosomes 1-22.")
2697
+	autosomeIndex.snps <- (1:nrow(object))[chromosome(object) < 23 & isSnp(object) & !is.na(chromosome(object))]
2698
+	snpBatches <- splitIndicesByLength(autosomeIndex.snps, ocProbesets())
2699
+	ocLapply(seq(along=snpBatches),
2700
+		 bias1,
2701
+		 index=autosomeIndex.snps,
2702
+		 snpBatches=snpBatches,
2703
+		 object=object,
2704
+		 normal=normal,
2705
+		 prior.prob=prior.prob,
2706
+		 emit=emit,
2707
+		 MIN.SAMPLES=MIN.SAMPLES,
2708
+		 verbose=verbose)	
2709
+}
2710
+
2711
+
3098 2712
 ##biasAdjNP <- function(plateIndex, envir, priorProb){
3099 2713
 biasAdjNP <- function(object, cnOptions, tmp.objects){
3100 2714
 	##batch <- unique(object$batch)
... ...
@@ -16,6 +16,7 @@ setMethod("[", "CNSetLM", function(x, i, j, ..., drop=FALSE){
16 16
 	x
17 17
 })
18 18
 
19
+
19 20
 setMethod("lM", "CNSetLM", function(object) object@lM)
20 21
 setReplaceMethod("lM", c("CNSetLM", "list_or_ffdf"), function(object, value){
21 22
 	object@lM <- value
22 23
new file mode 100644
... ...
@@ -0,0 +1,62 @@
1
+\name{CNSetLM-class}
2
+\Rdversion{1.1}
3
+\docType{class}
4
+\alias{CNSetLM-class}
5
+\alias{[,CNSetLM-method}
6
+\alias{lM,CNSetLM-method}
7
+\alias{lM<-,CNSetLM,list_or_ffdf-method}
8
+\alias{open,CNSetLM-method}
9
+\alias{show,CNSetLM-method}
10
+
11
+\title{CNSetLM class}
12
+\description{Container for allele-specific copy number and linear model
13
+parameters}
14
+\section{Objects from the Class}{
15
+		 Objects from the class can be created by calls of the
16
+		 form \code{ new("CNSetLM", CA=matrix(), CB=matrix(), alleleA=matrix(), alleleB=matrix(), call=matrix(), callProbability=matrix())}
17
+		 
18
+}
19
+\section{Slots}{
20
+  \describe{
21
+    \item{\code{lM}:}{Object of class \code{"list_or_ffdf"}}
22
+    \item{\code{assayData}:}{Object of class \code{"AssayData"} }
23
+    \item{\code{phenoData}:}{Object of class \code{"AnnotatedDataFrame"} }
24
+    \item{\code{featureData}:}{Object of class \code{"AnnotatedDataFrame"} }
25
+    \item{\code{experimentData}:}{Object of class \code{"MIAME"} }
26
+    \item{\code{annotation}:}{Object of class \code{"character"}  }
27
+    \item{\code{protocolData}:}{Object of class \code{"AnnotatedDataFrame"} }
28
+    \item{\code{.__classVersion__}:}{Object of class \code{"Versions"} }
29
+  }
30
+}
31
+\section{Extends}{
32
+Class \code{"\linkS4class{CNSet}"}, directly.
33
+Class \code{"\linkS4class{SnpSuperSet}"}, by class "CNSet", distance 2.
34
+Class \code{"\linkS4class{AlleleSet}"}, by class "CNSet", distance 3.
35
+Class \code{"\linkS4class{SnpSet}"}, by class "CNSet", distance 3.
36
+Class \code{"\linkS4class{eSet}"}, by class "CNSet", distance 4.
37
+Class \code{"\linkS4class{VersionedBiobase}"}, by class "CNSet", distance 5.
38
+Class \code{"\linkS4class{Versioned}"}, by class "CNSet", distance 6.
39
+}
40
+\section{Methods}{
41
+  \describe{
42
+    \item{[}{\code{signature(x = "CNSetLM")}: subset \code{CNSetLM} objects}
43
+    \item{lM}{\code{signature(object = "CNSetLM")}: Extract list or
44
+    ffdf object containing linear model parameters}
45
+##    \item{lM<-}{\code{signature(object = "CNSetLM", value = "list_or_ffdf")}: ... }
46
+    \item{open}{\code{signature(con = "CNSetLM")}: opens file connects
47
+    to ff objects for assayData elements and linear model parameters}
48
+    \item{show}{\code{signature(object = "CNSetLM")}: print method
49
+    for the class }
50
+	 }
51
+}
52
+\author{ R. Scharpf}
53
+\seealso{
54
+	\code{\linkS4class{SnpSuperSet}}, \code{\linkS4class{CNSet}}
55
+}
56
+%	~~objects to See Also as \code{\link{~~fun~~}}, ~~~
57
+%	or \code{\linkS4class{CLASSNAME}} for links to other classes
58
+%}
59
+\examples{
60
+showClass("CNSetLM")
61
+}
62
+\keyword{classes}
0 63
new file mode 100644
... ...
@@ -0,0 +1,45 @@
1
+\name{batch}
2
+\alias{batch}
3
+\alias{batch,eSet-method}
4
+\title{
5
+	Function to extract batch information.
6
+}
7
+\description{
8
+	Checks the phenoData and protocolData for a variable named
9
+	batch and, if present, returns the vector.
10
+}
11
+\usage{
12
+batch(object)
13
+}
14
+\arguments{
15
+  \item{object}{
16
+  An object extending the \code{eSet} class.
17
+}
18
+}
19
+\details{
20
+
21
+	For copy number estimation, a batch variable must be
22
+	specified.  Currently, we suggest storing this variable in the
23
+	protocolData.
24
+
25
+	Batch represents groups of samples that were processed (DNA
26
+	preparation and collection, PCR amplification, scan date) at
27
+	similar times. Often, the 96 well chemistry plate or scan date
28
+	is a useful surrogate for batch.
29
+
30
+}
31
+\value{
32
+
33
+	Vector indicating batch.
34
+
35
+}
36
+
37
+\author{
38
+R. Scharpf
39
+}
40
+
41
+\seealso{
42
+	\code{\link{genotype}}, \code{\link{genotype2}}
43
+}
44
+\keyword{manip}
45
+
... ...
@@ -1,29 +1,53 @@
1 1
 \name{crlmmCopynumber}
2 2
 \alias{crlmmCopynumber}
3
+\alias{crlmmCopynumber2}
3 4
 \title{Locus- and allele-specific estimation of copy number}
4 5
 \description{
5 6
   Locus- and allele-specific estimation of copy number.
6 7
 }
7 8
 \usage{
8
-crlmmCopynumber(object, chromosome = 1:23, which.batches, MIN.SAMPLES = 10, SNRMin = 5, MIN.OBS = 3, DF.PRIOR = 50, bias.adj = FALSE, prior.prob = rep(1/4, 4), seed = 1, verbose = TRUE, GT.CONF.THR = 0.99, PHI.THR = 2^6, nHOM.THR = 5, MIN.NU = 2^3, MIN.PHI = 2^3, THR.NU.PHI = TRUE, thresholdCopynumber = TRUE)
9
+crlmmCopynumber(object, chromosome = 1:23, which.batches, MIN.SAMPLES
10
+= 10, SNRMin = 5, MIN.OBS = 3, DF.PRIOR = 50, bias.adj = FALSE,
11
+prior.prob = rep(1/4, 4), seed = 1, verbose = TRUE, GT.CONF.THR =
12
+0.99, PHI.THR = 2^6, nHOM.THR = 5, MIN.NU = 2^3, MIN.PHI = 2^3,
13
+THR.NU.PHI = TRUE, thresholdCopynumber = TRUE)
14
+
15
+crlmmCopynumber2(object, which.batches, MIN.SAMPLES = 10, SNRMin = 5,
16
+MIN.OBS = 1, DF.PRIOR = 50, bias.adj = FALSE, prior.prob = rep(1/4,
17
+4), seed = 1, verbose = TRUE, GT.CONF.THR = 0.99, PHI.THR = 2^6,
18
+nHOM.THR = 5, MIN.NU = 2^3, MIN.PHI = 2^3, THR.NU.PHI = TRUE,
19
+thresholdCopynumber = TRUE)
20
+
9 21
 }
10 22
 \arguments{
11 23
   \item{object}{object of class \code{SnpSuperSet}.
12 24
 }
25
+
13 26
   \item{which.batches}{ Character vector with length equal to the number of
14 27
   samples.  Used to adjust for batch effects.  Chemistry plate or
15 28
   date often work well.  See examples.
29
+
30
+  Ignored in crlmmCopynumber2.
31
+
16 32
 }
33
+
17 34
  \item{chromosome}{Numeric vector indicating which chromosomes to
18 35
  process (length <= 23). For chromosome X, use 23. A copy number
19 36
  method for chromosome Y is not yet available.
20 37
 }
38
+
21 39
   \item{MIN.SAMPLES}{ 'Integer'.  The minimum number of samples in a
22
-  batch.  Bathes with fewer than MIN.SAMPLES are skipped.
40
+  batch.  Bathes with fewer than MIN.SAMPLES are skipped.  Therefore,
41
+  samples in batches with fewer than MIN.SAMPLES have NA's for the
42
+  allele-specific copy number and NA's for the linear model
43
+  parameters.
44
+
23 45
 }
46
+
24 47
   \item{SNRMin}{ Samples with low signal to noise ratios are
25 48
   excluded.  
26 49
 }
50
+
27 51
   \item{MIN.OBS}{ 
28 52
 
29 53
   For genotypes with fewer than \code{MIN.OBS}, the within-genotype
... ...
@@ -56,6 +80,8 @@ crlmmCopynumber(object, chromosome = 1:23, which.batches, MIN.SAMPLES = 10, SNRM
56 80
   of the samples have a copy number alteration.  For additional
57 81
   information, see Scharpf et al., 2009.
58 82
 
83
+  This argument is ignored in crlmmCopynumber2.
84
+
59 85
 }
60 86
   \item{prior.prob}{
61 87
 
... ...
@@ -64,10 +90,9 @@ crlmmCopynumber(object, chromosome = 1:23, which.batches, MIN.SAMPLES = 10, SNRM
64 90
   normal copy number, and amplification, respectively.
65 91
 
66 92
 }
67
-  \item{seed}{ Seed for sampling.
68
-}
69
-  \item{verbose}{ Logical. 
70
-}
93
+  \item{seed}{ Seed for random number generation.}
94
+
95
+  \item{verbose}{ Logical. }
71 96
 
72 97
   \item{GT.CONF.THR}{ 
73 98
 
... ...
@@ -101,20 +126,39 @@ crlmmCopynumber(object, chromosome = 1:23, which.batches, MIN.SAMPLES = 10, SNRM
101 126
   \code{MIN.PHI} are ignored.
102 127
 }
103 128
   \item{thresholdCopynumber}{
129
+
104 130
   If \code{TRUE}, allele-specific number estimates are truncated.
105 131
   Values less than 0.05 are assigned the value 0.05; values exceeding
106 132
   5 are assigned the value 5.  
133
+
134
+  Ignored in crlmmCopynumber2.  Extreme values are automatically
135
+  truncated.
136
+
137
+}
107 138
 }
139
+
140
+\details{
141
+
142
+	The function crlmmCopynumber uses matrices instead of ff
143
+	objects if the ff library is not loaded.
144
+
145
+	The function crlmmCopynumber2 allows parallel processing via
146
+	and requires large data support via the ff package.
147
+
148
+	We plan to phase out crlmmCopynumber and replace this function
149
+	by crlmmCopynumber2.
150
+
108 151
 }
109 152
 \author{R. Scharpf}
110 153
 \examples{
111 154
 ## data(example.callSet)
112
-## cnSet <- crlmmCopynumber(example.callSet)
155
+## cnSet <- crlmmCopynumber2(example.callSet)
113 156
 ## total copy number
114 157
 ## cn <- copyNumber(cnSet)
115 158
 ## allele-specific copy number
116
-## ca <- CA(cnSet) ## A dosage
117
-## cb <- CB(cnSet) ## B dosage
159
+## ca <- CA(cnSet)/100 ## A dosage
160
+## cb <- CB(cnSet)/100 ## B dosage
118 161
 }
119 162
 % Add one or more standard keywords, see file 'KEYWORDS' in the
120 163
 % R documentation directory.
164
+\keyword{manip}
... ...
@@ -1,5 +1,6 @@
1 1
 \name{genotype}
2 2
 \alias{genotype}
3
+\alias{genotype2}
3 4
 \title{
4 5
 	Preprocessing and genotyping of Affymetrix arrays.
5 6
 }
... ...
@@ -7,11 +8,18 @@
7 8
 	Preprocessing and genotyping of Affymetrix arrays.	
8 9
 }
9 10
 \usage{
10
-genotype(filenames, cdfName, mixtureSampleSize = 10^5, eps = 0.1, verbose = TRUE, seed = 1, sns, copynumber = FALSE, probs = rep(1/3, 3), DF = 6, SNRMin = 5, recallMin = 10, recallRegMin = 1000, gender = NULL, returnParams = TRUE, badSNP = 0.7)
11
+genotype(filenames, cdfName, batch, mixtureSampleSize = 10^5, eps =
12
+0.1, verbose = TRUE, seed = 1, sns, copynumber = FALSE, probs =
13
+rep(1/3, 3), DF = 6, SNRMin = 5, recallMin = 10, recallRegMin = 1000,
14
+gender = NULL, returnParams = TRUE, badSNP = 0.7)
15
+
16
+genotype2(filenames, cdfName, batch, mixtureSampleSize = 10^5, eps = 0.1, verbose = TRUE, seed = 1, sns, copynumber = FALSE, probs = rep(1/3, 3), DF = 6, SNRMin = 5, recallMin = 10, recallRegMin = 1000, gender = NULL, returnParams = TRUE, badSNP = 0.7)
11 17
 }
12 18
 \arguments{
13 19
   \item{filenames}{ complete path to CEL files}
14
-  \item{cdfName}{  annotation package  (see also \code{validCdfNames})}
20
+  \item{cdfName}{  annotation package  (see also
21
+  \code{validCdfNames})}
22
+  \item{batch}{ batch variable. See details. }
15 23
   \item{mixtureSampleSize}{    Sample size to be use when fitting the mixture model.}
16 24
  \item{eps}{   Stop criteria.}
17 25
   \item{verbose}{  Logical.  Whether to print descriptive messages during processing.}
... ...
@@ -30,6 +38,23 @@ genotype(filenames, cdfName, mixtureSampleSize = 10^5, eps = 0.1, verbose = TRUE
30 38
   \item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)}
31 39
 }
32 40
 
41
+\details{
42
+
43
+	For large datasets it is important to utilize the large data
44
+	support by installing and loading the ff package before
45
+	calling the genotype or genotype2 function.
46
+
47
+	Currently, two functions are provided for preprocessing and
48
+	genotyping Affymetrix platforms: genotype and genotype2. For
49
+	small datasets, genotype and genotype2 are identical.  For
50
+	large datasets, genotype2 provides large data support (via ff)
51
+	and permits the use of clusters or multiple cores (via snow
52
+	package) to speed up genotyping (similar to \code{crlmm2}).
53
+	The \code{genotype} function will be phased out in the future
54
+	and replaced by \code{genotype2}.
55
+
56
+}
57
+
33 58
 \value{	A \code{SnpSuperSet} instance.}
34 59
 \references{
35 60
 
... ...
@@ -51,15 +76,33 @@ this will greatly reduce the RAM required for big jobs.  See
51 76
 \seealso{
52 77
 	\code{\link{snprma}}, \code{\link{crlmm}},
53 78
 	\code{\link[oligoClasses]{ocSamples}},
54
-	\code{\link[oligoClasses]{ldOpts}}
79
+	\code{\link[oligoClasses]{ldOpts}},
80
+	\code{\link{batch}},
81
+	\code{\link{crlmmCopynumber}}
55 82
 }
56 83
 \examples{
57
-if (require(genomewidesnp6Crlmm) & require(hapmapsnp6)){
84
+if (require(ff) & require(genomewidesnp6Crlmm) & require(hapmapsnp6)){
85
+
58 86
   path <- system.file("celFiles", package="hapmapsnp6")
59 87
   ## the filenames with full path...
60 88
   ## very useful when genotyping samples not in the working directory
61 89
   cels <- list.celfiles(path, full.names=TRUE)
62
-  (crlmmOutput <- genotype(cels, cdfName="genomewidesnp6"))
90
+
91
+  ## To use less RAM, specify a smaller argument to ocProbesets
92
+  ocProbesets(50e3)
93
+  (cnSet <- genotype2(cels, cdfName="genomewidesnp6",
94
+  copynumber=TRUE))
95
+
96
+  dim(cnSet)
97
+  table(isSnp(cnSet))
98
+
99
+## The above is a trivial example.  Typically you may have a large
100
+##    number of cel files, many of which were processed at different
101
+##    times. For such datasets, it is important to set a batch
102
+##    variable.  If not specified, the scan date of the file is used
103
+##    as the batch variable.  
104
+      batch(cnSet)
105
+      protocolData(cnSet)$ScanDate
63 106
 }
64 107
 }
65 108
 \keyword{ classif }