Browse code

merging from collab

* collab:
add warning in vignette about NAs with BafLrrSetList function
Added Human Omni Express Exome 8 v1.1b as a supported chip
updated version number of pacakge and man pages to reflect these changes
skeleton for krlmm capability added. genotype.Illumina() can now take and XY object as input
update copynumber.Rnw to use BafLrrSetList
updates to vignettes
update namespace

# Please enter a commit message to explain why this merge is necessary,
# especially if it merges an updated upstream into a topic branch.
#
# Lines starting with '#' will be ignored, and an empty message aborts
# the commit.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@79138 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 31/07/2013 01:37:34
Showing14 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.19.2
4
+Version: 1.19.6
5 5
 Author: Benilton S Carvalho, Robert Scharpf, Matt Ritchie, Ingo Ruczinski, Rafael A Irizarry
6 6
 Maintainer: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
7 7
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms
... ...
@@ -22,7 +22,7 @@ Imports: methods,
22 22
 	 lattice,
23 23
 	 ff,
24 24
 	 foreach,
25
-         RcppEigen,
25
+         RcppEigen (>= 0.3.1.2.1),
26 26
 	 matrixStats
27 27
 Suggests: hapmapsnp6,
28 28
           genomewidesnp6Crlmm (>= 1.0.7),
... ...
@@ -43,6 +43,7 @@ Collate: AllGenerics.R
43 43
          crlmm-functions.R
44 44
 	 crlmmGT2.R
45 45
          crlmm-illumina.R
46
+         krlmm.R
46 47
 	 snprma-functions.R
47 48
          utils.R
48 49
          zzz.R
... ...
@@ -63,7 +63,7 @@ importFrom(oligoClasses, celfileDate, chromosome2integer, i2p,
63 63
 	   parStatus,
64 64
            splitIndicesByLength, splitIndicesByNode, AssayDataList)
65 65
 
66
-importFrom(preprocessCore, normalize.quantiles,
66
+importFrom(preprocessCore, normalize.quantiles, normalize.quantiles.determine.target,
67 67
            normalize.quantiles.use.target, subColSummarizeMedian)
68 68
 
69 69
 importFrom(stats, coef, cov, dnorm, kmeans, lm, mad, median, quantile,
... ...
@@ -2787,12 +2787,12 @@ calculateRTheta <- function(object, ##genotype=c("AA", "AB", "BB"),
2787 2787
 	centersMatrix <- lapply(centers, function(x, i, j) x[i, j], j=j, i=feature.index)
2788 2788
 	lapply(centers, close)
2789 2789
 	centersMatrix <- do.call(cbind, centersMatrix)
2790
-	centersA <- centersMatrix[, 1:3]
2791
-	centersB <- centersMatrix[, 4:6]
2790
+	centersA <- centersMatrix[, 1:3, drop=FALSE]
2791
+	centersB <- centersMatrix[, 4:6, drop=FALSE]
2792 2792
 	rm(centers, centersMatrix); gc()
2793 2793
 	centersA[centersA < 64] <- 64
2794 2794
 	centersB[centersB < 64] <- 64
2795
-	theta <- atan2(centersB, centersA) * 2/pi
2795
+	theta <- as.matrix(atan2(centersB, centersA) * 2/pi)
2796 2796
 	centersA[is.na(centersA)] <- 0
2797 2797
 	centersB[is.na(centersB)] <- 0
2798 2798
 	if(length(index.np) > 0) theta[index.np, ] <- 1
... ...
@@ -2844,3 +2844,5 @@ calculateRTheta <- function(object, ##genotype=c("AA", "AB", "BB"),
2844 2844
 	lrr <- integerMatrix(lrr, 100)
2845 2845
 	return(list(baf=bf, lrr=lrr))
2846 2846
 }
2847
+
2848
+truncate <- func
... ...
@@ -1,4 +1,4 @@
1
-					# function below works OK provided all .idat files are in the current working directory
1
+# function below works OK provided all .idat files are in the current working directory
2 2
 # - could add an option to allow files in Illumina directory structure to be handled
3 3
 # or to use the optional 'Path' column in sampleSheet
4 4
 # - there is a lot of header information that is currently discarded - could try and store this somewhere in the resulting NChannelSet
... ...
@@ -243,10 +243,12 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
243 243
 	  "human1mduov3b",          # 1M Duo
244 244
 	  "humanomni1quadv1b",      # Omni1 quad
245 245
 	  "humanomni25quadv1b",     # Omni2.5 quad
246
-	  "humanomni258v1a",        # Omni2.5 8 sample
246
+	  "humanomni258v1a",        # Omni2.5 8
247
+          "humanomni5quadv1b",      # Omni5 quad  
247 248
 	  "humanomniexpress12v1b",  # Omni express 12
248 249
 	  "humanimmuno12v1b",       # Immuno chip 12
249
-          "humancytosnp12v2p1h")    # CytoSNP 12
250
+          "humancytosnp12v2p1h",    # CytoSNP 12
251
+          "humanomniexpexome8v1p1b") # Omni Express Exome 8 v1.1b
250 252
 	  ## RS: added cleancdfname()
251 253
 	  if(missing(chipType)){
252 254
 		  chipType = match.arg(cleancdfname(annotation(RG)), chipList)
... ...
@@ -353,12 +355,13 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
353 355
 }
354 356
 
355 357
 
356
-stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
357
-	if(useTarget){
358
+stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE, quantile.method="between") {
359
+        if(quantile.method=="between") {
360
+	  if(useTarget){
358 361
 		objectsNeeded <- c("stripnum", "reference")
359
-	} else objectsNeeded <- "stripnum"
360
-	needToLoad <- !all(sapply(objectsNeeded, isLoaded))
361
-	if(needToLoad){
362
+	  } else objectsNeeded <- "stripnum"
363
+	  needToLoad <- !all(sapply(objectsNeeded, isLoaded))
364
+	  if(needToLoad){
362 365
 		pkgname = getCrlmmAnnotationName(annotation(XY))
363 366
 		if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
364 367
 			suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
... ...
@@ -369,36 +372,86 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
369 372
 		}
370 373
 		if(verbose) message("Loading strip and reference normalization information.")
371 374
 		loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
372
-	}
373
-	stripnum = getVarInEnv("stripnum")
374
-	if(useTarget)
375
+	  }
376
+	  stripnum = getVarInEnv("stripnum")
377
+	  if(useTarget)
375 378
 		targetdist = getVarInEnv("reference")
376
-	if(verbose){
379
+	  if(verbose){
377 380
 		message("Quantile normalizing ", ncol(XY), " arrays by ", max(stripnum), " strips.")
378 381
 		if (getRversion() > '2.7.0') pb = txtProgressBar(min=0, max=max(stripnum), style=3)
379
-	}
380
-  for(s in 1:max(stripnum)) {
381
-    if(verbose) {
382
-      if (getRversion() > '2.7.0') setTxtProgressBar(pb, s)
383
-      else cat(".")
384
-    }
385
-    sel = stripnum==s
386
-    ##RS: faster to access data directly
387
-    ##subX = as.matrix(exprs(channel(XY, "X"))[sel,])
388
-    ##subY = as.matrix(exprs(channel(XY, "Y"))[sel,])
389
-    subX <- as.matrix(assayData(XY)[["X"]][sel, ])
390
-    subY <- as.matrix(assayData(XY)[["Y"]][sel, ])
391
-    if(useTarget)
392
-      tmp = normalize.quantiles.use.target(cbind(subX, subY), targetdist[[s]])
393
-    else
394
-      tmp = normalize.quantiles(cbind(subX, subY))
395
-    XY@assayData$X[sel,] = matrix(as.integer(tmp[,1:(ncol(tmp)/2)]+16), nrow(tmp), ncol(tmp)/2)
396
-    XY@assayData$Y[sel,] = matrix(as.integer(tmp[,(ncol(tmp)/2+1):ncol(tmp)]+16), nrow(tmp), ncol(tmp)/2)
397
-    rm(subX, subY, tmp, sel)
398
-    gc(verbose=FALSE)
399
-  }
400
-  if(verbose)
401
-    cat("\n")
382
+	  }
383
+          for(s in 1:max(stripnum)) {
384
+            if(verbose) {
385
+              if (getRversion() > '2.7.0') setTxtProgressBar(pb, s)
386
+              else cat(".")
387
+            }
388
+            sel = stripnum==s
389
+            ##RS: faster to access data directly
390
+            ##subX = as.matrix(exprs(channel(XY, "X"))[sel,])
391
+            ##subY = as.matrix(exprs(channel(XY, "Y"))[sel,])
392
+            subX <- as.matrix(assayData(XY)[["X"]][sel, ])
393
+            subY <- as.matrix(assayData(XY)[["Y"]][sel, ])
394
+            if(useTarget)
395
+              tmp = normalize.quantiles.use.target(cbind(subX, subY), targetdist[[s]])
396
+            else
397
+              tmp = normalize.quantiles(cbind(subX, subY))
398
+            XY@assayData$X[sel,] = matrix(as.integer(tmp[,1:(ncol(tmp)/2)]+16), nrow(tmp), ncol(tmp)/2)
399
+            XY@assayData$Y[sel,] = matrix(as.integer(tmp[,(ncol(tmp)/2+1):ncol(tmp)]+16), nrow(tmp), ncol(tmp)/2)
400
+            rm(subX, subY, tmp, sel)
401
+            gc(verbose=FALSE)
402
+          }
403
+          if(verbose)
404
+            cat("\n")
405
+        }
406
+        if(quantile.method=="within") {  # ignore strip information
407
+	  if(useTarget){
408
+		objectsNeeded <- c("Xref", "Yref")
409
+	  } else objectsNeeded <- ""
410
+	  needToLoad <- !all(sapply(objectsNeeded, isLoaded))
411
+	  if(needToLoad){
412
+		pkgname = getCrlmmAnnotationName(annotation(XY))
413
+		if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
414
+			suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
415
+			msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
416
+			message(strwrap(msg))
417
+			stop("Package ", pkgname, " could not be found.")
418
+			rm(suggCall, msg)
419
+		}
420
+		if(verbose) message("Loading reference normalization information.")
421
+		loader("targetXY.rda", .crlmmPkgEnv, pkgname)
422
+	  }
423
+	  if(useTarget) {
424
+                Xref = getVarInEnv("Xref")
425
+	        Yref = getVarInEnv("Yref")
426
+          } else{
427
+                Xref = normalize.quantiles.determine.target(as.matrix(assayData(XY)[["X"]]))
428
+                gc(verbose=FALSE)
429
+                Yref = normalize.quantiles.determine.target(as.matrix(assayData(XY)[["Y"]]))
430
+                gc(verbose=FALSE)
431
+          }
432
+	  if(verbose){
433
+		message("Quantile normalizing ", ncol(XY), " arrays, one at a time.")
434
+		if (getRversion() > '2.7.0') pb = txtProgressBar(min=0, max=ncol(XY), style=3)
435
+	  }
436
+          for(s in 1:ncol(XY)) {
437
+            if(verbose) {
438
+              if (getRversion() > '2.7.0') setTxtProgressBar(pb, s)
439
+              else cat(".")
440
+            }
441
+            ##RS: faster to access data directly
442
+            ##subX = as.matrix(exprs(channel(XY, "X"))[sel,])
443
+            ##subY = as.matrix(exprs(channel(XY, "Y"))[sel,])
444
+            subX <- as.matrix(assayData(XY)[["X"]][,s])
445
+            subY <- as.matrix(assayData(XY)[["Y"]][,s])
446
+            tmpX = normalize.quantiles.use.target(subX, as.integer(Xref))
447
+            tmpY = normalize.quantiles.use.target(subY, as.integer(Yref))              
448
+            XY@assayData$X[,s] = as.integer(tmpX+16)
449
+            XY@assayData$Y[,s] = as.integer(tmpY+16)
450
+            rm(subX, subY, tmpX, tmpY)
451
+          }
452
+          if(verbose)
453
+            cat("\n")
454
+        }
402 455
   XY
403 456
 }
404 457
 
... ...
@@ -411,13 +464,14 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
411 464
 				cdfName,
412 465
 				sns,
413 466
 				stripNorm=TRUE,
414
-				useTarget=TRUE) { #,
467
+				useTarget=TRUE,
468
+                                quantile.method="between") { #,
415 469
 #               outdir=".") {
416 470
 #				save.it=FALSE,
417 471
 #				snpFile,
418 472
 #				cnFile) {
419 473
 	if(stripNorm)
420
-		XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)
474
+		XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose, quantile.method=quantile.method)
421 475
 	## MR: the code below is mostly straight from snprma.R
422 476
 	if (missing(sns)) sns = sampleNames(XY) #$X
423 477
 	if(missing(cdfName))
... ...
@@ -437,14 +491,17 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
437 491
 		}
438 492
 		if(verbose) message("Loading snp annotation and mixture model parameters.")
439 493
 		loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
440
-		loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
494
+                if(fitMixture)
495
+ 		  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
441 496
 		loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname)
442 497
 		loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
443 498
 	}
444 499
 	autosomeIndex = getVarInEnv("autosomeIndex")
445
-	SMEDIAN = getVarInEnv("SMEDIAN")
446
-	theKnots = getVarInEnv("theKnots")
447
-	npIndex = getVarInEnv("npProbesFid")
500
+        if(fitMixture) {
501
+          SMEDIAN = getVarInEnv("SMEDIAN")
502
+          theKnots = getVarInEnv("theKnots")
503
+        }
504
+        npIndex = getVarInEnv("npProbesFid")
448 505
 	snpIndex = getVarInEnv("snpProbesFid")
449 506
 	narrays = ncol(XY)
450 507
 	nprobes = length(npIndex)
... ...
@@ -490,7 +547,7 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
490 547
   B = matrix(NA, nprobes, narrays)
491 548
   zero = matrix(NA, nprobes, narrays)
492 549
 
493
-  if(verbose){
550
+  if(verbose && fitMixture){
494 551
      message("Calibrating ", narrays, " arrays.")
495 552
      if (getRversion() > '2.7.0') pb = txtProgressBar(min=0, max=narrays, style=3)
496 553
   }
... ...
@@ -510,15 +567,15 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
510 567
 		  tmp = fitAffySnpMixture56(S, M, theKnots, eps=eps)
511 568
 		  mixtureParams[, i] = tmp[["coef"]]
512 569
 		  SNR[i] = tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
513
-	  }
514
-	  if(verbose) {
515
-		  if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
516
-		  else cat(".")
570
+	          if(verbose) {
571
+		    if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
572
+		    else cat(".")
573
+                  }
517 574
 	  }
518 575
 	  ## run garbage collection every now and then
519 576
 	  if(i %% 100 == 0) gc(verbose=FALSE);
520 577
   }
521
-  if (verbose) {
578
+  if (verbose && fitMixture) {
522 579
 	  if (getRversion() > '2.7.0') close(pb)
523 580
 	  else cat("\n")
524 581
   }
... ...
@@ -800,15 +857,17 @@ constructInf <- function(sampleSheet=NULL,
800 857
 			 highDensity=FALSE,
801 858
 			 sep="_",
802 859
 			 fileExt=list(green="Grn.idat", red="Red.idat"),
860
+                         XY,
803 861
 			 cdfName,
804 862
 			 verbose=FALSE,
805
-			 batch, #fns,
863
+			 batch=NULL, #fns,
806 864
 			 saveDate=TRUE) { #, outdir="."){
807 865
 	verbose <- FALSE
808
-	if(!is.null(arrayNames)) {
866
+        if(is.null(XY)) {
867
+  	  if(!is.null(arrayNames)) {
809 868
 		pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
810
-	}
811
-	if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
869
+	  }
870
+	  if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
812 871
 		if(is.null(arrayNames)) {
813 872
 			if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
814 873
 				barcode = sampleSheet[,arrayInfoColNames$barcode]
... ...
@@ -826,61 +885,62 @@ constructInf <- function(sampleSheet=NULL,
826 885
 						arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
827 886
 				}
828 887
 			}
829
-		}
830
-		pd = new("AnnotatedDataFrame", data = sampleSheet)
831
-		sampleNames(pd) = make.unique(basename(arrayNames))
832
-	}
833
-	if(is.null(arrayNames)) {
888
+		  }
889
+		  pd = new("AnnotatedDataFrame", data = sampleSheet)
890
+		  sampleNames(pd) = make.unique(basename(arrayNames))
891
+	  }
892
+	  if(is.null(arrayNames)) {
834 893
 		arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
835 894
 		if(!is.null(sampleSheet)) {
836 895
 			sampleSheet=NULL
837 896
 			cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
838 897
 		}
839 898
 		pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
840
-	}
841
-	narrays = length(arrayNames)
899
+  	  }
900
+	  narrays = length(arrayNames)
842 901
 
843
-	if(!missing(batch)) {
902
+	  if(!is.null(batch)) {
844 903
 		stopifnot(length(batch) == narrays)
845
-	}
846
-	if(missing(batch)) {
847
-                stop("Must specify 'batch'") # batch = as.factor(rep(1, narrays))
848
-	}
904
+	  }
905
+ 	  if(is.null(batch)) {
906
+                batch = rep("batch1", narrays) # assume only one batch stop("Must specify 'batch'")
907
+	  }
908
+          if(is(batch, "factor")) batch = as.character(batch)
849 909
 
850
-	grnfiles = paste(arrayNames, fileExt$green, sep=sep)
851
-	redfiles = paste(arrayNames, fileExt$red, sep=sep)
852
-	if(length(grnfiles)==0 || length(redfiles)==0)
910
+	  grnfiles = paste(arrayNames, fileExt$green, sep=sep)
911
+	  redfiles = paste(arrayNames, fileExt$red, sep=sep)
912
+	  if(length(grnfiles)==0 || length(redfiles)==0)
853 913
 		stop("Cannot find .idat files")
854
-	if(length(grnfiles)!=length(redfiles))
914
+	  if(length(grnfiles)!=length(redfiles))
855 915
 		stop("Cannot find matching .idat files")
856
-	if(path[1] != "." & path[1] != ""){
916
+	  if(path[1] != "." & path[1] != ""){
857 917
 		grnidats = file.path(path, grnfiles)
858 918
 		redidats = file.path(path, redfiles)
859
-	}  else {
919
+	  }  else {
860 920
 		message("path arg not set.  Assuming files are in local directory, or that complete path is provided")
861 921
 		grnidats = grnfiles
862 922
 		redidats = redfiles
863
-	}
864
-	if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
865
-	if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
866
-
867
-	if(verbose) message("Initializing container for genotyping and copy number estimation")
868
-	pkgname <- getCrlmmAnnotationName(cdfName)
869
-	path <- system.file("extdata", package=pkgname)
870
-	genome <- getAvailableIlluminaGenomeBuild(path)
871
-	featureData = getFeatureData(cdfName, copynumber=TRUE, genome=genome)
872
-	nr = nrow(featureData); nc = narrays
873
-	sns <-  if (!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)) {
923
+    	  }
924
+	  if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
925
+	  if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
926
+
927
+	  if(verbose) message("Initializing container for genotyping and copy number estimation")
928
+	  pkgname <- getCrlmmAnnotationName(cdfName)
929
+	  path <- system.file("extdata", package=pkgname)
930
+	  genome <- getAvailableIlluminaGenomeBuild(path)
931
+	  featureData = getFeatureData(cdfName, copynumber=TRUE, genome=genome)
932
+	  nr = nrow(featureData); nc = narrays
933
+	  sns <-  if (!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)) {
874 934
 		make.unique(sampleSheet$Sample_ID)
875
-        } else{
935
+          } else{
876 936
 		make.unique(basename(arrayNames))
877
-	}
878
-	biga <- initializeBigMatrix(name="A", nr, nc)
879
-	bigb <- initializeBigMatrix(name="B", nr, nc)
880
-	bigc <- initializeBigMatrix(name="call", nr, nc)
881
-	bigd <- initializeBigMatrix(name="callPr", nr,nc)
882
-	colnames(biga) <- colnames(bigb) <- colnames(bigc) <- colnames(bigd) <- sns
883
-	cnSet <- new("CNSet",
937
+          }
938
+	  biga <- initializeBigMatrix(name="A", nr, nc)
939
+	  bigb <- initializeBigMatrix(name="B", nr, nc)
940
+	  bigc <- initializeBigMatrix(name="call", nr, nc)
941
+	  bigd <- initializeBigMatrix(name="callPr", nr,nc)
942
+	  colnames(biga) <- colnames(bigb) <- colnames(bigc) <- colnames(bigd) <- sns
943
+	  cnSet <- new("CNSet",
884 944
 		     alleleA=biga,
885 945
 		     alleleB=bigb,
886 946
 		     call=bigc,
... ...
@@ -894,25 +954,63 @@ constructInf <- function(sampleSheet=NULL,
894 954
 ##        } else{
895 955
 ##		sampleNames(cnSet) <- make.unique(basename(arrayNames))
896 956
 ##	}
897
-	if(saveDate){
957
+  	  if(saveDate){
898 958
 		protocolData = getProtocolData.Illumina(grnidats, sep=sep, fileExt=fileExt$green, verbose=verbose)
899
-	} else{
959
+	  } else{
900 960
 		protocolData = annotatedDataFrameFrom(A(cnSet), byrow=FALSE)
901
-	}
902
-	rownames(pData(protocolData)) = sampleNames(cnSet)
903
-	protocolData(cnSet) = protocolData
961
+	  }
962
+	  rownames(pData(protocolData)) = sampleNames(cnSet)
963
+	  protocolData(cnSet) = protocolData
904 964
 	##featureData(cnSet) = featureData
905
-	featureNames(cnSet) = featureNames(featureData)
906
-	cnSet$gender <- initializeBigVector("gender", ncol(cnSet), vmode="integer")
907
-	cnSet$SNR = initializeBigVector("crlmmSNR-", ncol(cnSet), "double")
908
-	cnSet$SKW = initializeBigVector("crlmmSKW-", ncol(cnSet), "double")
965
+	  featureNames(cnSet) = featureNames(featureData)
966
+	  cnSet$gender <- initializeBigVector("gender", ncol(cnSet), vmode="integer")
967
+	  cnSet$SNR = initializeBigVector("crlmmSNR-", ncol(cnSet), "double")
968
+	  cnSet$SKW = initializeBigVector("crlmmSKW-", ncol(cnSet), "double")
909 969
 	##sampleNames(cnSet) <- basename(sampleNames(cnSet))
970
+        } else{ # if XY specified, easier set-up of cnSet
971
+          narrays = ncol(XY)
972
+          if(verbose) message("Initializing container for genotyping and copy number estimation")           
973
+          if(!is.null(batch)) {
974
+              stopifnot(length(batch) == narrays)
975
+          }
976
+          if(is.null(batch)) {
977
+              batch = rep("batch1", narrays) # assume only one batch stop("Must specify 'batch'")
978
+          }
979
+          if(is(batch, "factor")) batch = as.character(batch)
980
+          pkgname <- getCrlmmAnnotationName(cdfName)
981
+          path <- system.file("extdata", package=pkgname)
982
+          genome <- getAvailableIlluminaGenomeBuild(path)
983
+          featureData = getFeatureData(cdfName, copynumber=TRUE, genome=genome)
984
+          nr = nrow(featureData); nc = narrays
985
+          sns <-  sampleNames(XY)
986
+          biga <- initializeBigMatrix(name="A", nr, nc)
987
+          bigb <- initializeBigMatrix(name="B", nr, nc)
988
+          bigc <- initializeBigMatrix(name="call", nr, nc)
989
+          bigd <- initializeBigMatrix(name="callPr", nr,nc)
990
+          colnames(biga) <- colnames(bigb) <- colnames(bigc) <- colnames(bigd) <- sns
991
+          cnSet <- new("CNSet",
992
+                       alleleA=biga,
993
+                       alleleB=bigb,
994
+                       call=bigc,
995
+                       callProbability=bigd,
996
+                       annotation=cdfName,
997
+                       featureData=featureData,
998
+                       batch=batch,
999
+                       genome=genome)
1000
+          protocolData = annotatedDataFrameFrom(A(cnSet), byrow=FALSE)
1001
+          rownames(pData(protocolData)) = sampleNames(cnSet)
1002
+          protocolData(cnSet) = protocolData
1003
+          featureNames(cnSet) = featureNames(featureData)
1004
+          cnSet$gender = initializeBigVector("gender", ncol(cnSet), vmode="integer")
1005
+          cnSet$SNR = initializeBigVector("crlmmSNR-", ncol(cnSet), "double")
1006
+          cnSet$SKW = initializeBigVector("crlmmSKW-", ncol(cnSet), "double")
1007
+        }
910 1008
 	return(cnSet)
911 1009
 }
912 1010
 construct.Illumina <- constructInf
913 1011
 
914 1012
 preprocessInf <- function(cnSet,
915
-			  sampleSheet=NULL,
1013
+		       sampleSheet=NULL,
916 1014
 		       arrayNames=NULL,
917 1015
 		       ids=NULL,
918 1016
 		       path=".",
... ...
@@ -920,11 +1018,13 @@ preprocessInf <- function(cnSet,
920 1018
 		       highDensity=TRUE,
921 1019
 		       sep="_",
922 1020
 		       fileExt=list(green="Grn.idat", red="Red.idat"),
1021
+                       XY,
923 1022
 		       saveDate=TRUE,
924 1023
 		       stripNorm=TRUE,
925 1024
 		       useTarget=TRUE,
926 1025
 		       mixtureSampleSize=10^5,
927
-			  fitMixture=TRUE,
1026
+		       fitMixture=TRUE,
1027
+                       quantile.method="between",
928 1028
 		       eps=0.1,
929 1029
 		       verbose=TRUE,
930 1030
 		       seed=1, cdfName){
... ...
@@ -949,6 +1049,7 @@ preprocessInf <- function(cnSet,
949 1049
 		 highDensity=highDensity,
950 1050
 		 sep=sep,
951 1051
 		 fileExt=fileExt,
1052
+                 XY=XY,
952 1053
 		 saveDate=saveDate,
953 1054
 		 verbose=verbose,
954 1055
 		 mixtureSampleSize=mixtureSampleSize,
... ...
@@ -959,6 +1060,7 @@ preprocessInf <- function(cnSet,
959 1060
 		 sns=sns,
960 1061
 		 stripNorm=stripNorm,
961 1062
 		 useTarget=useTarget,
1063
+                 quantile.method=quantile.method,                 
962 1064
 		 A=A(cnSet),
963 1065
 		 B=B(cnSet),
964 1066
 		 GT=calls(cnSet),
... ...
@@ -981,7 +1083,8 @@ genotypeInf <- function(cnSet, mixtureParams, probs=rep(1/3,3),
981 1083
 			badSNP=0.7,
982 1084
 			gender=NULL,
983 1085
 			DF=6,
984
-			cdfName){
1086
+			cdfName,
1087
+                        call.method="crlmm"){
985 1088
 	is.snp = isSnp(cnSet)
986 1089
 	snp.index = which(is.snp)
987 1090
 ##	narrays = ncol(cnSet)
... ...
@@ -998,7 +1101,8 @@ genotypeInf <- function(cnSet, mixtureParams, probs=rep(1/3,3),
998 1101
 ##	message("Writing complete.  Begin genotyping...")
999 1102
 ##	close(A(cnSet))
1000 1103
 ##	close(B(cnSet))
1001
-	tmp <- crlmmGT2(A=A(cnSet),
1104
+        if(call.method=="crlmm") {
1105
+  	  tmp <- crlmmGT2(A=A(cnSet),
1002 1106
 			B=B(cnSet),
1003 1107
 			SNR=cnSet$SNR,
1004 1108
 			mixtureParams=mixtureParams,
... ...
@@ -1016,11 +1120,14 @@ genotypeInf <- function(cnSet, mixtureParams, probs=rep(1/3,3),
1016 1120
 			callsGt=calls(cnSet),
1017 1121
 			callsPr=snpCallProbability(cnSet))#,
1018 1122
 	##RS:  I changed the API for crlmmGT2 to be consistent with crlmmGT
1123
+           open(cnSet$gender)
1124
+           cnSet$gender[,] = tmp$gender
1125
+           close(cnSet$gender)
1126
+        }
1127
+        if(call.method=="krlmm")
1128
+          tmp <- krlmm(cnSet, cdfName, gender=gender) # new function required...  cnSet, cdfName and gender are probably the only arguments you need...
1019 1129
 	## snp.names=featureNames(cnSet)[snp.index])
1020
-	if(verbose) message("Genotyping finished.  Updating container with genotype calls and confidence scores.")
1021
-	open(cnSet$gender)
1022
-	cnSet$gender[,] = tmp$gender
1023
-	close(cnSet$gender)
1130
+	if(verbose) message("Genotyping finished.") # Updating container with genotype calls and confidence scores.")
1024 1131
 	TRUE
1025 1132
 }
1026 1133
 
... ...
@@ -1033,12 +1140,15 @@ genotype.Illumina <- function(sampleSheet=NULL,
1033 1140
 			      highDensity=FALSE,
1034 1141
 			      sep="_",
1035 1142
 			      fileExt=list(green="Grn.idat", red="Red.idat"),
1143
+                              XY=NULL,
1144
+                              call.method="crlmm",                              
1036 1145
 			      cdfName,
1037 1146
 			      copynumber=TRUE,
1038
-			      batch,
1147
+			      batch=NULL,
1039 1148
 			      saveDate=TRUE,
1040 1149
 			      stripNorm=TRUE,
1041 1150
 			      useTarget=TRUE,
1151
+                              quantile.method="between",                             
1042 1152
 			      mixtureSampleSize=10^5,
1043 1153
 			      fitMixture=TRUE,
1044 1154
 			      eps=0.1,
... ...
@@ -1053,25 +1163,58 @@ genotype.Illumina <- function(sampleSheet=NULL,
1053 1163
 			      gender=NULL,
1054 1164
 			      returnParams=TRUE,
1055 1165
 			      badSNP=0.7) {
1166
+        krlmm.supported = c("humanomni1quadv1b",      # Omni1 quad
1167
+	                    "humanomni25quadv1b",     # Omni2.5 quad
1168
+	                    "humanomni258v1a",        # Omni2.5 8
1169
+                            "humanomni5quadv1b")      # Omni5 quad
1170
+        crlmm.supported = c("human370v1c",            # 370CNV
1171
+	                    "human650v3a",            # 650Y
1172
+	                    "human610quadv1b",        # 610 quad
1173
+	                    "human660quadv1a",        # 660 quad
1174
+	                    "human370quadv3c",        # 370CNV quad
1175
+	                    "human550v3b",            # 550K
1176
+	                    "human1mduov3b",          # 1M Duo
1177
+                            "humanomni1quadv1b",      # Omni1 quad
1178
+	                    "humanomniexpress12v1b",  # Omni express 12
1179
+	                    "humanimmuno12v1b",       # Immuno chip 12
1180
+                            "humancytosnp12v2p1h",    # CytoSNP 12
1181
+                            "humanomniexpexome8v1p1b") # Omni Express Exome 8 v1.1b
1182
+        if(call.method=="krlmm") {
1183
+          if(!any(cdfName==krlmm.supported))
1184
+             stop(cdfName, " platform not supported by krlmm.  Consider setting call.method=\'crlmm\'")           
1185
+          fitMixture=FALSE
1186
+          quantile.method="within"
1187
+        }
1188
+        if(call.method=="crlmm") {
1189
+          if(!any(cdfName==crlmm.supported))
1190
+             stop(cdfName, " platform not supported by crlmm.  Consider setting call.method=\'krlmm\'")
1191
+          fitMixture=TRUE
1192
+          # quantile.method="between"
1193
+        }
1056 1194
 	is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
1057 1195
 	stopifnot(is.lds)
1196
+        if(!is.null(XY) && missing(cdfName))
1197
+          cdfName = getCrlmmAnnotationName(annotation(cnSet))
1058 1198
 	if(missing(cdfName)) stop("must specify cdfName")
1059 1199
 	if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
1060
-	stopifnot(!missing(batch))
1061
-	if(is(batch, "factor")) batch <- as.character(batch)
1200
+#	stopifnot(!missing(batch))
1201
+#	if(is(batch, "factor")) batch <- as.character(batch)
1062 1202
         pkgname <- getCrlmmAnnotationName(cdfName)
1063
-	message("Instantiate CNSet container.")
1064
-	cnSet <- constructInf(sampleSheet=sampleSheet,
1203
+#        if(is.null(cnSet)) {
1204
+  	  message("Instantiate CNSet container.")
1205
+	  cnSet <- constructInf(sampleSheet=sampleSheet,
1065 1206
 				    arrayNames=arrayNames,
1066 1207
 				    path=path,
1067 1208
 				    arrayInfoColNames=arrayInfoColNames,
1068 1209
 				    highDensity=highDensity,
1069 1210
 				    sep=sep,
1070 1211
 				    fileExt=fileExt,
1212
+                                    XY=XY,
1071 1213
 				    cdfName=cdfName,
1072 1214
 				    verbose=verbose,
1073 1215
 				    batch=batch,
1074 1216
 				    saveDate=saveDate)
1217
+#        }
1075 1218
 	mixtureParams <- preprocessInf(cnSet=cnSet,
1076 1219
 				    sampleSheet=sampleSheet,
1077 1220
 				    arrayNames=arrayNames,
... ...
@@ -1082,32 +1225,33 @@ genotype.Illumina <- function(sampleSheet=NULL,
1082 1225
 				    sep=sep,
1083 1226
 				    fileExt=fileExt,
1084 1227
 				    saveDate=saveDate,
1228
+                                    XY=XY,
1085 1229
 				    stripNorm=stripNorm,
1086 1230
 				    useTarget=useTarget,
1087 1231
 				    mixtureSampleSize=mixtureSampleSize,
1088 1232
 				    fitMixture=fitMixture,
1233
+                                    quantile.method=quantile.method,
1089 1234
 				    eps=eps,
1090 1235
 				    verbose=verbose,
1091 1236
 				    seed=seed,
1092
-				       cdfName=cdfName)
1237
+				    cdfName=cdfName)
1093 1238
 	message("Preprocessing complete.  Begin genotyping...")
1094 1239
 	genotypeInf(cnSet=cnSet, mixtureParams=mixtureParams,
1095 1240
 		    probs=probs,
1096
-		   SNRMin=SNRMin,
1097
-		   recallMin=recallMin,
1098
-		   recallRegMin=recallRegMin,
1099
-		   verbose=verbose,
1100
-		   returnParams=returnParams,
1101
-		   badSNP=badSNP,
1102
-		   gender=gender,
1103
-		   DF=DF,
1104
-		    cdfName=cdfName)
1241
+		    SNRMin=SNRMin,
1242
+		    recallMin=recallMin,
1243
+		    recallRegMin=recallRegMin,
1244
+		    verbose=verbose,
1245
+		    returnParams=returnParams,
1246
+		    badSNP=badSNP,
1247
+		    gender=gender,
1248
+		    DF=DF,
1249
+		    cdfName=cdfName,
1250
+                    call.method=call.method)
1105 1251
 	return(cnSet)
1106 1252
 }
1107 1253
 
1108 1254
 
1109
-
1110
-
1111 1255
 processIDAT <- function(stratum, sampleBatches, sampleSheet=NULL,
1112 1256
 			arrayNames=NULL,
1113 1257
 			ids=NULL,
... ...
@@ -1116,6 +1260,7 @@ processIDAT <- function(stratum, sampleBatches, sampleSheet=NULL,
1116 1260
 			highDensity=FALSE,
1117 1261
 			sep="_",
1118 1262
 			fileExt=list(green="Grn.idat", red="Red.idat"),
1263
+                        XY,
1119 1264
 			saveDate=FALSE,
1120 1265
 			verbose=TRUE,
1121 1266
 			mixtureSampleSize=10^5,
... ...
@@ -1126,6 +1271,7 @@ processIDAT <- function(stratum, sampleBatches, sampleSheet=NULL,
1126 1271
 			sns,
1127 1272
 			stripNorm=TRUE,
1128 1273
 			useTarget=TRUE,
1274
+                        quantile.method=quantile.method,                        
1129 1275
 			A, B,
1130 1276
 			GT,
1131 1277
 			GTP,
... ...
@@ -1133,16 +1279,23 @@ processIDAT <- function(stratum, sampleBatches, sampleSheet=NULL,
1133 1279
 	message("Processing sample stratum ", stratum, " of ", length(sampleBatches))
1134 1280
 	sel <- sampleBatches[[stratum]]
1135 1281
         if(length(path)>= length(sel)) path = path[sel]
1136
-        RG = readIdatFiles(sampleSheet=sampleSheet[sel,], arrayNames=arrayNames[sel],
1282
+        if(is.null(XY)) {
1283
+          RG = readIdatFiles(sampleSheet=sampleSheet[sel,], arrayNames=arrayNames[sel],
1137 1284
                        ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
1138 1285
                        highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate, verbose=verbose)
1139
-        XY = RGtoXY(RG, chipType=cdfName)
1140
-        rm(RG)
1141
-        gc(verbose=FALSE)
1142
-        if (missing(sns) || length(sns)!=ncol(XY)) sns = sampleNames(XY)
1143
-        res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1144
-                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #, outdir=outdir)
1145
-        rm(XY)
1286
+          XY = RGtoXY(RG, chipType=cdfName)
1287
+          rm(RG)
1288
+          gc(verbose=FALSE)
1289
+          if (missing(sns) || length(sns)!=ncol(XY)) sns = sampleNames(XY)
1290
+          res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1291
+                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget,
1292
+                               quantile.method=quantile.method) #, outdir=outdir)
1293
+          rm(XY)
1294
+        }else{ #XY already available
1295
+          res = preprocessInfinium2(XY[,sel], mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1296
+                                           seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget,
1297
+                                           quantile.method=quantile.method) 
1298
+        }
1146 1299
         gc(verbose=FALSE)
1147 1300
 	if(verbose) message("Finished preprocessing.")
1148 1301
         snp.index = which(is.snp)
1149 1302
new file mode 100644
... ...
@@ -0,0 +1,10 @@
1
+krlmm = function(cnSet, cdfName, gender) { # Doesn't do anything as yet - Jenny/Cynthia to add functionality based on their code
2
+  if(is.null(gender)) {
3
+    gender=rep(1, ncol(cnSet)) # Need to impute gender if not specified - Cynthia is working on this using Y chr SNPs
4
+  }
5
+  open(cnSet$gender)
6
+  cnSet$gender[,] = gender
7
+  close(cnSet$gender)
8
+
9
+  TRUE
10
+}
... ...
@@ -166,10 +166,12 @@ illuminaCdfNames <- function(){
166 166
 	  "human1mduov3b",          # 1M Duo
167 167
 	  "humanomni1quadv1b",      # Omni1 quad
168 168
 	  "humanomni25quadv1b",     # Omni2.5 quad
169
-	  "humanomni258v1a",        # Omni2.5 8 sample
169
+	  "humanomni258v1a",        # Omni2.5 8
170
+          "humanomni5quadv1b",      # Omni5 quad          
170 171
 	  "humanomniexpress12v1b",  # Omni express 12
171 172
 	  "humanimmuno12v1b",       # Immuno chip 12
172
-	  "humancytosnp12v2p1h")    # CytoSNP 12
173
+	  "humancytosnp12v2p1h",    # CytoSNP 12
174
+          "humanomniexpexome8v1p1b")
173 175
 }
174 176
 
175 177
 affyCdfNames <- function(){
... ...
@@ -194,16 +194,6 @@ table(c("male", "female")[cnSet$gender[]])
194 194
 if(any(is.na(cnSet$gender[]))) stop("missing genders")
195 195
 @
196 196
 
197
-%Parameters for determining log R ratios and B allele frequencies are
198
-%estimated using the function \Rfunction{crlmmCopynumber}:
199
-%
200
-%<<crlmmCopynumber>>=
201
-%crlmmCopynumber(cnSet, fit.linearModel=FALSE)
202
-%@
203
-%<<LDS_genotype, cache=TRUE>>=
204
-%cnSet <- genotype(celFiles, batch=plates, cdfName=cdfName, genome="hg19")
205
-%@
206
-
207 197
 The normalized intensities, genotype calls, and confidence scores are
208 198
 stored as \verb+ff+ objects in the \verb+assayData+ slot.  A concise
209 199
 summary of this object can be obtained throught the \Rfunction{print}
... ...
@@ -220,10 +210,6 @@ genotype calls are stored on disk rather than in active memory.
220 210
 print(object.size(cnSet), units="Mb")
221 211
 @
222 212
 
223
-%Users can proceed to the \verb+copynumber+ vignette for copy number
224
-%analyses.  See the \verb+Infrastructure+ vignette for additional
225
-%details on the \Robject{CNSet} class, including an overview of the
226
-%available accessors.
227 213
 \SweaveInput{copynumber}
228 214
 
229 215
 A sample-specific estimate of the signal to noise ratio (SNR)
230 216
Binary files a/inst/scripts/AffyGW.pdf and b/inst/scripts/AffyGW.pdf differ
... ...
@@ -238,6 +238,7 @@ cnSet <- genotype.Illumina(sampleSheet=samplesheet,
238 238
 			   arrayInfoColNames=arrayInfo,
239 239
 			   cdfName="human370v1c",
240 240
 			   batch=batch)
241
+stop()
241 242
 @
242 243
 
243 244
 
... ...
@@ -53,15 +53,24 @@ print(histogram(~snr,
53 53
 
54 54
 \section{Copy number estimation}
55 55
 
56
-As described in \cite{Scharpf2011}, the CRLMM-CopyNumber algorithm
57
-fits a linear model to the normalized intensities stratified by the
58
-diallic genotype call.  The intercept and slope from the linear model
59
-are both SNP- and batch-specific.  The implementation in the \crlmm{}
60
-package is encapsulated by the function \Rfunction{crlmmCopynumber}
61
-that, using the default settings, can be called by passing a single
62
-object of class \Rclass{CNSet}.  See the appropriate
63
-preprocessing/genotyping vignette for the construction of an object of
64
-class \Rclass{CNSet}.
56
+There are two ways to obtain marker-level estimates of copy number
57
+that are supported by \Rpackage{crlmm}. One approach is to fit a
58
+linear model to the normalized intensities stratified by the diallelic
59
+genotype call at each SNP, as described in \cite{Scharpf2011}.
60
+Another alternative is to compute log R ratio and B allele
61
+frequencies. The latter is often better supported by downstream hidden
62
+Markov models such as those in the \Rpackage{VanillaICE} package. We
63
+describe each approach in the following two sections.
64
+
65
+\subsection{Linear model for normalized intensities}
66
+
67
+In this section, we fit a linear model to the normalized intensities
68
+stratified by the diallic genotype call.  The intercept and slope from
69
+the linear model are both SNP- and batch-specific.  The implementation
70
+in the \crlmm{} package is encapsulated by the function
71
+\Rfunction{crlmmCopynumber} that, using the default settings, can be
72
+called by passing a single object of class \Rclass{CNSet}.
73
+
65 74
 <<LDS_copynumber,cache=TRUE>>=
66 75
 crlmmCopynumber(cnSet)
67 76
 @
... ...
@@ -105,12 +114,7 @@ described in the \R{} package \Rpackage{ff}. The value returned by
105 114
 \Rfunction{crlmmCopynumber} is \verb+TRUE+, indicating that the files
106 115
 on disk have been successfully updated.  Note that while the
107 116
 \Robject{cnSet} object is unchanged, the values on disk are different.
108
-On the other hand, subsetting the \Robject{cnSet} with the `[' method
109
-coerces all of the elements to class \Rclass{matrix}. The
110
-batch-specific summaries are now ordinary matrices stored in RAM. The
111
-object returned by \Robject{crlmmCopynumber} is an object of class
112
-\Rclass{CNSet} with the matrices in the \verb+batchStatistics+ slot
113
-updated.
117
+
114 118
 
115 119
 <<chr1index>>=
116 120
 chr1.index <- which(chromosome(cnSet) == 1)
... ...
@@ -127,20 +131,10 @@ for(i in seq_along(nms)) cls[i] <- class(batchStatistics(cnSet2)[[nms[i]]])[1]
127 131
 all(cls == "matrix")
128 132
 @
129 133
 
130
-<<matrix_copynumber,cache=TRUE>>=
131
-cnSet3 <- crlmmCopynumber(cnSet2)
132
-class(cnSet3)
133
-@
134
-
135 134
 <<clean, echo=FALSE, results=hide>>=
136 135
 rm(cnSet2); gc()
137 136
 @
138 137
 
139
-\subsection{Marker-specific estimates}
140
-%\subsection{Raw copy number}
141
-
142
-%\paragraph{Log R ratios and B allele frequencies.}
143
-
144 138
 \paragraph{Raw total copy number.}
145 139
 Several functions are available that will compute relatively quickly
146 140
 the allele-specific, \emph{raw} copy number estimates. At allele $k$,
... ...
@@ -186,38 +180,49 @@ cb <- CB(cnSet, i=snp.index, j=1:2)
186 180
 
187 181
 
188 182
 \subsection{Container for log R ratios and B allele frequencies}
183
+\label{sec:lrrBaf}
189 184
 
190 185
 A useful container for storing the \crlmm{} genotypes, genotype
191 186
 confidence scores, and the total or relative copy number at each
192
-marker is the \Rclass{oligoSetList} class.  Coercion of a
193
-\Rclass{CNSet} object to a \Rfunction{oligoSnpSet} object can be
194
-acheived by using the function \Rfunction{constructOligoSetFrom} as
187
+marker is the \Rclass{BafLrrSetList} class.  Coercion of a
188
+\Rclass{CNSet} object to a \Rfunction{BafLrrSetList} object can be
189
+acheived by the function \Rfunction{BafLrrSetList} as
195 190
 illustrated below. Users should note that if the \verb+assayData+
196 191
 elements in the \Rclass{CNSet} instance are \Rpackage{ff} objects, the
197
-\verb+assayData+ elements of each element in the \Rclass{oligoSetList}
192
+\verb+assayData+ elements of each element in the \Rclass{BafLrrSetList}
198 193
 object will be \Rpackage{ff}-dervied objects (a new
199 194
 \verb+total_cn*.ff+ file will be created in the \verb+ldPath()+
200
-directory).
195
+directory). The following code-chunk is not evalutated.
196
+
197
+Estimation of log R ratios and B allele frequencies from an object of class \Rclass{CNSet} instantiated from genotyping Affymetrix CEL files or Illumina iDat files requires running the \R{} function \Rfunction{crlmmCopynumber} as a preliminary step.  Specifying \texttt{fit.linearModel=FALSE} will speed up computation as this function will by default compute summary statistics unnecessary for computing BAFs and log R ratios.
201 198
 
202
-<<oligoSnpSet>>=
199
+<<callingCrlmmCopynumber,eval=FALSE>>=
200
+crlmmCopynumber(cnSet, fit.linearModel=FALSE)
201
+@
202
+
203
+<<oligoSnpSet,eval=FALSE>>=
203 204
 library(VanillaICE)
204
-open(cnSet3)
205
-oligoSetList <- constructOligoSetListFrom(cnSet3, batch.name=batch(cnSet3)[1])
206
-close(cnSet3)
205
+open(cnSet)
206
+oligoSetList <- BafLrrSetList(cnSet)
207
+close(cnSet)
207 208
 show(oligoSetList)
208 209
 class(oligoSetList)
209 210
 ## oligoSnpSet of first chromosome
210 211
 oligoSetList[[1]]
211 212
 @
212 213
 
213
-\noindent Note that log R ratios stored in the \Robject{oligoSnpSet}
214
-object can be retrieved by the \Rfunction{copyNumber} accessor.  B
215
-allele frequences are retrieved by the \Rfunction{baf} accessor.
214
+\noindent Log R ratios and B allele frequences can be retrieved by the
215
+accessors \Rfunction{lrr} and \Rfunction{baf}, respectively.
216 216
 
217
-<<testEqual>>=
218
-lrrList <- copyNumber(oligoSetList)
217
+<<testEqual,eval=FALSE>>=
218
+lrrList <- lrr(oligoSetList)
219 219
 class(lrrList)
220 220
 dim(lrrList[[1]]) ## log R ratios for chromosome 1.
221 221
 bafList <- baf(oligoSetList)
222 222
 dim(bafList[[1]]) ## B allele frequencies for chromosome 1
223 223
 @
224
+
225
+If the \Rfunction{crlmmCopynumber} function is not run prior to the
226
+\Rfunction{BafLrrSetList} construction, the log R ratios and BAFs will
227
+be initialized as NAs.
228
+
... ...
@@ -9,7 +9,9 @@
9 9
 	\code{batchStatistics} will be \code{ff} objects. See details.
10 10
 }
11 11
 \usage{
12
-constructInf(sampleSheet = NULL, arrayNames = NULL, path = ".", arrayInfoColNames = list(barcode = "SentrixBarcode_A", position = "SentrixPosition_A"), highDensity = FALSE, sep = "_", fileExt = list(green = "Grn.idat", red = "Red.idat"), cdfName, verbose = FALSE, batch, saveDate = TRUE)
12
+constructInf(sampleSheet = NULL, arrayNames = NULL, path = ".",
13
+       arrayInfoColNames = list(barcode="SentrixBarcode_A",position="SentrixPosition_A"), highDensity = FALSE, sep = "_",
14
+       fileExt = list(green = "Grn.idat", red = "Red.idat"), XY, cdfName, verbose = FALSE, batch=NULL, saveDate = TRUE)
13 15
 }
14 16
 \arguments{
15 17
   \item{sampleSheet}{\code{data.frame} containing Illumina sample sheet
... ...
@@ -35,6 +37,7 @@ constructInf(sampleSheet = NULL, arrayNames = NULL, path = ".", arrayInfoColName
35 37
     names.}
36 38
   \item{fileExt}{list containing elements 'Green' and 'Red' which
37 39
     specify the .idat file extension for the Cy3 and Cy5 channels.}
40
+  \item{XY}{an \code{NChannelSet} containing X and Y intensities.}
38 41
   \item{cdfName}{ annotation package  (see also \code{validCdfNames})}
39 42
   \item{verbose}{  'logical.'  Whether to print descriptive messages
40 43
   during processing.}
... ...
@@ -10,11 +10,11 @@
10 10
 \usage{
11 11
 genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
12 12
       arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
13
-      highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"),
14
-      cdfName, copynumber=TRUE, batch, saveDate=TRUE, stripNorm=TRUE, useTarget=TRUE,
15
-      mixtureSampleSize=10^5, fitMixture=TRUE, eps =0.1, verbose = TRUE, seed = 1,
16
-      sns, probs = rep(1/3, 3), DF = 6, SNRMin = 5, recallMin = 10, recallRegMin = 1000,
17
-      gender = NULL, returnParams = TRUE, badSNP = 0.7)
13
+      highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), XY=NULL,
14
+      call.method="crlmm", cdfName, copynumber=TRUE, batch=NULL, saveDate=TRUE, stripNorm=TRUE, 
15
+      useTarget=TRUE, quantile.method="between", mixtureSampleSize=10^5, fitMixture=TRUE,                               
16
+      eps =0.1, verbose = TRUE, seed = 1, sns, probs = rep(1/3, 3), DF = 6, SNRMin = 5, 
17
+      recallMin = 10, recallRegMin = 1000, gender = NULL, returnParams = TRUE, badSNP = 0.7)
18 18
 }
19 19
 \arguments{
20 20
   \item{sampleSheet}{\code{data.frame} containing Illumina sample sheet
... ...
@@ -41,6 +41,8 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
41 41
     names.}
42 42
   \item{fileExt}{list containing elements 'Green' and 'Red' which
43 43
     specify the .idat file extension for the Cy3 and Cy5 channels.}
44
+  \item{XY}{\code{NChannelSet} containing X and Y intensities.}
45
+  \item{call.method}{character string specifying the genotype calling algorithm to use ('crlmm' or 'krlmm').}
44 46
   \item{cdfName}{ annotation package  (see also \code{validCdfNames})}
45 47
   \item{copynumber}{ 'logical.' Whether to store copy number intensities with SNP output.}
46 48
   \item{batch}{ character vector indicating the batch variable. Must be
... ...
@@ -50,6 +52,7 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
50 52
   \item{stripNorm}{'logical'.  Should the data be strip-level normalized?}
51 53
   \item{useTarget}{'logical' (only used when \code{stripNorm=TRUE}).
52 54
     Should the reference HapMap intensities be used in strip-level normalization?}
55
+  \item{quantile.method}{character string specifying the quantile normalization method to use ('within' or 'between' channels).}
53 56
   \item{mixtureSampleSize}{ Sample size to be use when fitting the mixture model.}
54 57
   \item{fitMixture}{ 'logical.' Whether to fit per-array mixture model.}
55 58
   \item{eps}{   Stop criteria.}
... ...
@@ -88,8 +91,7 @@ genotype.Illumina(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
88 91
 	Rather, \code{batch} is required in order to initialize a
89 92
 	\code{CNSet} container with the appropriate dimensions.
90 93
 
91
-
92
-
94
+        The new 'krlmm' option is available for certain chip types.
93 95
       }
94 96
 
95 97
 \value{	A \code{SnpSuperSet} instance.}
... ...
@@ -9,7 +9,7 @@
9 9
 \description{
10 10
 
11 11
 	Genotyping of Illumina Infinium II arrays. This function
12
-	provides CRLMM genotypes and confidence scores for the the
12
+	provides CRLMM/KRLMM genotypes and confidence scores for the the
13 13
 	polymorphic markers and is a required step prior to copy
14 14
 	number estimation.
15 15
 
... ...
@@ -17,7 +17,7 @@
17 17
 \usage{
18 18
 genotypeInf(cnSet, mixtureParams, probs = rep(1/3, 3), SNRMin = 5,
19 19
 recallMin = 10, recallRegMin = 1000, verbose = TRUE, returnParams =
20
-TRUE, badSNP = 0.7, gender = NULL, DF = 6, cdfName)
20
+TRUE, badSNP = 0.7, gender = NULL, DF = 6, cdfName, call.method="crlmm")
21 21
 }
22 22
 %- maybe also 'usage' for other objects documented here.
23 23
 \arguments{
... ...
@@ -43,11 +43,12 @@ TRUE, badSNP = 0.7, gender = NULL, DF = 6, cdfName)
43 43
     t-distribution.}
44 44
   \item{cdfName}{\code{character} string indicating which annotation
45 45
     package to load.}
46
+  \item{call.method}{character string specifying the genotype calling algorithm to use ('crlmm' or 'krlmm').}
46 47
 }
47 48
 
48 49
 \details{
49 50
 
50
-	The CRLMM genotype calls and confidence scores are written to
51
+	The genotype calls and confidence scores are written to
51 52
 	file using \code{ff} protocols for I/O.  For the most part,
52 53
 	the calls and confidence scores can be accessed as though the
53 54
 	data is in memory through the methods \code{snpCall} and
... ...
@@ -19,9 +19,9 @@
19 19
 preprocessInf(cnSet, sampleSheet=NULL, arrayNames = NULL, ids = NULL,
20 20
 path = ".", arrayInfoColNames = list(barcode = "SentrixBarcode_A",
21 21
 position = "SentrixPosition_A"), highDensity = TRUE, sep = "_", fileExt
22
-= list(green = "Grn.idat", red = "Red.idat"), saveDate = TRUE, stripNorm
23
-= TRUE, useTarget = TRUE, mixtureSampleSize = 10^5, fitMixture = TRUE,
24
-eps = 0.1, verbose = TRUE, seed = 1, cdfName)
22
+= list(green = "Grn.idat", red = "Red.idat"), XY, saveDate = TRUE, stripNorm
23
+= TRUE, useTarget = TRUE, mixtureSampleSize = 10^5, fitMixture = TRUE, 
24
+quantile.method="between", eps = 0.1, verbose = TRUE, seed = 1, cdfName)
25 25
 }
26 26
 
27 27
 \arguments{
... ...
@@ -58,6 +58,7 @@ eps = 0.1, verbose = TRUE, seed = 1, cdfName)
58 58
     names.}
59 59
   \item{fileExt}{list containing elements 'Green' and 'Red' which
60 60
     specify the .idat file extension for the Cy3 and Cy5 channels.}
61
+  \item{XY}{an \code{NChannelSet} object containing X and Y intensities.}
61 62
   \item{saveDate}{'logical'.  Should the dates from each .idat be saved
62 63
     with sample information?}
63 64
   \item{stripNorm}{'logical'.  Should the data be strip-level normalized?}
... ...
@@ -65,7 +66,8 @@ eps = 0.1, verbose = TRUE, seed = 1, cdfName)
65 66
     Should the reference HapMap intensities be used in strip-level normalization?}
66 67
   \item{mixtureSampleSize}{ Sample size to be use when fitting the mixture model.}
67 68
   \item{fitMixture}{ 'logical.' Whether to fit per-array mixture
68
-    model. }
69
+    model.}
70
+  \item{quantile.method}{character string specifying the quantile normalization method to use ('within' or 'between' channels).}
69 71
   \item{eps}{   Stop criteria.}
70 72
   \item{verbose}{  'logical.'  Whether to print descriptive messages during processing.}
71 73
   \item{seed}{ Seed to be used when sampling. Useful for