Browse code

fixed bug in cnrma (dimnames(NP) error)

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45177 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 11/03/2010 19:03:38
Showing5 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.5.35
4
+Version: 1.5.36
5 5
 Date: 2010-02-05
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -1,4 +1,9 @@
1 1
 setOldClass("ffdf")
2 2
 setOldClass("ff_matrix")
3 3
 ##setClassUnion("matrix_or_ff", c("matrix", "ff_matrix"))
4
-##setClassUnion("list_or_ffdf", c("list", "ffdf"))
4
+setClassUnion("list_or_ffdf", c("list", "ffdf"))
5
+setClass("CNSetLM", contains="CNSet", representation(lM="list_or_ffdf"))
6
+setMethod("initialize", "CNSetLM", function(.Object, CA=new("matrix"), CB=new("matrix"), lM=new("list"), ...){
7
+	.Object <- callNextMethod(.Object, CA=CA, CB=CB, lM=lM, ...)
8
+	.Object
9
+})
... ...
@@ -92,7 +92,6 @@ genotype <- function(filenames, cdfName, mixtureSampleSize=10^5,
92 92
 		     fitMixture=TRUE,
93 93
 		     eps=0.1, verbose=TRUE,
94 94
 		     seed=1, sns, copynumber=FALSE,
95
-		     batchSize=1000,
96 95
 		     probs=rep(1/3, 3),
97 96
 		     DF=6,
98 97
 		     SNRMin=5,
... ...
@@ -138,7 +137,7 @@ genotype <- function(filenames, cdfName, mixtureSampleSize=10^5,
138 137
 		cnrmaRes <- cnrma(filenames=filenames[j],
139 138
 				  cdfName=cdfName,
140 139
 				  row.names=featureNames(callSet)[np.index],				  
141
-				  sns=sns,
140
+				  sns=sns[j],
142 141
 				  seed=seed,
143 142
 				  verbose=verbose)
144 143
 		stopifnot(identical(featureNames(callSet)[np.index], rownames(cnrmaRes)))
... ...
@@ -381,85 +380,40 @@ harmonizeDimnamesTo <- function(object1, object2){
381 380
 	return(object1)
382 381
 }
383 382
 
384
-crlmmCopynumber <- function(filenames, cnOptions, object, ...){
385
-	if(!missing(object)){
386
-		stopifnot(class(object) == "CNSet")
387
-		createIntermediateObjects <- FALSE
388
-	} else {
389
-		createIntermediateObjects <- TRUE
390
-		crlmmResults <- crlmmWrapper(filenames, cnOptions, ...)
391
-		snprmaResult <- crlmmResults[["snprmaResult"]]
392
-		cnrmaResult <- crlmmResults[["cnrmaResult"]]
393
-		callSet <- crlmmResults[["callSet"]]
394
-		rm(crlmmResults); gc()
395
-		annotation(callSet) <- cnOptions[["cdfName"]]
396
-		stopifnot(identical(featureNames(callSet), snprmaResult[["gns"]]))
397
-		path <- system.file("extdata", package=paste(annotation(callSet), "Crlmm", sep=""))	
398
-		load(file.path(path, "snpProbes.rda"))
399
-		snpProbes <- get("snpProbes")
400
-		load(file.path(path, "cnProbes.rda"))
401
-		cnProbes <- get("cnProbes")	
402
-		k <- grep("chr", colnames(snpProbes))
403
-		if(length(k) < 1) stop("chr or chromosome not in colnames(snpProbes)")
404
-	}
405
-	set.seed(cnOptions[["seed"]])  ##for reproducibility
383
+
384
+
385
+crlmmCopynumber <- function(object, cnOptions, ...){
406 386
 	chromosome <- cnOptions[["chromosome"]]
407
-	SNRmin <- cnOptions[["SNRmin"]]
408
-	for(CHR in chromosome){
409
-		cat("Chromosome ", CHR, "\n")
410
-		if(createIntermediateObjects){
411
-			snps <- rownames(snpProbes)[snpProbes[, k] == CHR]
412
-			cnps <- rownames(cnProbes)[cnProbes[, k] == CHR]
413
-			index.snps <- match(snps, featureNames(callSet))
414
-			index.nps <- match(cnps, rownames(cnrmaResult[["NP"]]))
415
-			if(!is.null(cnrmaResult)){
416
-				npI <- cnrmaResult$NP[index.nps, ]
417
-			} else npI <- NULL
418
-			snpI <- list(A=snprmaResult$A[index.snps, ],
419
-				     B=snprmaResult$B[index.snps, ],
420
-				     sns=snprmaResult$sns,
421
-				     gns=snprmaResult$gns[index.snps],
422
-				     SNR=snprmaResult$SNR,
423
-				     SKW=snprmaResult$SKW,
424
-				     mixtureParams=snprmaResult$mixtureParams,
425
-				     cdfName=snprmaResult$cdfName)
426
-			cnOptions[["batch"]] <- cnOptions[["batch"]][snpI[["SNR"]]  >= SNRmin]
427
-			cnSet <- combineIntensities(res=snpI,
428
-						    NP=npI,
429
-						    callSet=callSet[index.snps, ])
430
-			if(any(cnSet$SNR > SNRmin)){
431
-				message(paste("Excluding samples with SNR < ", SNRmin))
432
-				cnSet <- cnSet[, cnSet$SNR >= SNRmin]
433
-			}
434
-			rm(snpI, npI, snps, cnps, index.snps, index.nps); gc()
435
-			pData(cnSet)$batch <- cnOptions[["batch"]]
436
-			featureData(cnSet) <- lm.parameters(cnSet, cnOptions)
437
-		} else {
438
-			cnSet <- object
439
-		}
440
-		if(CHR != 24){		
441
-			cnSet <- computeCopynumber(cnSet, cnOptions)
442
-		} else{
443
-			message("Copy number estimates not available for chromosome Y.  Saving only the 'callSet' object for this chromosome")
444
-			alleleSet <- cnSet
445
-			save(alleleSet, file=file.path(cnOptions[["outdir"]], paste("alleleSet_", CHR, ".rda", sep="")))
446
-			rm(cnSet, alleleSet); gc()
447
-			next()
448
-		}
449
-		if(length(chromosome) == 1){
450
-			if(cnOptions[["save.cnset"]]){
451
-				save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep="")))
452
-			}
453
-		}
454
-		if(length(chromosome) > 1){
455
-			save(cnSet, file=file.path(cnOptions[["outdir"]], paste("cnSet_", CHR, ".rda", sep="")))
456
-			rm(cnSet); gc()
457
-		} else {
458
-			return(cnSet)
387
+	which.batch <- cnOptions[["whichbatch"]]
388
+	SNRMin <- cnOptions[["SNRMin"]]
389
+	cnSet <- new("CNSetLM",
390
+		     alleleA=A(object),
391
+		     alleleB=B(object),
392
+		     call=calls(object),
393
+		     callProbability=confs(object),
394
+		     CA=initializeBigMatrix("CA", nrow(object), ncol(object)),
395
+		     CB=initializeBigMatrix("CB", nrow(object), ncol(object)),
396
+		     annotation=annotation(object),
397
+		     featureData=featureData(object),
398
+		     experimentData=experimentData(object),
399
+		     phenoData=phenoData(object))
400
+	rm(object); gc()
401
+	if(any(cnSet$SNR < SNRMin)){
402
+		message("Excluding ", sum(cnSet$SNR < SNRMin), " samples with SNR below ", SNRMin)
403
+		cnSet <- cnSet[, cnSet$SNR > SNRMin]
404
+	}
405
+	batches <- split(1:ncol(cnSet), cnSet$batch)
406
+	if(any(sapply(batches, length) < MIN.SAMPLES)) message("Excluding batches with fewer than ", MIN.SAMPLES, " samples")
407
+	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
408
+	for(i in chromosome){
409
+		cat("Chromosome ", i, "\n")
410
+		for(j in batches){
411
+			if(i >= 24) next()
412
+			row.index <- which(chromosome(cnSet) == i)
413
+			tmp <- computeCopynumber(cnSet[row.index, j], cnOptions)
414
+			##message("Copy number estimates not available for chromosome Y.  Saving only the 'callSet' object for this chromosome")
459 415
 		}
460 416
 	}
461
-	saved.objects <- list.files(cnOptions[["outdir"]], pattern="cnSet", full.names=TRUE)		
462
-	return(saved.objects)
463 417
 }
464 418
 
465 419
 
... ...
@@ -731,15 +685,16 @@ thresholdCopynumber <- function(object){
731 685
 ##
732 686
 ##}
733 687
 
734
-cnOptions <- function(outdir="./",
735
-		      cdfName,
736
-		      crlmmFile,
737
-		      intensityFile,
738
-		      rgFile="rgFile.rda",
739
-		      save.it=TRUE,
740
-		      save.cnset=TRUE,
741
-		      load.it=TRUE,
742
-		      splitByChr=TRUE,
688
+cnOptions <- function(
689
+##		      outdir="./",
690
+##		      cdfName,
691
+##		      crlmmFile,
692
+##		      intensityFile,
693
+##		      rgFile="rgFile.rda",
694
+##		      save.it=TRUE,
695
+##		      save.cnset=TRUE,
696
+##		      load.it=TRUE,
697
+##		      splitByChr=TRUE,
743 698
 		      MIN.OBS=3,
744 699
 		      MIN.SAMPLES=10,
745 700
 		      batch=NULL,
... ...
@@ -763,32 +718,33 @@ cnOptions <- function(outdir="./",
763 718
 ##		      cbsOpts=NULL,
764 719
 		      ##hmmOpts=NULL,
765 720
 		      ...){
766
-	if(missing(cdfName)) stop("must specify cdfName")
767
-	if(!file.exists(outdir)){
768
-		message(outdir, " does not exist.  Trying to create it.")
769
-		dir.create(outdir, recursive=TRUE)
770
-	}
771
-	stopifnot(isValidCdfName(cdfName))
772
-##	if(hiddenMarkovModel){
773
-##		hmmOpts <- hmmOptions(...)
721
+##	if(missing(cdfName)) stop("must specify cdfName")
722
+##	if(!file.exists(outdir)){
723
+##		message(outdir, " does not exist.  Trying to create it.")
724
+##		dir.create(outdir, recursive=TRUE)
774 725
 ##	}
775
-	if(missing(crlmmFile)){
776
-		crlmmFile <- file.path(outdir, "snpsetObject.rda")
777
-	}
778
-	if(missing(intensityFile)){
779
-		intensityFile <- file.path(outdir, "normalizedIntensities.rda")
780
-	}
781
-	if(is.null(batch))
782
-		stop("must specify batch -- should be the same length as the number of files")
783
-	list(outdir=outdir,
784
-	     cdfName=cdfName,
785
-	     crlmmFile=crlmmFile,
786
-	     intensityFile=intensityFile,
787
-	     rgFile=file.path(outdir, rgFile),
788
-	     save.it=save.it,
789
-	     save.cnset=save.cnset,
790
-	     load.it=load.it,
791
-	     splitByChr=splitByChr,
726
+##	stopifnot(isValidCdfName(cdfName))
727
+####	if(hiddenMarkovModel){
728
+####		hmmOpts <- hmmOptions(...)
729
+####	}
730
+##	if(missing(crlmmFile)){
731
+##		crlmmFile <- file.path(outdir, "snpsetObject.rda")
732
+##	}
733
+##	if(missing(intensityFile)){
734
+##		intensityFile <- file.path(outdir, "normalizedIntensities.rda")
735
+##	}
736
+##	if(is.null(batch))
737
+##		stop("must specify batch -- should be the same length as the number of files")
738
+	list(
739
+##	     outdir=outdir,
740
+##	     cdfName=cdfName,
741
+##	     crlmmFile=crlmmFile,
742
+##	     intensityFile=intensityFile,
743
+##	     rgFile=file.path(outdir, rgFile),
744
+##	     save.it=save.it,
745
+##	     save.cnset=save.cnset,
746
+##	     load.it=load.it,
747
+##	     splitByChr=splitByChr,
792 748
 	     MIN.OBS=MIN.OBS,
793 749
 	     MIN.SAMPLES=MIN.SAMPLES,
794 750
 	     batch=batch,
... ...
@@ -1,20 +1,45 @@
1
-setAs("SnpSuperSet", "CNSet",
2
-      function(from, to){
3
-	      CA <- CB <- matrix(NA, nrow(from), ncol(from))
4
-	      dimnames(CA) <- dimnames(CB) <- list(featureNames(from), sampleNames(from))		  
5
-	      new("CNSet",
6
-		  call=calls(from),
7
-		  callProbability=assayData(from)[["callProbability"]],  ##confs(from) returns 1-exp(-x/1000)
8
-		  alleleA=A(from),
9
-		  alleleB=B(from),
10
-		  CA=CA,
11
-		  CB=CB,
12
-		  phenoData=phenoData(from),
13
-		  experimentData=experimentData(from),
14
-		  annotation=annotation(from),
15
-		  protocolData=protocolData(from),
16
-		  featureData=featureData(from))
17
-      })
1
+setMethod("show", "CNSet", function(object){
2
+	callNextMethod(object)
3
+	cat("lM: ", length(lM(object)), " elements \n")
4
+	print(names(lM(object)))
5
+})
6
+setMethod("[", "CNSet", function(x, i, j, ..., drop=FALSE){
7
+	x <- callNextMethod(x, i, j, ..., drop=drop)
8
+	if(!missing(i)){
9
+		if(class(lM(x)) == "ffdf"){
10
+			lM(x) <- lapply(physical(lM(x)), function(x, i){open(x); x[i, ]}, i=i)
11
+		} else {
12
+			lM(x) <- lapply(lM(x), function(x, i) x[i, , drop=FALSE], i=i)
13
+		}
14
+	}
15
+	x
16
+})
17
+setGeneric("lM", function(object) standardGeneric("lM"))
18
+setGeneric("lM<-", function(object, value) standardGeneric("lM<-"))
19
+setMethod("lM", "CNSet", function(object) object@lM)
20
+##setMethod("linearModelParam", "AffymetrixCNSet", function(object) object@linearModelParam)
21
+setReplaceMethod("lM", c("CNSet", "list_or_ffdf"), function(object, value){
22
+	object@lM <- value
23
+	object
24
+})
25
+
26
+##setAs("SnpSuperSet", "CNSet",
27
+##      function(from, to){
28
+##	      CA <- CB <- matrix(NA, nrow(from), ncol(from))
29
+##	      dimnames(CA) <- dimnames(CB) <- list(featureNames(from), sampleNames(from))		  
30
+##	      new("CNSet",
31
+##		  call=calls(from),
32
+##		  callProbability=assayData(from)[["callProbability"]],  ##confs(from) returns 1-exp(-x/1000)
33
+##		  alleleA=A(from),
34
+##		  alleleB=B(from),
35
+##		  CA=CA,
36
+##		  CB=CB,
37
+##		  phenoData=phenoData(from),
38
+##		  experimentData=experimentData(from),
39
+##		  annotation=annotation(from),
40
+##		  protocolData=protocolData(from),
41
+##		  featureData=featureData(from))
42
+##      })
18 43
 
19 44
 setMethod("computeCopynumber", "CNSet", function(object, cnOptions){
20 45
 	## to do the bias adjustment, initial estimates of the parameters are needed
... ...
@@ -151,39 +176,3 @@ ellipse.CNSet <- function(x, copynumber, batch, ...){
151 176
 }
152 177
 
153 178
 
154
-
155
-
156
-
157
-
158
-setMethod("show", "CNSet", function(object){
159
-	callNextMethod(object)
160
-##	cat("emissionPr\n")
161
-##	cat("   array:", nrow(object), "features,", ncol(object), "samples,", dim(emissionPr(object))[3], "states\n")
162
-##	cat("   hidden states:\n")
163
-##	cat("      ", dimnames(emissionPr(object))[[3]], "\n")
164
-##	cat("   Missing values:", sum(is.na(emissionPr(object))), "\n")
165
-##	if(!all(is.na(emissionPr(object)))){
166
-##		cat("   minimum value:", min(emissionPr(object), na.rm=TRUE), "\n")
167
-##	} else  cat("   minimum value: NA (all missing)\n")
168
-##	cat("rangedData:  ")
169
-##	cat("    ", show(rangedData(object)), "\n")
170
-})
171
-##setMethod("rangedData", "CNSet", function(object) segmentData(object))
172
-##setReplaceMethod("rangedData", c("CNSet", "RangedData"), function(object, value){
173
-##	segmentData(object) <- value
174
-##})
175
-##
176
-##setMethod("segmentData", "CNSet", function(object) object@segmentData)
177
-##setReplaceMethod("segmentData", c("CNSet", "RangedData"), function(object, value){
178
-##	object@segmentData <- value
179
-##	object
180
-##})
181
-##
182
-##setMethod("emissionPr", "CNSet", function(object) object@emissionPr)
183
-##setReplaceMethod("emissionPr", c("CNSet", "array"), function(object, value){
184
-##	object@emissionPr <- value
185
-##	object
186
-##})
187
-##setMethod("start", "CNSet", function(x, ...) start(segmentData(x), ...))
188
-##setMethod("end", "CNSet", function(x, ...) end(segmentData(x), ...))
189
-##setMethod("width", "CNSet", function(x) width(segmentData(x)))
190 179
new file mode 100644
... ...
@@ -0,0 +1,62 @@
1
+\name{genotype}
2
+\alias{genotype}
3
+\title{
4
+	Preprocessing and genotyping of Affymetrix arrays.
5
+}
6
+\description{
7
+	Preprocessing and genotyping of Affymetrix arrays.	
8
+}
9
+\usage{
10
+genotype(filenames, cdfName, mixtureSampleSize = 10^5, fitMixture = TRUE, eps = 0.1, verbose = TRUE, seed = 1, sns, copynumber = FALSE, probs = rep(1/3, 3), DF = 6, SNRMin = 5, recallMin = 10, recallRegMin = 1000, gender = NULL, returnParams = TRUE, badSNP = 0.7)
11
+}
12
+\arguments{
13
+  \item{filenames}{ complete path to CEL files}
14
+  \item{cdfName}{  annotation package  (see also \code{validCdfNames})}
15
+  \item{mixtureSampleSize}{}
16
+  \item{fitMixture}{}
17
+  \item{eps}{}
18
+  \item{verbose}{  Logical.  Whether to print descriptive messages during processing.}
19
+  \item{seed}{  Integer. Useful for reproducibility}
20
+  \item{sns}{The sample identifiers.  If missing, the default sample names are \code{basename(filenames)}}
21
+  \item{copynumber}{ Whether to quantile normalize the nonpolymorphic probes.  If TRUE, the quantile normalized intensities for nonpolymorphic markers are included in the 'A' matrix.}
22
+  \item{probs}{}
23
+  \item{DF}{}
24
+  \item{SNRMin}{}
25
+  \item{recallMin}{ }
26
+  \item{recallRegMin}{}
27
+  \item{gender}{  integer (  male = 1, female =2 ) or missing.  If missing, the gender is predicted.}
28
+  \item{returnParams}{}
29
+  \item{badSNP}{}
30
+}
31
+\details{
32
+}
33
+\value{	A \code{SnpSuperSet} instance.}
34
+\references{
35
+
36
+  Carvalho B, Bengtsson H, Speed TP, Irizarry RA. Exploration,
37
+  normalization, and genotype calls of high-density oligonucleotide SNP
38
+  array data. Biostatistics. 2007 Apr;8(2):485-99. Epub 2006 Dec
39
+  22. PMID: 17189563.
40
+
41
+  Carvalho BS, Louis TA, Irizarry RA. 
42
+  Quantifying uncertainty in genotype calls.
43
+  Bioinformatics. 2010 Jan 15;26(2):242-9.
44
+
45
+}
46
+\author{R. Scharpf}
47
+\note{}
48
+
49
+\seealso{
50
+	\code{\link{snprma}}, \code{\link{crlmm}}, \code{\link{validCdfNames}}
51
+}
52
+\examples{
53
+if (require(genomewidesnp5Crlmm) & require(hapmapsnp5)){
54
+  path <- system.file("celFiles", package="hapmapsnp5")
55
+  ## the filenames with full path...
56
+  ## very useful when genotyping samples not in the working directory
57
+  cels <- list.celfiles(path, full.names=TRUE)
58
+  (crlmmOutput <- genotype(cels))
59
+}
60
+}
61
+\keyword{ classif }
62
+