Browse code

bug fixes

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@42271 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 09/10/2009 12:47:42
Showing 10 changed files

... ...
@@ -282,3 +282,11 @@ is decoded and scanned
282 282
 * changed copyNumber() method so that CA + NA = NA, CB + NA = NA (previously had CA+NA=CA, but this can result in a lot of zeros, depending on the genotype)
283 283
 * new method: addFeatureAnnotation
284 284
 * support for snp5.0
285
+
286
+2009-10-04 R. Scharpf - committed version 1.3.22
287
+
288
+* changed default path argument for readIdatFiles to empty quotes
289
+* fixed bug in update method for character strings
290
+* updated addFeatureAnnotation so that CHR arg not required
291
+* fixed bug in nonpolymorphic function -- function checks whether any regions are pseudoautosomal 
292
+* fixed bug in list2locusset function (no longer assigns genomewidesnp6 as the default annotation)
... ...
@@ -1,14 +1,14 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.3.21
4
+Version: 1.3.22
5 5
 Date: 2009-09-29
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 6.0.
9 9
 License: Artistic-2.0
10 10
 Depends: methods, Biobase (>= 2.5.5)
11
-Imports: affyio, preprocessCore, utils, stats, genefilter, splines, mvtnorm, oligoClasses, ellipse, methods
11
+Imports: affyio, preprocessCore, utils, stats, genefilter, splines, mvtnorm, oligoClasses, ellipse, methods, SNPchip
12 12
 Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2),genomewidesnp6Crlmm (>= 1.0.2), snpMatrix, VanillaICE (>= 1.7.8), GGdata
13 13
 Collate: AllClasses.R
14 14
 	 AllGenerics.R
... ...
@@ -31,6 +31,7 @@ importFrom(graphics, abline, axis, layout, legend, mtext, par, plot,
31 31
            polygon, rect, segments, text, points, boxplot)
32 32
 
33 33
 importFrom(stats, update)
34
+importFrom(SNPchip, chromosome2integer)
34 35
 
35 36
 importFrom(grDevices, grey)
36 37
 
... ...
@@ -141,7 +141,8 @@ predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){
141 141
 		A <- res@assayData[["A"]]
142 142
 		B <- res@assayData[["B"]]
143 143
 	}
144
-	XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median)/2
144
+	tmp <- which(rowSums(is.na(A)) > 0)
145
+	XMedian <- apply(log2(A[XIndex,, drop=FALSE])+log2(B[XIndex,, drop=FALSE]), 2, median, na.rm=TRUE)/2
145 146
 	SNR <- res$SNR
146 147
 	if(sum(SNR>SNRMin)==1){
147 148
 		gender <- which.min(c(abs(XMedian-8.9), abs(XMedian-9.5)))
... ...
@@ -267,7 +268,7 @@ crlmmWrapper <- function(filenames,
267 268
 			load.it <- FALSE
268 269
 		}
269 270
 	}
270
-	if(!isValidCdfName(cdfName, platform=platform))
271
+	if(!isValidCdfName(cdfName))
271 272
 		stop(cdfName, " is not a valid entry.  See crlmm:::validCdfNames(platform)")
272 273
 	if(platform == "affymetrix"){
273 274
 		if(!file.exists(crlmmFile) | !load.it){
... ...
@@ -329,6 +330,8 @@ crlmmWrapper <- function(filenames,
329 330
 	crlmmResult <- harmonizeSnpSet(crlmmResult, ABset, cdfName)
330 331
 	stopifnot(all.equal(dimnames(crlmmResult), dimnames(ABset)))
331 332
 	crlmmSetList <- as(list(ABset, crlmmResult), "CrlmmSetList")
333
+	fd <- addFeatureAnnotation(crlmmSetList)
334
+	featureData(crlmmSetList[[1]]) <- fd
332 335
 	if(splitByChr){
333 336
 		message("Saving by chromosome")
334 337
 		splitByChromosome(crlmmSetList, cdfName=cdfName, outdir=dirname(crlmmFile))
... ...
@@ -340,46 +343,79 @@ crlmmWrapper <- function(filenames,
340 343
 	return()
341 344
 }
342 345
 
343
-validCdfNames <- function(platform){
344
-	if(!missing(platform)){
345
-		if(!platform %in% c("illumina", "affymetrix"))
346
-			stop("only illumina and affymetrix platforms are supported.")
347
-		if(platform=="illumina"){
348
-			chipList = c("human1mv1c",             # 1M
349
-			"human370v1c",            # 370CNV
350
-			"human650v3a",            # 650Y
351
-			"human610quadv1b",        # 610 quad
352
-			"human660quadv1a",        # 660 quad
353
-			"human370quadv3c",        # 370CNV quad
354
-			"human550v3b",            # 550K
355
-			"human1mduov3b")          # 1M Duo
356
-		}
357
-		if(platform=="affymetrix"){
358
-			chipList=c("genomewidesnp6", "genomewidesnp5")
359
-		}
360
-	} else{
361
-		chipList <- list()
362
-		chipList$affymetrix <- c("genomewidesnp6","genomewidesnp5")
363
-		chipList$illumina <- c("human370v1c",
364
-				       "human370quadv3c",
365
-				       "human550v3b",
366
-				       "human650v3a",
367
-				       "human610quadv1b",
368
-				       "human660quadv1a",
369
-				       "human1mduov3b")
370
-	}
371
-	return(chipList)
346
+validCdfNames <- function(){
347
+	c("genomewidesnp6",
348
+	  "genomewidesnp5",
349
+	  "human370v1c",
350
+	  "human370quadv3c",
351
+	  "human550v3b",
352
+	  "human650v3a",
353
+	  "human610quadv1b",
354
+	  "human660quadv1a",
355
+	  "human1mduov3b")
372 356
 }
373 357
 
374
-isValidCdfName <- function(cdfName, platform){
375
-	chipList <- validCdfNames(platform)
376
-	if(!(cdfName %in% chipList)){
358
+##validCdfNames <- function(platform){
359
+##	if(!missing(platform)){
360
+##		if(!platform %in% c("illumina", "affymetrix"))
361
+##			stop("only illumina and affymetrix platforms are supported.")
362
+##		if(platform=="illumina"){
363
+##			chipList = c("human1mv1c",             # 1M
364
+##			"human370v1c",            # 370CNV
365
+##			"human650v3a",            # 650Y
366
+##			"human610quadv1b",        # 610 quad
367
+##			"human660quadv1a",        # 660 quad
368
+##			"human370quadv3c",        # 370CNV quad
369
+##			"human550v3b",            # 550K
370
+##			"human1mduov3b")          # 1M Duo
371
+##		}
372
+##		if(platform=="affymetrix"){
373
+##			chipList=c("genomewidesnp6", "genomewidesnp5")
374
+##		}
375
+##	} else{
376
+##		chipList <- list()
377
+##		chipList$affymetrix <- c("genomewidesnp6","genomewidesnp5")
378
+##		chipList$illumina <- c("human370v1c",
379
+##				       "human370quadv3c",
380
+##				       "human550v3b",
381
+##				       "human650v3a",
382
+##				       "human610quadv1b",
383
+##				       "human660quadv1a",
384
+##				       "human1mduov3b")
385
+##	}
386
+##	return(chipList)
387
+##}
388
+isValidCdfName <- function(cdfName){
389
+	chipList <- validCdfNames()
390
+	result <- cdfName %in% chipList	
391
+	if(!(result)){
377 392
 		warning("cdfName must be one of the following: ",
378 393
 			chipList)
379 394
 	}
380
-	result <- cdfName %in% chipList
381 395
 	return(result)
382 396
 }
397
+
398
+whichPlatform <- function(cdfName){
399
+	index <- grep("genomewidesnp", cdfName)
400
+	if(length(index) > 0){
401
+		platform <- "affymetrix"
402
+	} else{
403
+		index <- grep("human", cdfName)
404
+		platform <- "illumina"
405
+	}
406
+	return(platform)
407
+}
408
+
409
+
410
+##isValidCdfName <- function(cdfName, platform){
411
+##	chipList <- validCdfNames(platform)
412
+##	if(!(cdfName %in% chipList)){
413
+##		warning("cdfName must be one of the following: ",
414
+##			chipList)
415
+##	}
416
+##	result <- cdfName %in% chipList
417
+##	return(result)
418
+##}
383 419
 	
384 420
 	
385 421
 	
... ...
@@ -558,19 +594,19 @@ computeCopynumber <- function(object,
558 594
 			      bias.adj=FALSE,
559 595
 			      batch,
560 596
 			      SNRmin=5,
561
-			      cdfName,
562
-			      platform=c("affymetrix", "illumina")[1], ...){
597
+			      cdfName, ...){
563 598
 	if(class(object) != "CrlmmSetList") stop("object must be of class ClrmmSetList")
564 599
 	if(missing(cdfName))
565 600
 		cdfName <- annotation(object)
566
-	if(!isValidCdfName(cdfName, platform=platform)) stop(cdfName, " not supported.")	
601
+	if(!isValidCdfName(cdfName)) stop(cdfName, " not supported.")
602
+	platform <- whichPlatform(cdfName)
567 603
 	if(ncol(object) < 10)
568 604
 		stop("Must have at least 10 samples in each batch to estimate model parameters....preferably closer to 90 samples per batch")
569 605
 	##require(oligoClasses)
570 606
 	if(missing(CHR)) stop("Must specify CHR")
571 607
 	if(CHR == 24) stop("Nothing available yet for chromosome Y")
572 608
 	if(missing(batch)) {
573
-		message("'batch' missing.  Assuming all samples were processed together in the same batch.")
609
+		message("'batch' missing.  Assuming all samples in the CrlmmSetList object were processed together in the same batch.")
574 610
 		batch <- rep("A", ncol(object))
575 611
 	}
576 612
 	if(length(batch) != ncol(object[[1]])) stop("Batch must be the same length as the number of samples")
... ...
@@ -751,7 +787,7 @@ updateNuPhi <- function(crlmmSetList, cnSet){
751 787
 ##			   ...)	
752 788
 }
753 789
 
754
-list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){
790
+list2locusSet <- function(envir, ABset, NPset, CHR, cdfName){
755 791
 	if(missing(CHR)) stop("Must specify chromosome")
756 792
 	pkgname <- paste(cdfName, "Crlmm", sep="")	
757 793
 	path <- system.file("extdata", package=pkgname)
... ...
@@ -879,7 +915,7 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){
879 915
 		      assayData=assayData,
880 916
 		      featureData=fD,
881 917
 		      phenoData=phenodata,
882
-		      annotation="genomewidesnp6")
918
+		      annotation=cdfName)
883 919
 	cnset <- thresholdCopyNumberSet(cnset)
884 920
 	return(cnset)
885 921
 }
... ...
@@ -1087,6 +1123,8 @@ nonpolymorphic <- function(plateIndex, NP, envir, CONF.THR=0.99, DF.PRIOR=50, pk
1087 1123
 		##chrX:1-2,709,520 and chrX:154584237-154913754, respectively
1088 1124
 		##par:pseudo-autosomal regions
1089 1125
 		pseudoAR <- cnProbes[, "position"] < 2709520 | (cnProbes[, "position"] > 154584237 & cnProbes[, "position"] < 154913754)
1126
+		##in case some of the cnProbes are not annotated
1127
+		pseudoAR[is.na(pseudoAR)] <- FALSE
1090 1128
 		gender <- envir[["gender"]]
1091 1129
 		mu1 <- rowMedians(NP[, gender=="male"], na.rm=TRUE)
1092 1130
 		mu2 <- rowMedians(NP[, gender=="female"], na.rm=TRUE)
... ...
@@ -1099,8 +1137,10 @@ nonpolymorphic <- function(plateIndex, NP, envir, CONF.THR=0.99, DF.PRIOR=50, pk
1099 1137
 		phi1 <- 2^(Yhat1)
1100 1138
 		phi2 <- 2^(Yhat2)
1101 1139
 		nu1 <- 2^(mus[, 1]) - phi1
1102
-		nu2 <- 2^(mus[, 2]) - 2*phi2		
1103
-		nu1[pseudoAR] <- 2^(mus[pseudoAR, 1]) - 2*phi1[pseudoAR]
1140
+		nu2 <- 2^(mus[, 2]) - 2*phi2
1141
+		if(any(pseudoAR)){
1142
+			nu1[pseudoAR] <- 2^(mus[pseudoAR, 1]) - 2*phi1[pseudoAR]
1143
+		}
1104 1144
 		CT1 <- 1/phi1*(NP[, gender=="male"]-nu1)
1105 1145
 		CT2 <- 1/phi2*(NP[, gender=="female"]-nu2)
1106 1146
 		CT1 <- matrix(as.integer(100*CT1), nrow(CT1), ncol(CT1))
... ...
@@ -1932,10 +1972,22 @@ thresholdModelParams <- function(object, MIN=2^3){
1932 1972
 setMethod("update", "character", function(object, ...){
1933 1973
 	crlmmFile <- object
1934 1974
 	for(i in seq(along=crlmmFile)){
1935
-		cat("Processing ", clrmmFile[i], "...\n")
1975
+		cat("Processing ", crlmmFile[i], "...\n")
1936 1976
 		load(crlmmFile[i])
1937 1977
 		crlmmSetList <- get("crlmmSetList")
1938
-		crlmmSetList <- update(crlmmSetList, ...)
1978
+		if(length(crlmmSetList) == 3) next()  ##copy number object already present. 
1979
+		if(!"chromosome" %in% fvarLabels(crlmmSetList[[1]])){
1980
+			featureData(crlmmSetList[[1]]) <- addFeatureAnnotation(crlmmSetList)
1981
+		} 
1982
+		CHR <- unique(chromosome(crlmmSetList[[1]]))		
1983
+		if(CHR==24){
1984
+			message("skipping chromosome 24")
1985
+			next()
1986
+		}
1987
+		cat("----------------------------------------------------------------------------\n")
1988
+		cat("-        Estimating copy number for chromosome", CHR, "\n")
1989
+		cat("----------------------------------------------------------------------------\n")		
1990
+		crlmmSetList <- update(crlmmSetList, CHR=CHR, ...)
1939 1991
 		save(crlmmSetList, file=crlmmFile[i])
1940 1992
 		rm(crlmmSetList); gc();
1941 1993
 	}
... ...
@@ -6,7 +6,7 @@
6 6
 readIdatFiles <- function(sampleSheet=NULL,
7 7
 			  arrayNames=NULL,
8 8
 			  ids=NULL,
9
-			  path=".",
9
+			  path="",
10 10
 			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
11 11
 			  highDensity=FALSE,
12 12
 			  sep="_",
... ...
@@ -50,12 +50,19 @@ readIdatFiles <- function(sampleSheet=NULL,
50 50
 	       stop("Cannot find .idat files")
51 51
        if(length(grnfiles)!=length(redfiles))
52 52
 	       stop("Cannot find matching .idat files")
53
-       if(!all(c(redfiles,grnfiles) %in% dir(path=path))){
54
-	       stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
55
-		    paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
53
+       if(path != ""){
54
+	       grnidats = file.path(path, grnfiles)
55
+	       redidats = file.path(path, redfiles)
56
+       }  else {
57
+	       grnidats <- grnfiles
58
+	       redidats <- redfiles
56 59
        }
57
-       grnidats = file.path(path, grnfiles)
58
-       redidats = file.path(path, redfiles)
60
+       if(!all(file.exists(grnfiles))) stop("Missing some of the *Grn.idat files")
61
+       if(!all(file.exists(redfiles))) stop("Missing some of the *Red.idat files")       
62
+##       if(!all(c(redfiles,grnfiles) %in% dir(path=path))){
63
+##	       stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
64
+##		    paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
65
+##       }
59 66
        headerInfo = list(nProbes = rep(NA, narrays),
60 67
                          Barcode = rep(NA, narrays),
61 68
                          ChipType = rep(NA, narrays),
... ...
@@ -1,5 +1,4 @@
1 1
 setValidity("CopyNumberSet", function(object) {
2
-	##msg <- validMsg(NULL, Biobase:::isValidVersion(object, "CopyNumberSet"))
3 2
 	msg <- validMsg(NULL, assayDataValidMembers(assayData(object), c("CA", "CB")))
4 3
 	if (is.null(msg)) TRUE else msg
5 4
 })
... ...
@@ -49,8 +49,8 @@ setMethod(".harmonizeDimnames", "CrlmmSetList", function(object){
49 49
 
50 50
 setMethod("A", "CrlmmSetList", function(object) A(object[[1]]))
51 51
 
52
-setMethod("addFeatureAnnotation", "CrlmmSetList", function(object, CHR){
53
-	if(missing(CHR)) stop("Must specificy chromosome")
52
+setMethod("addFeatureAnnotation", "CrlmmSetList", function(object, ...){
53
+	##if(missing(CHR)) stop("Must specificy chromosome")
54 54
 	cdfName <- annotation(object)
55 55
 	pkgname <- paste(cdfName, "Crlmm", sep="")	
56 56
 	path <- system.file("extdata", package=pkgname)
... ...
@@ -66,16 +66,26 @@ setMethod("addFeatureAnnotation", "CrlmmSetList", function(object, CHR){
66 66
 	names(position.snp) <- snps
67 67
 	position.np <- cnProbes[match(nps, rownames(cnProbes)), "position"]
68 68
 	names(position.np) <- nps
69
+
70
+	J <- grep("chr", colnames(snpProbes))
71
+	chr.snp <- snpProbes[match(snps, rownames(snpProbes)), J]
72
+	chr.np <- cnProbes[match(nps, rownames(cnProbes)), J]	
69 73
 	
70 74
 	position <- c(position.snp, position.np)
71
-	position <- position[match(featureNames(object), names(position))]
75
+	chrom <- c(chr.snp, chr.np)
76
+	ix <- match(featureNames(object), names(position))
77
+	position <- position[ix]
78
+	chrom <- chrom[ix]
79
+	##require(SNPchip)
80
+	chrom <- chromosome2integer(chrom)
81
+
72 82
 	stopifnot(identical(names(position), featureNames(object)))
73 83
 	if(sum(duplicated(names(position))) > 0){
74 84
 		warning("Removing rows with NA identifiers...")
75 85
 		##RS: fix this
76 86
 		I <- which(!is.na(names(position)))
77 87
 	}  else I <- seq(along=names(position))
78
-	fd <- data.frame(cbind(CHR,
88
+	fd <- data.frame(cbind(chrom[I],
79 89
 			       position[I]))
80 90
 	colnames(fd) <- c("chromosome", "position")
81 91
 	rownames(fd) <- featureNames(object)
... ...
@@ -27,8 +27,10 @@ options(prompt="R> ")
27 27
 @ 
28 28
 
29 29
 <<>>=
30
-library(Biobase)
31 30
 library(crlmm)
31
+@ 
32
+
33
+<<echo=FALSE, eval=TRUE>>=
32 34
 setwd("/thumper/ctsa/snpmicroarray/illumina/IDATS/370k")
33 35
 @ 
34 36
 
... ...
@@ -55,14 +57,21 @@ crlmmWrapper(sampleSheet=samplesheet5,
55 57
 	     platform="illumina")
56 58
 @ 
57 59
 
58
-Note: the code below is run for testing purposes only. None of these
59
-functions are exported, so would not routinely be run directly by the
60
-user.  A typical analysis would involve runnning crlmmWrapper() followed
61
-by update() to obtain copy number.
60
+Samples with low signal to noise ratios tend to have a lot of variation
61
+in the point estimates of copy number.  One may want to exclude these
62
+samples.
62 63
 
63 64
 
64 65
 <<SNR>>=
65
-hist(crlmmOut$SNR) ##approx. 5-fold higher than what we see in Affy!
66
+hist(crlmmOut$SNR)
67
+@ 
68
+
69
+Run update on the CrlmmSetList object to obtain copy number. Estimate
70
+copy number for chromosome 1.
71
+
72
+<<>>=
73
+CHR <- 1
74
+filename <- paste(platedir, "/CrlmmSetList_", CHR, ".rda", sep="")
66 75
 @ 
67 76
 
68 77
 \end{document}
... ...
@@ -9,7 +9,7 @@
9 9
 }
10 10
 \usage{
11 11
 computeCopynumber(object, CHR, bias.adj=FALSE, batch, SNRmin=5, cdfName,
12
-platform=c("affymetrix", "illumina")[1], ...)
12
+...)
13 13
 }
14 14
 \arguments{
15 15
   \item{object}{object of class \code{CrlmmSetList}.}
... ...
@@ -21,7 +21,6 @@ platform=c("affymetrix", "illumina")[1], ...)
21 21
   \item{SNRmin}{The minimum value for the SNR -- we suggest 5. Samples
22 22
     with SNR below SNRmin are dropped.}
23 23
   \item{cdfName}{Annotation package }
24
-  \item{platform}{Character string--must be eitheraffymetrix or illumina.}  
25 24
   \item{\dots}{arguments to \code{.computeCopynumber}.}
26 25
 }
27 26
 
... ...
@@ -8,7 +8,7 @@
8 8
   .idat files of Infinium II genotyping BeadChips}
9 9
 
10 10
 \usage{
11
-readIdatFiles(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
11
+readIdatFiles(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path="",
12 12
               arrayInfoColNames=list(barcode="SentrixBarcode_A",
13 13
                                      position="SentrixPosition_A"),
14 14
               highDensity=FALSE, sep="_",