Browse code

update computeCopynumber, added .R files for new classes/methods

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@40371 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 29/06/2009 12:29:11
Showing 13 changed files

... ...
@@ -174,6 +174,9 @@ is decoded and scanned
174 174
 
175 175
 * several changes to the copynumber vignette
176 176
 
177
+2009-06-29 R Scharpf - committed version 1.3.6
177 178
 
179
+* updated computeCopynumber
180
+* export A, B methods for ABset class
178 181
 
179 182
 
... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.3.5
4
+Version: 1.3.6
5 5
 Date: 2009-06-17
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -45,9 +45,12 @@ importFrom(mvtnorm, dmvnorm)
45 45
 
46 46
 importFrom(ellipse, ellipse)
47 47
 
48
+S3method(ellipse,CopyNumberSet)
49
+
48 50
 exportClasses(ABset, CopyNumberSet, CrlmmSetList)
49 51
 ##S3method(ellipse, CopyNumberSet)
50
-exportMethods(calls,
52
+exportMethods(A, B,
53
+	      calls,
51 54
 	      CA,
52 55
 	      "CA<-",
53 56
 	      CB,
54 57
new file mode 100644
... ...
@@ -0,0 +1,18 @@
1
+setGeneric("A", function(object) standardGeneric("A"))
2
+setGeneric("B", function(object) standardGeneric("B"))
3
+##setGeneric("calls", function(x) standardGeneric("calls"))
4
+
5
+setGeneric("confs", function(object) standardGeneric("confs"))
6
+setGeneric("CA", function(object) standardGeneric("CA"))
7
+setGeneric("CB", function(object) standardGeneric("CB"))
8
+setGeneric("CA<-", function(object, value) standardGeneric("CA<-"))
9
+setGeneric("CB<-", function(object, value) standardGeneric("CB<-"))
10
+##setGeneric("chromosome", function(object) standardGeneric("chromosome"))
11
+setGeneric("cnIndex", function(object) standardGeneric("cnIndex"))
12
+setGeneric("cnNames", function(object) standardGeneric("cnNames"))
13
+##setGeneric("copyNumber", function(object) standardGeneric("copyNumber"))
14
+##setGeneric("position", function(object) standardGeneric("position"))
15
+setGeneric("snpIndex", function(object) standardGeneric("snpIndex"))
16
+setGeneric("snpNames", function(object) standardGeneric("snpNames"))
17
+setGeneric("splitByChromosome", function(object, ...) standardGeneric("splitByChromosome"))
18
+
... ...
@@ -456,8 +456,7 @@ computeCopynumber <- function(object,
456 456
 			   bias.adj=FALSE,
457 457
 			   SNRmin=SNRmin,
458 458
 			   ...)
459
-	message("Organizing results...")	
460
-	locusSet <- list2locusSet(envir, ABset=ABset, NPset=NPset, CHR=CHR, cdfName=cdfName)
459
+
461 460
 	if(bias.adj){
462 461
 		message("Running bias adjustment...")
463 462
 		.computeCopynumber(chrom=CHR,
... ...
@@ -472,10 +471,9 @@ computeCopynumber <- function(object,
472 471
 				   bias.adj=TRUE,
473 472
 				   SNRmin=SNRmin,
474 473
 				   ...)
475
-		message("Organizing results...")			
476
-		locusSet <- list2locusSet(envir, ABset=ABset, NPset=NPset, CHR=CHR, cdfName=cdfName)
477
-		##.GlobalEnv[["locusSetAdj"]] <- locusSetAdj
478 474
 	}
475
+	message("Organizing results...")			
476
+	locusSet <- list2locusSet(envir, ABset=ABset, NPset=NPset, CHR=CHR, cdfName=cdfName)
479 477
 	return(locusSet)
480 478
 }
481 479
 
482 480
new file mode 100644
... ...
@@ -0,0 +1,11 @@
1
+setValidity("ABset", function(object) {
2
+	##msg <- validMsg(NULL, Biobase:::isValidVersion(object, "CopyNumberSet"))
3
+	msg <- validMsg(NULL, assayDataValidMembers(assayData(object), c("A", "B")))
4
+	if (is.null(msg)) TRUE else msg
5
+})
6
+setMethod("A", "ABset", function(object) assayData(object)[["A"]])
7
+setMethod("B", "ABset", function(object) assayData(object)[["B"]])
8
+
9
+
10
+
11
+
0 12
new file mode 100644
... ...
@@ -0,0 +1,65 @@
1
+setValidity("CopyNumberSet", function(object) {
2
+	##msg <- validMsg(NULL, Biobase:::isValidVersion(object, "CopyNumberSet"))
3
+	msg <- validMsg(NULL, assayDataValidMembers(assayData(object), c("CA", "CB")))
4
+	if (is.null(msg)) TRUE else msg
5
+})
6
+setMethod("CA", "CopyNumberSet", function(object) assayData(object)[["CA"]])
7
+setMethod("CB", "CopyNumberSet", function(object) assayData(object)[["CB"]])
8
+setReplaceMethod("CB", signature(object="CopyNumberSet", value="matrix"),
9
+		 function(object, value) assayDataElementReplace(object, "CB", value))
10
+setReplaceMethod("CA", signature(object="CopyNumberSet", value="matrix"),
11
+		 function(object, value) assayDataElementReplace(object, "CA", value))
12
+
13
+setMethod("chromosome", "CopyNumberSet", function(object) fData(object)$chromosome)
14
+setMethod("position", "CopyNumberSet", function(object) fData(object)$position)
15
+
16
+setMethod("copyNumber", "CopyNumberSet", function(object){
17
+	##ensure that 2 + NA = 2 by replacing NA's with zero
18
+	CA <- CA(object)
19
+	CB <- CB(object)
20
+	nas <- is.na(CA) & is.na(CB)
21
+	CA[is.na(CA)] <- 0
22
+	CB[is.na(CB)] <- 0
23
+	CN <- CA/100 + CB/100
24
+	##if both CA and CB are NA, report NA
25
+	CN[nas] <- NA
26
+	CN
27
+})
28
+
29
+##setMethod("ellipse", "CopyNumberSet", function(x, copynumber, ...){
30
+ellipse.CopyNumberSet <- function(x, copynumber, ...){
31
+	##fittedOrder <- unique(sapply(basename(celFiles), function(x) strsplit(x, "_")[[1]][2]))
32
+	##index <- match(plates, fittedOrder)
33
+	if(nrow(x) > 1) stop("only 1 snp at a time")
34
+	batch <- unique(x$batch)
35
+	if(length(batch) > 1) stop("batch variable not unique")
36
+	nuA <- as.numeric(fData(x)[, match(paste("nuA", batch, sep="_"), fvarLabels(x))])
37
+	nuB <- as.numeric(fData(x)[, match(paste("nuB", batch, sep="_"), fvarLabels(x))])	
38
+	phiA <- as.numeric(fData(x)[, match(paste("phiA", batch, sep="_"), fvarLabels(x))])
39
+	phiB <- as.numeric(fData(x)[, match(paste("phiB", batch, sep="_"), fvarLabels(x))])	
40
+	tau2A <- as.numeric(fData(x)[, match(paste("tau2A", batch, sep="_"), fvarLabels(x))])
41
+	tau2B <- as.numeric(fData(x)[, match(paste("tau2B", batch, sep="_"), fvarLabels(x))])
42
+	sig2A <- as.numeric(fData(x)[, match(paste("sig2A", batch, sep="_"), fvarLabels(x))])
43
+	sig2B <- as.numeric(fData(x)[, match(paste("sig2B", batch, sep="_"), fvarLabels(x))])
44
+	corrA.BB <- as.numeric(fData(x)[, match(paste("corrA.BB", batch, sep="_"), fvarLabels(x))])
45
+	corrB.AA <- as.numeric(fData(x)[, match(paste("corrB.AA", batch, sep="_"), fvarLabels(x))])
46
+	corr <- as.numeric(fData(x)[, match(paste("corr", batch, sep="_"), fvarLabels(x))])
47
+	for(CN in copynumber){
48
+		for(CA in 0:CN){
49
+			CB <- CN-CA
50
+			A.scale <- sqrt(tau2A*(CA==0) + sig2A*(CA > 0))
51
+			B.scale <- sqrt(tau2B*(CB==0) + sig2B*(CB > 0))
52
+			scale <- c(A.scale, B.scale)
53
+			if(CA == 0 & CB > 0) rho <- corrA.BB
54
+			if(CA > 0 & CB == 0) rho <- corrB.AA
55
+			if(CA > 0 & CB > 0) rho <- corr
56
+			lines(ellipse(x=rho, centre=c(log2(nuA+CA*phiA),
57
+					     log2(nuB+CB*phiB)),
58
+				      scale=scale), ...)
59
+		}
60
+	}
61
+}
62
+
63
+
64
+
65
+
0 66
new file mode 100644
... ...
@@ -0,0 +1,90 @@
1
+setMethod("[", "CrlmmSetList", function(x, i, j, ..., drop = FALSE){
2
+            if (missing(drop)) drop <- FALSE
3
+            if (missing(i) && missing(j))
4
+              {
5
+                 if (length(list(...))!=0)
6
+                   stop("specify genes or samples to subset; use '",
7
+                        substitute(x), "$", names(list(...))[[1]],
8
+                        "' to access phenoData variables")
9
+                 return(x)
10
+               }
11
+            if (!missing(j)){
12
+              f1 <- function(x, j){
13
+                x <- x[, j]
14
+              }
15
+              x <- lapply(x, f1, j)
16
+            }
17
+            if(!missing(i)){
18
+              f2 <- function(x, i){
19
+                x <- x[i, ]
20
+              }
21
+              x <- lapply(x, f2, i)
22
+            }
23
+	as(x, "CrlmmSetList")	
24
+})
25
+
26
+setMethod("A", "CrlmmSetList", function(object) A(object[[1]]))
27
+setMethod("B", "CrlmmSetList", function(object) B(object[[1]]))
28
+setMethod("calls", "CrlmmSetList", function(object) calls(object[[2]]))
29
+setMethod("cnIndex", "CrlmmSetList", function(object) match(cnNames(object[[1]]), featureNames(object)))
30
+
31
+setMethod("combine", signature=signature(x="CrlmmSetList", y="CrlmmSetList"),
32
+          function(x, y, ...){
33
+		  x.abset <- x[[1]]
34
+		  y.abset <- y[[1]]
35
+
36
+		  x.snpset <- x[[2]]
37
+		  y.snpset <- y[[2]]
38
+
39
+		  abset <- combine(x.abset, y.abset)
40
+
41
+		  ##we have hijacked the featureData slot to store parameters.  Biobase will not allow combining our 'feature' data.
42
+		  warning("removing featureData")
43
+		  ##fd1 <- featureData(x.snpset)
44
+		  ##fd2 <- featureData(y.snpset)
45
+		  featureData(x.snpset) <- annotatedDataFrameFrom(calls(x.snpset), byrow=TRUE)
46
+		  featureData(y.snpset) <- annotatedDataFrameFrom(calls(y.snpset), byrow=TRUE)
47
+		  snpset <- combine(x.snpset, y.snpset)
48
+		  merged <- list(abset, snpset)
49
+		  merged <- as(merged, "CrlmmSetList")
50
+		  merged
51
+	  })
52
+		  
53
+
54
+
55
+setMethod("featureNames", "CrlmmSetList", function(object) featureNames(object[[1]]))
56
+setMethod("plot", signature(x="CrlmmSetList"),
57
+	  function(x, y, ...){
58
+		  A <- log2(A(x))
59
+		  B <- log2(B(x))
60
+		  plot(A, B, ...)
61
+	  })
62
+setMethod("points", signature(x="CrlmmSetList"),
63
+	  function(x, y, ...){
64
+		  A <- log2(A(x))
65
+		  B <- log2(B(x))
66
+		  points(A, B, ...)
67
+	  })
68
+setMethod("sampleNames", "CrlmmSetList", function(object) sampleNames(object[[1]]))
69
+setMethod("scanDates", "CrlmmSetList", function(object) scanDates(object[[1]]))
70
+setMethod("show", "CrlmmSetList", function(object){
71
+	show(object[[1]])
72
+	show(object[[2]])
73
+})
74
+setMethod("snpIndex", "CrlmmSetList", function(object) match(snpNames(object[[1]]), featureNames(object)))
75
+setMethod("splitByChromosome", "CrlmmSetList", function(object, cdfName, outdir){
76
+	path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep=""))	
77
+	load(file.path(path, "snpProbes.rda"))
78
+	load(file.path(path, "cnProbes.rda"))				
79
+	for(CHR in 1:24){
80
+		cat("Chromosome ", CHR, "\n")
81
+		snps <- rownames(snpProbes)[snpProbes[, "chrom"] == CHR]
82
+		cnps <- rownames(cnProbes)[cnProbes[, "chrom"] == CHR]
83
+		index <- c(match(snps, featureNames(object)),
84
+			   match(cnps, featureNames(object)))
85
+		crlmmResults <- object[index, ]
86
+		save(crlmmResults, file=file.path(outdir, paste("crlmmResults_", CHR, ".rda", sep="")))
87
+	}
88
+})
89
+
90
+
0 91
new file mode 100644
... ...
@@ -0,0 +1,31 @@
1
+setMethod("cnIndex", "eSet", function(object) match(cnNames(object), featureNames(object)))
2
+setMethod("cnNames", "eSet", function(object) featureNames(object)[grep("CN_", featureNames(object))])
3
+##setMethod("combine", signature=signature(x="eSet", y="eSet"),
4
+##	  function(x, y, ...){
5
+##		  ##Check that both x and y are valid objects
6
+##		  if(!validObject(x)) stop("x is not a valid object")
7
+##		  if(!validObject(y)) stop("y is not a valid object")
8
+##
9
+##		  if(annotation(x) != annotation(y)) stop("must have same value for annotation slot")
10
+##		  if(class(x) != class(y)) stop("objects must have the same class")
11
+##		  if(storageMode(assayData(x)) != storageMode(assayData(y))){
12
+##			  stop("objects must have same storage mode for assayData")
13
+##		  }
14
+##		  fd <- combine(featureData(x), featureData(y))
15
+##		  pd <- combine(phenoData(x), phenoData(y))            
16
+##		  ad.x <- as.list(assayData(x))
17
+##		  ad.y <- as.list(assayData(y))
18
+##		  ad.xy <- mapply(rbind, ad.x, ad.y, SIMPLIFY=FALSE)
19
+##		  id.x <- match(rownames(ad.xy[[1]]), featureNames(fd))
20
+##		  ee <- combine(experimentData(x), experimentData(y))
21
+##		  assayData(x) <- ad.xy
22
+##		  storageMode(assayData(x)) <- storageMode(assayData(y))            
23
+##		  experimentData(x) <- ee
24
+##		  featureData(x) <- fd
25
+##		  phenoData(x) <- pd
26
+##		  x
27
+##          })
28
+setMethod("snpIndex", "eSet", function(object) match(snpNames(object), featureNames(object)))
29
+setMethod("snpNames", "eSet", function(object) featureNames(object)[grep("SNP_", featureNames(object))])
30
+
31
+
... ...
@@ -19,16 +19,21 @@ B Carvalho
19 19
 
20 20
 - crlmm should return results in CHP files
21 21
 
22
-#####################################
23
-### FOR CNRMA
24
-#####################################
25
-- Bias adjustment for X chromosome
22
+  ***********************************************************
23
+  *                                                         *       
24
+  *                FOR CNRMA                                *
25
+  *                                                         *
26
+  ***********************************************************
27
+
28
+  o Bias adjustment for X chromosome
29
+
30
+  o Adjust nu for altered copy number (crosshyb)
26 31
 
27
-- adjust nu for altered copy number (crosshyb)
32
+  o Should store parameters in something besides the featureData slot
28 33
 
29
-- should store parameters in something besides the featureData slot
34
+    - Perhaps add a matrix slot for parameters and a slot for batch.
30 35
 
31
-        - Perhaps add a slot for parameters and a slot for batch.
36
+    - Need accessors
32 37
 
33 38
 
34 39
 
... ...
@@ -146,24 +146,13 @@ save(example.crlmmResults, file="~/madman/Rpacks/crlmm/inst/scripts/example.crlm
146 146
 The above algorithm for estimating copy number is predicated on the
147 147
 assumption that most samples within a batch have copy number 2 at any
148 148
 given locus.  For common copy number variants, this assumption may not
149
-hold.  An additional iteration using a bias correction can improve the
150
-estimates.  Set the \Robject{bias.adj} argument to \Robject{TRUE}.
151
-
152
-<<biasAdjustment, eval=FALSE, echo=FALSE>>=
153
-library(oligoClasses);library(genefilter)
154
-source("~/madman/Rpacks/crlmm/R/AllClasses.R")
155
-source("~/madman/Rpacks/crlmm/R/AllGenerics.R")
156
-source("~/madman/Rpacks/crlmm/R/cnrma-functions.R")
157
-source("~/madman/Rpacks/crlmm/R/methods-eSet.R")
158
-source("~/madman/Rpacks/crlmm/R/methods-ABset.R")
159
-source("~/madman/Rpacks/crlmm/R/methods-CrlmmSetList.R")
160
-source("~/madman/Rpacks/crlmm/R/methods-CopyNumberSet.R")
161
-source("~/madman/Rpacks/crlmm/R/utils.R")
162
-.crlmmPkgEnv <- new.env()
163
-envir2 <- envir
164
-envir <- getCopyNumberEnvironment(crlmmSetList, cnSet)
165
-updateNuPhi(crlmmSetList, cnSet)
166
-##TODO
149
+hold.  An additional iteration using a bias correction provides
150
+additional robustness to this assumption.  Set the \Robject{bias.adj}
151
+argument to \Robject{TRUE}:
152
+
153
+<<biasAdjustment, eval=FALSE>>=
154
+cnset <- computeCopynumber(crlmmResults, SNRmin=5, batch=batch, CHR=CHR, 
155
+			   cdfName="genomewidesnp6", bias.adj=TRUE)
167 156
 @ 
168 157
 
169 158
 \section{Suggested plots}
... ...
@@ -352,8 +341,8 @@ dev.off()
352 341
 @ 
353 342
 
354 343
 \section{Session information}
355
-<<>>=
356
-sessionInfo()
344
+<<sessionInfo, results=tex>>=
345
+toLatex(sessionInfo())
357 346
 @ 
358 347
 
359 348
 
360 349
Binary files a/inst/scripts/copynumber.pdf and b/inst/scripts/copynumber.pdf differ
... ...
@@ -46,7 +46,9 @@ if(!exists("res.rda")){
46 46
 		load(file.path(outdir, "crlmmOut.rda"))		
47 47
 	}
48 48
 }
49
-@ 
49
+@
50
+
51
+TODO: The code below needs to be udpated for the Illumina platform.
50 52
 
51 53
 <<SNR>>=
52 54
 hist(crlmmOut$SNR) ##approx. 5-fold higher than what we see in Affy!