Browse code

computeCopynumber returns CrlmmSetList object

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@40618 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 14/07/2009 17:22:19
Showing 13 changed files

... ...
@@ -199,3 +199,24 @@ is decoded and scanned
199 199
 
200 200
 * fixed bugs in biasAdj and biasAdjNP
201 201
 
202
+2009-07-09 R Scharpf - committed version 1.3.10
203
+
204
+* changed defaults for crlmmWrapper: save.it=FALSE
205
+
206
+* splitByChr method looks for 'chr' or 'chromosome' in colnames 
207
+
208
+* sample-specific standard deviations in the .getEmission function
209
+
210
+* add check for duplicated positions in creating locusset object
211
+
212
+* computeCopynumber returns an object of class CrlmmSetList
213
+  
214
+  - dimnames for the elements in the list are the same
215
+
216
+  - elements in the list are ordered by chromosome and physical
217
+    position
218
+
219
+  - copy number parameters in the featureData of the CopyNumberSet
220
+    element are thresholded
221
+
222
+
... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.3.9
4
+Version: 1.3.10
5 5
 Date: 2009-06-17
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -8,12 +8,12 @@ importClassesFrom(Biobase, AnnotatedDataFrame, AssayData, eSet,
8 8
 importClassesFrom(oligoClasses, SnpLevelSet)
9 9
 
10 10
 importMethodsFrom(Biobase, annotation, "annotation<-", annotatedDataFrameFrom,
11
-		  assayData, "assayData<-", combine,
11
+		  assayData, "assayData<-", combine, dims,
12 12
 		  experimentData, "experimentData<-",
13 13
                   fData, featureData, "featureData<-", featureNames,
14 14
 		  fvarMetadata, fvarLabels,
15 15
                   pData, phenoData, "phenoData<-", pubMedIds, rowMedians, sampleNames, scanDates,
16
-                  "scanDates<-", storageMode, "storageMode<-", updateObject)
16
+                  "scanDates<-", storageMode, "storageMode<-", updateObject, varLabels)
17 17
 
18 18
 
19 19
 ##importMethodsFrom(oligoClasses, chromosome, copyNumber, position)
... ...
@@ -24,7 +24,7 @@ importFrom(oligoClasses, chromosome, copyNumber, position, calls)
24 24
 ##importFrom(methods, "@<-", callNextMethod, new, validObject)
25 25
 
26 26
 importFrom(Biobase, assayDataElement, assayDataElementNames,
27
-           assayDataElementReplace, assayDataNew, classVersion)
27
+           assayDataElementReplace, assayDataNew, classVersion, validMsg)
28 28
 
29 29
 importFrom(graphics, abline, axis, layout, legend, mtext, par, plot,
30 30
            polygon, rect, segments, text, points)
... ...
@@ -46,10 +46,11 @@ importFrom(mvtnorm, dmvnorm)
46 46
 importFrom(ellipse, ellipse)
47 47
 
48 48
 ##S3method(ellipse,CopyNumberSet)
49
-
49
+##S3method(boxplot,CrlmmSetList)
50 50
 exportClasses(ABset, CopyNumberSet, CrlmmSetList)
51 51
 ##S3method(ellipse, CopyNumberSet)
52
-exportMethods(A, B,
52
+exportMethods("[", ##"[[",
53
+	      "$", A, B,
53 54
 	      calls,
54 55
 	      CA,
55 56
 	      "CA<-",
... ...
@@ -62,7 +63,8 @@ exportMethods(A, B,
62 63
 	      show,
63 64
 	      snpIndex)
64 65
 
65
-export(celDates,
66
+export(batch,
67
+       celDates,
66 68
        calls,
67 69
        chromosome,
68 70
        cnrma,
... ...
@@ -72,7 +74,7 @@ export(celDates,
72 74
        crlmm,
73 75
        crlmmIllumina,
74 76
        crlmmWrapper,
75
-       ellipse,
77
+##       ellipse,
76 78
 ##       computeEmission,
77 79
        list.celfiles,
78 80
        position,
... ...
@@ -1,9 +1,10 @@
1 1
 setGeneric("A", function(object) standardGeneric("A"))
2 2
 setGeneric("B", function(object) standardGeneric("B"))
3
+setGeneric("batch", function(object) standardGeneric("batch"))
3 4
 ##setGeneric("calls", function(x) standardGeneric("calls"))
4 5
 setGeneric("confs", function(object) standardGeneric("confs"))
5
-setGeneric("CA", function(object) standardGeneric("CA"))
6
-setGeneric("CB", function(object) standardGeneric("CB"))
6
+setGeneric("CA", function(object, ...) standardGeneric("CA"))
7
+setGeneric("CB", function(object, ...) standardGeneric("CB"))
7 8
 setGeneric("CA<-", function(object, value) standardGeneric("CA<-"))
8 9
 setGeneric("CB<-", function(object, value) standardGeneric("CB<-"))
9 10
 ##setGeneric("chromosome", function(object) standardGeneric("chromosome"))
... ...
@@ -11,6 +12,7 @@ setGeneric("cnIndex", function(object) standardGeneric("cnIndex"))
11 12
 setGeneric("cnNames", function(object) standardGeneric("cnNames"))
12 13
 ##setGeneric("copyNumber", function(object) standardGeneric("copyNumber"))
13 14
 ##setGeneric("position", function(object) standardGeneric("position"))
15
+setGeneric(".harmonizeDimnames", function(object) standardGeneric(".harmonizeDimnames"))
14 16
 setGeneric("snpIndex", function(object) standardGeneric("snpIndex"))
15 17
 setGeneric("snpNames", function(object) standardGeneric("snpNames"))
16 18
 setGeneric("splitByChromosome", function(object, ...) standardGeneric("splitByChromosome"))
... ...
@@ -206,6 +206,8 @@ harmonizeSnpSet <- function(crlmmResult, ABset){
206 206
 }
207 207
 
208 208
 harmonizeDimnamesTo <- function(object1, object2){
209
+	#object2 should be a subset of object 1
210
+	object2 <- object2[featureNames(object2) %in% featureNames(object1), ]
209 211
 	object1 <- object1[match(featureNames(object2), featureNames(object1)), ]
210 212
 	object1 <- object1[, match(sampleNames(object2), sampleNames(object1))]
211 213
 	stopifnot(all.equal(featureNames(object1), featureNames(object2)))
... ...
@@ -251,7 +253,8 @@ crlmmIlluminaWrapper <- function(sampleSheet, outdir="./", cdfName,
251 253
 	
252 254
 	
253 255
 	
254
-crlmmWrapper <- function(filenames, outdir="./", cdfName="genomewidesnp6", save.it,
256
+crlmmWrapper <- function(filenames, outdir="./", cdfName="genomewidesnp6",
257
+			 save.it=FALSE,
255 258
 			 splitByChr=TRUE, ...){
256 259
 	##no visible binding for res
257 260
 	if(!file.exists(file.path(outdir, "crlmmResult.rda"))){
... ...
@@ -261,9 +264,6 @@ crlmmWrapper <- function(filenames, outdir="./", cdfName="genomewidesnp6", save.
261 264
 		message("Loading crlmmResult...")
262 265
 		load(file.path(outdir, "crlmmResult.rda"))
263 266
 	}
264
-
265
-	##- Make crlmmResult the same dimension as ABset
266
-	## -Create a list object that where rows and columns can be subset using '[' methods
267 267
 	if(!file.exists(file.path(outdir, "cnrmaResult.rda"))){
268 268
 		message("Quantile normalizing the copy number probes")		
269 269
 		cnrmaResult <- cnrma(filenames=filenames, cdfName=cdfName)
... ...
@@ -272,23 +272,25 @@ crlmmWrapper <- function(filenames, outdir="./", cdfName="genomewidesnp6", save.
272 272
 		message("Loading cnrmaResult...")		
273 273
 		load(file.path(outdir, "cnrmaResult.rda"))
274 274
 	}
275
-##	loader("intensities.rda", pkgname=pkgname, envir=.crlmmPkgEnv)
276
-##	res <- get("res", envir=.crlmmPkgEnv)
277 275
 	load(file.path(outdir, "intensities.rda"))
278 276
 	ABset <- combineIntensities(res, cnrmaResult, cdfName)
279 277
 	scanDates(ABset) <- as.character(celDates(filenames))	
280 278
 	crlmmResult <- harmonizeSnpSet(crlmmResult, ABset)
281 279
 	stopifnot(all.equal(dimnames(crlmmResult), dimnames(ABset)))
282
-	crlmmList <- list(ABset,
283
-			  crlmmResult)
284
-	crlmmList <- as(crlmmList, "CrlmmSetList")
280
+	crlmmResults <- list(ABset,
281
+			     crlmmResult)
282
+	crlmmResults <- as(crlmmResults, "CrlmmSetList")
285 283
 	
286 284
 	if(splitByChr){
287 285
 		message("Saving by chromosome")
288
-		splitByChromosome(crlmmList, cdfName=cdfName, outdir=outdir)
286
+		splitByChromosome(crlmmResults, cdfName=cdfName, outdir=outdir)
289 287
 	} else{
290 288
 		message("Saving crlmmList object to ", outdir)
291
-		save(crlmmList, file=file.path(outdir, "crlmmList.rda"))
289
+		save(crlmmResults, file=file.path(outdir, "crlmmResults.rda"))
290
+	}
291
+	if(!save.it){
292
+		message("Cleaning up")
293
+		unlink(file.path(outdir, "intensities.rda"))
292 294
 	}
293 295
 	return()
294 296
 }
... ...
@@ -509,7 +511,18 @@ computeCopynumber <- function(object,
509 511
 	}
510 512
 	message("Organizing results...")			
511 513
 	locusSet <- list2locusSet(envir, ABset=ABset, NPset=NPset, CHR=CHR, cdfName=cdfName)
512
-	return(locusSet)
514
+	if(anyDuplicated(position(locusSet))){
515
+		message("duplicated physical positions removed from CopyNumberSet object")
516
+		locusSet <- locusSet[!duplicated(position(locusSet)), ]
517
+	}
518
+	message("Thresholding model parameters")
519
+	locusSet <- thresholdModelParams(locusSet)
520
+	object[[3]] <- locusSet
521
+	message("harmonizing dimnames of the elements in CrlmmSetList...")
522
+	object <- .harmonizeDimnames(object)
523
+	message("Reordering features by chromosome and physical position...")
524
+	object <- object[order(chromosome(object), position(object)), ]	
525
+	return(object)
513 526
 }
514 527
 
515 528
 cnIllumina <- function(object,
... ...
@@ -1841,21 +1854,30 @@ getParams <- function(object, batch){
1841 1854
 	return(params)
1842 1855
 }
1843 1856
 
1844
-computeEmission <- function(crlmmResults, cnset, copyNumberStates=0:5, MIN=2^3){
1857
+computeEmission <- function(crlmmResults, copyNumberStates=0:5, MIN=2^3,
1858
+			    EMIT.THR,
1859
+			    scaleSds=TRUE){
1845 1860
 	##threshold small nu's and phis
1846
-	cnset <- thresholdModelParams(cnset, MIN=MIN)
1861
+	cnset <- thresholdModelParams(crlmmResults[[3]], MIN=MIN)
1847 1862
 	index <- order(chromosome(cnset), position(cnset))
1863
+	
1848 1864
 	if(any(diff(index) > 1)) stop("must be ordered by chromosome and physical position")
1849 1865
 	emissionProbs <- array(NA, dim=c(nrow(cnset), ncol(cnset), length(copyNumberStates)))
1850 1866
 	dimnames(emissionProbs) <- list(featureNames(crlmmResults),
1851 1867
 					sampleNames(crlmmResults),
1852 1868
 					paste("copy.number_", copyNumberStates, sep=""))	
1853
-	batch <- cnset$batch
1854
-	for(i in seq(along=unique(batch))){
1869
+	b <- batch(cnset)
1870
+	for(i in seq(along=unique(b))){
1855 1871
 		if(i == 1) cat("Computing emission probabilities \n")
1856
-		message("batch ", unique(batch)[i], "...")
1857
-		emissionProbs[, batch == unique(batch)[i], ] <- .getEmission(crlmmResults, cnset, batch=unique(batch)[i],
1858
-				copyNumberStates=copyNumberStates)
1872
+		message("batch ", unique(b)[i], "...")
1873
+		emissionProbs[, b == unique(b)[i], ] <- .getEmission(crlmmResults, cnset, batch=unique(b)[i],
1874
+				copyNumberStates=copyNumberStates,
1875
+				scaleSds=scaleSds)
1876
+	}
1877
+	if(missing(EMIT.THR)){
1878
+		EMIT.THR <- quantile(emissionProbs, probs=0.25, na.rm=TRUE)
1879
+		message("Thresholding emission probabilities at a small negative value (", round(EMIT.THR, 1), ") to reduce influence of outliers.")
1880
+		emissionProbs[emissionProbs < EMIT.THR] <- EMIT.THR
1859 1881
 	}
1860 1882
 	emissionProbs
1861 1883
 }
... ...
@@ -1877,11 +1899,35 @@ thresholdModelParams <- function(object, MIN=2^3){
1877 1899
 	object
1878 1900
 }
1879 1901
 
1880
-.getEmission <- function(crlmmResults, cnset, batch, copyNumberStates){
1902
+.getEmission <- function(crlmmResults, cnset, batch, copyNumberStates, scaleSds=TRUE){
1881 1903
 	if(length(batch) > 1) stop("batch variable not unique")
1882 1904
 	crlmmResults <- crlmmResults[, cnset$batch==batch]
1883
-	cnset <- cnset[, cnset$batch == batch]	
1884
-	emissionProbs <- array(NA, dim=c(nrow(crlmmResults[[1]]), ncol(crlmmResults[[1]]), length(copyNumberStates)))
1905
+	cnset <- cnset[, cnset$batch == batch]
1906
+
1907
+##	a <- A(crlmmResults)
1908
+##	b <- B(crlmmResults)	
1909
+##	sds.a <- apply(log2(a), 2, sd, na.rm=TRUE)
1910
+##	sds.b <- apply(log2(b), 2, sd, na.rm=TRUE)
1911
+	if(scaleSds){
1912
+		a <- CA(crlmmResults)
1913
+		b <- CB(crlmmResults)
1914
+		sds.a <- apply(a, 2, sd, na.rm=TRUE)
1915
+		sds.b <- apply(b, 2, sd, na.rm=TRUE)	
1916
+	
1917
+		sds.a <- log2(sds.a/median(sds.a))
1918
+		sds.b <- log2(sds.b/median(sds.b))
1919
+		
1920
+		sds.a <- matrix(sds.a, nrow(cnset), ncol(cnset), byrow=TRUE)
1921
+		sds.b <- matrix(sds.b, nrow(cnset), ncol(cnset), byrow=TRUE)
1922
+
1923
+	} else sds.a <- sds.b <- matrix(0, nrow(cnset), ncol(cnset))
1924
+##	ca <- CA(cnset)
1925
+##	sds.ca <- apply(ca, 2, sd, na.rm=T)
1926
+##	sds.ca <- sds.ca/median(sds.ca)
1927
+##	sds.scale <- sds/median(sds)  #scale snp-specific variance by measure of the relative sample noise
1928
+	
1929
+	emissionProbs <- array(NA, dim=c(nrow(crlmmResults[[1]]),
1930
+				   ncol(crlmmResults[[1]]), length(copyNumberStates)))
1885 1931
 	snpset <- cnset[snpIndex(cnset), ]
1886 1932
 	params <- getParams(snpset, batch=batch)
1887 1933
 	##attach(params)
... ...
@@ -1911,7 +1957,12 @@ thresholdModelParams <- function(object, MIN=2^3){
1911 1957
 			if(CA > 0 & CB > 0) r <- corr
1912 1958
 			if(CA == 0 & CB == 0) r <- 0
1913 1959
 			muA <- log2(nuA+CA*phiA)
1914
-			muB <- log2(nuB+CB*phiB)		
1960
+			muB <- log2(nuB+CB*phiB)
1961
+
1962
+			sigmaA <- matrix(sigmaA, nrow=length(sigmaA), ncol=ncol(cnset), byrow=FALSE)
1963
+			sigmaB <- matrix(sigmaB, nrow=length(sigmaB), ncol=ncol(cnset), byrow=FALSE)
1964
+			sigmaA <- sigmaA+sds.a[snpIndex(crlmmResults), ]
1965
+			sigmaB <- sigmaB+sds.b[snpIndex(crlmmResults), ]			
1915 1966
 
1916 1967
 			##might want to allow the variance to be sample-specific
1917 1968
 			##TODO:
... ...
@@ -1954,9 +2005,10 @@ thresholdModelParams <- function(object, MIN=2^3){
1954 2005
 		mus <- as.numeric(matrix(log2(nuA + CT*phiA), nrow(cnset), ncol(cnset)))
1955 2006
 		##Again, should make sds sample-specific
1956 2007
 		sds.matrix <- matrix(sqrt(sig2A), nrow(cnset), ncol(cnset))
1957
-		sds <- as.numeric(matrix(sqrt(sig2A), nrow(cnset), ncol(cnset)))
1958
-		
1959
-		tmp <- matrix(dnorm(a, mean=mus, sd=sds), nrow(cnset), ncol(cnset))
2008
+
2009
+		sds.matrix <- sds.matrix + sds.a[cnIndex(crlmmResults), ]
2010
+		sds <- as.numeric(sds.matrix)
2011
+		suppressWarnings(tmp <- matrix(dnorm(a, mean=mus, sd=sds), nrow(cnset), ncol(cnset)))
1960 2012
 		emissionProbs[cnIndex(crlmmResults), , k] <- log(tmp)
1961 2013
 	}
1962 2014
 	emissionProbs
... ...
@@ -3,8 +3,9 @@ setValidity("CopyNumberSet", function(object) {
3 3
 	msg <- validMsg(NULL, assayDataValidMembers(assayData(object), c("CA", "CB")))
4 4
 	if (is.null(msg)) TRUE else msg
5 5
 })
6
-setMethod("CA", "CopyNumberSet", function(object) assayData(object)[["CA"]]/100)
7
-setMethod("CB", "CopyNumberSet", function(object) assayData(object)[["CB"]]/100)
6
+##may want to allow thresholding here (... arg)
7
+setMethod("CA", "CopyNumberSet", function(object, ...) assayData(object)[["CA"]]/100)
8
+setMethod("CB", "CopyNumberSet", function(object, ...) assayData(object)[["CB"]]/100)
8 9
 setReplaceMethod("CB", signature(object="CopyNumberSet", value="matrix"),
9 10
 		 function(object, value) assayDataElementReplace(object, "CB", value))
10 11
 setReplaceMethod("CA", signature(object="CopyNumberSet", value="matrix"),
... ...
@@ -12,6 +13,14 @@ setReplaceMethod("CA", signature(object="CopyNumberSet", value="matrix"),
12 13
 
13 14
 setMethod("chromosome", "CopyNumberSet", function(object) fData(object)$chromosome)
14 15
 
16
+setMethod("batch", "CopyNumberSet", function(object){
17
+	if("batch" %in% varLabels(object)){
18
+		result <- object$batch
19
+	} else {
20
+		stop("batch not in varLabels of CopyNumberSet")
21
+	}
22
+	return(result)
23
+})
15 24
 
16 25
 setMethod("copyNumber", "CopyNumberSet", function(object){
17 26
 	##ensure that 2 + NA = 2 by replacing NA's with zero
... ...
@@ -29,8 +38,8 @@ setMethod("copyNumber", "CopyNumberSet", function(object){
29 38
 setMethod("position", "CopyNumberSet", function(object) fData(object)$position)
30 39
 
31 40
 ##setMethod("ellipse", "CopyNumberSet", function(x, copynumber, ...){
32
-##ellipse.CopyNumberSet <- function(x, copynumber, ...){
33
-setMethod("ellipse", "CopyNumberSet", function(x, copynumber, ...){
41
+ellipse.CopyNumberSet <- function(x, copynumber, ...){
42
+##setMethod("ellipse", "CopyNumberSet", function(x, copynumber, ...){
34 43
 	##fittedOrder <- unique(sapply(basename(celFiles), function(x) strsplit(x, "_")[[1]][2]))
35 44
 	##index <- match(plates, fittedOrder)
36 45
 	if(nrow(x) > 1) stop("only 1 snp at a time")
... ...
@@ -61,7 +70,7 @@ setMethod("ellipse", "CopyNumberSet", function(x, copynumber, ...){
61 70
 				      scale=scale), ...)
62 71
 		}
63 72
 	}
64
-})
73
+}
65 74
 
66 75
 
67 76
 
... ...
@@ -23,11 +23,28 @@ setMethod("[", "CrlmmSetList", function(x, i, j, ..., drop = FALSE){
23 23
 	as(x, "CrlmmSetList")	
24 24
 })
25 25
 
26
+##setReplaceMethod("[[", "CrlmmSetList", function(x, i, j, ..., value) {
27
+##	browser()
28
+##	##otherwise infinite recursion
29
+##	x <- as(x, "list")
30
+##	x[[i]] <- value
31
+##	x <- as(x, "CrlmmSetList")
32
+##	x <- .harmonizeDimnames(x)
33
+##	stopifnot(identical(featureNames(x[[i]]), featureNames(x[[1]])))
34
+##	return(x)
35
+##})
36
+
26 37
 setMethod("A", "CrlmmSetList", function(object) A(object[[1]]))
27 38
 setMethod("B", "CrlmmSetList", function(object) B(object[[1]]))
39
+
40
+setMethod("CA", "CrlmmSetList", function(object, ...) CA(object[[3]], ...))
41
+setMethod("CB", "CrlmmSetList", function(object, ...) CB(object[[3]], ...))
42
+
28 43
 setMethod("calls", "CrlmmSetList", function(object) calls(object[[2]]))
29 44
 setMethod("cnIndex", "CrlmmSetList", function(object) match(cnNames(object[[1]]), featureNames(object)))
30 45
 
46
+setMethod("copyNumber", "CrlmmSetList", function(object) copyNumber(object[[3]]))
47
+
31 48
 setMethod("combine", signature=signature(x="CrlmmSetList", y="CrlmmSetList"),
32 49
           function(x, y, ...){
33 50
 		  x.abset <- x[[1]]
... ...
@@ -52,8 +69,12 @@ setMethod("combine", signature=signature(x="CrlmmSetList", y="CrlmmSetList"),
52 69
 		  
53 70
 
54 71
 
72
+##setMethod("fData", "CrlmmSetList", function(object) featureNames(object[[1]]))
73
+
55 74
 setMethod("featureNames", "CrlmmSetList", function(object) featureNames(object[[1]]))
56 75
 
76
+setMethod("ncol", signature(x="CrlmmSetList"), function(x) ncol(x[[1]]))
77
+setMethod("nrow", signature(x="CrlmmSetList"), function(x) nrow(x[[1]]))
57 78
 setMethod("plot", signature(x="CrlmmSetList"),
58 79
 	  function(x, y, ...){
59 80
 		  A <- log2(A(x))
... ...
@@ -71,18 +92,51 @@ setMethod("points", signature(x="CrlmmSetList"),
71 92
 setMethod("sampleNames", "CrlmmSetList", function(object) sampleNames(object[[1]]))
72 93
 setMethod("scanDates", "CrlmmSetList", function(object) scanDates(object[[1]]))
73 94
 setMethod("show", "CrlmmSetList", function(object){
74
-	show(object[[1]])
75
-	show(object[[2]])
95
+	for(i in seq(along=object)) show(object[[i]])
96
+})
97
+
98
+setMethod("chromosome", "CrlmmSetList", function(object) chromosome(object[[3]]))
99
+setMethod("position", "CrlmmSetList", function(object) position(object[[3]]))
100
+
101
+
102
+setMethod(".harmonizeDimnames", "CrlmmSetList", function(object){
103
+	i <- length(object)
104
+	while(i > 1){
105
+		object[[i-1]] <- harmonizeDimnamesTo(object[[i-1]], object[[i]])
106
+		i <- i-1
107
+	}
108
+	object
76 109
 })
110
+setMethod("dims", "CrlmmSetList", function(object) sapply(object, dim))
111
+setMethod("batch", "CrlmmSetList", function(object) batch(object[[3]]))
112
+setMethod("$", "CrlmmSetList", function(x, name) {
113
+	##if(!(name %in% .parameterNames()[output(x) != 0])){
114
+	if(length(x) != 3){
115
+		stop("'$' operature reserved for accessing parameter names in CopyNumberSet object.  CrlmmSetList must be of length 3")
116
+	}
117
+	j <- grep(name, fvarLabels(x[[3]]))	
118
+	if(length(j) < 1)
119
+		stop(name, " not in fvarLabels of CopyNumberSet object")
120
+	if(length(j) > 1){
121
+		warning("Multiple instances of ", name, " in fvarLabels.  Using the first instance")
122
+		j <- j[1]
123
+	}
124
+	param <- fData(x[[3]])[, j]
125
+	param
126
+})
127
+
128
+	
77 129
 setMethod("snpIndex", "CrlmmSetList", function(object) match(snpNames(object[[1]]), featureNames(object)))
78 130
 setMethod("splitByChromosome", "CrlmmSetList", function(object, cdfName, outdir){
79 131
 	path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep=""))	
80 132
 	load(file.path(path, "snpProbes.rda"))
81
-	load(file.path(path, "cnProbes.rda"))				
133
+	load(file.path(path, "cnProbes.rda"))
134
+	k <- grep("chr", colnames(snpProbes))
135
+	if(length(k) < 1) stop("chr or chromosome not in colnames(snpProbes)")
82 136
 	for(CHR in 1:24){
83 137
 		cat("Chromosome ", CHR, "\n")
84
-		snps <- rownames(snpProbes)[snpProbes$chr == CHR]
85
-		cnps <- rownames(cnProbes)[cnProbes$chr == CHR]
138
+		snps <- rownames(snpProbes)[snpProbes[, k] == CHR]
139
+		cnps <- rownames(cnProbes)[cnProbes[, k] == CHR]
86 140
 		index <- c(match(snps, featureNames(object)),
87 141
 			   match(cnps, featureNames(object)))
88 142
 		crlmmResults <- object[index, ]
... ...
@@ -35,5 +35,27 @@ B Carvalho
35 35
 
36 36
     - Need accessors
37 37
 
38
+    - how to store parameters for chromosome X (should be nu_A_male,
39
+      nu_A_female, etc.)?
40
+
41
+  o Provide a SNP- and sample-specific estimate of the variance for
42
+    computing emission probabilities.  (Currently, only SNP-specific)
43
+
44
+    (DONE -- version 1.3.10
45
+
46
+  o Allow a copy-neutral ROH state by not having an equal prior on the
47
+    ellipses.  For instance,
48
+
49
+    normal state:  1/4 AA, 1/2 AB, 1/4 BB
50
+    
51
+    copy-neutral ROH:  (1-epsilon)/2 AA,  epsilon AB,  (1-epsilon)/2
52
+    BB
53
+
54
+  o add caConfidence and cbConfidence slots (but what about the correlation)
55
+
56
+
57
+
58
+
59
+
38 60
 
39 61
 
... ...
@@ -47,6 +47,7 @@ scanDates(example.cnset)[1:5]
47 47
 <<requiredPackages>>=
48 48
 library(crlmm)
49 49
 library(genomewidesnp6Crlmm)
50
+library(Biobase)
50 51
 @ 
51 52
 
52 53
 Specify the complete path for the CEL files and a directory in which to
... ...
@@ -111,9 +112,9 @@ batch. Here we make a table of date versus ancestry:
111 112
 <<specifyBatch>>=
112 113
 sns <- sampleNames(crlmmResults)
113 114
 sns[1]
114
-batch <- substr(basename(sns), 13, 13)
115
-table(batch)
116
-table(format(as.POSIXlt(scanDates(crlmmResults)), "%d %b %Y"), batch)
115
+plate <- substr(basename(sns), 13, 13)
116
+table(plate)
117
+table(format(as.POSIXlt(scanDates(crlmmResults)), "%d %b %Y"), plate)
117 118
 @ 
118 119
 
119 120
 As all of these samples were run on the first week of March, we would
... ...
@@ -124,22 +125,35 @@ chemistry plate as an argument for \Robject{batch} in the
124 125
 \Robject{computeCopynumber} function.
125 126
 
126 127
 <<computeCopynumber>>=
127
-if(!exists("cnset")){
128
-	if(file.exists(file.path(outdir, paste("cnset_", CHR, ".rda", sep="")))) load(file.path(outdir, paste("cnset_", CHR, ".rda", sep="")))	
129
-	else {
130
-		cnset <- computeCopynumber(crlmmResults, SNRmin=5, batch=batch, CHR=CHR, cdfName="genomewidesnp6")
131
-		save(cnset, file=file.path(outdir, paste("cnset_", CHR, ".rda", sep="")))
132
-	}
128
+if(!exists("crlmmResults2")){
129
+	crlmmResults2 <- crlmmResults
130
+	crlmmResults <- computeCopynumber(crlmmResults, SNRmin=5, batch=plate, CHR=CHR, cdfName="genomewidesnp6")
131
+	scanDates(crlmmResults[[3]]) <- scanDates(crlmmResults[[3]])
133 132
 }
134
-scanDates(cnset) <- scanDates(crlmmResults)
135
-cnset <- cnset[order(chromosome(cnset), position(cnset)), ]
136
-crlmmResults <- crlmm:::harmonizeDimnamesTo(crlmmResults, cnset)
137 133
 @ 
138 134
 
135
+**Note: the number of samples in the \Robject{CrlmmSetList} object after
136
+copy number estimation may be fewer than the number of samples in the
137
+\Robject{CrlmmSetList} object after preprocessing/genotyping.  This
138
+occurs when 1 or more samples have a signal-to-noise ratio less than
139
+value passed to \Robject{SNRmin}.  By default, intermediate forms of the
140
+data are stored in one object to ensure that each element in the
141
+\Robject{CrlmmSetList} have the same ordering of probes and samples.
142
+The object returned by \Robject{computeCopynumber} is ordered by
143
+chromosome and physical position (useful for downstream methods that
144
+smooth the copy number as a function of the physical position).  **
145
+
146
+<<dimnamesMayDiffer>>=
147
+identical(featureNames(crlmmResults), featureNames(crlmmResults2)) ##not identical
148
+identical(sampleNames(crlmmResults), sampleNames(crlmmResults2))
149
+gc()
150
+@ 
151
+
152
+
139 153
 <<example.cnset, eval=FALSE, echo=FALSE>>=
140 154
 example.crlmmResults <- crlmmResults[1:1000, 1:100]
141
-example.cnset <- cnset[1:1000, 1:100]
142
-save(example.cnset, file="~/madman/Rpacks/crlmm/inst/scripts/example.cnset.rda")
155
+##example.cnset <- cnset[1:1000, 1:100]
156
+##save(example.cnset, file="~/madman/Rpacks/crlmm/inst/scripts/example.cnset.rda")
143 157
 save(example.crlmmResults, file="~/madman/Rpacks/crlmm/inst/scripts/example.crlmmResults.rda")
144 158
 @ 
145 159
 
... ...
@@ -151,8 +165,8 @@ additional robustness to this assumption.  Set the \Robject{bias.adj}
151 165
 argument to \Robject{TRUE}:
152 166
 
153 167
 <<biasAdjustment, eval=FALSE>>=
154
-cnset <- computeCopynumber(crlmmResults, SNRmin=5, batch=batch, CHR=CHR, 
155
-			   cdfName="genomewidesnp6", bias.adj=TRUE)
168
+crlmmResults[[3]] <- computeCopynumber(crlmmResults, SNRmin=5, batch=plate, CHR=CHR, 
169
+				       cdfName="genomewidesnp6", bias.adj=TRUE)
156 170
 @ 
157 171
 
158 172
 \section{Suggested plots}
... ...
@@ -165,12 +179,12 @@ integer.
165 179
 
166 180
 <<oneSample, fig=TRUE, width=8, height=4, include=FALSE, eval=FALSE>>=
167 181
 par(las=1)
168
-plot(position(cnset), copyNumber(cnset)[, 1], pch=".", cex=2, xaxt="n", col="grey20", ylim=c(0,6), 
182
+plot(position(crlmmResults), copyNumber(crlmmResults)[, 1], pch=".", cex=2, xaxt="n", col="grey20", ylim=c(0,6), 
169 183
      ylab="copy number", xlab="physical position (Mb)",
170
-     main=paste(sampleNames(cnset)[1], ", CHR:", unique(chromosome(cnset))))
171
-points(position(cnset)[cnIndex(cnset)], copyNumber(cnset)[cnIndex(cnset), 1],
184
+     main=paste(sampleNames(crlmmResults)[1], ", CHR:", unique(chromosome(crlmmResults))))
185
+points(position(crlmmResults)[cnIndex(crlmmResults)], copyNumber(crlmmResults)[cnIndex(crlmmResults), 1],
172 186
        pch=".", cex=2, col="lightblue")
173
-axis(1, at=pretty(range(position(cnset))), labels=pretty(range(position(cnset)))/1e6)
187
+axis(1, at=pretty(range(position(crlmmResults))), labels=pretty(range(position(crlmmResults)))/1e6)
174 188
 @ 
175 189
 
176 190
 <<idiogram, eval=FALSE, echo=FALSE>>=
... ...
@@ -216,7 +230,7 @@ for(i in indices[[j]]){
216 230
 <<predictionRegion, fig=TRUE, width=8, height=8, include=FALSE, eval=FALSE>>=
217 231
 require(RColorBrewer)
218 232
 greens <- brewer.pal(9, "Greens")
219
-J <- split(1:ncol(cnset), cnset$batch)
233
+J <- split(1:ncol(crlmmResults), batch(crlmmResults))
220 234
 colors <- c("red", "blue", "green3")
221 235
 cex <- 0.6
222 236
 colors <- c("blue", greens[8], "red")
... ...
@@ -242,15 +256,15 @@ for(j in seq(along=indices)[1:10]){
242 256
 		     xlim=xlim, ylim=ylim,
243 257
 		     type="n")
244 258
 		if(plotpoints){
245
-			for(b in seq(along=unique(cnset$batch))){
259
+			for(b in seq(along=unique(batch(crlmmResults)))){
246 260
 				points(crlmmResults[i, J[[b]]], 
247 261
 				       pch=pch, 
248 262
 				       col=colors[b], bg=colors[b], cex=cex,
249 263
 				       xlim=xlim, ylim=ylim)
250 264
 			}
251 265
 		}
252
-		for(b in seq(along=unique(cnset$batch))){
253
-			ellipse(cnset[i, J[[b]]], copynumber=2, col=colors[b], lwd=lwd)
266
+		for(b in seq(along=unique(batch(crlmmResults)))){
267
+			ellipse(crlmmResults[i, J[[b]]], copynumber=2, col=colors[b], lwd=lwd)
254 268
 		}
255 269
 		##legend("bottomright", bty="n", legend=featureNames(crlmmResults)[i])
256 270
 		if(k == 1) {
... ...
@@ -264,10 +278,7 @@ for(j in seq(along=indices)[1:10]){
264 278
 dev.off()
265 279
 @
266 280
 
267
-<<problems, echo=FALSE, eval=FALSE>>=
268
-##8300117, 8469401, 8706404, 2296864, 8659838, 1936806, 4198761, 8584395
269 281
 
270
-@ 
271 282
 
272 283
 \begin{figure}
273 284
   \includegraphics[width=0.9\textwidth]{copynumber-predictionRegion}
... ...
@@ -285,14 +296,13 @@ with the Birdseye HMM, the same transition probabilities are specified.
285 296
 <<segment>>=
286 297
 library(VanillaICE)
287 298
 copyNumberStates <- 0:5
288
-require(Biobase)
289
-cnset <- crlmm:::thresholdModelParams(cnset)
299
+##crlmmResults <- crlmm:::thresholdModelParams(crlmmResults)
290 300
 if(!exists("fit")){
291 301
 	if(file.exists(file.path(outdir, paste("fit_", CHR, ".rda", sep="")))) load(file.path(outdir, paste("fit_", CHR, ".rda", sep="")))
292 302
 	else {
293
-		emission.cn <- computeEmission(crlmmResults, cnset, copyNumberStates)
294
-		tau <- transitionProbability(chromosome=chromosome(cnset),
295
-					     position=position(cnset),
303
+		emission.cn <- crlmm:::computeEmission(crlmmResults, copyNumberStates)
304
+		tau <- transitionProbability(chromosome=chromosome(crlmmResults),
305
+					     position=position(crlmmResults),
296 306
 					     TAUP=1e8)
297 307
 		initialP <- rep(1/length(copyNumberStates), length(copyNumberStates))
298 308
 		fit <- viterbi(initialStateProbs=log(initialP),
... ...
@@ -304,8 +314,7 @@ if(!exists("fit")){
304 314
 			       altered2normal=0.5,
305 315
 			       altered2altered=0.0025)
306 316
 		brks <- breaks(x=fit, states=copyNumberStates, position=tau[, "position"],
307
-			       chromosome=tau[, "chromosome"],
308
-			       sampleNames=sampleNames(cnset))
317
+			       chromosome=tau[, "chromosome"])
309 318
 	}
310 319
 }
311 320
 @ 
... ...
@@ -314,31 +323,27 @@ if(!exists("fit")){
314 323
 dir.create("figures")
315 324
 par(las=1, mfrow=c(1,1), ask=FALSE)
316 325
 bmp("figures/chr22plots%02d.png", width=800, height=500)
317
-for(j in 1:ncol(cnset)){
326
+for(j in 1:ncol(crlmmResults)){
318 327
 	cat(j, " ")
319 328
 	##A region that we would miss if we only looked at the polymorphic probes...is it real?
320
-	##index <- which(position(cnset) > 21.2*1e6 & position(cnset) < 21.6*1e6 & mns < 1)
329
+	##index <- which(position(crlmmResults) > 21.2*1e6 & position(crlmmResults) < 21.6*1e6 & mns < 1)
321 330
 	##fit <- fit[index,]
322 331
 	index <- 1:nrow(fit)
323
-	plot(0:1, type="n", xlim=range(position(cnset)[index]),
332
+	plot(0:1, type="n", xlim=range(position(crlmmResults)[index]),
324 333
 	     xaxt="n", ylim=c(-0.5, 6), 
325 334
 	     ylab="copy number", xlab="physical position (Mb)",
326
-	     main=paste(sampleNames(cnset)[j], ", CHR:", unique(chromosome(cnset))), 
335
+	     main=paste(sampleNames(crlmmResults)[j], ", CHR:", unique(chromosome(crlmmResults))), 
327 336
 	     lwd=2,
328 337
 	     col="blue")
329
-	points(position(cnset)[index], copyNumber(cnset)[index, j], pch=".", cex=2, col="grey60")
330
-	lines(position(cnset)[index], fit[index, j]-1, type="s", lwd=2, col="blue")
338
+	points(position(crlmmResults)[index], copyNumber(crlmmResults)[index, j], pch=".", cex=2, col="grey60")
339
+	lines(position(crlmmResults)[index], fit[index, j]-1, type="s", lwd=2, col="blue")
331 340
 	abline(h=0:4, col="grey60")
332
-	axis(1, at=pretty(range(position(cnset)[index])), labels=pretty(range(position(cnset)[index]))/1e6)
341
+	axis(1, at=pretty(range(position(crlmmResults)[index])), labels=pretty(range(position(crlmmResults)[index]))/1e6)
333 342
 }
334 343
 dev.off()
335 344
 @ 
336 345
 
337
-<<fixProblems, eval=FALSE>>=
338
-## NA18522, NA18978 (missed deletion), NA18944 (noisy), NA19098 and NA19130 (missed amplification), 
339
-## NA19138 (looks like deletion with contamination), NA19143 (missed amplification)
340
-## NA19204 (looks like homozygous deletion)
341
-@ 
346
+
342 347
 
343 348
 \section{Session information}
344 349
 <<sessionInfo, results=tex>>=
... ...
@@ -346,4 +351,6 @@ toLatex(sessionInfo())
346 351
 @ 
347 352
 
348 353
 
354
+
355
+
349 356
 \end{document}
... ...
@@ -2,6 +2,7 @@
2 2
 \Rdversion{1.1}
3 3
 \docType{class}
4 4
 \alias{CopyNumberSet-class}
5
+\alias{batch,CopyNumberSet-method}
5 6
 \alias{CA}
6 7
 \alias{CA<-}
7 8
 \alias{CA,CopyNumberSet-method}
... ...
@@ -40,14 +41,17 @@ Class \code{"\linkS4class{Versioned}"}, by class "eSet", distance 3.
40 41
 \section{Methods}{
41 42
   \describe{
42 43
 
43
-    \item{CA}{\code{signature(object = "CopyNumberSet")}: Extracts the
44
+    \item{batch}{\code{signature(object="CopyNumberSet")}: Extracts the
45
+    batch information used to estimate copy number.}
46
+
47
+    \item{CA}{\code{signature(object = "CopyNumberSet", ...)}: Extracts the
44 48
       copy number for allele 'A' at polymorphic loci. At nonpolymorphic
45 49
       loci, the total copy number is returned.}
46 50
 
47 51
     \item{CA<-}{\code{signature(object = "CopyNumberSet",
48 52
     value="matrix")}: Replaces CA matrix with supplied value.}    
49 53
 
50
-    \item{CB}{\code{signature(object = "CopyNumberSet")}:
54
+    \item{CB}{\code{signature(object = "CopyNumberSet", ...)}:
51 55
       Extracts copy number for allele 'B' at polymorphic loci. NAs are
52 56
       returned for nonpolymorphic loci.}    
53 57
 
... ...
@@ -3,13 +3,22 @@
3 3
 \docType{class}
4 4
 \alias{CrlmmSetList-class}
5 5
 \alias{[,CrlmmSetList-method}
6
+\alias{$,CrlmmSetList-method}
6 7
 \alias{A,CrlmmSetList-method}
7 8
 \alias{B,CrlmmSetList-method}
9
+\alias{batch,CrlmmSetList-method}
10
+\alias{CA,CrlmmSetList-method}
11
+\alias{CB,CrlmmSetList-method}
8 12
 \alias{calls,CrlmmSetList-method}
13
+\alias{chromosome,CrlmmSetList-method}
9 14
 \alias{cnIndex,CrlmmSetList-method}
10 15
 \alias{combine,CrlmmSetList,CrlmmSetList-method}
16
+\alias{copyNumber,CrlmmSetList-method}
17
+\alias{nrow,CrlmmSetList-method}
18
+\alias{ncol,CrlmmSetList-method}
11 19
 \alias{plot,CrlmmSetList,ANY-method}
12 20
 \alias{points,CrlmmSetList-method}
21
+\alias{position,CrlmmSetList-method}
13 22
 \alias{sampleNames,CrlmmSetList-method}
14 23
 \alias{scanDates,CrlmmSetList-method}
15 24
 \alias{show,CrlmmSetList-method}
... ...
@@ -59,17 +68,35 @@
59 68
 }
60 69
 \section{Methods}{
61 70
   \describe{
62
-    \item{"["}{\code{signature(x = "CrlmmSetList")}: subsets both the }
71
+    \item{"["}{\code{signature(x = "CrlmmSetList")}: subsets both the
72
+  features and samples of each element in th elist}
73
+
74
+    \item{"$"}{\code{signature(x = "CrlmmSetList")}: used to access
75
+    element from the \code{featureData} of the \code{CopyNumberSet}
76
+    element.  In particular, useful for extract the SNP- and
77
+    batch-specific parameters used to estimate copy number that are
78
+    currently stored in the \code{featureData} of \code{CopyNumberSet}
79
+    objects. }
63 80
   
64 81
     \item{"A"}{\code{signature(object = "CrlmmSetList")}: extracts the
65 82
       quantile normalized intensities for the A allele in the
66 83
       \code{ABset} element.}
67
-    
68 84
   
69 85
     \item{"B"}{\code{signature(object = "CrlmmSetList")}: extracts the
70 86
       quantile normalized intensities for the B allele in the
71 87
       \code{ABset} element.}
72 88
 
89
+    \item{"batch"}{\code{signature(object = "CrlmmSetList")}: extracts the
90
+      batch information used to estimate copy number.}    
91
+
92
+    \item{"CA"}{\code{signature(object = "CrlmmSetList")}: extracts the
93
+      copy number for allele A at polymorphic loci. For nonpolymorphic
94
+      probes, CA returns the total copy number.}
95
+  
96
+    \item{"CB"}{\code{signature(object = "CrlmmSetList")}: extracts the
97
+      copy number for allele B at polymorphic loci. For nonpolymorphic
98
+      probes, CB returns 'NA'.}    
99
+
73 100
     \item{"calls"}{\code{signature(object = "CrlmmSetList")}: extracts
74 101
       the genotype calls from the \code{SnpSet} element.}
75 102
 
... ...
@@ -78,7 +105,13 @@
78 105
 
79 106
     \item{"combine"}{\code{signature(x = "CrlmmSetList", y = "CrlmmSetList")}: combines
80 107
       objects of the class.  }
81
-
108
+    
109
+    \item{"nrow"}{\code{signature(x = "CrlmmSetList")}: Number of rows
110
+    (features) of each element in the list.  }
111
+    
112
+    \item{"ncol"}{\code{signature(x = "CrlmmSetList")}: Number of columns
113
+    (samples) of each element in the list.  }    
114
+	
82 115
     \item{"plot"}{\code{signature(x = "CrlmmSetList")}: For A versus B
83 116
       scatterplots.  }
84 117
 
85 118
new file mode 100644
... ...
@@ -0,0 +1,25 @@
1
+\name{batch}
2
+\Rdversion{1.1}
3
+\alias{batch}
4
+\title{
5
+  Stores information on the batch (e.g., chemistry plate) used to
6
+  process samples.
7
+}
8
+\description{
9
+  Parameters for estimating copy number are locus- and batch-specific.
10
+}
11
+\usage{
12
+batch(object)
13
+}
14
+\arguments{
15
+  \item{object}{An object of class \code{CrlmmSetList} or \code{CopyNumberSet}.
16
+  }
17
+}
18
+\value{
19
+  Typically, a character string or factor.
20
+}
21
+\author{
22
+  Rob Scharpf
23
+}
24
+\keyword{manip}
25
+
... ...
@@ -10,7 +10,7 @@
10 10
   confidence scores are assigned using the crlmm algorithm.
11 11
 }
12 12
 \usage{
13
-crlmmWrapper(filenames, outdir = "./", cdfName = "genomewidesnp6", save.it, splitByChr=TRUE, ...)
13
+crlmmWrapper(filenames, outdir = "./", cdfName = "genomewidesnp6", save.it=FALSE, splitByChr=TRUE, ...)
14 14
 }
15 15
 \arguments{
16 16
   \item{filenames}{