Browse code

Added method totalCopyNumber to compute the total copy number for CNSet class, regardless of whether assay data elements are ff objects.

This method should eventually supplant copyNumber. Add text regarding
how to obtain total copy number in the illumina_copynumber vignette

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/branches/RELEASE_2_6/madman/Rpacks/crlmm@48444 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 27/07/2010 03:01:30
Showing5 changed files

... ...
@@ -56,7 +56,7 @@ importFrom(ellipse, ellipse)
56 56
 importFrom(ff, ffdf)
57 57
 
58 58
 exportClasses(CNSetLM)
59
-exportMethods(open, "[", show, lM, copyNumber)
59
+exportMethods(open, "[", show, lM, copyNumber, totalCopyNumber)
60 60
 export(crlmm, 
61 61
        crlmmCopynumber, 
62 62
        crlmmIllumina, 
... ...
@@ -8,4 +8,5 @@ setGeneric("snpIndex", function(object) standardGeneric("snpIndex"))
8 8
 setGeneric("snpNames", function(object) standardGeneric("snpNames"))
9 9
 setGeneric("lM", function(object) standardGeneric("lM"))
10 10
 setGeneric("lM<-", function(object, value) standardGeneric("lM<-"))
11
+setGeneric("totalCopyNumber", function(object, i, j, ...) standardGeneric("totalCopyNumber"))
11 12
 
... ...
@@ -95,7 +95,7 @@ setMethod("computeCopynumber", "CNSet",
95 95
 
96 96
 setMethod("copyNumber", "CNSet", function(object){
97 97
 	I <- isSnp(object)
98
-	ffIsLoaded <- class(calls(object))[[1]]=="ff"
98
+	ffIsLoaded <- inherits(CA(object), "ff")
99 99
 	CA <- CA(object)
100 100
 	CB <- CB(object)
101 101
 	if(ffIsLoaded){
... ...
@@ -110,6 +110,33 @@ setMethod("copyNumber", "CNSet", function(object){
110 110
 	CN
111 111
 })
112 112
 
113
+setMethod("totalCopyNumber", "CNSet", function(object, i, j){
114
+	if(missing(i) & missing(j)){
115
+		if(inherits(CA(object), "ff")) stop("Must specify i and/or j for ff objects")
116
+	}
117
+	if(missing(i) & !missing(j)){
118
+		snp.index <- which(isSnp(object))	
119
+		cn.total <- CA(cnSet)[, j]	
120
+		cb <- CB(cnSet)[snp.index, j]	
121
+		cn.total[snp.index, ] <- cn.total[snp.index, ] + cb		
122
+	}
123
+	if(!missing(i) & missing(j)){
124
+		snp.index <- intersect(which(isSnp(object)), i)
125
+		cn.total <- CA(cnSet)[i, ]	
126
+		cb <- CB(cnSet)[snp.index, ]	
127
+		cn.total[snp.index, ] <- cn.total[snp.index, ] + cb				
128
+	}
129
+	if(!missing(i) & !missing(j)){
130
+		snp.index <- intersect(which(isSnp(object)), i)		
131
+		cn.total <- CA(cnSet)[i, j]	
132
+		cb <- CB(cnSet)[snp.index, j]	
133
+		cn.total[snp.index, ] <- cn.total[snp.index, ] + cb
134
+	}
135
+	cn.total <- cn.total/100
136
+	dimnames(cn.total) <- NULL
137
+	return(cn.total)
138
+})
139
+
113 140
 
114 141
 setMethod("ellipse", "CNSet", function(x, copynumber, batch, ...){
115 142
 	ellipse.CNSet(x, copynumber, batch, ...)
... ...
@@ -103,7 +103,9 @@ if(!exists("crlmmResult")){
103 103
 	} else {
104 104
 		message("Loading previously saved crlmm results")
105 105
 		load(file.path(outdir, "snpFile.rda"))
106
+		res <- get("res")
106 107
 		load(file.path(outdir, "cnFile.rda"))
108
+		cnAB <- get("cnAB")
107 109
 		load(file.path(outdir, "crlmmResult.rda"))
108 110
 	}
109 111
 }
... ...
@@ -205,6 +207,102 @@ if(!exists("cnSet")){
205 207
 }
206 208
 @ 
207 209
 
210
+\paragraph{Extracting the locus-level copy number estimates.}  At
211
+polymorphic loci, the total copy number is the sum of the number of
212
+copies of the A allele and the number of copies for the B allele.  At
213
+nonpolymorphic loci, the total copy number is the number of copies for
214
+the A allele.  (Note that NAs are recorded for the number of copies of
215
+the B allele at nonpolymorphic loci.)  In the following code, we show
216
+how the allele-specific estimates can be extracted when the assay data
217
+elements are of class matrix.  Users that used \Robject{ff} to reduce
218
+the memory footprint may skip this code chunk.  A simple check for the
219
+class of the \Robject{assayData} elements in the \Robject{cnSet} object is
220
+
221
+<<checkClass>>=
222
+(adClass <- class(CA(cnSet))[[1]])
223
+@ 
224
+
225
+We now compute the total copy number for the polymorphic markers.  The
226
+accessor \Robject{isSnp} is helpful for determining the indices of the
227
+polymorphic markers.  The \Rfunction{crlmmCopynumber} multiples the
228
+allele-specific estimates by 100 and saves as integers to reduce the
229
+file size.  The final step is to put the copy number estimates back on
230
+the original scale.  
231
+
232
+<<copynumberAccessors>>=
233
+if(adClass == "matrix"){
234
+	snp.index <- which(isSnp(cnSet))
235
+	ca <- CA(cnSet)
236
+	cb <- CB(cnSet)[snp.index, ]
237
+	cn.total <- ca
238
+	cn.total[snp.index, ] <- ca[snp.index, ] + cb
239
+	cn.total <- cn.total/100
240
+	dimnames(cn.total) <- NULL
241
+	cn.total[1:5, 1:5]
242
+##	rm(ca, cb); gc()
243
+}
244
+@ 
245
+
246
+We next illustrate how to extract copy number when the assay data
247
+elements inherit from the \Robject{ff} class.  Since the assay data
248
+elements are of class \Robject{matrix}, we convert these assayData
249
+elements to \Robjects{ff} objects (this step can be omitted as the the
250
+assay data elements are presumably already of class
251
+\Robject{ff}).
252
+
253
+<<convertToFF>>=
254
+if(require("ff")){
255
+	ca <- initializeBigMatrix("CA", nr=nrow(cnSet), nc=ncol(cnSet), vmode="integer", initdata=CA(cnSet))
256
+	cb <- initializeBigMatrix("CB", nr=nrow(cnSet), nc=ncol(cnSet), vmode="integer", initdata=CB(cnSet))
257
+	dimnames(cb) <- dimnames(ca) <- list(featureNames(cnSet), sampleNames(cnSet))
258
+	CA(cnSet) <- ca
259
+	CB(cnSet) <- cb
260
+}
261
+@ 
262
+
263
+More care is required when extracting copy number from the
264
+\Robject{ff} objects as these objects can potentially be very large.
265
+In particular, there is no advantage of utilizing \Robject{ff} objects
266
+to reduce RAM if one attempts to bring all of the data stored on disk
267
+to RAM.  Instead, we suggest extracting only subsets of the rows
268
+and/or a columns as needed.  Note that subsetting the \Robject{cnSet}
269
+object itself can be a particularly dangerous operation as all of the
270
+assay data elements are retrieved from disk. For instance, a dataset
271
+comprised of 10,000 samples with 1 million markers would have assay
272
+data elements that are each 1 million rows $\times$ 10,000 columns.
273
+The following command ({\it not evaluated}) would create a
274
+\Robject{CNSetLM} object in which the assay data elements are matrices
275
+with data in RAM and not on disk
276
+
277
+<<donotdo, eval=FALSE>>=
278
+cnSet[,]
279
+@ 
280
+
281
+Here, we give an example whereby total copy number is obtained for all
282
+of the markers on chromosome 1 for the first 20 samples.
283
+
284
+<<copyNumberWithFF>>=
285
+if(inherits(CA(cnSet), "ff")){
286
+	marker.index <- which(chromosome(cnSet) == 1)
287
+	snp.index <- which(isSnp(cnSet) & chromosome(cnSet) == 1)
288
+	sample.index <- 1:20
289
+	cn.total <- CA(cnSet)[marker.index, sample.index]
290
+	cb <- CB(cnSet)[snp.index, sample.index]	
291
+	cn.total[snp.index, ] <- cn.total[snp.index, ] + cb
292
+	cn.total <- cn.total/100
293
+	dimnames(cn.total) <- NULL
294
+	cn.total[1:5, 1:5]	
295
+}
296
+@ 
297
+
298
+The following helper function can facilitate access to the total copy
299
+number.
300
+
301
+<<copyNumberHelper>>=
302
+trace(totalCopyNumber, browser, signature="CNSet")
303
+cn.total2 <- totalCopyNumber(cnSet, i=which(chromosome(cnSet)==1), j=1:20)
304
+@ 
305
+
208 306
 A few simple visualizations may be helpful at this point. The first
209 307
 plot is a histogram of the signal to noise ratio of the sample -- an
210 308
 overall measure of how well the genotype clusters separate.  (This
211 309
new file mode 100644
... ...
@@ -0,0 +1,18 @@
1
+\name{totalCopyNumber-methods}
2
+\docType{methods}
3
+\alias{totalCopyNumber}
4
+\alias{totalCopyNumber-methods}
5
+\alias{totalCopyNumber,CNSet-method}
6
+\title{Method for computing total copy number from either ff}
7
+\description{
8
+ ~~ Methods for function \code{totalCopyNumber} in Package `crlmm' ~~
9
+}
10
+\section{Methods}{
11
+\describe{
12
+
13
+\item{\code{signature(object = "CNSet")}}{
14
+%%  ~~describe this method here~~
15
+}
16
+}}
17
+\keyword{methods}
18
+\keyword{ ~~ other possible keyword(s) ~~ }