Browse code

updated vignettes for illumina and affy

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@42431 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 16/10/2009 12:08:21
Showing 11 changed files

... ...
@@ -290,3 +290,18 @@ is decoded and scanned
290 290
 * updated addFeatureAnnotation so that CHR arg not required
291 291
 * fixed bug in nonpolymorphic function -- function checks whether any regions are pseudoautosomal 
292 292
 * fixed bug in list2locusset function (no longer assigns genomewidesnp6 as the default annotation)
293
+
294
+2009-10-16 R. Scharpf - committed version 1.3.23
295
+
296
+ * 100*CA, 100*CB are stored in the CA, CB assayDataElements.  The
297
+   replacement method automatically converts a matrix on the copy
298
+   number scale to an integer representation by scaling by a factor of
299
+   100. The accessor methods for CA, CB automatically divide by 100 to
300
+   return estimates back on the original copy number scale.
301
+
302
+ * more debugging of readIdatFiles and illumina copy number vignette.
303
+
304
+ * updated illumina_copynumber vignette
305
+
306
+
307
+
... ...
@@ -1,13 +1,13 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.3.22
4
+Version: 1.3.23
5 5
 Date: 2009-09-29
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
8 8
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 6.0.
9 9
 License: Artistic-2.0
10
-Depends: methods, Biobase (>= 2.5.5)
10
+Depends: methods, Biobase (>= 2.5.5), R (>= 2.10.0)
11 11
 Imports: affyio, preprocessCore, utils, stats, genefilter, splines, mvtnorm, oligoClasses, ellipse, methods, SNPchip
12 12
 Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2),genomewidesnp6Crlmm (>= 1.0.2), snpMatrix, VanillaICE (>= 1.7.8), GGdata
13 13
 Collate: AllClasses.R
... ...
@@ -217,15 +217,16 @@ harmonizeDimnamesTo <- function(object1, object2){
217 217
 }
218 218
 
219 219
 crlmmWrapper <- function(filenames,
220
-			 cdfName="genomewidesnp6",
220
+			 cdfName,
221 221
 			 load.it=FALSE,
222 222
 			 save.it=FALSE,
223 223
 			 splitByChr=TRUE,
224 224
 			 crlmmFile,
225 225
 			 intensityFile,
226 226
 			 rgFile,
227
-			 platform=c("affymetrix", "illumina")[1],
228 227
 			 ...){
228
+	if(missing(cdfName)) stop("cdfName is missing -- a valid cdfName is required.  See crlmm:::validCdfNames()")  
229
+	platform <- whichPlatform(cdfName)
229 230
 	if(!(platform %in% c("affymetrix", "illumina"))){
230 231
 		stop("Only 'affymetrix' and 'illumina' platforms are supported at this time.")
231 232
 	} else {
... ...
@@ -234,7 +235,11 @@ crlmmWrapper <- function(filenames,
234 235
 	if(missing(intensityFile)) stop("must specify 'intensityFile'.")
235 236
 	if(missing(crlmmFile)) stop("must specify 'crlmmFile'.")
236 237
 	if(platform == "illumina"){
237
-		if(missing(rgFile)) stop("must specify 'rgFile'.")
238
+		if(missing(rgFile)){
239
+			##stop("must specify 'rgFile'.")
240
+			rgFile <- file.path(dirname(crlmmFile), "rgFile.rda")
241
+			message("rgFile not specified.  Using ", rgFile)
242
+		}
238 243
 		if(!load.it){
239 244
 			RG <- readIdatFiles(...)
240 245
 			if(save.it) save(RG, file=rgFile)
... ...
@@ -264,7 +269,7 @@ crlmmWrapper <- function(filenames,
264 269
 	}
265 270
 	if(load.it){
266 271
 		if(!file.exists(crlmmFile)){
267
-			message("load.it is TRUE, but 'crlmmFile' does not exist.  Rerunning the genotype calling algorithm") 
272
+			message("load.it is TRUE, but ", crlmmFile, " does not exist.  Rerunning the genotype calling algorithm") 
268 273
 			load.it <- FALSE
269 274
 		}
270 275
 	}
... ...
@@ -603,7 +608,11 @@ computeCopynumber <- function(object,
603 608
 	if(ncol(object) < 10)
604 609
 		stop("Must have at least 10 samples in each batch to estimate model parameters....preferably closer to 90 samples per batch")
605 610
 	##require(oligoClasses)
606
-	if(missing(CHR)) stop("Must specify CHR")
611
+	if(missing(CHR)){
612
+		if(length(unique(chromosome(object)))==1){
613
+			CHR <- unique(chromosome(object))
614
+		} else  stop("Must specify CHR")
615
+	}
607 616
 	if(CHR == 24) stop("Nothing available yet for chromosome Y")
608 617
 	if(missing(batch)) {
609 618
 		message("'batch' missing.  Assuming all samples in the CrlmmSetList object were processed together in the same batch.")
... ...
@@ -803,7 +812,6 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName){
803 812
 	dimnames(CA) <- list(envir[["snps"]], envir[["sns"]])	
804 813
 	CB <- envir[["CB"]]
805 814
 	dimnames(CB) <- dimnames(CA)
806
-
807 815
 	##NP copy number
808 816
 	if(!is.null(NPset)){
809 817
 		CT <- envir[["CT"]]
... ...
@@ -907,15 +915,19 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName){
907 915
 	rownames(fd) <- rownames(CA)[I]
908 916
 	fD <- new("AnnotatedDataFrame",
909 917
 		  data=fd,
910
-		  varMetadata=data.frame(labelDescription=colnames(fd)))	
918
+		  varMetadata=data.frame(labelDescription=colnames(fd)))
919
+	placeholder <- matrix(NA, nrow(CA), ncol(CA))
920
+	dimnames(placeholder) <- dimnames(CA)
911 921
 	assayData <- assayDataNew("lockedEnvironment",
912
-				  CA=CA[I, ],
913
-				  CB=CB[I, ])
922
+				  CA=placeholder[I, ],
923
+				  CB=placeholder[I, ])
914 924
 	cnset <- new("CopyNumberSet",
915 925
 		      assayData=assayData,
916 926
 		      featureData=fD,
917 927
 		      phenoData=phenodata,
918 928
 		      annotation=cdfName)
929
+	CA(cnset) <- CA  ##replacement method converts to integer
930
+	CB(cnset) <- CB  ##replacement method converts to integer
919 931
 	cnset <- thresholdCopyNumberSet(cnset)
920 932
 	return(cnset)
921 933
 }
... ...
@@ -929,10 +941,11 @@ thresholdCopyNumberSet <- function(object){
929 941
 	ca[ca > 5] <- 5
930 942
 	cb[cb < 0.05] <- 0.05
931 943
 	cb[cb > 5] <- 5
932
-	ca <- matrix(as.integer(ca*100), nrow(ca), ncol(ca))
933
-	cb <- matrix(as.integer(cb*100), nrow(cb), ncol(cb))
934
-	rownames(ca) <- rownames(cb) <- featureNames(object)
935
-	colnames(ca) <- colnames(cb) <- sampleNames(object)
944
+##	ca <- matrix(as.integer(ca*100), nrow(ca), ncol(ca))
945
+##	cb <- matrix(as.integer(cb*100), nrow(cb), ncol(cb))
946
+##	rownames(ca) <- rownames(cb) <- featureNames(object)
947
+##	colnames(ca) <- colnames(cb) <- sampleNames(object)
948
+	##replacement method multiplies by 100 and converts to integer
936 949
 	CA(object) <- ca
937 950
 	CB(object) <- cb
938 951
 	return(object)
... ...
@@ -1143,8 +1156,8 @@ nonpolymorphic <- function(plateIndex, NP, envir, CONF.THR=0.99, DF.PRIOR=50, pk
1143 1156
 		}
1144 1157
 		CT1 <- 1/phi1*(NP[, gender=="male"]-nu1)
1145 1158
 		CT2 <- 1/phi2*(NP[, gender=="female"]-nu2)
1146
-		CT1 <- matrix(as.integer(100*CT1), nrow(CT1), ncol(CT1))
1147
-		CT2 <- matrix(as.integer(100*CT2), nrow(CT2), ncol(CT2))
1159
+		##CT1 <- matrix(as.integer(100*CT1), nrow(CT1), ncol(CT1))
1160
+		##CT2 <- matrix(as.integer(100*CT2), nrow(CT2), ncol(CT2))
1148 1161
 		CT <- envir[["CT"]]
1149 1162
 		CT[, plate==uplate[p] & gender=="male"] <- CT1
1150 1163
 		CT[, plate==uplate[p] & gender=="female"] <- CT2
... ...
@@ -1163,7 +1176,7 @@ nonpolymorphic <- function(plateIndex, NP, envir, CONF.THR=0.99, DF.PRIOR=50, pk
1163 1176
 		normalNP <- envir[["normalNP"]]
1164 1177
 		normalNP <- normalNP[, plate==uplate[p]]
1165 1178
 		mus <- rowMedians(NP * normalNP, na.rm=TRUE)
1166
-		crosshyb <- median(muA) - median(mus)
1179
+		crosshyb <- max(median(muA) - median(mus), 0)
1167 1180
 		X <- cbind(1, log2(mus+crosshyb))
1168 1181
 		##X <- cbind(1, log2(mus))
1169 1182
 		logPhiT <- X %*% betahat
... ...
@@ -1171,7 +1184,8 @@ nonpolymorphic <- function(plateIndex, NP, envir, CONF.THR=0.99, DF.PRIOR=50, pk
1171 1184
 		nuT[, p] <- mus - 2*phiT[, p]
1172 1185
 		T <- 1/phiT[, p]*(NP - nuT[, p])
1173 1186
 		CT <- envir[["CT"]]
1174
-		CT[, plate==uplate[p]] <- matrix(as.integer(100*T), nrow(T), ncol(T))
1187
+##		CT[, plate==uplate[p]] <- matrix(as.integer(100*T), nrow(T), ncol(T))
1188
+		CT[, plate==uplate[p]] <- matrix(T, nrow(T), ncol(T))
1175 1189
 
1176 1190
 		##Variance for prediction region
1177 1191
 		##sig2T[, plate==uplate[[p]]] <- rowMAD(log2(NP*normalNP), na.rm=TRUE)^2
... ...
@@ -1615,11 +1629,11 @@ polymorphic <- function(plateIndex, A, B, envir){
1615 1629
 		tmp <- (B-nuB - phistar*A + phistar*nuA)/phiB
1616 1630
 		copyB <- tmp/(1-phistar*phiAx/phiB)
1617 1631
 		copyA <- (A-nuA-phiAx*copyB)/phiA
1618
-		CB[, plate==uplate[p]] <- matrix(as.integer(100*copyB), nrow(copyB), ncol(copyB))
1619
-		CA[, plate==uplate[p]] <- matrix(as.integer(100*copyA), nrow(copyA), ncol(copyA))
1632
+		CB[, plate==uplate[p]] <- copyB
1633
+		CA[, plate==uplate[p]] <- copyA
1620 1634
 	} else{
1621
-		CA[, plate==uplate[p]] <- matrix(as.integer(100*1/phiA*(A-nuA)), nrow(A), ncol(A))
1622
-		CB[, plate==uplate[p]] <- matrix(as.integer(100*1/phiB*(B-nuB)), nrow(A), ncol(A))
1635
+		CA[, plate==uplate[p]] <- matrix((1/phiA*(A-nuA)), nrow(A), ncol(A))
1636
+		CB[, plate==uplate[p]] <- matrix((1/phiB*(B-nuB)), nrow(B), ncol(B))
1623 1637
 	}
1624 1638
 	assign("CA", CA, envir)
1625 1639
 	assign("CB", CB, envir)
... ...
@@ -1979,7 +1993,8 @@ setMethod("update", "character", function(object, ...){
1979 1993
 		if(!"chromosome" %in% fvarLabels(crlmmSetList[[1]])){
1980 1994
 			featureData(crlmmSetList[[1]]) <- addFeatureAnnotation(crlmmSetList)
1981 1995
 		} 
1982
-		CHR <- unique(chromosome(crlmmSetList[[1]]))		
1996
+		CHR <- unique(chromosome(crlmmSetList[[1]]))
1997
+		if(length(CHR) > 1) stop("More than one chromosome in the object. This method requires one chromosome at a time.")		
1983 1998
 		if(CHR==24){
1984 1999
 			message("skipping chromosome 24")
1985 2000
 			next()
... ...
@@ -21,7 +21,7 @@ readIdatFiles <- function(sampleSheet=NULL,
21 21
 	       pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
22 22
        }	
23 23
        if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
24
-	       if(!is.null(arrayNames)){
24
+	       if(is.null(arrayNames)){
25 25
 		       ##arrayNames=NULL
26 26
 		       if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
27 27
 			       barcode = sampleSheet[,arrayInfoColNames$barcode]
... ...
@@ -41,7 +41,7 @@ readIdatFiles <- function(sampleSheet=NULL,
41 41
 		       }
42 42
 	       }
43 43
 	       pd = new("AnnotatedDataFrame", data = sampleSheet)
44
-	       sampleNames(pd) <- arrayNames
44
+	       sampleNames(pd) <- basename(arrayNames)
45 45
        }
46 46
        narrays = length(arrayNames)
47 47
        grnfiles = paste(arrayNames, fileExt$green, sep=sep)
... ...
@@ -6,10 +6,19 @@ setValidity("CopyNumberSet", function(object) {
6 6
 setMethod("CA", "CopyNumberSet", function(object, ...) assayData(object)[["CA"]]/100)
7 7
 setMethod("CB", "CopyNumberSet", function(object, ...) assayData(object)[["CB"]]/100)
8 8
 
9
-setReplaceMethod("CA", signature(object="CopyNumberSet", value="matrix"),
10
-		 function(object, value) assayDataElementReplace(object, "CA", value*100))
11
-setReplaceMethod("CB", signature(object="CopyNumberSet", value="matrix"),
12
-		 function(object, value) assayDataElementReplace(object, "CB", value*100))
9
+setReplaceMethod("CA", signature(object="CopyNumberSet", value="matrix"), function(object, value){
10
+	dns <- dimnames(value)
11
+	value <- matrix(as.integer(value*100), nrow(value), ncol(value))
12
+	dimnames(value) <- dns
13
+	assayDataElementReplace(object, "CA", value)
14
+})
15
+
16
+setReplaceMethod("CB", signature(object="CopyNumberSet", value="matrix"), function(object, value){
17
+	dns <- dimnames(value)	
18
+	value <- matrix(as.integer(value*100), nrow(value), ncol(value))
19
+	dimnames(value) <- dns	
20
+	assayDataElementReplace(object, "CB", value)
21
+})
13 22
 
14 23
 
15 24
 
... ...
@@ -73,6 +73,12 @@ setMethod("addFeatureAnnotation", "CrlmmSetList", function(object, ...){
73 73
 	
74 74
 	position <- c(position.snp, position.np)
75 75
 	chrom <- c(chr.snp, chr.np)
76
+
77
+	##We may not have annotation for all of the snps
78
+	if(!all(featureNames(object) %in% names(position))){
79
+		message("Dropping loci for which physical position  is not available.")
80
+		object <- object[featureNames(object) %in% names(position), ]
81
+	}
76 82
 	ix <- match(featureNames(object), names(position))
77 83
 	position <- position[ix]
78 84
 	chrom <- chrom[ix]
... ...
@@ -200,8 +206,19 @@ setMethod("splitByChromosome", "CrlmmSetList", function(object, cdfName, outdir)
200 206
 		save(crlmmSetList, file=file.path(outdir, paste("crlmmSetList_", CHR, ".rda", sep="")))
201 207
 	}
202 208
 })
209
+
203 210
 setMethod("update", "CrlmmSetList", function(object, ...){
204
-	computeCopynumber(object, ...)
211
+	if(length(crlmmSetList) == 3){
212
+		message("copy number object already present. Nothing to do.")
213
+		return()
214
+	}
215
+	CHR <- unique(chromosome(crlmmSetList[[1]]))
216
+	if(length(CHR) > 1) stop("More than one chromosome in the object. This method requires one chromosome at a time.")
217
+	if(CHR == 24){
218
+		message("A solution for chromosome 24 is not yet available.")
219
+		return()
220
+	}	
221
+	computeCopynumber(object, CHR=CHR, ...)
205 222
 })
206 223
 
207 224
 setReplaceMethod("CA", signature(object="CrlmmSetList", value="matrix"),
... ...
@@ -47,6 +47,11 @@ library(crlmm)
47 47
 crlmm:::validCdfNames()
48 48
 @ 
49 49
 
50
+<<Rsge, eval=TRUE, echo=FALSE>>=
51
+##If multiple cluster nodes are available
52
+library(Rsge)
53
+@ 
54
+
50 55
 \paragraph{Preprocess and genotype.}
51 56
 
52 57
 Specify the coordinates of Affymetrix cel files and where to put
... ...
@@ -56,11 +61,19 @@ number estimation.
56 61
 <<celfiles>>=
57 62
 myPath <- "/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m"
58 63
 celFiles <- list.celfiles(myPath, full.names=TRUE, pattern=".CEL")
59
-#outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/affy"
60
-outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/tmp"
61
-dir.create(outdir)
64
+outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/affy"
65
+#outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/tmp"
66
+if(!file.exists(outdir)) dir.create(outdir)
62 67
 @ 
63 68
 
69
+Split the list of files by plate.  For this HapMap data, the different
70
+ancestries were run on separate plates.
71
+
72
+<<plate>>=
73
+plate <- substr(basename(celFiles), 13, 13)
74
+table(plate)
75
+celFiles <- split(celFiles, plate)
76
+@ 
64 77
 
65 78
 
66 79
 \noindent The next code chunk quantile normalizes the samples to a
... ...
@@ -68,21 +81,76 @@ target reference distribution and uses the crlmm algorithm to genotype.
68 81
 See \cite{Carvalho2007a} for details regarding the crlmm genotyping
69 82
 algorithm.
70 83
 
71
-<<preprocessAndGenotype, eval=FALSE>>=
72
-crlmmWrapper(celFiles,##[1:15], 
73
-	     cdfName="genomewidesnp6",
74
-	     save.it=TRUE, 
75
-	     load.it=FALSE,
76
-	     intensityFile=file.path(outdir, "normalizedIntensities.rda"),
77
-	     crlmmFile=file.path(outdir, "snpsetObject.rda"),
78
-	     platform="affymetrix")
79
-@ 
80
-
81
-As a result of the above wrapper, the following R objects are now created:
84
+<<preprocessAndGenotype, eval=TRUE, echo=TRUE>>=
85
+for(i in seq(along=celFiles)){
86
+	platedir <- file.path(outdir, names(celFiles)[i])
87
+	if(!file.exists(platedir)) dir.create(platedir)
88
+	crlmmWrapper(celFiles[[i]],##[1:15], 
89
+		     cdfName="genomewidesnp6",
90
+		     save.it=FALSE, 
91
+		     load.it=TRUE,
92
+		     intensityFile=file.path(platedir, "normalizedIntensities.rda"),
93
+		     crlmmFile=file.path(platedir, "snpsetObject.rda"))
94
+}
95
+q("no")
96
+@ 
97
+
98
+<<preprocessAndGenotype, eval=FALSE, echo=FALSE>>=
99
+for(i in seq(along=celFiles)){
100
+	platedir <- file.path(outdir, names(celFiles)[i])
101
+	if(!file.exists(platedir)) dir.create(platedir)
102
+	sge.submit(crlmmWrapper,
103
+		   filenames=celFiles[[i]],##[1:15], 
104
+		   cdfName="genomewidesnp6",
105
+		   save.it=FALSE, 
106
+		   load.it=TRUE,
107
+		   intensityFile=file.path(platedir, "normalizedIntensities.rda"),
108
+		   crlmmFile=file.path(platedir, "snpsetObject.rda"),
109
+		   global.savelist=c("platedir"),
110
+		   packages="crlmm")
111
+}
112
+##platedirs <- sapply(names(celFiles), function(x) file.path(outdir, x))
113
+##intensityFiles <- file.path(platedirs, "normalizedIntensities.rda")
114
+####crlmmFiles <- file.path(platedirs, "snpsetObject.rda")
115
+####myF <- function(celFiles, intensityFile, crlmmFile, ...){
116
+####	crlmmWrapper(celFiles, intensityFile=intensityFile,
117
+####		     crlmmFile=crlmmFile,
118
+##sge.apply(celFiles, 
119
+##	  crlmmWrapper,
120
+##	  cdfName="genomewidesnp6",
121
+##	  save.it=FALSE, 
122
+##	  load.it=TRUE,
123
+##	  intensityFile=,
124
+##	  crlmmFile=file.path(platedir, "snpsetObject.rda"))
125
+q("no")
126
+@ 
127
+
128
+As a result of the above wrapper, the following R objects are now
129
+created for each plate:
130
+
131
+<<crlmmSetListObjects>>=
132
+fns <- list.files(platedir, pattern="crlmmSetList", full.name=TRUE)
133
+@ 
134
+
135
+To estimate copy number, simply run the update function on this vector
136
+of filenames.  If multiple nodes are available for computation, one
137
+could easily split this job.
138
+
139
+<<copynumber, echo=FALSE, eval=TRUE>>=
140
+for(i in seq(along=celFiles)){
141
+	fns <- list.files(file.path(outdir, names(celFiles[i])), 
142
+			  pattern="crlmmSetList", full.names=TRUE)
143
+	update(fns)
144
+}
145
+@
82 146
 
83
-<<>>=
84
-list.files(outdir)[grep("crlmmSetList", list.files(outdir))]
85
-@ 
147
+<<copynumber.sge, echo=FALSE, eval=TRUE>>=
148
+for(i in seq(along=celFiles)){
149
+	fns <- list.files(file.path(outdir, names(celFiles[i])), 
150
+			  pattern="crlmmSetList", full.names=TRUE)
151
+	sge.parLapply(X=fns, FUN=update, packages="crlmm", cluster=FALSE)
152
+}
153
+@
86 154
 
87 155
 \paragraph{Locus- and allele-specific estimates of copy number.}
88 156
 
... ...
@@ -92,9 +160,7 @@ Load the object for chromosome 22 and compute copy number:
92 160
 CHR <- 22
93 161
 if(!exists("crlmmSetList")) load(file.path(outdir, paste("crlmmSetList_", CHR, ".rda", sep="")))
94 162
 show(crlmmSetList)
95
-if(length(crlmmSetList)==2){
96
-	crlmmSetList <- update(crlmmSetList, CHR=CHR)
97
-}
163
+crlmmSetList <- update(crlmmSetList)
98 164
 show(crlmmSetList)
99 165
 @ 
100 166
 
... ...
@@ -501,10 +567,10 @@ for(i in indices[[j]]){
501 567
     intensities.  Each panel displays a single SNP.}
502 568
 \end{figure}
503 569
 
504
-TODO: Plot the prediction regions for total copy number 2 and 3 for the
505
-first plate. Plotting symbols are the genotype calls (1=AA, 2=AB, 3=BB);
506
-light grey points are from other plates. One could also add the
507
-prediction regions for 0-4 copies, but it gets crowded.
570
+% TODO: Plot the prediction regions for total copy number 2 and 3 for
571
+% the first plate. Plotting symbols are the genotype calls (1=AA, 2=AB,
572
+% 3=BB); light grey points are from other plates. One could also add the
573
+% prediction regions for 0-4 copies, but it gets crowded.
508 574
 
509 575
 
510 576
 <<predictionRegion, fig=TRUE, width=8, height=8, include=FALSE, eval=FALSE, echo=FALSE>>=
... ...
@@ -572,8 +638,10 @@ the allele-specific copy number.  SNP-specific parameters are estimated
572 638
 only from samples with a suitable confidence score for the diallelic
573 639
 genotype calls.  The default confidence threshold is 0.99, but can be
574 640
 adjusted by passing the argument \Robject{CONF.THR} to the
575
-\Robject{update} method.  TODO: illustrate this approach with boxplots
576
-of the A and B intensities stratified by genotype.
641
+\Robject{update} method.  
642
+
643
+% TODO: illustrate this approach with boxplots of the A and B
644
+% intensities stratified by genotype.
577 645
 
578 646
 <<linearModel, fig=TRUE,include=FALSE, eval=FALSE, echo=FALSE>>=
579 647
 cols <- brewer.pal(7, "Accent")[c(1, 4, 7)]
... ...
@@ -28,50 +28,70 @@ options(prompt="R> ")
28 28
 
29 29
 <<>>=
30 30
 library(crlmm)
31
+crlmm:::validCdfNames()
32
+outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/illumina/HumanCNV370-Duo"
33
+datadir <- "/thumper/ctsa/snpmicroarray/illumina/IDATS/370k"
34
+cdfName <- "human370v1c"
31 35
 @ 
32 36
 
33
-<<echo=FALSE, eval=TRUE>>=
34
-setwd("/thumper/ctsa/snpmicroarray/illumina/IDATS/370k")
35
-@ 
36
-
37
-<<readIdat>>=
38
-samplesheet5 = read.csv("HumanHap370Duo_Sample_Map.csv", header=TRUE, as.is=TRUE)[-c(28:46,61:75,78:79),]
39
-if(!exists("RG")){
40
-	RG <- readIdatFiles(samplesheet5, arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), saveDate=TRUE)
41
-}
37
+<<samplesToProcess>>=
38
+samplesheet = read.csv(file.path(datadir, "HumanHap370Duo_Sample_Map.csv"), header=TRUE, as.is=TRUE)
39
+## Excluding a few of the problem samples
40
+samplesheet <- samplesheet[-c(28:46,61:75,78:79), ]
41
+arrayNames <- file.path(datadir, unique(samplesheet[, "SentrixPosition"]))
42
+##Check that files exist
43
+grnfiles = all(file.exists(paste(arrayNames, ".Grn.dat", sep="")))
44
+redfiles = all(file.exists(paste(arrayNames, ".Red.dat", sep="")))
42 45
 @ 
43 46
 
44 47
 Alternatively, arguments to the readIdatFiles can be passed through the
45 48
 {\tt ...} argument of the \R{} function \Robject{crlmmWrapper}.
46 49
 
47 50
 <<wrapper>>=
48
-crlmmWrapper(sampleSheet=samplesheet5,
51
+crlmmWrapper(sampleSheet=samplesheet,
52
+	     arrayNames=arrayNames,
49 53
 	     arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), 
50 54
 	     saveDate=TRUE,
51 55
 	     cdfName=cdfName,
52 56
 	     load.it=FALSE,
53
-	     save.it=TRUE,
54
-	     intensityFile=file.path(platedir, "normalizedIntensities.rda"),
55
-	     crlmmFile=file.path(platedir, "snpsetObject.rda"),
56
-	     rgFile=file.path(platedir, "rgFile.rda"),
57
-	     platform="illumina")
57
+	     save.it=FALSE,
58
+	     intensityFile=file.path(outdir, "normalizedIntensities.rda"),
59
+	     crlmmFile=file.path(outdir, "snpsetObject.rda"),
60
+	     rgFile=file.path(outdir, "rgFile.rda"))
58 61
 @ 
59 62
 
60
-Samples with low signal to noise ratios tend to have a lot of variation
61
-in the point estimates of copy number.  One may want to exclude these
62
-samples.
63
+This creates a \Robject{crlmmSetList} object in the \Robject{outdir}
64
+directory.  The first element of this object contains the
65
+quantile-normalized A and B intensities.  The second element in the list
66
+contains the crlmm genotype calls.  
63 67
 
64 68
 
65 69
 <<SNR>>=
66
-hist(crlmmOut$SNR)
70
+CHR <- 1
71
+filename <- paste(outdir, "/crlmmSetList_", CHR, ".rda", sep="")
72
+load(filename)
73
+hist(crlmmSetList[[1]]$SNR)
67 74
 @ 
68 75
 
69
-Run update on the CrlmmSetList object to obtain copy number. Estimate
76
+Run update on the CrlmmSetList object to obtain copy number estimates. Estimate
70 77
 copy number for chromosome 1.
71 78
 
72
-<<>>=
73
-CHR <- 1
74
-filename <- paste(platedir, "/CrlmmSetList_", CHR, ".rda", sep="")
79
+<<estimateCn>>=
80
+update(filename)
81
+##Or equivalently,
82
+##crlmmSetList <- computeCopynumber(crlmmSetList)
83
+##save(crlmmSetList, file=file.path(outdir, paste("crlmmSetList_", CHR, ".rda", sep="")))
84
+@ 
85
+
86
+Samples with low signal to noise ratios tend to have a lot of variation
87
+in the point estimates of copy number.  One may want to exclude these
88
+samples, or smooth after filtering outliers.  Here we load the
89
+crlmmSetList object.  See the copynumber.Rnw vignette for example plots.
90
+
91
+<<crlmmSetList>>=
92
+##reload the updated object
93
+load(filename)
94
+cn <- copyNumber(crlmmSetList)
75 95
 @ 
76 96
 
77 97
 \end{document}
78 98
new file mode 100644
79 99
Binary files /dev/null and b/inst/scripts/illumina_copynumber.pdf differ
... ...
@@ -115,7 +115,7 @@
115 115
 
116 116
     \item{"CA"}{\code{signature(object = "CrlmmSetList")}: extracts the
117 117
       copy number for allele A at polymorphic loci. For nonpolymorphic
118
-      probes, CA returns the total copy number.}
118
+      probes, CA returns the total copy number. }
119 119
   
120 120
     \item{"CB"}{\code{signature(object = "CrlmmSetList")}: extracts the
121 121
       copy number for allele B at polymorphic loci. For nonpolymorphic
... ...
@@ -9,9 +9,8 @@
9 9
   confidence scores are assigned using the crlmm algorithm.
10 10
 }
11 11
 \usage{
12
-crlmmWrapper(filenames, cdfName = "genomewidesnp6", load.it=FALSE,
13
-save.it=FALSE, splitByChr=TRUE, crlmmFile, intensityFile, rgFile,
14
-platform=c("affymetrix", "illumina")[1], ...)
12
+crlmmWrapper(filenames, cdfName, load.it=FALSE,
13
+save.it=FALSE, splitByChr=TRUE, crlmmFile, intensityFile, rgFile, ...)
15 14
 }
16 15
 \arguments{
17 16
   \item{filenames}{
... ...
@@ -19,6 +18,7 @@ platform=c("affymetrix", "illumina")[1], ...)
19 18
   }
20 19
   \item{cdfName}{
21 20
     Annotation package (currently, only 'genomewidesnp6' is supported).
21
+    See \code{crlmm:::validCdfNames()}.
22 22
   }
23 23
   \item{load.it}{ Logical.  If TRUE, the wrapper function will try to
24 24
     load the 'intensityFile' and 'crlmmFile'. When FALSE, the quantile
... ...
@@ -41,8 +41,6 @@ platform=c("affymetrix", "illumina")[1], ...)
41 41
   \item{rgFile}{Character string.  Name of file (including the
42 42
     complete path) containing the RG intensities (only applicable for
43 43
     illumina platform).  }
44
-  \item{platform}{Character string.  Currently only \code{affymetrix}
45
-    and \code{illumina} are supported.}
46 44
   \item{\dots}{
47 45
     additional arguments to function \code{readIdatFiles} (illumina
48 46
     platform only)