git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@42431 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -290,3 +290,18 @@ is decoded and scanned |
290 | 290 |
* updated addFeatureAnnotation so that CHR arg not required |
291 | 291 |
* fixed bug in nonpolymorphic function -- function checks whether any regions are pseudoautosomal |
292 | 292 |
* fixed bug in list2locusset function (no longer assigns genomewidesnp6 as the default annotation) |
293 |
+ |
|
294 |
+2009-10-16 R. Scharpf - committed version 1.3.23 |
|
295 |
+ |
|
296 |
+ * 100*CA, 100*CB are stored in the CA, CB assayDataElements. The |
|
297 |
+ replacement method automatically converts a matrix on the copy |
|
298 |
+ number scale to an integer representation by scaling by a factor of |
|
299 |
+ 100. The accessor methods for CA, CB automatically divide by 100 to |
|
300 |
+ return estimates back on the original copy number scale. |
|
301 |
+ |
|
302 |
+ * more debugging of readIdatFiles and illumina copy number vignette. |
|
303 |
+ |
|
304 |
+ * updated illumina_copynumber vignette |
|
305 |
+ |
|
306 |
+ |
|
307 |
+ |
... | ... |
@@ -1,13 +1,13 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.3.22 |
|
4 |
+Version: 1.3.23 |
|
5 | 5 |
Date: 2009-09-29 |
6 | 6 |
Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
7 | 7 |
Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
8 | 8 |
Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 6.0. |
9 | 9 |
License: Artistic-2.0 |
10 |
-Depends: methods, Biobase (>= 2.5.5) |
|
10 |
+Depends: methods, Biobase (>= 2.5.5), R (>= 2.10.0) |
|
11 | 11 |
Imports: affyio, preprocessCore, utils, stats, genefilter, splines, mvtnorm, oligoClasses, ellipse, methods, SNPchip |
12 | 12 |
Suggests: hapmapsnp5, hapmapsnp6, genomewidesnp5Crlmm (>= 1.0.2),genomewidesnp6Crlmm (>= 1.0.2), snpMatrix, VanillaICE (>= 1.7.8), GGdata |
13 | 13 |
Collate: AllClasses.R |
... | ... |
@@ -217,15 +217,16 @@ harmonizeDimnamesTo <- function(object1, object2){ |
217 | 217 |
} |
218 | 218 |
|
219 | 219 |
crlmmWrapper <- function(filenames, |
220 |
- cdfName="genomewidesnp6", |
|
220 |
+ cdfName, |
|
221 | 221 |
load.it=FALSE, |
222 | 222 |
save.it=FALSE, |
223 | 223 |
splitByChr=TRUE, |
224 | 224 |
crlmmFile, |
225 | 225 |
intensityFile, |
226 | 226 |
rgFile, |
227 |
- platform=c("affymetrix", "illumina")[1], |
|
228 | 227 |
...){ |
228 |
+ if(missing(cdfName)) stop("cdfName is missing -- a valid cdfName is required. See crlmm:::validCdfNames()") |
|
229 |
+ platform <- whichPlatform(cdfName) |
|
229 | 230 |
if(!(platform %in% c("affymetrix", "illumina"))){ |
230 | 231 |
stop("Only 'affymetrix' and 'illumina' platforms are supported at this time.") |
231 | 232 |
} else { |
... | ... |
@@ -234,7 +235,11 @@ crlmmWrapper <- function(filenames, |
234 | 235 |
if(missing(intensityFile)) stop("must specify 'intensityFile'.") |
235 | 236 |
if(missing(crlmmFile)) stop("must specify 'crlmmFile'.") |
236 | 237 |
if(platform == "illumina"){ |
237 |
- if(missing(rgFile)) stop("must specify 'rgFile'.") |
|
238 |
+ if(missing(rgFile)){ |
|
239 |
+ ##stop("must specify 'rgFile'.") |
|
240 |
+ rgFile <- file.path(dirname(crlmmFile), "rgFile.rda") |
|
241 |
+ message("rgFile not specified. Using ", rgFile) |
|
242 |
+ } |
|
238 | 243 |
if(!load.it){ |
239 | 244 |
RG <- readIdatFiles(...) |
240 | 245 |
if(save.it) save(RG, file=rgFile) |
... | ... |
@@ -264,7 +269,7 @@ crlmmWrapper <- function(filenames, |
264 | 269 |
} |
265 | 270 |
if(load.it){ |
266 | 271 |
if(!file.exists(crlmmFile)){ |
267 |
- message("load.it is TRUE, but 'crlmmFile' does not exist. Rerunning the genotype calling algorithm") |
|
272 |
+ message("load.it is TRUE, but ", crlmmFile, " does not exist. Rerunning the genotype calling algorithm") |
|
268 | 273 |
load.it <- FALSE |
269 | 274 |
} |
270 | 275 |
} |
... | ... |
@@ -603,7 +608,11 @@ computeCopynumber <- function(object, |
603 | 608 |
if(ncol(object) < 10) |
604 | 609 |
stop("Must have at least 10 samples in each batch to estimate model parameters....preferably closer to 90 samples per batch") |
605 | 610 |
##require(oligoClasses) |
606 |
- if(missing(CHR)) stop("Must specify CHR") |
|
611 |
+ if(missing(CHR)){ |
|
612 |
+ if(length(unique(chromosome(object)))==1){ |
|
613 |
+ CHR <- unique(chromosome(object)) |
|
614 |
+ } else stop("Must specify CHR") |
|
615 |
+ } |
|
607 | 616 |
if(CHR == 24) stop("Nothing available yet for chromosome Y") |
608 | 617 |
if(missing(batch)) { |
609 | 618 |
message("'batch' missing. Assuming all samples in the CrlmmSetList object were processed together in the same batch.") |
... | ... |
@@ -803,7 +812,6 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName){ |
803 | 812 |
dimnames(CA) <- list(envir[["snps"]], envir[["sns"]]) |
804 | 813 |
CB <- envir[["CB"]] |
805 | 814 |
dimnames(CB) <- dimnames(CA) |
806 |
- |
|
807 | 815 |
##NP copy number |
808 | 816 |
if(!is.null(NPset)){ |
809 | 817 |
CT <- envir[["CT"]] |
... | ... |
@@ -907,15 +915,19 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName){ |
907 | 915 |
rownames(fd) <- rownames(CA)[I] |
908 | 916 |
fD <- new("AnnotatedDataFrame", |
909 | 917 |
data=fd, |
910 |
- varMetadata=data.frame(labelDescription=colnames(fd))) |
|
918 |
+ varMetadata=data.frame(labelDescription=colnames(fd))) |
|
919 |
+ placeholder <- matrix(NA, nrow(CA), ncol(CA)) |
|
920 |
+ dimnames(placeholder) <- dimnames(CA) |
|
911 | 921 |
assayData <- assayDataNew("lockedEnvironment", |
912 |
- CA=CA[I, ], |
|
913 |
- CB=CB[I, ]) |
|
922 |
+ CA=placeholder[I, ], |
|
923 |
+ CB=placeholder[I, ]) |
|
914 | 924 |
cnset <- new("CopyNumberSet", |
915 | 925 |
assayData=assayData, |
916 | 926 |
featureData=fD, |
917 | 927 |
phenoData=phenodata, |
918 | 928 |
annotation=cdfName) |
929 |
+ CA(cnset) <- CA ##replacement method converts to integer |
|
930 |
+ CB(cnset) <- CB ##replacement method converts to integer |
|
919 | 931 |
cnset <- thresholdCopyNumberSet(cnset) |
920 | 932 |
return(cnset) |
921 | 933 |
} |
... | ... |
@@ -929,10 +941,11 @@ thresholdCopyNumberSet <- function(object){ |
929 | 941 |
ca[ca > 5] <- 5 |
930 | 942 |
cb[cb < 0.05] <- 0.05 |
931 | 943 |
cb[cb > 5] <- 5 |
932 |
- ca <- matrix(as.integer(ca*100), nrow(ca), ncol(ca)) |
|
933 |
- cb <- matrix(as.integer(cb*100), nrow(cb), ncol(cb)) |
|
934 |
- rownames(ca) <- rownames(cb) <- featureNames(object) |
|
935 |
- colnames(ca) <- colnames(cb) <- sampleNames(object) |
|
944 |
+## ca <- matrix(as.integer(ca*100), nrow(ca), ncol(ca)) |
|
945 |
+## cb <- matrix(as.integer(cb*100), nrow(cb), ncol(cb)) |
|
946 |
+## rownames(ca) <- rownames(cb) <- featureNames(object) |
|
947 |
+## colnames(ca) <- colnames(cb) <- sampleNames(object) |
|
948 |
+ ##replacement method multiplies by 100 and converts to integer |
|
936 | 949 |
CA(object) <- ca |
937 | 950 |
CB(object) <- cb |
938 | 951 |
return(object) |
... | ... |
@@ -1143,8 +1156,8 @@ nonpolymorphic <- function(plateIndex, NP, envir, CONF.THR=0.99, DF.PRIOR=50, pk |
1143 | 1156 |
} |
1144 | 1157 |
CT1 <- 1/phi1*(NP[, gender=="male"]-nu1) |
1145 | 1158 |
CT2 <- 1/phi2*(NP[, gender=="female"]-nu2) |
1146 |
- CT1 <- matrix(as.integer(100*CT1), nrow(CT1), ncol(CT1)) |
|
1147 |
- CT2 <- matrix(as.integer(100*CT2), nrow(CT2), ncol(CT2)) |
|
1159 |
+ ##CT1 <- matrix(as.integer(100*CT1), nrow(CT1), ncol(CT1)) |
|
1160 |
+ ##CT2 <- matrix(as.integer(100*CT2), nrow(CT2), ncol(CT2)) |
|
1148 | 1161 |
CT <- envir[["CT"]] |
1149 | 1162 |
CT[, plate==uplate[p] & gender=="male"] <- CT1 |
1150 | 1163 |
CT[, plate==uplate[p] & gender=="female"] <- CT2 |
... | ... |
@@ -1163,7 +1176,7 @@ nonpolymorphic <- function(plateIndex, NP, envir, CONF.THR=0.99, DF.PRIOR=50, pk |
1163 | 1176 |
normalNP <- envir[["normalNP"]] |
1164 | 1177 |
normalNP <- normalNP[, plate==uplate[p]] |
1165 | 1178 |
mus <- rowMedians(NP * normalNP, na.rm=TRUE) |
1166 |
- crosshyb <- median(muA) - median(mus) |
|
1179 |
+ crosshyb <- max(median(muA) - median(mus), 0) |
|
1167 | 1180 |
X <- cbind(1, log2(mus+crosshyb)) |
1168 | 1181 |
##X <- cbind(1, log2(mus)) |
1169 | 1182 |
logPhiT <- X %*% betahat |
... | ... |
@@ -1171,7 +1184,8 @@ nonpolymorphic <- function(plateIndex, NP, envir, CONF.THR=0.99, DF.PRIOR=50, pk |
1171 | 1184 |
nuT[, p] <- mus - 2*phiT[, p] |
1172 | 1185 |
T <- 1/phiT[, p]*(NP - nuT[, p]) |
1173 | 1186 |
CT <- envir[["CT"]] |
1174 |
- CT[, plate==uplate[p]] <- matrix(as.integer(100*T), nrow(T), ncol(T)) |
|
1187 |
+## CT[, plate==uplate[p]] <- matrix(as.integer(100*T), nrow(T), ncol(T)) |
|
1188 |
+ CT[, plate==uplate[p]] <- matrix(T, nrow(T), ncol(T)) |
|
1175 | 1189 |
|
1176 | 1190 |
##Variance for prediction region |
1177 | 1191 |
##sig2T[, plate==uplate[[p]]] <- rowMAD(log2(NP*normalNP), na.rm=TRUE)^2 |
... | ... |
@@ -1615,11 +1629,11 @@ polymorphic <- function(plateIndex, A, B, envir){ |
1615 | 1629 |
tmp <- (B-nuB - phistar*A + phistar*nuA)/phiB |
1616 | 1630 |
copyB <- tmp/(1-phistar*phiAx/phiB) |
1617 | 1631 |
copyA <- (A-nuA-phiAx*copyB)/phiA |
1618 |
- CB[, plate==uplate[p]] <- matrix(as.integer(100*copyB), nrow(copyB), ncol(copyB)) |
|
1619 |
- CA[, plate==uplate[p]] <- matrix(as.integer(100*copyA), nrow(copyA), ncol(copyA)) |
|
1632 |
+ CB[, plate==uplate[p]] <- copyB |
|
1633 |
+ CA[, plate==uplate[p]] <- copyA |
|
1620 | 1634 |
} else{ |
1621 |
- CA[, plate==uplate[p]] <- matrix(as.integer(100*1/phiA*(A-nuA)), nrow(A), ncol(A)) |
|
1622 |
- CB[, plate==uplate[p]] <- matrix(as.integer(100*1/phiB*(B-nuB)), nrow(A), ncol(A)) |
|
1635 |
+ CA[, plate==uplate[p]] <- matrix((1/phiA*(A-nuA)), nrow(A), ncol(A)) |
|
1636 |
+ CB[, plate==uplate[p]] <- matrix((1/phiB*(B-nuB)), nrow(B), ncol(B)) |
|
1623 | 1637 |
} |
1624 | 1638 |
assign("CA", CA, envir) |
1625 | 1639 |
assign("CB", CB, envir) |
... | ... |
@@ -1979,7 +1993,8 @@ setMethod("update", "character", function(object, ...){ |
1979 | 1993 |
if(!"chromosome" %in% fvarLabels(crlmmSetList[[1]])){ |
1980 | 1994 |
featureData(crlmmSetList[[1]]) <- addFeatureAnnotation(crlmmSetList) |
1981 | 1995 |
} |
1982 |
- CHR <- unique(chromosome(crlmmSetList[[1]])) |
|
1996 |
+ CHR <- unique(chromosome(crlmmSetList[[1]])) |
|
1997 |
+ if(length(CHR) > 1) stop("More than one chromosome in the object. This method requires one chromosome at a time.") |
|
1983 | 1998 |
if(CHR==24){ |
1984 | 1999 |
message("skipping chromosome 24") |
1985 | 2000 |
next() |
... | ... |
@@ -21,7 +21,7 @@ readIdatFiles <- function(sampleSheet=NULL, |
21 | 21 |
pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
22 | 22 |
} |
23 | 23 |
if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet |
24 |
- if(!is.null(arrayNames)){ |
|
24 |
+ if(is.null(arrayNames)){ |
|
25 | 25 |
##arrayNames=NULL |
26 | 26 |
if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) { |
27 | 27 |
barcode = sampleSheet[,arrayInfoColNames$barcode] |
... | ... |
@@ -41,7 +41,7 @@ readIdatFiles <- function(sampleSheet=NULL, |
41 | 41 |
} |
42 | 42 |
} |
43 | 43 |
pd = new("AnnotatedDataFrame", data = sampleSheet) |
44 |
- sampleNames(pd) <- arrayNames |
|
44 |
+ sampleNames(pd) <- basename(arrayNames) |
|
45 | 45 |
} |
46 | 46 |
narrays = length(arrayNames) |
47 | 47 |
grnfiles = paste(arrayNames, fileExt$green, sep=sep) |
... | ... |
@@ -6,10 +6,19 @@ setValidity("CopyNumberSet", function(object) { |
6 | 6 |
setMethod("CA", "CopyNumberSet", function(object, ...) assayData(object)[["CA"]]/100) |
7 | 7 |
setMethod("CB", "CopyNumberSet", function(object, ...) assayData(object)[["CB"]]/100) |
8 | 8 |
|
9 |
-setReplaceMethod("CA", signature(object="CopyNumberSet", value="matrix"), |
|
10 |
- function(object, value) assayDataElementReplace(object, "CA", value*100)) |
|
11 |
-setReplaceMethod("CB", signature(object="CopyNumberSet", value="matrix"), |
|
12 |
- function(object, value) assayDataElementReplace(object, "CB", value*100)) |
|
9 |
+setReplaceMethod("CA", signature(object="CopyNumberSet", value="matrix"), function(object, value){ |
|
10 |
+ dns <- dimnames(value) |
|
11 |
+ value <- matrix(as.integer(value*100), nrow(value), ncol(value)) |
|
12 |
+ dimnames(value) <- dns |
|
13 |
+ assayDataElementReplace(object, "CA", value) |
|
14 |
+}) |
|
15 |
+ |
|
16 |
+setReplaceMethod("CB", signature(object="CopyNumberSet", value="matrix"), function(object, value){ |
|
17 |
+ dns <- dimnames(value) |
|
18 |
+ value <- matrix(as.integer(value*100), nrow(value), ncol(value)) |
|
19 |
+ dimnames(value) <- dns |
|
20 |
+ assayDataElementReplace(object, "CB", value) |
|
21 |
+}) |
|
13 | 22 |
|
14 | 23 |
|
15 | 24 |
|
... | ... |
@@ -73,6 +73,12 @@ setMethod("addFeatureAnnotation", "CrlmmSetList", function(object, ...){ |
73 | 73 |
|
74 | 74 |
position <- c(position.snp, position.np) |
75 | 75 |
chrom <- c(chr.snp, chr.np) |
76 |
+ |
|
77 |
+ ##We may not have annotation for all of the snps |
|
78 |
+ if(!all(featureNames(object) %in% names(position))){ |
|
79 |
+ message("Dropping loci for which physical position is not available.") |
|
80 |
+ object <- object[featureNames(object) %in% names(position), ] |
|
81 |
+ } |
|
76 | 82 |
ix <- match(featureNames(object), names(position)) |
77 | 83 |
position <- position[ix] |
78 | 84 |
chrom <- chrom[ix] |
... | ... |
@@ -200,8 +206,19 @@ setMethod("splitByChromosome", "CrlmmSetList", function(object, cdfName, outdir) |
200 | 206 |
save(crlmmSetList, file=file.path(outdir, paste("crlmmSetList_", CHR, ".rda", sep=""))) |
201 | 207 |
} |
202 | 208 |
}) |
209 |
+ |
|
203 | 210 |
setMethod("update", "CrlmmSetList", function(object, ...){ |
204 |
- computeCopynumber(object, ...) |
|
211 |
+ if(length(crlmmSetList) == 3){ |
|
212 |
+ message("copy number object already present. Nothing to do.") |
|
213 |
+ return() |
|
214 |
+ } |
|
215 |
+ CHR <- unique(chromosome(crlmmSetList[[1]])) |
|
216 |
+ if(length(CHR) > 1) stop("More than one chromosome in the object. This method requires one chromosome at a time.") |
|
217 |
+ if(CHR == 24){ |
|
218 |
+ message("A solution for chromosome 24 is not yet available.") |
|
219 |
+ return() |
|
220 |
+ } |
|
221 |
+ computeCopynumber(object, CHR=CHR, ...) |
|
205 | 222 |
}) |
206 | 223 |
|
207 | 224 |
setReplaceMethod("CA", signature(object="CrlmmSetList", value="matrix"), |
... | ... |
@@ -47,6 +47,11 @@ library(crlmm) |
47 | 47 |
crlmm:::validCdfNames() |
48 | 48 |
@ |
49 | 49 |
|
50 |
+<<Rsge, eval=TRUE, echo=FALSE>>= |
|
51 |
+##If multiple cluster nodes are available |
|
52 |
+library(Rsge) |
|
53 |
+@ |
|
54 |
+ |
|
50 | 55 |
\paragraph{Preprocess and genotype.} |
51 | 56 |
|
52 | 57 |
Specify the coordinates of Affymetrix cel files and where to put |
... | ... |
@@ -56,11 +61,19 @@ number estimation. |
56 | 61 |
<<celfiles>>= |
57 | 62 |
myPath <- "/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m" |
58 | 63 |
celFiles <- list.celfiles(myPath, full.names=TRUE, pattern=".CEL") |
59 |
-#outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/affy" |
|
60 |
-outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/tmp" |
|
61 |
-dir.create(outdir) |
|
64 |
+outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/affy" |
|
65 |
+#outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/tmp" |
|
66 |
+if(!file.exists(outdir)) dir.create(outdir) |
|
62 | 67 |
@ |
63 | 68 |
|
69 |
+Split the list of files by plate. For this HapMap data, the different |
|
70 |
+ancestries were run on separate plates. |
|
71 |
+ |
|
72 |
+<<plate>>= |
|
73 |
+plate <- substr(basename(celFiles), 13, 13) |
|
74 |
+table(plate) |
|
75 |
+celFiles <- split(celFiles, plate) |
|
76 |
+@ |
|
64 | 77 |
|
65 | 78 |
|
66 | 79 |
\noindent The next code chunk quantile normalizes the samples to a |
... | ... |
@@ -68,21 +81,76 @@ target reference distribution and uses the crlmm algorithm to genotype. |
68 | 81 |
See \cite{Carvalho2007a} for details regarding the crlmm genotyping |
69 | 82 |
algorithm. |
70 | 83 |
|
71 |
-<<preprocessAndGenotype, eval=FALSE>>= |
|
72 |
-crlmmWrapper(celFiles,##[1:15], |
|
73 |
- cdfName="genomewidesnp6", |
|
74 |
- save.it=TRUE, |
|
75 |
- load.it=FALSE, |
|
76 |
- intensityFile=file.path(outdir, "normalizedIntensities.rda"), |
|
77 |
- crlmmFile=file.path(outdir, "snpsetObject.rda"), |
|
78 |
- platform="affymetrix") |
|
79 |
-@ |
|
80 |
- |
|
81 |
-As a result of the above wrapper, the following R objects are now created: |
|
84 |
+<<preprocessAndGenotype, eval=TRUE, echo=TRUE>>= |
|
85 |
+for(i in seq(along=celFiles)){ |
|
86 |
+ platedir <- file.path(outdir, names(celFiles)[i]) |
|
87 |
+ if(!file.exists(platedir)) dir.create(platedir) |
|
88 |
+ crlmmWrapper(celFiles[[i]],##[1:15], |
|
89 |
+ cdfName="genomewidesnp6", |
|
90 |
+ save.it=FALSE, |
|
91 |
+ load.it=TRUE, |
|
92 |
+ intensityFile=file.path(platedir, "normalizedIntensities.rda"), |
|
93 |
+ crlmmFile=file.path(platedir, "snpsetObject.rda")) |
|
94 |
+} |
|
95 |
+q("no") |
|
96 |
+@ |
|
97 |
+ |
|
98 |
+<<preprocessAndGenotype, eval=FALSE, echo=FALSE>>= |
|
99 |
+for(i in seq(along=celFiles)){ |
|
100 |
+ platedir <- file.path(outdir, names(celFiles)[i]) |
|
101 |
+ if(!file.exists(platedir)) dir.create(platedir) |
|
102 |
+ sge.submit(crlmmWrapper, |
|
103 |
+ filenames=celFiles[[i]],##[1:15], |
|
104 |
+ cdfName="genomewidesnp6", |
|
105 |
+ save.it=FALSE, |
|
106 |
+ load.it=TRUE, |
|
107 |
+ intensityFile=file.path(platedir, "normalizedIntensities.rda"), |
|
108 |
+ crlmmFile=file.path(platedir, "snpsetObject.rda"), |
|
109 |
+ global.savelist=c("platedir"), |
|
110 |
+ packages="crlmm") |
|
111 |
+} |
|
112 |
+##platedirs <- sapply(names(celFiles), function(x) file.path(outdir, x)) |
|
113 |
+##intensityFiles <- file.path(platedirs, "normalizedIntensities.rda") |
|
114 |
+####crlmmFiles <- file.path(platedirs, "snpsetObject.rda") |
|
115 |
+####myF <- function(celFiles, intensityFile, crlmmFile, ...){ |
|
116 |
+#### crlmmWrapper(celFiles, intensityFile=intensityFile, |
|
117 |
+#### crlmmFile=crlmmFile, |
|
118 |
+##sge.apply(celFiles, |
|
119 |
+## crlmmWrapper, |
|
120 |
+## cdfName="genomewidesnp6", |
|
121 |
+## save.it=FALSE, |
|
122 |
+## load.it=TRUE, |
|
123 |
+## intensityFile=, |
|
124 |
+## crlmmFile=file.path(platedir, "snpsetObject.rda")) |
|
125 |
+q("no") |
|
126 |
+@ |
|
127 |
+ |
|
128 |
+As a result of the above wrapper, the following R objects are now |
|
129 |
+created for each plate: |
|
130 |
+ |
|
131 |
+<<crlmmSetListObjects>>= |
|
132 |
+fns <- list.files(platedir, pattern="crlmmSetList", full.name=TRUE) |
|
133 |
+@ |
|
134 |
+ |
|
135 |
+To estimate copy number, simply run the update function on this vector |
|
136 |
+of filenames. If multiple nodes are available for computation, one |
|
137 |
+could easily split this job. |
|
138 |
+ |
|
139 |
+<<copynumber, echo=FALSE, eval=TRUE>>= |
|
140 |
+for(i in seq(along=celFiles)){ |
|
141 |
+ fns <- list.files(file.path(outdir, names(celFiles[i])), |
|
142 |
+ pattern="crlmmSetList", full.names=TRUE) |
|
143 |
+ update(fns) |
|
144 |
+} |
|
145 |
+@ |
|
82 | 146 |
|
83 |
-<<>>= |
|
84 |
-list.files(outdir)[grep("crlmmSetList", list.files(outdir))] |
|
85 |
-@ |
|
147 |
+<<copynumber.sge, echo=FALSE, eval=TRUE>>= |
|
148 |
+for(i in seq(along=celFiles)){ |
|
149 |
+ fns <- list.files(file.path(outdir, names(celFiles[i])), |
|
150 |
+ pattern="crlmmSetList", full.names=TRUE) |
|
151 |
+ sge.parLapply(X=fns, FUN=update, packages="crlmm", cluster=FALSE) |
|
152 |
+} |
|
153 |
+@ |
|
86 | 154 |
|
87 | 155 |
\paragraph{Locus- and allele-specific estimates of copy number.} |
88 | 156 |
|
... | ... |
@@ -92,9 +160,7 @@ Load the object for chromosome 22 and compute copy number: |
92 | 160 |
CHR <- 22 |
93 | 161 |
if(!exists("crlmmSetList")) load(file.path(outdir, paste("crlmmSetList_", CHR, ".rda", sep=""))) |
94 | 162 |
show(crlmmSetList) |
95 |
-if(length(crlmmSetList)==2){ |
|
96 |
- crlmmSetList <- update(crlmmSetList, CHR=CHR) |
|
97 |
-} |
|
163 |
+crlmmSetList <- update(crlmmSetList) |
|
98 | 164 |
show(crlmmSetList) |
99 | 165 |
@ |
100 | 166 |
|
... | ... |
@@ -501,10 +567,10 @@ for(i in indices[[j]]){ |
501 | 567 |
intensities. Each panel displays a single SNP.} |
502 | 568 |
\end{figure} |
503 | 569 |
|
504 |
-TODO: Plot the prediction regions for total copy number 2 and 3 for the |
|
505 |
-first plate. Plotting symbols are the genotype calls (1=AA, 2=AB, 3=BB); |
|
506 |
-light grey points are from other plates. One could also add the |
|
507 |
-prediction regions for 0-4 copies, but it gets crowded. |
|
570 |
+% TODO: Plot the prediction regions for total copy number 2 and 3 for |
|
571 |
+% the first plate. Plotting symbols are the genotype calls (1=AA, 2=AB, |
|
572 |
+% 3=BB); light grey points are from other plates. One could also add the |
|
573 |
+% prediction regions for 0-4 copies, but it gets crowded. |
|
508 | 574 |
|
509 | 575 |
|
510 | 576 |
<<predictionRegion, fig=TRUE, width=8, height=8, include=FALSE, eval=FALSE, echo=FALSE>>= |
... | ... |
@@ -572,8 +638,10 @@ the allele-specific copy number. SNP-specific parameters are estimated |
572 | 638 |
only from samples with a suitable confidence score for the diallelic |
573 | 639 |
genotype calls. The default confidence threshold is 0.99, but can be |
574 | 640 |
adjusted by passing the argument \Robject{CONF.THR} to the |
575 |
-\Robject{update} method. TODO: illustrate this approach with boxplots |
|
576 |
-of the A and B intensities stratified by genotype. |
|
641 |
+\Robject{update} method. |
|
642 |
+ |
|
643 |
+% TODO: illustrate this approach with boxplots of the A and B |
|
644 |
+% intensities stratified by genotype. |
|
577 | 645 |
|
578 | 646 |
<<linearModel, fig=TRUE,include=FALSE, eval=FALSE, echo=FALSE>>= |
579 | 647 |
cols <- brewer.pal(7, "Accent")[c(1, 4, 7)] |
... | ... |
@@ -28,50 +28,70 @@ options(prompt="R> ") |
28 | 28 |
|
29 | 29 |
<<>>= |
30 | 30 |
library(crlmm) |
31 |
+crlmm:::validCdfNames() |
|
32 |
+outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/illumina/HumanCNV370-Duo" |
|
33 |
+datadir <- "/thumper/ctsa/snpmicroarray/illumina/IDATS/370k" |
|
34 |
+cdfName <- "human370v1c" |
|
31 | 35 |
@ |
32 | 36 |
|
33 |
-<<echo=FALSE, eval=TRUE>>= |
|
34 |
-setwd("/thumper/ctsa/snpmicroarray/illumina/IDATS/370k") |
|
35 |
-@ |
|
36 |
- |
|
37 |
-<<readIdat>>= |
|
38 |
-samplesheet5 = read.csv("HumanHap370Duo_Sample_Map.csv", header=TRUE, as.is=TRUE)[-c(28:46,61:75,78:79),] |
|
39 |
-if(!exists("RG")){ |
|
40 |
- RG <- readIdatFiles(samplesheet5, arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), saveDate=TRUE) |
|
41 |
-} |
|
37 |
+<<samplesToProcess>>= |
|
38 |
+samplesheet = read.csv(file.path(datadir, "HumanHap370Duo_Sample_Map.csv"), header=TRUE, as.is=TRUE) |
|
39 |
+## Excluding a few of the problem samples |
|
40 |
+samplesheet <- samplesheet[-c(28:46,61:75,78:79), ] |
|
41 |
+arrayNames <- file.path(datadir, unique(samplesheet[, "SentrixPosition"])) |
|
42 |
+##Check that files exist |
|
43 |
+grnfiles = all(file.exists(paste(arrayNames, ".Grn.dat", sep=""))) |
|
44 |
+redfiles = all(file.exists(paste(arrayNames, ".Red.dat", sep=""))) |
|
42 | 45 |
@ |
43 | 46 |
|
44 | 47 |
Alternatively, arguments to the readIdatFiles can be passed through the |
45 | 48 |
{\tt ...} argument of the \R{} function \Robject{crlmmWrapper}. |
46 | 49 |
|
47 | 50 |
<<wrapper>>= |
48 |
-crlmmWrapper(sampleSheet=samplesheet5, |
|
51 |
+crlmmWrapper(sampleSheet=samplesheet, |
|
52 |
+ arrayNames=arrayNames, |
|
49 | 53 |
arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), |
50 | 54 |
saveDate=TRUE, |
51 | 55 |
cdfName=cdfName, |
52 | 56 |
load.it=FALSE, |
53 |
- save.it=TRUE, |
|
54 |
- intensityFile=file.path(platedir, "normalizedIntensities.rda"), |
|
55 |
- crlmmFile=file.path(platedir, "snpsetObject.rda"), |
|
56 |
- rgFile=file.path(platedir, "rgFile.rda"), |
|
57 |
- platform="illumina") |
|
57 |
+ save.it=FALSE, |
|
58 |
+ intensityFile=file.path(outdir, "normalizedIntensities.rda"), |
|
59 |
+ crlmmFile=file.path(outdir, "snpsetObject.rda"), |
|
60 |
+ rgFile=file.path(outdir, "rgFile.rda")) |
|
58 | 61 |
@ |
59 | 62 |
|
60 |
-Samples with low signal to noise ratios tend to have a lot of variation |
|
61 |
-in the point estimates of copy number. One may want to exclude these |
|
62 |
-samples. |
|
63 |
+This creates a \Robject{crlmmSetList} object in the \Robject{outdir} |
|
64 |
+directory. The first element of this object contains the |
|
65 |
+quantile-normalized A and B intensities. The second element in the list |
|
66 |
+contains the crlmm genotype calls. |
|
63 | 67 |
|
64 | 68 |
|
65 | 69 |
<<SNR>>= |
66 |
-hist(crlmmOut$SNR) |
|
70 |
+CHR <- 1 |
|
71 |
+filename <- paste(outdir, "/crlmmSetList_", CHR, ".rda", sep="") |
|
72 |
+load(filename) |
|
73 |
+hist(crlmmSetList[[1]]$SNR) |
|
67 | 74 |
@ |
68 | 75 |
|
69 |
-Run update on the CrlmmSetList object to obtain copy number. Estimate |
|
76 |
+Run update on the CrlmmSetList object to obtain copy number estimates. Estimate |
|
70 | 77 |
copy number for chromosome 1. |
71 | 78 |
|
72 |
-<<>>= |
|
73 |
-CHR <- 1 |
|
74 |
-filename <- paste(platedir, "/CrlmmSetList_", CHR, ".rda", sep="") |
|
79 |
+<<estimateCn>>= |
|
80 |
+update(filename) |
|
81 |
+##Or equivalently, |
|
82 |
+##crlmmSetList <- computeCopynumber(crlmmSetList) |
|
83 |
+##save(crlmmSetList, file=file.path(outdir, paste("crlmmSetList_", CHR, ".rda", sep=""))) |
|
84 |
+@ |
|
85 |
+ |
|
86 |
+Samples with low signal to noise ratios tend to have a lot of variation |
|
87 |
+in the point estimates of copy number. One may want to exclude these |
|
88 |
+samples, or smooth after filtering outliers. Here we load the |
|
89 |
+crlmmSetList object. See the copynumber.Rnw vignette for example plots. |
|
90 |
+ |
|
91 |
+<<crlmmSetList>>= |
|
92 |
+##reload the updated object |
|
93 |
+load(filename) |
|
94 |
+cn <- copyNumber(crlmmSetList) |
|
75 | 95 |
@ |
76 | 96 |
|
77 | 97 |
\end{document} |
... | ... |
@@ -115,7 +115,7 @@ |
115 | 115 |
|
116 | 116 |
\item{"CA"}{\code{signature(object = "CrlmmSetList")}: extracts the |
117 | 117 |
copy number for allele A at polymorphic loci. For nonpolymorphic |
118 |
- probes, CA returns the total copy number.} |
|
118 |
+ probes, CA returns the total copy number. } |
|
119 | 119 |
|
120 | 120 |
\item{"CB"}{\code{signature(object = "CrlmmSetList")}: extracts the |
121 | 121 |
copy number for allele B at polymorphic loci. For nonpolymorphic |
... | ... |
@@ -9,9 +9,8 @@ |
9 | 9 |
confidence scores are assigned using the crlmm algorithm. |
10 | 10 |
} |
11 | 11 |
\usage{ |
12 |
-crlmmWrapper(filenames, cdfName = "genomewidesnp6", load.it=FALSE, |
|
13 |
-save.it=FALSE, splitByChr=TRUE, crlmmFile, intensityFile, rgFile, |
|
14 |
-platform=c("affymetrix", "illumina")[1], ...) |
|
12 |
+crlmmWrapper(filenames, cdfName, load.it=FALSE, |
|
13 |
+save.it=FALSE, splitByChr=TRUE, crlmmFile, intensityFile, rgFile, ...) |
|
15 | 14 |
} |
16 | 15 |
\arguments{ |
17 | 16 |
\item{filenames}{ |
... | ... |
@@ -19,6 +18,7 @@ platform=c("affymetrix", "illumina")[1], ...) |
19 | 18 |
} |
20 | 19 |
\item{cdfName}{ |
21 | 20 |
Annotation package (currently, only 'genomewidesnp6' is supported). |
21 |
+ See \code{crlmm:::validCdfNames()}. |
|
22 | 22 |
} |
23 | 23 |
\item{load.it}{ Logical. If TRUE, the wrapper function will try to |
24 | 24 |
load the 'intensityFile' and 'crlmmFile'. When FALSE, the quantile |
... | ... |
@@ -41,8 +41,6 @@ platform=c("affymetrix", "illumina")[1], ...) |
41 | 41 |
\item{rgFile}{Character string. Name of file (including the |
42 | 42 |
complete path) containing the RG intensities (only applicable for |
43 | 43 |
illumina platform). } |
44 |
- \item{platform}{Character string. Currently only \code{affymetrix} |
|
45 |
- and \code{illumina} are supported.} |
|
46 | 44 |
\item{\dots}{ |
47 | 45 |
additional arguments to function \code{readIdatFiles} (illumina |
48 | 46 |
platform only) |