* mymac:
add AffyGW.pdf
update vignettes in inst/scripts
Change argument of validCEL to celfiles
Update constructInf to accommodate GenomeDataFrame class for featureData
bump version to 1.13.7
Add doRUnit.R
Add celfile-utils.Rd
Streamlne some of the Rd files
add validCEL function that checks whether all celfiles can be read
getFeatureData returns GenomeAnnotatedDataFrame
Remove imports from methods. Remove pdf of illumina_copynumber.pdf (large file) and copynumber.pdf
getFeatureDAta returns GenomeAnnotatedDataFrame
Remove separate vignette for copy number in inst/scripts. Include copynumber section in both affy and illumina pipelines.
update documentation files for genotype.Illumina, preprocessInf, and genotypeInf (cdfName added as argument. Indicate that 'batch' should be a character string)
pass cdfName to genotypeInf and preprocessInf
add unitTests and cn-functions for 'simple usage'
Combine AffyPreprocess and copynumber. Combine IlluminaPreprocess and copynumber
remove depency on ff to allow installation on my mac
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@62108 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.13.5 |
|
4 |
+Version: 1.13.7 |
|
5 | 5 |
Date: 2010-12-10 |
6 | 6 |
Author: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>, Ingo Ruczinski <iruczins@jhsph.edu>, Rafael A Irizarry |
7 | 7 |
Maintainer: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
... | ... |
@@ -14,20 +14,19 @@ Depends: R (>= 2.13.0), |
14 | 14 |
Imports: affyio (>= 1.19.2), |
15 | 15 |
ellipse, |
16 | 16 |
genefilter (>= 1.33.0), |
17 |
- ff (>= 2.2-1), |
|
18 | 17 |
mvtnorm, |
19 | 18 |
preprocessCore (>= 1.13.4), |
20 | 19 |
splines, |
21 | 20 |
stats, |
22 | 21 |
SNPchip, |
23 | 22 |
utils, |
24 |
- lattice |
|
23 |
+ lattice, |
|
24 |
+ ff |
|
25 | 25 |
Suggests: hapmapsnp6, |
26 | 26 |
genomewidesnp6Crlmm (>= 1.0.4), |
27 | 27 |
snpStats, |
28 | 28 |
ellipse, |
29 |
- RUnit, |
|
30 |
- ff (>= 2.2-1) |
|
29 |
+ RUnit |
|
31 | 30 |
Collate: AllGenerics.R |
32 | 31 |
AllClasses.R |
33 | 32 |
methods-AssayData.R |
... | ... |
@@ -42,9 +41,10 @@ Collate: AllGenerics.R |
42 | 41 |
crlmmGT2.R |
43 | 42 |
crlmm-illumina.R |
44 | 43 |
snprma-functions.R |
44 |
+ cn-functions.R |
|
45 | 45 |
utils.R |
46 | 46 |
zzz.R |
47 | 47 |
test_crlmm_package.R |
48 | 48 |
LazyLoad: yes |
49 | 49 |
biocViews: Microarray, Preprocessing, SNP, Bioinformatics, CopyNumberVariants |
50 |
-Packaged: 2011-04-30 17:10:59 UTC; biocbuild |
|
50 |
+Packaged: 2011-04-30 17:10:59 UTC; biocbuild |
|
51 | 51 |
\ No newline at end of file |
... | ... |
@@ -1,4 +1,5 @@ |
1 | 1 |
useDynLib("crlmm", .registration=TRUE) |
2 |
+ |
|
2 | 3 |
##--------------------------------------------------------------------------- |
3 | 4 |
## Biobase |
4 | 5 |
##--------------------------------------------------------------------------- |
... | ... |
@@ -22,7 +23,7 @@ importFrom(Biobase, assayDataElement, assayDataElementNames, |
22 | 23 |
##--------------------------------------------------------------------------- |
23 | 24 |
## oligoClasses |
24 | 25 |
##--------------------------------------------------------------------------- |
25 |
-importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet, CNSet) |
|
26 |
+importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet, CNSet)##, ff_or_matrix) |
|
26 | 27 |
importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs, |
27 | 28 |
"confs<-", cnConfidence, "cnConfidence<-", isSnp, |
28 | 29 |
chromosome, position, A, B, |
... | ... |
@@ -49,9 +50,9 @@ importFrom(stats, coef, cov, dnorm, kmeans, lm, mad, median, quantile, sd, updat |
49 | 50 |
importFrom(genefilter, rowSds) |
50 | 51 |
importFrom(mvtnorm, dmvnorm) |
51 | 52 |
importFrom(ellipse, ellipse) |
52 |
-importFrom(ff, ffdf, physical.ff, physical.ffdf, ffrowapply) |
|
53 |
+##importFrom(ff, ffdf, physical.ff, physical.ffdf, ffrowapply) |
|
53 | 54 |
|
54 |
-importClassesFrom(oligoClasses, ff_matrix, ffdf) |
|
55 |
+##importClassesFrom(oligoClasses, ff_matrix, ffdf) |
|
55 | 56 |
##exportMethods(lines) |
56 | 57 |
exportMethods(CA, CB) |
57 | 58 |
export(crlmm, |
... | ... |
@@ -70,5 +71,5 @@ export(crlmm, |
70 | 71 |
export(genotypes, totalCopynumber, rawCopynumber, xyplot) |
71 | 72 |
exportMethods(A, B, corr, nuA, nuB, phiA, phiB, predictionRegion, posteriorProbability, tau2, Ns, medians, mads, |
72 | 73 |
xyplot, calculateRBaf) |
73 |
-export(ABpanel, constructInf, preprocessInf, genotypeInf) |
|
74 |
+export(ABpanel, constructInf, preprocessInf, genotypeInf, validCEL, celDates) |
|
74 | 75 |
exportClasses(PredictionRegion) |
5 | 5 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,117 @@ |
1 |
+copynumber <- function(filenames, |
|
2 |
+ batch, |
|
3 |
+ summaries=c("lrr", "baf", "ca", "cb", "gt", "gtconf"), |
|
4 |
+ write=FALSE, |
|
5 |
+ outdir=".", |
|
6 |
+ onefile=FALSE, |
|
7 |
+ rda=TRUE){ |
|
8 |
+ fnamelist <- split(filenames, batch) |
|
9 |
+ batchlist <- split(batch, batchlist) |
|
10 |
+ ## for each element in list. Avoid |
|
11 |
+ foreach(i=fnamelist) %do% simpleusage(filenames=fnameslist[[i]], batch=batchlist[[i]]) |
|
12 |
+} |
|
13 |
+ |
|
14 |
+simpleusage <- function(filenames, batch, ...){ |
|
15 |
+ object <- genotype(fnamelist, batch, ...) |
|
16 |
+ ## run genotype on each element in fnamelist |
|
17 |
+ object <- genotypeSummary(object) |
|
18 |
+ ## different than currently implemented |
|
19 |
+ ## - shrink to saved batch medians, etc. |
|
20 |
+ ## - shinkage across markers (?) |
|
21 |
+ object <- shrinkSummary(object) |
|
22 |
+ results <- array(NA, nrow(object), ncol(object), length(summaries)) |
|
23 |
+ if(c("ca", "cb") %in% summaries){ |
|
24 |
+ ## estimateCnParameters |
|
25 |
+ ##results <- estimatecnParameters(...) |
|
26 |
+ } else results <- NULL |
|
27 |
+ is.baf <- "baf" %in% summaries || "lrr" %in% summaries |
|
28 |
+ if(is.baf){ |
|
29 |
+ results2 <- calculateRtheta(object) ## returns list |
|
30 |
+ ## keep as list, or coerce to array |
|
31 |
+ } |
|
32 |
+ if(write){ |
|
33 |
+ ##write2(, onefile=onefile) |
|
34 |
+ } |
|
35 |
+ return(results) |
|
36 |
+} |
|
37 |
+ |
|
38 |
+ |
|
39 |
+imputeTheta <- function(ia, ib, theta, S=100){ |
|
40 |
+## y <- rbind(y1, y2) |
|
41 |
+ y <- theta |
|
42 |
+ N <- nrow(y); p <- ncol(y) |
|
43 |
+ |
|
44 |
+ mu0 <- apply(y, 2, mean, na.rm=TRUE) |
|
45 |
+ sd0 <- mu0/5 |
|
46 |
+ L0 <- matrix(.1, p, p); diag(L0) <- 1; L0 <- L0*outer(sd0, sd0) |
|
47 |
+ nu0 <- p+2; S0 <- L0 |
|
48 |
+ |
|
49 |
+ ## starting values |
|
50 |
+ Sigma <- S0 |
|
51 |
+ Y.full <- y |
|
52 |
+ O <- 1*(!is.na(y)) |
|
53 |
+ if(all(O[1, ] == 0)){ |
|
54 |
+ return(rep(NA, length(ia))) |
|
55 |
+ } |
|
56 |
+ for(j in seq_len(p)) Y.full[is.na(Y.full[, j]), j] <- mu0[j] |
|
57 |
+ ## Gibbs sampler |
|
58 |
+ ##THETA <- SIGMA <- Y.MISS <- NULL |
|
59 |
+ ## problems: approx. 90 observations for the means in the other batches |
|
60 |
+ ## -- only 3 observations for the mean in the current batch. |
|
61 |
+## THETA <- matrix(NA, iter, p) |
|
62 |
+## SIGMA <- matrix(NA, iter, p^2) |
|
63 |
+ Y.MISS <- matrix(NA, S, sum(O[1,]==0)) |
|
64 |
+ bafs <- matrix(NA, S, length(a)) |
|
65 |
+ for(s in seq_len(S)){ |
|
66 |
+ ## update lambda |
|
67 |
+ ybar <- apply(Y.full, 2, mean) |
|
68 |
+ Ln <- solve(solve(L0) + N*solve(Sigma)) |
|
69 |
+ mun <- Ln%*%(solve(L0)%*%mu0 + N*solve(Sigma)%*%ybar) |
|
70 |
+ lambda <- rmvnorm(1, mun, Ln) |
|
71 |
+ |
|
72 |
+ ##update sigma |
|
73 |
+ Sn <- S0+(t(Y.full)-c(lambda))%*%t(t(Y.full)-c(lambda)) |
|
74 |
+ Sigma <- solve(rwish(nu0+N, solve(Sn))) |
|
75 |
+ |
|
76 |
+ ## update missing data (only care about the first row.) |
|
77 |
+ a <- O[1, ] == 1 |
|
78 |
+ b <- O[1, ] == 0 |
|
79 |
+ iSa <- solve(Sigma[a,a]) |
|
80 |
+ beta.j <- Sigma[b,a]%*%iSa |
|
81 |
+ Sigma.j <- Sigma[b,b]-Sigma[b,a]%*%iSa%*%Sigma[a,b] |
|
82 |
+ lambda.j <- lambda[b]+beta.j%*%(t(Y.full[1,a, drop=FALSE]-lambda[a])) |
|
83 |
+ Y.full[1,b] <- rmvnorm(1, lambda.j, Sigma.j) |
|
84 |
+ ##SIGMA[s, ] <- c(Sigma) |
|
85 |
+ Y.MISS[s, ] <- Y.full[1, O[1, ]==0] |
|
86 |
+ } |
|
87 |
+ na.cols <- which(is.na(y[1, ])) |
|
88 |
+ THETA <- matrix(NA, S, 3) |
|
89 |
+ THETA[, na.cols] <- Y.MISS |
|
90 |
+ if(length(na.cols) < length(ia)) |
|
91 |
+ THETA[, -na.cols] <- y[1, -na.cols] |
|
92 |
+ |
|
93 |
+ obs.theta <- atan2(ib, ia)*2/pi |
|
94 |
+ theta.aa <- matrix(THETA[, 1], S, 3) |
|
95 |
+ theta.ab <- matrix(THETA[, 2], S, 3) |
|
96 |
+ theta.bb <- matrix(THETA[, 3], S, 3) |
|
97 |
+ lessAA <- obs.theta < theta.aa |
|
98 |
+ lessAB <- obs.theta < theta.ab |
|
99 |
+ lessBB <- obs.theta < theta.bb |
|
100 |
+ grAA <- !lessAA ## >= theta.aa |
|
101 |
+ grAB <- !lessAB ## >= theta.ab |
|
102 |
+ grBB <- !lessBB ## >= theta.bb |
|
103 |
+ ##not.na <- !is.na(theta.aa) |
|
104 |
+ I1 <- grAA & lessAB |
|
105 |
+ I2 <- grAB & lessBB |
|
106 |
+ ##mu <- apply(Y.MISS, 2, mean) |
|
107 |
+ ##bf <- matrix(NA, S, length(ia)) |
|
108 |
+ bf <- rep(NA, S*length(ia)) |
|
109 |
+ I1 <- as.logical(I1); I2 <- as.logical(I2) |
|
110 |
+ bf[I1] <- 0.5 * as.numeric(((obs.theta-theta.aa)/(theta.ab-theta.aa)))[I1] |
|
111 |
+ bf[I2] <- as.numeric((.5 * (obs.theta - theta.ab) / (theta.bb - theta.ab)))[I2] + 0.5 |
|
112 |
+ bf[as.logical(lessAA)] <- 0 |
|
113 |
+ bf[as.logical(grBB)] <- 1 |
|
114 |
+ bf <- matrix(bf, S, 3, byrow=TRUE) |
|
115 |
+ pm.bf <- apply(bf, 2, mean) |
|
116 |
+ return(pm.bf) |
|
117 |
+} |
... | ... |
@@ -53,9 +53,9 @@ getFeatureData <- function(cdfName, copynumber=FALSE){ |
53 | 53 |
index <- match(rownames(snpProbes), rownames(M)) #only snp probes in M get assigned position |
54 | 54 |
M[index, "position"] <- snpProbes[, grep("pos", colnames(snpProbes))] |
55 | 55 |
M[index, "chromosome"] <- chromosome2integer(snpProbes[, grep("chr", colnames(snpProbes))]) |
56 |
- M[index, "isSnp"] <- 1L |
|
57 |
- ##index <- which(is.na(M[, "isSnp"])) |
|
58 | 56 |
##M[index, "isSnp"] <- 1L |
57 |
+ index <- which(is.na(M[, "isSnp"])) |
|
58 |
+ M[index, "isSnp"] <- 1L |
|
59 | 59 |
if(copynumber){ |
60 | 60 |
index <- match(rownames(cnProbes), rownames(M)) #only snp probes in M get assigned position |
61 | 61 |
M[index, "position"] <- cnProbes[, grep("pos", colnames(cnProbes))] |
... | ... |
@@ -65,9 +65,14 @@ getFeatureData <- function(cdfName, copynumber=FALSE){ |
65 | 65 |
##A few of the snpProbes do not match -- I think it is chromosome Y. |
66 | 66 |
if(any(is.na(M[, "chromosome"]))) |
67 | 67 |
M[is.na(M[, "chromosome"]), "isSnp"] <- 1L |
68 |
- M <- data.frame(M) |
|
69 |
- return(new("AnnotatedDataFrame", data=M)) |
|
68 |
+ ##return(new("AnnotatedDataFrame", data=M)) |
|
69 |
+ new("GenomeAnnotatedDataFrame", |
|
70 |
+ position=as.integer(M[, "position"]), |
|
71 |
+ chromosome=as.integer(M[, "chromosome"]), |
|
72 |
+ isSnp=as.logical(M[, "isSnp"]), |
|
73 |
+ row.names=featurenames) |
|
70 | 74 |
} |
75 |
+ |
|
71 | 76 |
getFeatureData.Affy <- getFeatureData |
72 | 77 |
|
73 | 78 |
construct <- function(filenames, |
... | ... |
@@ -188,12 +193,18 @@ snprmaAffy <- function(cnSet, filenames, |
188 | 193 |
stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose)) |
189 | 194 |
mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames), "double") |
190 | 195 |
sampleBatches <- splitIndicesByNode(seq(along=filenames)) |
191 |
- ocLapply(sampleBatches, rsprocessCEL, filenames=filenames, |
|
192 |
- fitMixture=TRUE, A=A(cnSet), B=B(cnSet), C=calls(cnSet), |
|
196 |
+ ocLapply(sampleBatches, |
|
197 |
+ rsprocessCEL, |
|
198 |
+ filenames=filenames, |
|
199 |
+ fitMixture=TRUE, |
|
200 |
+ A=A(cnSet), B=B(cnSet), C=calls(cnSet), |
|
193 | 201 |
D=snpCallProbability(cnSet), |
194 | 202 |
SKW=cnSet$SKW, SNR=cnSet$SNR, |
195 |
- mixtureParams=mixtureParams, eps=eps, seed=seed, |
|
196 |
- mixtureSampleSize=mixtureSampleSize, pkgname=pkgname, |
|
203 |
+ mixtureParams=mixtureParams, |
|
204 |
+ eps=eps, |
|
205 |
+ seed=seed, |
|
206 |
+ mixtureSampleSize=mixtureSampleSize, |
|
207 |
+ pkgname=pkgname, |
|
197 | 208 |
neededPkgs=c("crlmm", pkgname)) |
198 | 209 |
if(verbose) message("Cloning A and B matrices to store genotype calls and confidence scores.") |
199 | 210 |
return(mixtureParams) |
... | ... |
@@ -215,15 +226,14 @@ genotype <- function(filenames, |
215 | 226 |
gender=NULL, |
216 | 227 |
returnParams=TRUE, |
217 | 228 |
badSNP=0.7){ |
218 |
- stopifnot(require("ff")) |
|
229 |
+ stopifnot(isPackageLoaded("ff")) |
|
219 | 230 |
cnSet <- constructAffy(filenames=filenames, |
220 | 231 |
cdfName=cdfName, |
221 | 232 |
batch=batch, verbose=verbose) |
222 | 233 |
if(!(is(A(cnSet), "ff") || is(A(cnSet), "ffdf"))){ |
223 | 234 |
stop("The ff package is required for this function.") |
224 | 235 |
} |
225 |
- |
|
226 |
- mixtureParams <- snprmaAffy(cnSet, filenames=filenames, |
|
236 |
+ cnSet@mixtureParams <- snprmaAffy(cnSet, filenames=filenames, |
|
227 | 237 |
mixtureSampleSize=mixtureSampleSize, |
228 | 238 |
eps=eps, |
229 | 239 |
seed=seed, |
... | ... |
@@ -233,7 +243,7 @@ genotype <- function(filenames, |
233 | 243 |
verbose=verbose) |
234 | 244 |
stopifnot(ok) |
235 | 245 |
ok <- genotypeAffy(cnSet=cnSet, |
236 |
- mixtureParams=mixtureParams, |
|
246 |
+ mixtureParams=cnSet@mixtureParams, |
|
237 | 247 |
SNRMin=SNRMin, |
238 | 248 |
recallMin=recallMin, |
239 | 249 |
recallRegMin=recallRegMin, |
... | ... |
@@ -444,7 +444,7 @@ readBPM <- function(bpmFile){ |
444 | 444 |
|
445 | 445 |
|
446 | 446 |
readGenCallOutput = function(file, path=".", cdfName, |
447 |
- colnames=list("SampleID"="Sample ID", "SNPID"="SNP Name", "XRaw"="X Raw", "YRaw"="Y Raw"), |
|
447 |
+ colnames=list("SampleID"="Sample ID", "SNPID"="SNP Name", "XRaw"="X Raw", "YRaw"="Y Raw"), |
|
448 | 448 |
type=list("SampleID"="character", "SNPID"="character", "XRaw"="integer", "YRaw"="integer"), verbose=FALSE) { |
449 | 449 |
|
450 | 450 |
if(!identical(names(type), names(colnames))) |
... | ... |
@@ -462,9 +462,9 @@ readGenCallOutput = function(file, path=".", cdfName, |
462 | 462 |
sepp=s[2] |
463 | 463 |
a1=unlist(strsplit(tmp[10][1],s[2])) |
464 | 464 |
} |
465 |
- |
|
465 |
+ |
|
466 | 466 |
if(sum(is.na(match(colnames,a1)))!=0) |
467 |
- stop("Cannot find required columns: ", colnames[is.na(match(colnames,a1))], " in ", file, "\nPlease check whether this data was exported.") |
|
467 |
+ stop("Cannot find required columns: ", colnames[is.na(match(colnames,a1))], " in ", file, "\nPlease check whether this data was exported.") |
|
468 | 468 |
|
469 | 469 |
m1=m=match(a1,colnames) |
470 | 470 |
m[is.na(m1)==FALSE] =type[m1[!is.na(m1)]] |
... | ... |
@@ -472,19 +472,19 @@ readGenCallOutput = function(file, path=".", cdfName, |
472 | 472 |
names(m) = names(colnames)[m1] |
473 | 473 |
|
474 | 474 |
fc = file(file.path(path, file), open="r") |
475 |
- |
|
475 |
+ |
|
476 | 476 |
dat = scan(fc, what=m, skip=10,sep=sepp) |
477 | 477 |
close(fc) |
478 |
- |
|
478 |
+ |
|
479 | 479 |
samples = unique(dat$"SampleID") |
480 | 480 |
nsamples = length(samples) |
481 | 481 |
snps = unique(dat$"SNPID") |
482 | 482 |
nsnps = length(snps) |
483 | 483 |
if(verbose) |
484 | 484 |
cat("Check ordering for samples","\n") |
485 |
- |
|
485 |
+ |
|
486 | 486 |
X = Y = zeroes = matrix(0, nsnps, nsamples) |
487 |
- |
|
487 |
+ |
|
488 | 488 |
for(i in 1:length(samples)) { |
489 | 489 |
ind = dat$"SampleID"==samples[i] |
490 | 490 |
if(sum(dat$"SNPID"[ind]==snps)==nsnps) { |
... | ... |
@@ -502,23 +502,23 @@ readGenCallOutput = function(file, path=".", cdfName, |
502 | 502 |
} |
503 | 503 |
gc() |
504 | 504 |
} |
505 |
- |
|
505 |
+ |
|
506 | 506 |
zeroes=(X=="0"|Y=="0") |
507 | 507 |
colnames(X) = colnames(Y) = colnames(zeroes) = samples |
508 | 508 |
rownames(X) = rownames(Y) = snps |
509 |
- |
|
510 |
- if(verbose) |
|
509 |
+ |
|
510 |
+ if(verbose) |
|
511 | 511 |
cat("Creating NChannelSet object\n") |
512 |
- |
|
512 |
+ |
|
513 | 513 |
XY = new("NChannelSet", X = initializeBigMatrix(name = "X", nr = nrow(X), nc = ncol(X), vmode = "integer", initdata=X), |
514 | 514 |
Y = initializeBigMatrix(name = "Y", nr = nrow(X), nc = ncol(X), vmode = "integer", initdata=Y), |
515 | 515 |
zero = initializeBigMatrix(name = "zero", nr = nrow(X), nc = ncol(X), vmode = "integer", initdata=zeroes), |
516 | 516 |
annotation = cdfName, storage.mode = "environment") |
517 | 517 |
sampleNames(XY)=colnames(X) |
518 |
- |
|
518 |
+ |
|
519 | 519 |
if(verbose) |
520 | 520 |
cat("Done\n") |
521 |
- |
|
521 |
+ |
|
522 | 522 |
XY |
523 | 523 |
} |
524 | 524 |
|
... | ... |
@@ -855,7 +855,7 @@ crlmmIllumina = function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
855 | 855 |
mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
856 | 856 |
cdfName, sns, recallMin=10, recallRegMin=1000, |
857 | 857 |
returnParams=FALSE, badSNP=.7) { |
858 |
- |
|
858 |
+ |
|
859 | 859 |
if(missing(cdfName)) { |
860 | 860 |
if(!missing(RG)) |
861 | 861 |
cdfName = annotation(RG) |
... | ... |
@@ -864,7 +864,7 @@ if(missing(cdfName)) { |
864 | 864 |
} |
865 | 865 |
if(!isValidCdfName(cdfName)) |
866 | 866 |
stop("cdfName not valid. see validCdfNames") |
867 |
- |
|
867 |
+ |
|
868 | 868 |
# if (save.it & (missing(snpFile) | missing(cnFile))) |
869 | 869 |
# stop("'snpFile' and/or 'cnFile' is missing and you chose to save.it") |
870 | 870 |
# if (load.it & missing(snpFile)) |
... | ... |
@@ -1213,11 +1213,11 @@ constructInf <- function(sampleSheet=NULL, |
1213 | 1213 |
call=initializeBigMatrix(name="call", nr, nc), |
1214 | 1214 |
callProbability=initializeBigMatrix(name="callPr", nr,nc), |
1215 | 1215 |
annotation=cdfName, |
1216 |
+ featureData=featureData, |
|
1216 | 1217 |
batch=batch) |
1217 | 1218 |
if (!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)) { |
1218 | 1219 |
sampleNames(cnSet) = sampleSheet$Sample_ID |
1219 |
- } |
|
1220 |
- else sampleNames(cnSet) = arrayNames |
|
1220 |
+ } else sampleNames(cnSet) = arrayNames |
|
1221 | 1221 |
if(saveDate){ |
1222 | 1222 |
protocolData = getProtocolData.Illumina(grnidats, sep=sep, fileExt=fileExt$green, verbose=verbose) |
1223 | 1223 |
} else{ |
... | ... |
@@ -1225,7 +1225,7 @@ constructInf <- function(sampleSheet=NULL, |
1225 | 1225 |
} |
1226 | 1226 |
rownames(pData(protocolData)) = sampleNames(cnSet) |
1227 | 1227 |
protocolData(cnSet) = protocolData |
1228 |
- featureData(cnSet) = featureData |
|
1228 |
+ ##featureData(cnSet) = featureData |
|
1229 | 1229 |
featureNames(cnSet) = featureNames(featureData) |
1230 | 1230 |
cnSet$gender <- initializeBigVector("gender", ncol(cnSet), vmode="integer") |
1231 | 1231 |
cnSet$SNR = initializeBigVector("crlmmSNR-", ncol(cnSet), "double") |
... | ... |
@@ -1248,10 +1248,10 @@ preprocessInf <- function(cnSet, |
1248 | 1248 |
stripNorm=TRUE, |
1249 | 1249 |
useTarget=TRUE, |
1250 | 1250 |
mixtureSampleSize=10^5, |
1251 |
- fitMixture=TRUE, |
|
1251 |
+ fitMixture=TRUE, |
|
1252 | 1252 |
eps=0.1, |
1253 | 1253 |
verbose=TRUE, |
1254 |
- seed=1){ |
|
1254 |
+ seed=1, cdfName){ |
|
1255 | 1255 |
stopifnot(all(c("gender", "SNR", "SKW") %in% varLabels(cnSet))) |
1256 | 1256 |
open(A(cnSet)) |
1257 | 1257 |
open(B(cnSet)) |
... | ... |
@@ -1304,7 +1304,8 @@ genotypeInf <- function(cnSet, mixtureParams, probs=rep(1/3,3), |
1304 | 1304 |
returnParams=TRUE, |
1305 | 1305 |
badSNP=0.7, |
1306 | 1306 |
gender=NULL, |
1307 |
- DF=6){ |
|
1307 |
+ DF=6, |
|
1308 |
+ cdfName){ |
|
1308 | 1309 |
is.snp = isSnp(cnSet) |
1309 | 1310 |
snp.index = which(is.snp) |
1310 | 1311 |
## narrays = ncol(cnSet) |
... | ... |
@@ -1379,6 +1380,7 @@ genotype.Illumina <- function(sampleSheet=NULL, |
1379 | 1380 |
if(missing(cdfName)) stop("must specify cdfName") |
1380 | 1381 |
if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames") |
1381 | 1382 |
stopifnot(!missing(batch)) |
1383 |
+ if(is(batch, "factor")) batch <- as.character(batch) |
|
1382 | 1384 |
pkgname <- getCrlmmAnnotationName(cdfName) |
1383 | 1385 |
message("Instantiate CNSet container.") |
1384 | 1386 |
cnSet <- constructInf(sampleSheet=sampleSheet, |
... | ... |
@@ -1408,7 +1410,8 @@ genotype.Illumina <- function(sampleSheet=NULL, |
1408 | 1410 |
fitMixture=fitMixture, |
1409 | 1411 |
eps=eps, |
1410 | 1412 |
verbose=verbose, |
1411 |
- seed=seed) |
|
1413 |
+ seed=seed, |
|
1414 |
+ cdfName=cdfName) |
|
1412 | 1415 |
message("Preprocessing complete. Begin genotyping...") |
1413 | 1416 |
genotypeInf(cnSet=cnSet, mixtureParams=mixtureParams, |
1414 | 1417 |
probs=probs, |
... | ... |
@@ -1419,7 +1422,8 @@ genotype.Illumina <- function(sampleSheet=NULL, |
1419 | 1422 |
returnParams=returnParams, |
1420 | 1423 |
badSNP=badSNP, |
1421 | 1424 |
gender=gender, |
1422 |
- DF=DF) |
|
1425 |
+ DF=DF, |
|
1426 |
+ cdfName=cdfName) |
|
1423 | 1427 |
return(cnSet) |
1424 | 1428 |
} |
1425 | 1429 |
|
... | ... |
@@ -216,3 +216,14 @@ setMethod("annotatedDataFrameFrom", "ffdf", Biobase:::annotatedDataFrameFromMatr |
216 | 216 |
getBAF <- function(theta, canonicalTheta) |
217 | 217 |
.Call('normalizeBAF', theta, ct) |
218 | 218 |
|
219 |
+ |
|
220 |
+validCEL <- function(celfiles){ |
|
221 |
+ for(i in seq_along(celfiles)){ |
|
222 |
+ res <- tryCatch(read.celfile(celfiles[i], intensity.means.only=TRUE), error=function(e) NULL) |
|
223 |
+ if(is.null(res)) { |
|
224 |
+ msg <- message("Problem reading ", celfiles[i]) |
|
225 |
+ stop(msg) |
|
226 |
+ } |
|
227 |
+ } |
|
228 |
+ return("Successfully read all cel files") |
|
229 |
+} |
224 | 235 |
deleted file mode 100644 |
... | ... |
@@ -1,11 +0,0 @@ |
1 |
-%\VignetteIndexEntry{Preprocessing and genotyping Affymetrix array for copy number analysis} |
|
2 |
-%\VignetteDepends{} |
|
3 |
-%\VignetteKeywords{crlmm, SNP 6} |
|
4 |
-%\VignettePackage{crlmm} |
|
5 |
- |
|
6 |
-%%% The real .Rnw is under inst/scripts. |
|
7 |
-%%% It cannot be added here because it depends on external |
|
8 |
-%%% data not yet available as a BioC package |
|
9 |
-\documentclass{article} |
|
10 |
-\begin{document} |
|
11 |
-\end{document} |
|
12 | 0 |
\ No newline at end of file |
... | ... |
@@ -1,9 +1,8 @@ |
1 |
-All: copynumber clean |
|
1 |
+All: vignettes clean |
|
2 | 2 |
|
3 | 3 |
## all pdfs must be generated from the first target of the Makefile |
4 |
-copynumber: copynumber.tex |
|
5 |
- cp ../scripts/copynumber.pdf . |
|
6 |
- cp ../scripts/AffymetrixPreprocessCN.pdf . |
|
4 |
+vignettes: AffyGW.tex |
|
5 |
+ cp ../scripts/AffyGW.pdf . |
|
7 | 6 |
cp ../scripts/IlluminaPreprocessCN.pdf . |
8 | 7 |
cp ../scripts/Infrastructure.pdf . |
9 | 8 |
cp ../scripts/crlmmDownstream.pdf . |
14 | 13 |
similarity index 88% |
15 | 14 |
rename from inst/scripts/AffymetrixPreprocessCN.Rnw |
16 | 15 |
rename to inst/scripts/AffyGW.Rnw |
... | ... |
@@ -6,6 +6,7 @@ |
6 | 6 |
\documentclass{article} |
7 | 7 |
\usepackage{graphicx} |
8 | 8 |
\usepackage{natbib} |
9 |
+\usepackage{amsmath} |
|
9 | 10 |
\usepackage{url} |
10 | 11 |
\newcommand{\Rfunction}[1]{{\texttt{#1}}} |
11 | 12 |
\newcommand{\Rmethod}[1]{{\texttt{#1}}} |
... | ... |
@@ -74,8 +75,7 @@ pathToCels <- "/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m" |
74 | 75 |
if(getRversion() < "2.13.0"){ |
75 | 76 |
rpath <- getRversion() |
76 | 77 |
} else rpath <- "trunk" |
77 |
-##outdir <- paste("/amber1/archive/oncbb/rscharpf/crlmm_vignette/", rpath, sep="") |
|
78 |
-outdir <- paste("/amber1/archive/oncbb/rscharpf/crlmm_vignette/", rpath, "/gw6v1.5/", sep="") |
|
78 |
+outdir <- paste("/amber1/archive/oncbb/rscharpf/crlmm_vignette/", rpath, "/gw6/", sep="") |
|
79 | 79 |
dir.create(outdir, recursive=TRUE, showWarnings=FALSE) |
80 | 80 |
@ |
81 | 81 |
|
... | ... |
@@ -87,7 +87,6 @@ the genotyping step will be stored in \verb+outdir+. |
87 | 87 |
ldPath(outdir) |
88 | 88 |
@ |
89 | 89 |
|
90 |
- |
|
91 | 90 |
% only needed if cacheing |
92 | 91 |
<<cachedir, echo=FALSE>>= |
93 | 92 |
setCacheDir(outdir) |
... | ... |
@@ -152,11 +151,7 @@ corrupt cel file. To check if any of the files are corrupt, try |
152 | 151 |
reading the files in one at a time: |
153 | 152 |
|
154 | 153 |
<<checkcorrupt,eval=FALSE>>= |
155 |
-require(affyio) |
|
156 |
-for(i in seq_along(celFiles)) { |
|
157 |
- cat("Reading ", i, "\n") |
|
158 |
- invisible(read.celfile(celFiles[i], intensity.means.only=TRUE)) |
|
159 |
-} |
|
154 |
+validCEL(celFiles) |
|
160 | 155 |
@ |
161 | 156 |
|
162 | 157 |
|
... | ... |
@@ -188,10 +183,12 @@ saveObject <- function(outdir, cnSet) { |
188 | 183 |
(cnset.saved <- saveObject(outdir, cnSet)) |
189 | 184 |
@ |
190 | 185 |
|
191 |
-Users can proceed to the \verb+copynumber+ vignette for copy number |
|
192 |
-analyses. See the \verb+Infrastructure+ vignette for additional |
|
193 |
-details on the \Robject{CNSet} class, including an overview of the |
|
194 |
-available accessors. |
|
186 |
+%Users can proceed to the \verb+copynumber+ vignette for copy number |
|
187 |
+%analyses. See the \verb+Infrastructure+ vignette for additional |
|
188 |
+%details on the \Robject{CNSet} class, including an overview of the |
|
189 |
+%available accessors. |
|
190 |
+ |
|
191 |
+\SweaveInput{copynumber} |
|
195 | 192 |
|
196 | 193 |
|
197 | 194 |
\section{Session information} |
... | ... |
@@ -200,4 +197,20 @@ toLatex(sessionInfo()) |
200 | 197 |
@ |
201 | 198 |
|
202 | 199 |
|
200 |
+\begin{figure}[f] |
|
201 |
+ \begin{center} |
|
202 |
+ \includegraphics[width=0.6\textwidth]{AffyGW-snr.pdf} |
|
203 |
+ \caption{The signal to noise ratio (SNR) for 180 HapMap samples. For |
|
204 |
+ Affymetrix platforms, SNR values below 5 can indicate possible |
|
205 |
+ problems with sample quality. In some circumstances, it may be |
|
206 |
+ more helpful to exclude samples with poor DNA quality.} |
|
207 |
+\end{center} |
|
208 |
+\end{figure} |
|
209 |
+ |
|
210 |
+ |
|
211 |
+\begin{bibliography} |
|
212 |
+ \bibliographystyle{plain} |
|
213 |
+ \bibliography{refs} |
|
214 |
+\end{bibliography} |
|
215 |
+ |
|
203 | 216 |
\end{document} |
... | ... |
@@ -5,6 +5,7 @@ |
5 | 5 |
\documentclass{article} |
6 | 6 |
\usepackage{graphicx} |
7 | 7 |
\usepackage{natbib} |
8 |
+\usepackage{amsmath} |
|
8 | 9 |
\usepackage[margin=1in]{geometry} |
9 | 10 |
\newcommand{\crlmm}{\Rpackage{crlmm}} |
10 | 11 |
\newcommand{\Rfunction}[1]{{\texttt{#1}}} |
... | ... |
@@ -84,15 +85,15 @@ ocProbesets(150e3) |
84 | 85 |
ocSamples(500) |
85 | 86 |
@ |
86 | 87 |
|
87 |
-\section{Initializing a container for storing processed data} |
|
88 |
- |
|
89 |
-This section will initialize a container for storing processed forms |
|
90 |
-of the data, including the normalized intensities for the A and B |
|
91 |
-alleles and the CRLMM genotype calls and confidence scores. In |
|
92 |
-addition, the container will store information on the markers |
|
93 |
-(physical position, chromosome, and a SNP indicator), the batch, and |
|
94 |
-the samples (e.g., gender). To construct this container for Infinium |
|
95 |
-platforms, several steps are required. |
|
88 |
+%\section{Initializing a container for storing processed data} |
|
89 |
+% |
|
90 |
+%This section will initialize a container for storing processed forms |
|
91 |
+%of the data, including the normalized intensities for the A and B |
|
92 |
+%alleles and the CRLMM genotype calls and confidence scores. In |
|
93 |
+%addition, the container will store information on the markers |
|
94 |
+%(physical position, chromosome, and a SNP indicator), the batch, and |
|
95 |
+%the samples (e.g., gender). To construct this container for Infinium |
|
96 |
+%platforms, several steps are required. |
|
96 | 97 |
|
97 | 98 |
We begin by specifying the path containing the raw IDAT files for a |
98 | 99 |
set of samples from the Infinium 370k platform. |
... | ... |
@@ -143,129 +144,140 @@ processed at similar times (e.g., within a few weeks). |
143 | 144 |
batch <- rep("1", nrow(samplesheet)) |
144 | 145 |
@ |
145 | 146 |
|
146 |
-Finally, we initialize an object of class \Robject{CNSet} using the |
|
147 |
-function \Rfunction{constructInf}. |
|
148 |
- |
|
149 |
-<<container,cache=TRUE>>= |
|
150 |
-cnSet <- constructInf(sampleSheet=samplesheet, |
|
151 |
- arrayNames=arrayNames, |
|
152 |
- batch=batch, |
|
153 |
- arrayInfoColNames=arrayInfo, |
|
154 |
- cdfName=cdfName, |
|
155 |
- verbose=TRUE, |
|
156 |
- saveDate=TRUE) |
|
157 |
-@ |
|
158 |
- |
|
159 |
-A concise summary of the object's contents can be viewed with the |
|
160 |
-\Rfunction{print} function. |
|
161 |
- |
|
162 |
-<<cnset>>= |
|
163 |
-print(cnSet) |
|
164 |
-@ |
|
165 |
- |
|
166 |
-Note that the above object does not yet contain any processed data |
|
167 |
-(only \verb+NA+'s). As the elements of the \verb+assayData+ slot are |
|
168 |
-\Robject{ff} objects (not matrices), several \verb+.ff+ files now |
|
169 |
-appear in the \verb+outdir+. The \verb+.ff+ files should not be |
|
170 |
-removed and can be listed using the \R{} function |
|
171 |
-\Rfunction{list.files}. %For the most part, the \emph{appearance} that |
|
147 |
+%Finally, we initialize an object of class \Robject{CNSet} using the |
|
148 |
+%function \Rfunction{constructInf}. |
|
149 |
+% |
|
150 |
+%<<container,cache=TRUE>>= |
|
151 |
+%cnSet <- constructInf(sampleSheet=samplesheet, |
|
152 |
+% arrayNames=arrayNames, |
|
153 |
+% batch=batch, |
|
154 |
+% arrayInfoColNames=arrayInfo, |
|
155 |
+% cdfName=cdfName, |
|
156 |
+% verbose=TRUE, |
|
157 |
+% saveDate=TRUE) |
|
158 |
+%@ |
|
159 |
+% |
|
160 |
+%A concise summary of the object's contents can be viewed with the |
|
161 |
+%\Rfunction{print} function. |
|
162 |
+% |
|
163 |
+%<<cnset>>= |
|
164 |
+%print(cnSet) |
|
165 |
+%@ |
|
166 |
+% |
|
167 |
+%Note that the above object does not yet contain any processed data |
|
168 |
+%(only \verb+NA+'s). As the elements of the \verb+assayData+ slot are |
|
169 |
+%\Robject{ff} objects (not matrices), several \verb+.ff+ files now |
|
170 |
+%appear in the \verb+outdir+. The \verb+.ff+ files should not be |
|
171 |
+%removed and can be listed using the \R{} function |
|
172 |
+%\Rfunction{list.files}. %For the most part, the \emph{appearance} that |
|
172 | 173 |
%the data is stored in memory is preserved. |
173 |
- |
|
174 |
-<<listff>>= |
|
175 |
-sapply(assayData(cnSet), function(x) class(x)[1]) |
|
176 |
-list.files(outdir, pattern=".ff")[1:5] |
|
177 |
-@ |
|
178 |
- |
|
179 |
-\section{Preprocessing} |
|
174 |
+% |
|
175 |
+%<<listff>>= |
|
176 |
+%sapply(assayData(cnSet), function(x) class(x)[1]) |
|
177 |
+%list.files(outdir, pattern=".ff")[1:5] |
|
178 |
+%@ |
|
179 |
+% |
|
180 |
+\section{Preprocessing and genotyping} |
|
180 | 181 |
|
181 | 182 |
The raw intensities from the Infinium IDAT files are read and |
182 | 183 |
normalized using the function \Rfunction{preprocessInf}. The function |
183 | 184 |
\Rfunction{preprocessInf} returns a \Robject{ff} object containing the |
184 | 185 |
parameters for the mixture model used by the CRLMM genotyping |
185 |
-algorithm. |
|
186 |
- |
|
187 |
-<<preprocess,cache=TRUE, results=hide>>= |
|
188 |
-mixtureParams <- preprocessInf(cnSet=cnSet, sampleSheet=samplesheet, arrayNames=arrayNames, arrayInfoColNames=arrayInfo) |
|
189 |
-@ |
|
190 |
- |
|
191 |
-<<showMixtureParams>>= |
|
192 |
-invisible(open(mixtureParams)) |
|
193 |
-str(mixtureParams[]) |
|
194 |
-invisible(close(mixtureParams)) |
|
195 |
-@ |
|
196 |
- |
|
197 |
-Note that the normalized intensities for the A and B alleles are no |
|
198 |
-longer \verb+NA+s and can be inspected using the methods \Rfunction{A} |
|
199 |
-and \Rfunction{B}, respectively. |
|
200 |
- |
|
201 |
-<<intensities>>= |
|
202 |
-invisible(open(A(cnSet))) |
|
203 |
-invisible(open(B(cnSet))) |
|
204 |
-as.matrix(A(cnSet)[1:5, 1:5]) |
|
205 |
-as.matrix(B(cnSet)[1:5, 1:5]) |
|
206 |
-invisible(close(A(cnSet))) |
|
207 |
-invisible(close(B(cnSet))) |
|
208 |
-@ |
|
209 |
- |
|
210 |
-\section{Genotyping} |
|
211 |
- |
|
212 |
-CRLMM genotype calls and confidence scores are estimated using the |
|
213 |
-function \Rfunction{genotypeInf}. |
|
214 |
- |
|
215 |
-<<genotype,cache=TRUE>>= |
|
216 |
-updated <- genotypeInf(cnSet, mixtureParams=mixtureParams) |
|
217 |
-@ |
|
218 |
-\vspace{-0.5em} |
|
219 |
-<<>>= |
|
220 |
-updated |
|
221 |
-@ |
|
222 |
- |
|
223 |
-\textbf{Wrapper:} As an alternative to calling the functions |
|
224 |
-\Rfunction{constructInf}, \Rfunction{preprocessInf} and |
|
225 |
-\Rfunction{genotypeInf} in sequence, a convenience function called |
|
226 |
-\Rfunction{genotype.Illumina} is a wrapper for the above functions and |
|
227 |
-produces identical results. |
|
186 |
+algorithm. Following preprocessing, the \Rfunction{genotypeInf} |
|
187 |
+genotypes the samples. The function \Rfunction{genotype.Illumina} is a |
|
188 |
+wrapper to the above functions and returns an object of class |
|
189 |
+\Rclass{CNSet}. |
|
190 |
+ |
|
191 |
+%<<preprocess,cache=TRUE, results=hide>>= |
|
192 |
+%mixtureParams <- preprocessInf(cnSet=cnSet, sampleSheet=samplesheet, |
|
193 |
+% arrayNames=arrayNames, |
|
194 |
+% arrayInfoColNames=arrayInfo, |
|
195 |
+% cdfName=cdfName) |
|
196 |
+%@ |
|
197 |
+% |
|
198 |
+%<<showMixtureParams>>= |
|
199 |
+%invisible(open(mixtureParams)) |
|
200 |
+%str(mixtureParams[]) |
|
201 |
+%invisible(close(mixtureParams)) |
|
202 |
+%@ |
|
203 |
+% |
|
204 |
+%Note that the normalized intensities for the A and B alleles are no |
|
205 |
+%longer \verb+NA+s and can be inspected using the methods \Rfunction{A} |
|
206 |
+%and \Rfunction{B}, respectively. |
|
207 |
+% |
|
208 |
+%<<intensities>>= |
|
209 |
+%invisible(open(A(cnSet))) |
|
210 |
+%invisible(open(B(cnSet))) |
|
211 |
+%as.matrix(A(cnSet)[1:5, 1:5]) |
|
212 |
+%as.matrix(B(cnSet)[1:5, 1:5]) |
|
213 |
+%invisible(close(A(cnSet))) |
|
214 |
+%invisible(close(B(cnSet))) |
|
215 |
+%@ |
|
216 |
+% |
|
217 |
+%\section{Genotyping} |
|
218 |
+% |
|
219 |
+%CRLMM genotype calls and confidence scores are estimated using the |
|
220 |
+%function \Rfunction{genotypeInf}. |
|
221 |
+% |
|
222 |
+%<<genotype,cache=TRUE>>= |
|
223 |
+%updated <- genotypeInf(cnSet, mixtureParams=mixtureParams, cdfName=cdfName) |
|
224 |
+%@ |
|
225 |
+%\vspace{-0.5em} |
|
226 |
+%<<>>= |
|
227 |
+%updated |
|
228 |
+%@ |
|
229 |
+% |
|
230 |
+%\textbf{Wrapper:} As an alternative to calling the functions |
|
231 |
+%\Rfunction{constructInf}, \Rfunction{preprocessInf} and |
|
232 |
+%\Rfunction{genotypeInf} in sequence, a convenience function called |
|
233 |
+%\Rfunction{genotype.Illumina} is a wrapper for the above functions and |
|
234 |
+%produces identical results. |
|
228 | 235 |
|
229 | 236 |
<<genotype.Illumina,cache=TRUE,results=hide>>= |
230 |
-cnSet2 <- genotype.Illumina(sampleSheet=samplesheet, |
|
237 |
+cnSet <- genotype.Illumina(sampleSheet=samplesheet, |
|
231 | 238 |
arrayNames=arrayNames, |
232 | 239 |
arrayInfoColNames=arrayInfo, |
233 | 240 |
cdfName="human370v1c", |
234 | 241 |
batch=batch) |
235 | 242 |
@ |
236 | 243 |
|
237 |
-<<checkIdenticalCalls>>= |
|
238 |
-invisible(open(calls(cnSet))) |
|
239 |
-invisible(open(calls(cnSet2))) |
|
240 |
-snp.index <- which(isSnp(cnSet)) |
|
241 |
-identical(calls(cnSet)[snp.index, 1:20], calls(cnSet2)[snp.index, 1:20]) |
|
242 |
-invisible(close(calls(cnSet))) |
|
243 |
-invisible(close(calls(cnSet2))) |
|
244 |
-@ |
|
245 | 244 |
|
246 |
-To fully remove the data associated with the \Robject{cnSet2} object, |
|
247 |
-one should use the \Rfunction{delete} function in the \Rpackage{ff} |
|
248 |
-package followed by the \Rfunction{rm} function. The following code |
|
249 |
-is not evaluated is it would change the results of the cached |
|
250 |
-computations in the previous code chunk. |
|
245 |
+Note, to fully remove the data associated with the \Robject{cnSet2} |
|
246 |
+object, one should use the \Rfunction{delete} function in the |
|
247 |
+\Rpackage{ff} package followed by the \Rfunction{rm} function. The |
|
248 |
+following code is not evaluated is it would change the results of the |
|
249 |
+cached computations in the previous code chunk. |
|
251 | 250 |
|
252 | 251 |
<<delete, eval=FALSE>>= |
253 |
-lapply(assayData(cnSet2), delete) |
|
254 |
-lapply(batchStatistics(cnSet2), delete) |
|
255 |
-delete(cnSet2$gender) |
|
256 |
-delete(cnSet2$SNR) |
|
257 |
-delete(cnSet2$SKW) |
|
258 |
-rm(cnSet2) |
|
252 |
+lapply(assayData(cnSet), delete) |
|
253 |
+lapply(batchStatistics(cnSet), delete) |
|
254 |
+delete(cnSet$gender) |
|
255 |
+delete(cnSet$SNR) |
|
256 |
+delete(cnSet$SKW) |
|
257 |
+rm(cnSet) |
|
259 | 258 |
@ |
260 | 259 |
|
261 |
-Users can proceed to the \verb+copynumber+ vignette for copy number |
|
262 |
-analyses. See the \verb+Infrastructure+ vignette for additional |
|
263 |
-details on the \Robject{CNSet} class, including an overview of the |
|
264 |
-available accessors. |
|
260 |
+%\section{Copy number estimation} |
|
261 |
+ |
|
262 |
+\SweaveInput{copynumber} |
|
265 | 263 |
|
266 | 264 |
\section{Session information} |
267 | 265 |
<<sessionInfo, results=tex>>= |
268 | 266 |
toLatex(sessionInfo()) |
269 | 267 |
@ |
270 | 268 |
|
269 |
+\begin{figure}[f] |
|
270 |
+ \begin{center} |
|
271 |
+ \includegraphics[width=0.6\textwidth]{IlluminaPreprocessCN-snr.pdf} |
|
272 |
+ \caption{The signal to noise ratio (SNR) for 180 HapMap samples. For |
|
273 |
+ Affymetrix platforms, SNR values below 5 can indicate possible |
|
274 |
+ problems with sample quality. In some circumstances, it may be |
|
275 |
+ more helpful to exclude samples with poor DNA quality.} |
|
276 |
+\end{center} |
|
277 |
+\end{figure} |
|
278 |
+ |
|
279 |
+ |
|
280 |
+\bibliography{refs} |
|
281 |
+\bibliographystyle{plain} |
|
282 |
+ |
|
271 | 283 |
\end{document} |
... | ... |
@@ -1,35 +1,3 @@ |
1 |
-%\VignetteIndexEntry{crlmm copy number Vignette} |
|
2 |
-%\VignetteDepends{crlmm, genomewidesnp6Crlmm} |
|
3 |
-%\VignetteKeywords{crlmm, SNP 6} |
|
4 |
-%\VignettePackage{crlmm} |
|
5 |
-\documentclass{article} |
|
6 |
-\usepackage{graphicx} |
|
7 |
-\usepackage{natbib} |
|
8 |
-\usepackage{url} |
|
9 |
-\usepackage{amsmath,amssymb} |
|
10 |
-\newcommand{\Rfunction}[1]{{\texttt{#1}}} |
|
11 |
-\newcommand{\Rmethod}[1]{{\texttt{#1}}} |
|
12 |
-\newcommand{\Rcode}[1]{{\texttt{#1}}} |
|
13 |
-\newcommand{\Robject}[1]{{\texttt{#1}}} |
|
14 |
-\newcommand{\Rpackage}[1]{{\textsf{#1}}} |
|
15 |
-\newcommand{\Rclass}[1]{{\textit{#1}}} |
|
16 |
-\newcommand{\oligo}{\Rpackage{oligo }} |
|
17 |
-\newcommand{\R}{\textsf{R}} |
|
18 |
-\newcommand{\crlmm}{\Rpackage{crlmm}} |
|
19 |
-\usepackage[margin=1in]{geometry} |
|
20 |
- |
|
21 |
-\begin{document} |
|
22 |
-\title{Copy number estimation with \Rpackage{crlmm}} |
|
23 |
-\date{\today} |
|
24 |
-\author{Rob Scharpf} |
|
25 |
-\maketitle |
|
26 |
- |
|
27 |
-<<setup, echo=FALSE, results=hide>>= |
|
28 |
-options(continue=" ", width=70) |
|
29 |
-@ |
|
30 |
- |
|
31 |
-\begin{abstract} |
|
32 |
- |
|
33 | 1 |
Copy number routines in the \crlmm{} package are available for |
34 | 2 |
Affymetrix 5.0 and 6.0 platforms, as well as several Illumina |
35 | 3 |
platforms. This vignette assumes that the arrays have already been |
... | ... |
@@ -44,35 +12,6 @@ options(continue=" ", width=70) |
44 | 12 |
\crlmm{} package is available from the author's website: |
45 | 13 |
\url{http://www.biostat.jhsph.edu/~rscharpf/crlmmCompendium/index.html}. |
46 | 14 |
|
47 |
-\end{abstract} |
|
48 |
- |
|
49 |
-\section{Set up} |
|
50 |
- |
|
51 |
-<<libraries,results=hide>>= |
|
52 |
-library(ff) |
|
53 |
-library(crlmm) |
|
54 |
-library(lattice) |
|
55 |
-library(cacheSweave) |
|
56 |
-if(getRversion() < "2.13.0"){ |
|
57 |
- rpath <- getRversion() |
|
58 |
-} else rpath <- "trunk" |
|
59 |
-outdir <- paste("/thumper/ctsa/snpmicroarray/rs/ProcessedData/crlmm/", rpath, "/copynumber_vignette", sep="") |
|
60 |
-@ |
|
61 |
- |
|
62 |
-<<ram>>= |
|
63 |
-ldPath(outdir) |
|
64 |
-setCacheDir(outdir) |
|
65 |
-ocProbesets(150e3) |
|
66 |
-ocSamples(200) |
|
67 |
-@ |
|
68 |
- |
|
69 |
-We begin by loading the \Robject{cnSet} object created by the |
|
70 |
-\verb+AffymetrixPreprocessCN+ vignette. |
|
71 |
- |
|
72 |
-<<loadcnset>>= |
|
73 |
-if(!exists("cnSet")) load(file.path(outdir, "cnSet.rda")) |
|
74 |
-@ |
|
75 |
- |
|
76 | 15 |
|
77 | 16 |
\textbf{Limitations:} While a minimum number of samples is not |
78 | 17 |
required for preprocessing and genotyping, copy number estimation in |
... | ... |
@@ -100,24 +39,16 @@ quality. The following code chunk makes a histogram of the SNR values |
100 | 39 |
for the HapMap samples. |
101 | 40 |
|
102 | 41 |
<<snr,fig=TRUE,include=FALSE,width=6, height=4>>= |
42 |
+library(lattice) |
|
103 | 43 |
invisible(open(cnSet$SNR)) |
104 | 44 |
snr <- cnSet$SNR[] |
105 | 45 |
close(cnSet$SNR) |
106 |
-print(histogram(~snr, panel=function(...){ |
|
107 |
- panel.histogram(...) |
|
108 |
- panel.abline(v=5, col="grey",lty=2) |
|
109 |
-}, |
|
110 |
- breaks=25,xlim=c(4.5,10), xlab="SNR")) |
|
46 |
+print(histogram(~snr, |
|
47 |
+ panel=function(...){ |
|
48 |
+ panel.histogram(...)}, |
|
49 |
+ breaks=25, xlim=range(snr), xlab="SNR")) |
|
111 | 50 |
@ |
112 |
-\begin{figure}[t!] |
|
113 |
- \begin{center} |
|
114 |
- \includegraphics[width=0.6\textwidth]{copynumber-snr.pdf} |
|
115 |
- \caption{The signal to noise ratio (SNR) for 180 HapMap samples. For |
|
116 |
- Affymetrix platforms, SNR values below 5 can indicate possible |
|
117 |
- problems with sample quality. In some circumstances, it may be |
|
118 |
- more helpful to exclude samples with poor DNA quality.} |
|
119 |
-\end{center} |
|
120 |
-\end{figure} |
|
51 |
+ |
|
121 | 52 |
|
122 | 53 |
|
123 | 54 |
\section{Copy number estimation} |
... | ... |
@@ -149,29 +80,17 @@ computed for subsets of the markers to reduce the required RAM. Note |
149 | 80 |
that the value returned by the \Rfunction{crlmmCopynumber} function in |
150 | 81 |
the above example is \verb+TRUE+. The reason the function returns |
151 | 82 |
\verb+TRUE+ in the above example is that the elements of the |
152 |
-\verb+batchStatistics+ slot have the class \Rclass{ff\_matrix}. Rather |
|
153 |
-than keep the statistical summaries in memory, the summaries are |
|
154 |
-written to files on disk using protocols described in the |
|
83 |
+\verb+batchStatistics+ slot have the class \Rclass{ff\_matrix}. |
|
84 |
+Rather than keep the statistical summaries in memory, the summaries |
|
85 |
+are written to files on disk using protocols described in the |
|
155 | 86 |
\Rpackage{ff} package. Hence, while the \Robject{cnSet} object itself |
156 | 87 |
is unchanged as a result of the \Rfunction{crlmmCopynumber} function, |
157 | 88 |
the data on disk is updated accordingly. Users that are interested in |
158 | 89 |
accessing these low-level summaries can refer to the |
159 |
-\verb+Infrastructure+ vignette. Computation of the raw copy number |
|
160 |
-estimates for each allele is described in the following section. |
|
161 |
- |
|
162 |
-For users that are interested in the analysis of a specific chromosome |
|
163 |
-(subset of markers) or a s |
|
164 |
- |
|
165 |
-pointers to files on disk, are stored in the \verb+batchStatistics+ |
|
166 |
-slot of the class \Rclass{CNSet}. Using the default settings for the |
|
167 |
-\Rfunction{crlmmCopynumber} function, only an object of class |
|
168 |
-\Rclass{CNSet} is required. %(See \verb+AffyPreprocessCN+ or |
|
169 |
-%\verb+IlluminaPreprocessCN+ vignettes for details.) |
|
170 |
- |
|
171 |
-Note that depends on whether the elements of the \verb+batchStatistics+ |
|
172 |
-slot are \Robject{ff} objects or ordinary matrices. In this example, |
|
173 |
-the elements of \verb+batchStatistics+ have the class |
|
174 |
-\Rclass{ff\_matrix}. |
|
90 |
+\verb+Infrastructure+ vignette. Note that the data structure depends |
|
91 |
+on whether the elements of the \verb+batchStatistics+ slot are |
|
92 |
+\Robject{ff} objects or ordinary matrices. In this example, the |
|
93 |
+elements of \verb+batchStatistics+ have the class \Rclass{ff\_matrix}. |
|
175 | 94 |
|
176 | 95 |
<<classes>>= |
177 | 96 |
nms <- ls(batchStatistics(cnSet)) |
... | ... |
@@ -186,10 +105,8 @@ described in the \R{} package \Rpackage{ff}. The value returned by |
186 | 105 |
\Rfunction{crlmmCopynumber} is \verb+TRUE+, indicating that the files |
187 | 106 |
on disk have been successfully updated. Note that while the |
188 | 107 |
\Robject{cnSet} object is unchanged, the values on disk are different. |
189 |
- |
|
190 |
- |
|
191 |
-\noindent On the other hand, subsetting the \Robject{cnSet} with the |
|
192 |
-`[' method coerces all of the elements to class \Rclass{matrix}. The |
|
108 |
+On the other hand, subsetting the \Robject{cnSet} with the `[' method |
|
109 |
+coerces all of the elements to class \Rclass{matrix}. The |
|
193 | 110 |
batch-specific summaries are now ordinary matrices stored in RAM. The |
194 | 111 |
object returned by \Robject{crlmmCopynumber} is an object of class |
195 | 112 |
\Rclass{CNSet} with the matrices in the \verb+batchStatistics+ slot |
... | ... |
@@ -219,14 +136,18 @@ class(cnSet3) |
219 | 136 |
rm(cnSet2); gc() |
220 | 137 |
@ |
221 | 138 |
|
222 |
-\subsection{Raw copy number} |
|
139 |
+\subsection{Marker-specific estimates} |
|
140 |
+%\subsection{Raw copy number} |
|
141 |
+ |
|
142 |
+%\paragraph{Log R ratios and B allele frequencies.} |
|
223 | 143 |
|
144 |
+\paragraph{Raw total copy number.} |
|
224 | 145 |
Several functions are available that will compute relatively quickly |
225 | 146 |
the allele-specific, \emph{raw} copy number estimates. At allele $k$, |
226 | 147 |
marker $i$, sample $j$, and batch $p$, the estimate of allele-specific |
227 | 148 |
copy number is computed by subtracting the estimated background from |
228 |
-the normalized intensity and scaling by the slope coefficient. More formally, |
|
229 |
-\newcommand{\A}{A} \newcommand{\B}{B} |
|
149 |
+the normalized intensity and scaling by the slope coefficient. More |
|
150 |
+formally, \newcommand{\A}{A} \newcommand{\B}{B} |
|
230 | 151 |
\begin{eqnarray} |
231 | 152 |
\label{eq:cnK} |
232 | 153 |
{\hat c}_{k,ijp} &=& \mbox{max}\left\{\frac{1}{{\hat |
... | ... |
@@ -246,9 +167,9 @@ at all markers for the first 2 samples, and the total copy number for |
246 | 167 |
chromosome 20 for the first 50 samples. |
247 | 168 |
|
248 | 169 |
<<totalCopynumber>>= |
249 |
-tmp <- totalCopynumber(cnSet, i=1:nrow(cnSet), j=1:2) |
|
170 |
+tmp <- totalCopynumber(cnSet, i=seq_len(nrow(cnSet)), j=1:2) |
|
250 | 171 |
dim(tmp) |
251 |
-tmp2 <- totalCopynumber(cnSet, i=which(chromosome(cnSet) == 20), j=1:50) |
|
172 |
+tmp2 <- totalCopynumber(cnSet, i=which(chromosome(cnSet) == 20), j=seq_len(ncol(cnSet))) |
|
252 | 173 |
dim(tmp2) |
253 | 174 |
@ |
254 | 175 |
|
... | ... |
@@ -258,98 +179,12 @@ chunk computes the allele-specific summaries at all polymorphic loci. |
258 | 179 |
|
259 | 180 |
<<ca>>= |
260 | 181 |
snp.index <- which(isSnp(cnSet) & !is.na(chromosome(cnSet))) |
261 |
-ca <- CA(cnSet, i=snp.index, j=1:5) |
|
262 |
-cb <- CB(cnSet, i=snp.index, j=1:5) |
|
263 |
-@ |
|
264 |
- |
|
265 |
-\noindent Note the equivalence of the following calculations. |
|
266 |
- |
|
267 |
-<<totalCopynumber>>= |
|
268 |
-ct <- ca+cb |
|
269 |
-ct2 <- totalCopynumber(cnSet, i=snp.index, j=1:5) |
|
270 |
-stopifnot(all.equal(ct, ct2)) |
|
271 |
-@ |
|
272 |
- |
|
273 |
-At nonpolymorphic loci, \Rfunction{CA} function returns the total copy |
|
274 |
-number and, by construction, the \Rfunction{CB} function returns 0. |
|
275 |
- |
|
276 |
-<<nonpolymorphicAutosomes>>= |
|
277 |
-marker.index <- which(!isSnp(cnSet)) |
|
278 |
-ct <- CA(cnSet, i=marker.index, j=1:5) |
|
279 |
-## all zeros |
|
280 |
-stopifnot(all(CB(cnSet, i=marker.index, j=1:5) == 0)) |
|
281 |
-ct2 <- totalCopynumber(cnSet, i=marker.index, j=1:5) |
|
282 |
-stopifnot(all.equal(ct, ct2)) |
|
283 |
-@ |
|
284 |
- |
|
285 |
-In the following code chunk, we extract estimates of the total copy |
|
286 |
-number at nonpolymorphic markers on chromosome X. |
|
287 |
- |
|
288 |
-<<npx>>= |
|
289 |
-set.seed(123) |
|
290 |
-npx.index <- which(chromosome(cnSet)==23 & !isSnp(cnSet)) |
|
291 |
-M <- sample(which(cnSet$gender[]==1), 5) |
|
292 |
-F <- sample(which(cnSet$gender[]==2), 5) |
|
293 |
-cn.M <- CA(cnSet, i=npx.index, j=M) |
|
294 |
-cn.F <- CA(cnSet, i=npx.index, j=F) |
|
295 |
-@ |
|
296 |
- |
|
297 |
-\noindent Again, the function \Rfunction{totalCopynumber} is |
|
298 |
-equivalent. |
|
299 |
- |
|
300 |
-<<npx2>>= |
|
301 |
-cnX <- cbind(cn.M, cn.F) |
|
302 |
-cnX2 <- totalCopynumber(cnSet, i=npx.index, j=c(M, F)) |
|
303 |
-stopifnot(all.equal(cnX, cnX2)) |
|
182 |
+ca <- CA(cnSet, i=snp.index, j=seq_len(ncol(cnSet))) |
|
183 |
+cb <- CB(cnSet, i=snp.index, j=seq_len(ncol(cnSet))) |
|
304 | 184 |
@ |
305 | 185 |
|
306 |
-<<nonpolymorphicX,fig=TRUE,include=FALSE, echo=FALSE, eval=FALSE>>= |
|
307 |
-df <- data.frame(cn=as.numeric(cnX), id=factor(rep(sampleNames(cnSet)[c(M,F)], each=nrow(cnX)), levels=sampleNames(cnSet)[c(M,F)], ordered=T)) |
|
308 |
-library(RColorBrewer) |
|
309 |
-cols <- brewer.pal(8, "Accent")[c(1, 5)] |
|
310 |
-print(bwplot(cn~id,df, panel=function(x,y,...){ |
|
311 |
- panel.grid(v=-10,h=0) |
|
312 |
- panel.bwplot(x,y, ...) |
|
313 |
- panel.abline(h=1:2, col="grey70",lwd=2) |
|
314 |
-}, |
|
315 |
- scales=list(x=list(rot=90)), cex=0.5, ylab="total copy number", main="Chr X SNPs", |
|
316 |
- fill=cols[cnSet$gender[c(M,F)]], key=mykey)) |
|
317 |
-@ |
|
318 |
- |
|
319 |
-%\begin{figure}[t!] |
|
320 |
-% \centering |
|
321 |
-% \includegraphics[width=0.5\textwidth]{copynumber-nonpolymorphicX} |
|
322 |
-% \caption{Copy number estimates for nonpolymorphic loci on chromosome |
|
323 |
-% X (5 men, 5 women). We assume that the median copy number across |
|
324 |
-% samples at a given marker on X is 1 for men and 2 for women.} |
|
325 |
-%\end{figure} |
|
326 |
- |
|
327 |
-Polymorphic markers on chromosome X: |
|
328 | 186 |
|
329 |
-<<polymorphicX,fig=TRUE,include=FALSE>>= |
|
330 |
-library(RColorBrewer) |
|
331 |
-cols <- brewer.pal(8, "Accent")[c(1, 5)] |
|
332 |
-X.markers <- which(isSnp(cnSet) & chromosome(cnSet) == 23) |
|
333 |
-cnX <- totalCopynumber(cnSet, i=X.markers, j=c(M,F)) |
|
334 |
-df <- data.frame(cn=as.numeric(cnX), id=factor(rep(sampleNames(cnSet)[c(M,F)], each=length(X.markers)), levels=sampleNames(cnSet)[c(M,F)], ordered=T)) |
|
335 |
-mykey <- simpleKey(c("male", "female"), points=FALSE,col=cols) |
|
336 |
-print(bwplot(cn~id,df, panel=function(x,y,...){ |
|
337 |
- panel.grid(v=-10,h=0) |
|
338 |
- panel.bwplot(x,y, ...) |
|
339 |
- panel.abline(h=1:2, col="grey70",lwd=2) |
|
340 |
-}, |
|
341 |
- scales=list(x=list(rot=90)), cex=0.5, ylab="total copy number", main="Chr X SNPs", |
|
342 |
- fill=cols[cnSet$gender[c(M,F)]], key=mykey)) |
|
343 |
-@ |
|
344 |
-\begin{figure}[t!] |
|
345 |
- \centering |
|
346 |
- \includegraphics[width=0.5\textwidth]{copynumber-polymorphicX} |
|
347 |
- \caption{Copy number estimates for polymorphic markers on chromosome |
|
348 |
- X. crlmm assumes that the median copy number across samples at a |
|
349 |
- given marker on X is 1 for men and 2 for women.} |
|
350 |
-\end{figure} |
|
351 |
- |
|
352 |
-\subsection{A container for raw copy number} |
|
187 |
+\subsection{Container for raw copy number} |
|
353 | 188 |
|
354 | 189 |
A useful container for storing the \crlmm{} genotypes, genotype |
355 | 190 |
confidence scores, and the total copy number at each marker is the |
... | ... |
@@ -362,28 +197,6 @@ instantiated \Rclass{oligoSnpSet} will also be \Rpackage{ff}-dervied |
362 | 197 |
objects (a new \verb+total_cn*.ff+ file will be created in the |
363 | 198 |
\verb+ldPath()+ directory). |
364 | 199 |
|
365 |
-%The advantage of this approach is that the raw copy number estimates |
|
366 |
-%can be computed for all markers and all samples simultaneously without |
|
367 |
-%excessive RAM. The disadvantage is that such a step creates |
|
368 |
-%additional I/O for reading and writing the raw copy number estimates |
|
369 |
-%from disk. For very large data sets (1000+ samples), the time |
|
370 |
-%required for I/O may outweigh the benefits. The alternative is to |
|
371 |
-%perform downstream processing (such as smoothing) on the total copy |
|
372 |
-%number for subsets of markers and/or samples. As indicated |
|
373 |
-%previously, the subset `[' method applied to an object of class |
|
374 |
-%\Robject{CNSet} will return an object of the same class with simple |
|
375 |
-%matrices instead of \Rpackage{ff} objects. Therefore, one can |
|
376 |
-%instantiate a \Rpackage{oligoSnpSet} object from a \Robject{CNSet} |
|
377 |
-%object without the I/O overhead for storing the total copy number |
|
378 |
-%estimates by first subsetting the \Rclass{CNSet} object. One could |
|
379 |
-%then smooth the raw copy number estimates and store a summary copy |
|
380 |
-%number for the interval, thereby substantially reducing the |
|
381 |
-%dimensionality of the copy number estimates. Examples of smoothing |
|
382 |
-%applications such as circular binary segmentation implemented in the |
|
383 |
-%\Rpackage{DNAcopy} package or a hidden Markov model implemented in the |
|
384 |
-%\Rpackage{VanillaICE} package are provided in the \verb+SmoothingCN+ |
|
385 |
-%vignette. |
|
386 |
- |
|
387 | 200 |
<<oligoSnpSet>>= |
388 | 201 |
open(cnSet3) |
389 | 202 |
oligoSet <- as(cnSet3, "oligoSnpSet") |
... | ... |
@@ -398,19 +211,6 @@ the \Rfunction{totalCopynumber} function defined over the same row and |
398 | 211 |
column indices. |
399 | 212 |
|
400 | 213 |
<<testEqual>>= |
401 |
-total.cn3 <- totalCopynumber(cnSet3, i=1:nrow(cnSet3), j=1:ncol(cnSet3)) |
|
214 |
+total.cn3 <- totalCopynumber(cnSet3, i=1:nrow(cnSet3), j=seq_len(ncol(cnSet3))) |
|
402 | 215 |
all.equal(copyNumber(oligoSet), total.cn3) |
403 |
-@ |
|
404 |
- |
|
405 |
- |
|
406 |
-\section{Session information} |
|
407 |
-<<sessionInfo, results=tex>>= |
|
408 |
-toLatex(sessionInfo()) |
|
409 |
-@ |
|
410 |
- |
|
411 |
-%\begin{bibliography} |
|
412 |
- \bibliographystyle{plain} |
|
413 |
- \bibliography{refs} |
|
414 |
-%\end{bibliography} |
|
415 |
- |
|
416 |
-\end{document} |
|
216 |
+@ |
|
417 | 217 |
\ No newline at end of file |
420 | 220 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,17 @@ |
1 |
+TOP=../.. |
|
2 |
+PKG=${shell cd ${TOP};pwd} |
|
3 |
+SUITE=doRUnit.R |
|
4 |
+#R=${R_HOME}/bin/R |
|
5 |
+ |
|
6 |
+all: inst test |
|
7 |
+ |
|
8 |
+inst: # Install package |
|
9 |
+ |
|
10 |
+ cd ${TOP}/..;\ |
|
11 |
+ R CMD INSTALL ${PKG} |
|
12 |
+ |
|
13 |
+test: # Run unit tests |
|
14 |
+ export RCMDCHECK=FALSE;\ |
|
15 |
+ export RUNITFILEPATTERN="$(file)";\ |
|
16 |
+ cd ${TOP}/tests;\ |
|
17 |
+ R --vanilla --slave < ${SUITE} |
|
0 | 18 |
\ No newline at end of file |
0 | 7 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,45 @@ |
1 |
+genotypingTest <- function(){ |
|
2 |
+ require(genomewidesnp6Crlmm) & require(hapmapsnp6) |
|
3 |
+ path <- system.file("celFiles", package="hapmapsnp6") |
|
4 |
+ cels <- list.celfiles(path, full.names=TRUE) |
|
5 |
+ ocProbesets(50e3) |
|
6 |
+ batch <- as.factor(rep("A", length(cels))) |
|
7 |
+ (cnSet <- genotype(cels, cdfName="genomewidesnp6", batch=batch)) |
|
8 |
+ library(ff) |
|
9 |
+ ldPath(tempdir()) |
|
10 |
+ (cnSet2 <- genotype(cels, cdfName="genomewidesnp6", batch=batch)) |
|
11 |
+ checkTrue(all.equal(calls(cnSet), calls(cnSet2)[,])) |
|
12 |
+} |
|
13 |
+ |
|
14 |
+genotypingTestIllumina <- function(){ |
|
15 |
+ setwd("/thumper/ctsa/snpmicroarray/illumina/IDATS/370k/") |
|
16 |
+ library(crlmm) |
|
17 |
+ library(ff) |
|
18 |
+ ldPath(tempdir()) |
|
19 |
+ |
|
20 |
+ samples <- read.csv("samples370k.csv", as.is=TRUE) |
|
21 |
+ RG <- readIdatFiles(sampleSheet=samples, |
|
22 |
+ arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), |
|
23 |
+ saveDate=TRUE) |
|
24 |
+ |
|
25 |
+ crlmmResult <- crlmmIllumina(RG=RG, |
|
26 |
+ cdfName="human370v1c", |
|
27 |
+ returnParams=TRUE) |
|
28 |
+ checkTrue(is(calls(crlmmResult)[1:5,1], "integer")) |
|
29 |
+ |
|
30 |
+ crlmmResult2 <- crlmmIlluminaV2(sampleSheet=samples, |
|
31 |
+ arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), |
|
32 |
+ saveDate=TRUE, cdfName="human370v1c", returnParams=TRUE) |
|
33 |
+ checkTrue(identical(calls(crlmmResult)[1:5, ]), |
|
34 |
+ identical(calls(crlmmResult2)[1:5, ])) |
|
35 |
+ |
|
36 |
+ crlmmResult3 <- genotype.Illumina(sampleSheet=samples, |
|
37 |
+ arrayInfoColNames=list(barcode=NULL, |
|
38 |
+ position="SentrixPosition"), |
|
39 |
+ saveDate=TRUE, cdfName="human370v1c", |
|
40 |
+ batch = as.factor(rep(1, nrow(samples)))) |
|
41 |
+ |
|
42 |
+ checkTrue(identical(calls(crlmmResult)[1:5, ]), |
|
43 |
+ identical(calls(crlmmResult3)[1:5, ])) |
|
44 |
+ |
|
45 |
+} |
... | ... |
@@ -35,9 +35,7 @@ ABpanel(x, y, predictRegion, copyNumber = 0:4, fill, ..., subscripts) |
35 | 35 |
See \code{xyplot} in the \pkg{lattice} package. |
36 | 36 |
} |
37 | 37 |
} |
38 |
-\details{ |
|
39 |
-%% ~~ If necessary, more details than the description above ~~ |
|
40 |
-} |
|
38 |
+ |
|
41 | 39 |
\value{ |
42 | 40 |
Not applicable |
43 | 41 |
} |
... | ... |
@@ -38,13 +38,10 @@ calculateRBaf(object, batch.name) |
38 | 38 |
\author{Lynn Mireless} |
39 | 39 |
|
40 | 40 |
\examples{ |
41 |
- |
|
42 | 41 |
data(cnSetExample) |
43 | 42 |
baf.lrr <- calculateRBaf(cnSetExample, "SHELF") |
44 | 43 |
hist(baf.lrr[["baf"]], breaks=100) |
45 | 44 |
hist(baf.lrr[["lrr"]], breaks=100) |
46 |
- |
|
47 |
- |
|
48 | 45 |
} |
49 | 46 |
% Add one or more standard keywords, see file 'KEYWORDS' in the |
50 | 47 |
% R documentation directory. |
51 | 48 |
deleted file mode 100644 |
... | ... |
@@ -1,19 +0,0 @@ |
1 |
-\name{celDates} |
|
2 |
-\alias{celDates} |
|
3 |
-\title{Extract dates from the cel file header} |
|
4 |
-\description{ |
|
5 |
- Extract dates from the cel file header. |
|
6 |
-} |
|
7 |
-\usage{ |
|
8 |
-celDates(celfiles) |
|
9 |
-} |
|
10 |
-\arguments{ |
|
11 |
- \item{celfiles}{CEL file names. Must specify the complete path.} |
|
12 |
-} |
|
13 |
-\value{ |
|
14 |
- date-time class POSIXt |
|
15 |
-} |
|
16 |
-\author{R. Scharpf} |
|
17 |
-\seealso{ \cite{\link[affyio]{read.celfile.header}}, \cite{\link{POSIXt}}} |
|
18 |
-\keyword{manip} |
|
19 |
- |
20 | 0 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,41 @@ |
1 |
+\name{validCEL} |
|
2 |
+\alias{celDates} |
|
3 |
+\alias{validCEL} |
|
4 |
+\title{ |
|
5 |
+ Reads cel files and return an error if a file is not read |
|
6 |
+} |
|
7 |
+\description{ |
|
8 |
+ Reads cel files and return an error if a file is not read |
|
9 |
+} |
|
10 |
+\usage{ |
|
11 |
+validCEL(celfiles) |
|
12 |
+celDates(celfiles) |
|
13 |
+} |
|
14 |
+\arguments{ |
|
15 |
+ \item{celfiles}{ |
|
16 |
+ vector of cel file names to read |
|
17 |
+} |
|
18 |
+} |
|
19 |
+\value{ |
|
20 |
+ Returns a message that cel files were successfully read, or an error |
|
21 |
+ if there were problems reading the cel files. |
|
22 |
+} |
|
23 |
+ |
|
24 |
+\author{ |
|
25 |
+R. Scharpf |
|
26 |
+} |
|
27 |
+ |
|
28 |
+\seealso{ \cite{\link[affyio]{read.celfile.header}}, |
|
29 |
+ \cite{\link{POSIXt}}, |
|
30 |
+ \code{\link{read.celfile}} |
|
31 |
+} |
|
32 |
+ |
|
33 |
+\examples{ |
|
34 |
+if(require(hapmapsnp6)){ |
|
35 |
+ path <- system.file("celFiles", package="hapmapsnp6") |
|
36 |
+ cels <- list.celfiles(path, full.names=TRUE) |
|
37 |
+ validCEL(cels) |
|
38 |
+ celDates(cels) |
|
39 |
+} |
|
40 |
+} |
|
41 |
+\keyword{IO} |
|
0 | 42 |
\ No newline at end of file |
... | ... |
@@ -23,85 +23,13 @@ data(cnSetExample2) |
23 | 23 |
The data illustrates the \code{CNSet-class}, with |
24 | 24 |
\code{assayData} containing the quantile-normalized |
25 | 25 |
intensities for the A and B alleles, genotype calls and |
26 |
- confidence scores. New slots that specific to copy number |
|
27 |
- estimation are \code{batch} and \code{batchStatistics}. |
|
26 |
+ confidence scores. |
|
28 | 27 |
|
29 | 28 |
} |
30 | 29 |
\examples{ |
31 |
-\dontshow{ |
|
32 | 30 |
\dontrun{ |
33 |
- |
|
34 |
- ## hapmap phase 3 data |
|
35 |
- data(hapmapSet, package="CnvScripts") |
|
36 |
- marker.index <- which(chromosome(hapmapSet) == 8) |
|
37 |
- marker.index <- marker.index[1:60e3] |
|
38 |
- cnSetExample <- hapmapSet[marker.index, c(1168:1169)] |
|
39 |
- save(cnSetExample, file="~/Software/crlmm/data/cnSetExample.rda") |