git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@40527 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -186,3 +186,12 @@ is decoded and scanned |
186 | 186 |
* added vignette for genoytping illumina data (crlmmIllumina.pdf in inst/doc and crlmmIllumina.Rnw in inst/scripts) |
187 | 187 |
* Changed stop() to warning() when idats are of different type in readIdatFiles() |
188 | 188 |
|
189 |
+ |
|
190 |
+2009-07-06 R Scharpf - committed version 1.3.8 |
|
191 |
+ |
|
192 |
+* initial development of copy number methods for illumina platform |
|
193 |
+ |
|
194 |
+* added Rd files for previously defined classes/methods |
|
195 |
+ |
|
196 |
+* some improvements to chromosome X copy number estimation |
|
197 |
+ |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.3.7 |
|
4 |
+Version: 1.3.8 |
|
5 | 5 |
Date: 2009-06-17 |
6 | 6 |
Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
7 | 7 |
Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
... | ... |
@@ -45,7 +45,7 @@ importFrom(mvtnorm, dmvnorm) |
45 | 45 |
|
46 | 46 |
importFrom(ellipse, ellipse) |
47 | 47 |
|
48 |
-S3method(ellipse,CopyNumberSet) |
|
48 |
+##S3method(ellipse,CopyNumberSet) |
|
49 | 49 |
|
50 | 50 |
exportClasses(ABset, CopyNumberSet, CrlmmSetList) |
51 | 51 |
##S3method(ellipse, CopyNumberSet) |
... | ... |
@@ -73,7 +73,7 @@ export(celDates, |
73 | 73 |
crlmmIllumina, |
74 | 74 |
crlmmWrapper, |
75 | 75 |
ellipse, |
76 |
- computeEmission, |
|
76 |
+## computeEmission, |
|
77 | 77 |
list.celfiles, |
78 | 78 |
position, |
79 | 79 |
readIdatFiles, |
80 | 80 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,51 @@ |
1 |
+ ************************************************** |
|
2 |
+ * * |
|
3 |
+ * 1.3 SERIES NEWS * |
|
4 |
+ * * |
|
5 |
+ ************************************************** |
|
6 |
+ |
|
7 |
+USER VISIBLE CHANGES |
|
8 |
+ |
|
9 |
+ o 3 new classes created: |
|
10 |
+ |
|
11 |
+ i. 'ABset': container for quantile-normalized A and B |
|
12 |
+ intensities for both SNP and copy number probes. Required |
|
13 |
+ assay data elements are 'A' and 'B'. Extends eSet |
|
14 |
+ directly. |
|
15 |
+ |
|
16 |
+ - For nonpolymorphic probes, the quantile normalized |
|
17 |
+ intensity is stored in the 'A' assay data element. |
|
18 |
+ The corresponding row in the 'B' assay data element |
|
19 |
+ is NA. This is a bit inefficient, but greatly |
|
20 |
+ simplifies downstream analyses. In particular, '[' |
|
21 |
+ works. |
|
22 |
+ |
|
23 |
+ ii. 'CrlmmSetList': container for results from preprocessing |
|
24 |
+ and genotyping. This object is a list. The first |
|
25 |
+ element of the list is an ABset. The second element is a |
|
26 |
+ SnpSet containing genotype calls. The two elements are |
|
27 |
+ required to have identical featureNames and sampleNames. |
|
28 |
+ |
|
29 |
+ - added several methods for subsetting and accessing |
|
30 |
+ elements of this object, including featureNames, |
|
31 |
+ sampleNames, and "[". |
|
32 |
+ |
|
33 |
+ iii. 'CopyNumberSet': contains locus-level estimates of copy |
|
34 |
+ number for SNPs and polymorphic probes. |
|
35 |
+ |
|
36 |
+ - Required assay data elements are 'CA' and 'CB', |
|
37 |
+ corresponding to the absolute copy number for allele |
|
38 |
+ A and B, respectively. |
|
39 |
+ |
|
40 |
+ - For nonpolymorphic probes, the total copy number is |
|
41 |
+ stored in the 'CA' slot and a NA is recorded for the |
|
42 |
+ corresponding row in the CB matrix. |
|
43 |
+ |
|
44 |
+ - Useful methods: 'copyNumber', 'ellipse', 'points' |
|
45 |
+ |
|
46 |
+ o 'crlmmWrapper' function does preprocessing |
|
47 |
+ (quantile-normalization) and genotyping, saving an object of |
|
48 |
+ class CrlmmSetList for each chromosome |
|
49 |
+ |
|
50 |
+ o 'computeCopynumber' now requires an object of class |
|
51 |
+ 'CrlmmSetList' and returns an object of class 'CopyNumberSet'. |
... | ... |
@@ -1,7 +1,6 @@ |
1 | 1 |
setGeneric("A", function(object) standardGeneric("A")) |
2 | 2 |
setGeneric("B", function(object) standardGeneric("B")) |
3 | 3 |
##setGeneric("calls", function(x) standardGeneric("calls")) |
4 |
- |
|
5 | 4 |
setGeneric("confs", function(object) standardGeneric("confs")) |
6 | 5 |
setGeneric("CA", function(object) standardGeneric("CA")) |
7 | 6 |
setGeneric("CB", function(object) standardGeneric("CB")) |
... | ... |
@@ -154,11 +154,16 @@ predictGender <- function(res, cdfName="genomewidesnp6", SNRMin=5){ |
154 | 154 |
combineIntensities <- function(res, cnrmaResult, cdfName){ |
155 | 155 |
rownames(res$B) <- rownames(res$A) <- res$gns |
156 | 156 |
colnames(res$B) <- colnames(res$A) <- res$sns |
157 |
- NP <- cnrmaResult$NP |
|
158 |
- blank <- matrix(NA, nrow(NP), ncol(NP)) |
|
159 |
- dimnames(blank) <- dimnames(NP) |
|
160 |
- A <- rbind(res$A, NP) |
|
161 |
- B <- rbind(res$B, blank) |
|
157 |
+ if(!is.null(cnrmaResult)){ |
|
158 |
+ NP <- cnrmaResult$NP |
|
159 |
+ blank <- matrix(NA, nrow(NP), ncol(NP)) |
|
160 |
+ dimnames(blank) <- dimnames(NP) |
|
161 |
+ A <- rbind(res$A, NP) |
|
162 |
+ B <- rbind(res$B, blank) |
|
163 |
+ } else { |
|
164 |
+ A <- res$A |
|
165 |
+ B <- res$B |
|
166 |
+ } |
|
162 | 167 |
aD <- assayDataNew("lockedEnvironment", |
163 | 168 |
A=A, |
164 | 169 |
B=B) |
... | ... |
@@ -166,7 +171,7 @@ combineIntensities <- function(res, cnrmaResult, cdfName){ |
166 | 171 |
assayData=aD, |
167 | 172 |
featureData=annotatedDataFrameFrom(A, byrow=TRUE), |
168 | 173 |
phenoData=annotatedDataFrameFrom(A, byrow=FALSE), |
169 |
- annotation="genomewidesnp6") |
|
174 |
+ annotation=cdfName) |
|
170 | 175 |
ABset$SNR <- res$SNR |
171 | 176 |
ABset$gender <- predictGender(res=res, cdfName=cdfName) |
172 | 177 |
return(ABset) |
... | ... |
@@ -207,8 +212,48 @@ harmonizeDimnamesTo <- function(object1, object2){ |
207 | 212 |
stopifnot(all.equal(sampleNames(object1), sampleNames(object2))) |
208 | 213 |
return(object1) |
209 | 214 |
} |
215 |
+ |
|
216 |
+crlmmIlluminaWrapper <- function(sampleSheet, outdir="./", cdfName, |
|
217 |
+ save.intermediate=FALSE, |
|
218 |
+ splitByChr=TRUE,...){ |
|
219 |
+ if(file.exists(file.path(outdir, "RG.rda"))) load(file.path(outdir, "RG.rda")) |
|
220 |
+ else { |
|
221 |
+ RG <- readIdatFiles(sampleSheet=samplesheet5, arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), saveDate=TRUE, |
|
222 |
+ path=path) |
|
223 |
+ J <- match(c("1_A", "3_A", "5_A", "7_A"), sampleNames(RG)) |
|
224 |
+ RG <- RG[, -J] |
|
225 |
+ if(save.intermediate) save(RG, file=file.path(outdir, "RG.rda")) ##935M for 91 samples...better not to save this |
|
226 |
+ } |
|
227 |
+ if(!file.exists(file.path(outdir, "res.rda"))){ |
|
228 |
+ crlmmOut <- crlmmIllumina(RG=RG, cdfName=cdfName, sns=pData(RG)$ID, returnParams=TRUE, save.it=TRUE, intensityFile=file.path(outdir, "res.rda")) |
|
229 |
+ if(save.intermediate) save(crlmmOut, file=file.path(outdir, "crlmmOut.rda")) |
|
230 |
+ } else{ |
|
231 |
+ message("Loading...") |
|
232 |
+ load(file.path(outdir, "res.rda")) |
|
233 |
+ load(file.path(outdir, "crlmmOut.rda")) |
|
234 |
+ } |
|
235 |
+ ABset <- combineIntensities(res, NULL, cdfName=cdfName) |
|
236 |
+ scanDates(ABset) <- as.character(pData(RG)$ScanDate) |
|
237 |
+ crlmmResult <- harmonizeSnpSet(crlmmOut, ABset) |
|
238 |
+ stopifnot(all.equal(dimnames(crlmmOut), dimnames(ABset))) |
|
239 |
+ crlmmList <- list(ABset, |
|
240 |
+ crlmmResult) |
|
241 |
+ crlmmList <- as(crlmmList, "CrlmmSetList") |
|
242 |
+ if(splitByChr){ |
|
243 |
+ message("Saving by chromosome") |
|
244 |
+ splitByChromosome(crlmmList, cdfName=cdfName, outdir=outdir) |
|
245 |
+ } else{ |
|
246 |
+ message("Saving crlmmList object to ", outdir) |
|
247 |
+ save(crlmmList, file=file.path(outdir, "crlmmList.rda")) |
|
248 |
+ } |
|
249 |
+ message("CrlmmSetList objects saved to ", outdir) |
|
250 |
+} |
|
251 |
+ |
|
210 | 252 |
|
211 |
-crlmmWrapper <- function(filenames, outdir="./", cdfName="genomewidesnp6", save.it, splitByChr=TRUE, ...){ |
|
253 |
+ |
|
254 |
+crlmmWrapper <- function(filenames, outdir="./", cdfName="genomewidesnp6", save.it, |
|
255 |
+ splitByChr=TRUE, ...){ |
|
256 |
+ ##no visible binding for res |
|
212 | 257 |
if(!file.exists(file.path(outdir, "crlmmResult.rda"))){ |
213 | 258 |
crlmmResult <- crlmm(filenames=filenames, cdfName=cdfName, save.it=TRUE, ...) |
214 | 259 |
if(save.it) save(crlmmResult, file=file.path(outdir, "crlmmResult.rda")) |
... | ... |
@@ -248,6 +293,8 @@ crlmmWrapper <- function(filenames, outdir="./", cdfName="genomewidesnp6", save. |
248 | 293 |
return() |
249 | 294 |
} |
250 | 295 |
|
296 |
+ |
|
297 |
+ |
|
251 | 298 |
cnrma <- function(filenames, cdfName="genomewidesnp6", sns, seed=1, verbose=FALSE){ |
252 | 299 |
pkgname <- getCrlmmAnnotationName(cdfName) |
253 | 300 |
require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available") |
... | ... |
@@ -307,29 +354,20 @@ goodSnps <- function(phi.thr, envir, fewAA=20, fewBB=20){ |
307 | 354 |
return(flags) |
308 | 355 |
} |
309 | 356 |
|
357 |
+##Needs to allow for NULL NP |
|
310 | 358 |
instantiateObjects <- function(calls, conf, NP, plate, envir, |
311 | 359 |
chrom, |
312 | 360 |
A, B, |
313 | 361 |
gender, SNRmin=5, SNR, |
314 | 362 |
pkgname, |
315 | 363 |
locusSet){ |
316 |
-## pkgname <- paste(pkgname, "Crlmm", sep="") |
|
317 | 364 |
envir[["chrom"]] <- chrom |
318 |
-## CHR_INDEX <- paste(chrom, "index", sep="") |
|
319 |
-## fname <- paste(CHR_INDEX, ".rda", sep="") |
|
320 |
-## loader(fname, .crlmmPkgEnv, pkgname) |
|
321 |
-## index <- get("index", envir=.crlmmPkgEnv) |
|
322 |
-#### data(list=CHR_INDEX, package="genomewidesnp6Crlmm") |
|
323 |
-## A <- A[index[[1]], SNR > SNRmin] |
|
324 |
-## B <- B[index[[1]], SNR > SNRmin] |
|
325 |
-## calls <- calls[index[[1]], SNR > SNRmin] |
|
326 |
-## conf <- conf[index[[1]], SNR > SNRmin] |
|
327 |
-## NP <- NP[index[[2]], SNR > SNRmin] |
|
328 | 365 |
A <- A[, SNR > SNRmin] |
329 | 366 |
B <- B[, SNR > SNRmin] |
330 | 367 |
calls <- calls[, SNR > SNRmin] |
331 | 368 |
conf <- conf[, SNR > SNRmin] |
332 |
- NP <- NP[, SNR > SNRmin] |
|
369 |
+ if(!is.null(NP)) |
|
370 |
+ NP <- NP[, SNR > SNRmin] |
|
333 | 371 |
plate <- plate[SNR > SNRmin] |
334 | 372 |
uplate <- unique(plate) |
335 | 373 |
SNR <- SNR[SNR > SNRmin] |
... | ... |
@@ -342,9 +380,12 @@ instantiateObjects <- function(calls, conf, NP, plate, envir, |
342 | 380 |
envir[["calls"]] <- calls |
343 | 381 |
envir[["conf"]] <- conf |
344 | 382 |
snps <- rownames(calls) |
345 |
- cnvs <- rownames(NP) |
|
383 |
+ if(!is.null(NP)){ |
|
384 |
+ cnvs <- rownames(NP) |
|
385 |
+ } else cnvs <- NULL |
|
346 | 386 |
sns <- basename(colnames(calls)) |
347 |
- stopifnot(identical(colnames(calls), colnames(NP))) |
|
387 |
+ if(!is.null(NP)) |
|
388 |
+ stopifnot(identical(colnames(calls), colnames(NP))) |
|
348 | 389 |
envir[["sns"]] <- sns |
349 | 390 |
envir[["snps"]] <- snps |
350 | 391 |
envir[["cnvs"]] <- cnvs |
... | ... |
@@ -354,7 +395,6 @@ instantiateObjects <- function(calls, conf, NP, plate, envir, |
354 | 395 |
message("Estimating gender") |
355 | 396 |
XMedian <- apply(log2(A[, , drop=FALSE]) + log2(B[, , drop=FALSE]), 2, median)/2 |
356 | 397 |
gender <- kmeans(XMedian, c(min(XMedian[SNR>SNRmin]), max(XMedian[SNR>SNRmin])))[["cluster"]] |
357 |
- ##gender <- getGender(res) |
|
358 | 398 |
gender[gender==2] <- "female" |
359 | 399 |
gender[gender=="1"] <- "male" |
360 | 400 |
envir[["gender"]] <- gender |
... | ... |
@@ -373,41 +413,34 @@ instantiateObjects <- function(calls, conf, NP, plate, envir, |
373 | 413 |
envir[["Ns"]] <- envir[["muB"]] <- envir[["muA"]] <- Ns |
374 | 414 |
envir[["vB"]] <- envir[["vA"]] <- Ns |
375 | 415 |
|
376 |
- CT.sds <- CT <- matrix(NA, nrow(NP), length(sns)) |
|
377 |
- ##NP.CT <- matrix(NA, nrow(NP), ncol(NP)) |
|
378 |
- ##NP.sds <- matrix(NA, nrow(NP), ncol(NP)) |
|
416 |
+ if(!is.null(NP)){ |
|
417 |
+ CT.sds <- CT <- matrix(NA, nrow(NP), length(sns)) |
|
418 |
+ nuT <- matrix(NA, nrow(NP), length(uplate)) |
|
419 |
+ phiT <- nuT |
|
420 |
+ normalNP <- matrix(TRUE, nrow(NP), ncol(NP)) |
|
421 |
+ sig2T <- matrix(NA, nrow(NP), length(uplate)) |
|
422 |
+ } else{ |
|
423 |
+ sig2T <- normalNP <- nuT <- phiT <- CT.sds <- CT <- NULL |
|
424 |
+ } |
|
379 | 425 |
envir[["CT"]] <- CT |
380 | 426 |
envir[["CT.sds"]] <- CT.sds |
381 |
- |
|
382 |
- ##assign("NP.CT", NP.CT, envir) |
|
383 |
- ##assign("NP.sds", NP.sds, envir) |
|
384 |
- nuT <- matrix(NA, nrow(NP), length(uplate)) |
|
385 |
- phiT <- nuT |
|
386 | 427 |
envir[["nuT"]] <- nuT |
387 | 428 |
envir[["phiT"]] <- phiT |
388 |
- ##assign("nus", nus, envir=envir) |
|
389 |
- ##assign("phis", nus, envir=envir) |
|
390 |
- |
|
391 | 429 |
plates.completed <- rep(FALSE, length(uplate)) |
392 | 430 |
envir[["plates.completed"]] <- plates.completed |
393 |
- |
|
394 | 431 |
steps <- rep(FALSE, 4) |
395 | 432 |
names(steps) <- c("suffStats", "coef", "snp-cn", "np-cn") |
396 | 433 |
envir[["steps"]] <- steps |
397 |
- |
|
398 | 434 |
snpflags <- matrix(FALSE, length(snps), length(uplate)) |
399 | 435 |
npflags <- matrix(FALSE, length(cnvs), length(uplate)) |
400 |
- ##assign("snpflags", snpflags, envir=envir) |
|
401 |
- ##assign("npflags", npflags, envir=envir) |
|
402 | 436 |
envir[["snpflags"]] <- snpflags |
403 | 437 |
envir[["npflags"]] <- npflags |
404 |
- |
|
405 | 438 |
tau2A <- matrix(NA, nrow(calls), length(uplate)) |
406 | 439 |
envir[["tau2A"]] <- tau2A |
407 | 440 |
envir[["tau2B"]] <- tau2A |
408 | 441 |
envir[["sig2A"]] <- tau2A |
409 | 442 |
envir[["sig2B"]] <- tau2A |
410 |
- sig2T <- matrix(NA, nrow(NP), length(uplate)) |
|
443 |
+ |
|
411 | 444 |
envir[["sig2T"]] <- sig2T |
412 | 445 |
envir[["corr"]] <- tau2A |
413 | 446 |
envir[["corrA.BB"]] <- tau2A |
... | ... |
@@ -417,7 +450,6 @@ instantiateObjects <- function(calls, conf, NP, plate, envir, |
417 | 450 |
envir[["nuB.se"]] <- envir[["nuA.se"]] <- tau2A |
418 | 451 |
envir[["phiB.se"]] <- envir[["phiA.se"]] <- tau2A |
419 | 452 |
normal <- matrix(TRUE, nrow(A), ncol(A)) |
420 |
- normalNP <- matrix(TRUE, nrow(NP), ncol(NP)) |
|
421 | 453 |
envir[["normal"]] <- normal |
422 | 454 |
envir[["normalNP"]] <- normalNP |
423 | 455 |
} |
... | ... |
@@ -430,6 +462,7 @@ computeCopynumber <- function(object, |
430 | 462 |
cdfName="genomewidesnp6", ...){ |
431 | 463 |
##require(oligoClasses) |
432 | 464 |
if(missing(CHR)) stop("Must specify CHR") |
465 |
+ if(CHR == 24) stop("Nothing available yet for chromosome Y") |
|
433 | 466 |
if(missing(batch)) stop("Must specify batch") |
434 | 467 |
if(length(batch) != ncol(object[[1]])) stop("Batch must be the same length as the number of samples") |
435 | 468 |
##the AB intensities |
... | ... |
@@ -455,6 +488,7 @@ computeCopynumber <- function(object, |
455 | 488 |
SNR=ABset$SNR, |
456 | 489 |
bias.adj=FALSE, |
457 | 490 |
SNRmin=SNRmin, |
491 |
+ cdfName=cdfName, |
|
458 | 492 |
...) |
459 | 493 |
|
460 | 494 |
if(bias.adj){ |
... | ... |
@@ -470,6 +504,7 @@ computeCopynumber <- function(object, |
470 | 504 |
SNR=ABset$SNR, |
471 | 505 |
bias.adj=TRUE, |
472 | 506 |
SNRmin=SNRmin, |
507 |
+ cdfName=cdfName, |
|
473 | 508 |
...) |
474 | 509 |
} |
475 | 510 |
message("Organizing results...") |
... | ... |
@@ -477,6 +512,55 @@ computeCopynumber <- function(object, |
477 | 512 |
return(locusSet) |
478 | 513 |
} |
479 | 514 |
|
515 |
+cnIllumina <- function(object, |
|
516 |
+ CHR, |
|
517 |
+ bias.adj=FALSE, |
|
518 |
+ batch, |
|
519 |
+ SNRmin=5, |
|
520 |
+ cdfName="genomewidesnp6", ...){ |
|
521 |
+ if(missing(CHR)) stop("Must specify CHR") |
|
522 |
+ if(missing(batch)) stop("Must specify batch") |
|
523 |
+ if(length(batch) != ncol(object[[1]])) stop("Batch must be the same length as the number of samples") |
|
524 |
+ ABset <- object[[1]] |
|
525 |
+ snpset <- object[[2]] |
|
526 |
+ envir <- new.env() |
|
527 |
+ NP <- NULL |
|
528 |
+ message("Fitting model for copy number estimation...") |
|
529 |
+ .computeCopynumber(chrom=CHR, |
|
530 |
+ A=A(ABset), |
|
531 |
+ B=B(ABset), |
|
532 |
+ calls=calls(snpset), |
|
533 |
+ conf=confs(snpset), |
|
534 |
+ NP=NULL, |
|
535 |
+ plate=batch, |
|
536 |
+ envir=envir, |
|
537 |
+ SNR=ABset$SNR, |
|
538 |
+ bias.adj=FALSE, |
|
539 |
+ SNRmin=SNRmin, |
|
540 |
+ cdfName=cdfName, |
|
541 |
+ ...) |
|
542 |
+ |
|
543 |
+ if(bias.adj){ |
|
544 |
+ message("Running bias adjustment...") |
|
545 |
+ .computeCopynumber(chrom=CHR, |
|
546 |
+ A=A(ABset), |
|
547 |
+ B=B(ABset), |
|
548 |
+ calls=calls(snpset), |
|
549 |
+ conf=confs(snpset), |
|
550 |
+ NP=NULL, |
|
551 |
+ plate=batch, |
|
552 |
+ envir=envir, |
|
553 |
+ SNR=ABset$SNR, |
|
554 |
+ bias.adj=TRUE, |
|
555 |
+ SNRmin=SNRmin, |
|
556 |
+ cdfName=cdfName, |
|
557 |
+ ...) |
|
558 |
+ } |
|
559 |
+ message("Organizing results...") |
|
560 |
+ locusSet <- list2locusSet(envir, ABset=ABset, NPset=NULL, CHR=CHR, cdfName=cdfName) |
|
561 |
+ return(locusSet) |
|
562 |
+} |
|
563 |
+ |
|
480 | 564 |
##getCopyNumberEnvironment <- function(crlmmSetList, cnSet){ |
481 | 565 |
## envir <- new.env() |
482 | 566 |
## envir[["A"]] <- A(crlmmSetList)[snpIndex(crlmmSetList), ] |
... | ... |
@@ -558,7 +642,6 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){ |
558 | 642 |
##require(oligoClasses) || stop("oligoClasses package not available") |
559 | 643 |
if(length(ls(envir)) == 0) stop("environment empty") |
560 | 644 |
batch <- envir[["plate"]] |
561 |
- |
|
562 | 645 |
##SNP copy number |
563 | 646 |
CA <- envir[["CA"]] |
564 | 647 |
dimnames(CA) <- list(envir[["snps"]], envir[["sns"]]) |
... | ... |
@@ -566,20 +649,21 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){ |
566 | 649 |
dimnames(CB) <- dimnames(CA) |
567 | 650 |
|
568 | 651 |
##NP copy number |
569 |
- CT <- envir[["CT"]] |
|
570 |
- rownames(CT) <- envir[["cnvs"]] |
|
571 |
- colnames(CT) <- envir[["sns"]] |
|
572 |
- sig2T <- envir[["sig2T"]] |
|
573 |
- rownames(sig2T) <- rownames(CT) |
|
574 |
- nuT <- envir[["nuT"]] |
|
575 |
- colnames(nuT) <- paste("nuT", unique(batch), sep="_") |
|
576 |
- ##nuA <- rbind(nuA, nuT) |
|
577 |
- ##nuB <- rbind(nuA, blank) |
|
578 |
- phiT <- envir[["phiT"]] |
|
579 |
- colnames(phiT) <- paste("phiT", unique(batch), sep="_") |
|
580 |
- naMatrix <- matrix(NA, nrow(CT), ncol(CT)) |
|
581 |
- dimnames(naMatrix) <- dimnames(CT) |
|
582 |
- |
|
652 |
+ if(!is.null(NPset)){ |
|
653 |
+ CT <- envir[["CT"]] |
|
654 |
+ rownames(CT) <- envir[["cnvs"]] |
|
655 |
+ colnames(CT) <- envir[["sns"]] |
|
656 |
+ sig2T <- envir[["sig2T"]] |
|
657 |
+ rownames(sig2T) <- rownames(CT) |
|
658 |
+ nuT <- envir[["nuT"]] |
|
659 |
+ colnames(nuT) <- paste("nuT", unique(batch), sep="_") |
|
660 |
+ phiT <- envir[["phiT"]] |
|
661 |
+ colnames(phiT) <- paste("phiT", unique(batch), sep="_") |
|
662 |
+ naMatrix <- matrix(NA, nrow(CT), ncol(CT)) |
|
663 |
+ dimnames(naMatrix) <- dimnames(CT) |
|
664 |
+ } else{ |
|
665 |
+ sig2T <- nuT <- phiT <- naMatrix <- CT <- NULL |
|
666 |
+ } |
|
583 | 667 |
CA <- rbind(CA, CT) |
584 | 668 |
CB <- rbind(CB, naMatrix) |
585 | 669 |
|
... | ... |
@@ -609,8 +693,12 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){ |
609 | 693 |
|
610 | 694 |
|
611 | 695 |
##Combine SNP and NP parameters |
612 |
- naMatrixParams <- matrix(NA, nrow(CT), length(unique(batch))) |
|
613 |
- dimnames(naMatrixParams) <- list(rownames(CT), unique(batch)) |
|
696 |
+ if(!is.null(NPset)){ |
|
697 |
+ naMatrixParams <- matrix(NA, nrow(CT), length(unique(batch))) |
|
698 |
+ dimnames(naMatrixParams) <- list(rownames(CT), unique(batch)) |
|
699 |
+ } else{ |
|
700 |
+ naMatrixParams <- NULL |
|
701 |
+ } |
|
614 | 702 |
tau2A <- rbind(tau2A, naMatrixParams) |
615 | 703 |
tau2B <- rbind(tau2B, naMatrixParams) |
616 | 704 |
sig2A <- rbind(sig2A, sig2T) |
... | ... |
@@ -625,7 +713,6 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){ |
625 | 713 |
rownames(tau2A) <- rownames(tau2B) <- rownames(sig2A) <- rownames(sig2B) <- rownames(CA) |
626 | 714 |
rownames(corr) <- rownames(corrA.BB) <- rownames(corrB.AA) <- rownames(CA) |
627 | 715 |
rownames(nuA) <- rownames(phiA) <- rownames(nuB) <- rownames(phiB) <- rownames(CA) |
628 |
- |
|
629 | 716 |
##phenodata |
630 | 717 |
phenodata <- phenoData(ABset) |
631 | 718 |
phenodata <- phenodata[match(envir[["sns"]], sampleNames(phenodata)), ] |
... | ... |
@@ -633,32 +720,41 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){ |
633 | 720 |
|
634 | 721 |
##Feature Data |
635 | 722 |
position.snp <- snpProbes[match(envir[["snps"]], rownames(snpProbes)), "position"] |
636 |
- position.np <- cnProbes[match(envir[["cnvs"]], rownames(cnProbes)), "position"] |
|
723 |
+ names(position.snp) <- envir[["snps"]] |
|
724 |
+ if(!is.null(NPset)){ |
|
725 |
+ position.np <- cnProbes[match(envir[["cnvs"]], rownames(cnProbes)), "position"] |
|
726 |
+ names(position.np) <- envir[["cnvs"]] |
|
727 |
+ } else position.np <- NULL |
|
637 | 728 |
position <- c(position.snp, position.np) |
638 | 729 |
if(!(identical(names(position), rownames(CA)))){ |
639 | 730 |
position <- position[match(rownames(CA), names(position))] |
640 | 731 |
} |
732 |
+ if(sum(duplicated(names(position))) > 0){ |
|
733 |
+ warning("Removing rows with NA identifiers...") |
|
734 |
+ ##RS: fix this |
|
735 |
+ I <- which(!is.na(names(position))) |
|
736 |
+ } else I <- seq(along=names(position)) |
|
641 | 737 |
fd <- data.frame(cbind(CHR, |
642 |
- position, |
|
643 |
- tau2A, |
|
644 |
- tau2B, |
|
645 |
- sig2A, |
|
646 |
- sig2B, |
|
647 |
- nuA, |
|
648 |
- nuB, |
|
649 |
- phiA, |
|
650 |
- phiB, |
|
651 |
- corr, |
|
652 |
- corrA.BB, |
|
653 |
- corrB.AA)) |
|
738 |
+ position[I], |
|
739 |
+ tau2A[I,, drop=FALSE], |
|
740 |
+ tau2B[I,, drop=FALSE], |
|
741 |
+ sig2A[I,, drop=FALSE], |
|
742 |
+ sig2B[I,, drop=FALSE], |
|
743 |
+ nuA[I,, drop=FALSE], |
|
744 |
+ nuB[I,, drop=FALSE], |
|
745 |
+ phiA[I,, drop=FALSE], |
|
746 |
+ phiB[I,, drop=FALSE], |
|
747 |
+ corr[I,, drop=FALSE], |
|
748 |
+ corrA.BB[I,, drop=FALSE], |
|
749 |
+ corrB.AA[I,, drop=FALSE])) |
|
654 | 750 |
colnames(fd)[1:2] <- c("chromosome", "position") |
655 |
- rownames(fd) <- rownames(CA) |
|
751 |
+ rownames(fd) <- rownames(CA)[I] |
|
656 | 752 |
fD <- new("AnnotatedDataFrame", |
657 | 753 |
data=fd, |
658 | 754 |
varMetadata=data.frame(labelDescription=colnames(fd))) |
659 | 755 |
assayData <- assayDataNew("lockedEnvironment", |
660 |
- CA=CA, |
|
661 |
- CB=CB) |
|
756 |
+ CA=CA[I, ], |
|
757 |
+ CB=CB[I, ]) |
|
662 | 758 |
cnset <- new("CopyNumberSet", |
663 | 759 |
assayData=assayData, |
664 | 760 |
featureData=fD, |
... | ... |
@@ -671,10 +767,14 @@ list2locusSet <- function(envir, ABset, NPset, CHR, cdfName="genomewidesnp6"){ |
671 | 767 |
thresholdCopyNumberSet <- function(object){ |
672 | 768 |
ca <- CA(object) |
673 | 769 |
cb <- CB(object) |
674 |
- ca[ca < 5] <- 5 |
|
675 |
- ca[ca > 500] <- 500 |
|
676 |
- cb[cb < 5] <- 5 |
|
677 |
- cb[cb > 500] <- 500 |
|
770 |
+ ca[ca < 0.05] <- 0.05 |
|
771 |
+ ca[ca > 5] <- 5 |
|
772 |
+ cb[cb < 0.05] <- 0.05 |
|
773 |
+ cb[cb > 5] <- 5 |
|
774 |
+ ca <- matrix(as.integer(ca*100), nrow(ca), ncol(ca)) |
|
775 |
+ cb <- matrix(as.integer(cb*100), nrow(cb), ncol(cb)) |
|
776 |
+ rownames(ca) <- rownames(cb) <- featureNames(object) |
|
777 |
+ colnames(ca) <- colnames(cb) <- sampleNames(object) |
|
678 | 778 |
CA(object) <- ca |
679 | 779 |
CB(object) <- cb |
680 | 780 |
return(object) |
... | ... |
@@ -682,31 +782,31 @@ thresholdCopyNumberSet <- function(object){ |
682 | 782 |
|
683 | 783 |
|
684 | 784 |
.computeCopynumber <- function(chrom, |
685 |
- A, |
|
686 |
- B, |
|
687 |
- calls, |
|
688 |
- conf, |
|
689 |
- NP, |
|
690 |
- plate, |
|
691 |
- MIN.OBS=5, |
|
692 |
- envir, |
|
693 |
- P, |
|
694 |
- DF.PRIOR=50, |
|
695 |
- CONF.THR=0.99, |
|
696 |
- bias.adj=FALSE, |
|
697 |
- priorProb, |
|
698 |
- gender=NULL, |
|
699 |
- SNR, |
|
700 |
- SNRmin=5, seed=123, |
|
701 |
- cdfName="genomewidesnp6", |
|
702 |
- verbose=TRUE, ...){ |
|
785 |
+ A, |
|
786 |
+ B, |
|
787 |
+ calls, |
|
788 |
+ conf, |
|
789 |
+ NP, |
|
790 |
+ plate, |
|
791 |
+ MIN.OBS=5, |
|
792 |
+ envir, |
|
793 |
+ P, |
|
794 |
+ DF.PRIOR=50, |
|
795 |
+ CONF.THR=0.99, |
|
796 |
+ bias.adj=FALSE, |
|
797 |
+ priorProb, |
|
798 |
+ gender=NULL, |
|
799 |
+ SNR, |
|
800 |
+ SNRmin=5, seed=123, |
|
801 |
+ cdfName, |
|
802 |
+ verbose=TRUE, ...){ |
|
803 |
+ if(missing(cdfName)) stop("cdfName must be provided") |
|
703 | 804 |
require(paste(cdfName, "Crlmm", sep=""), character.only=TRUE) || stop(paste("cdf ", cdfName, "Crlmm", " not available.", sep="")) |
704 | 805 |
if(!missing(plate)){ |
705 | 806 |
if(length(plate) != ncol(A)) |
706 | 807 |
stop("plate must the same length as the number of columns of A") |
707 | 808 |
} |
708 | 809 |
set.seed(seed) |
709 |
- ##if(missing(chrom)) stop("must specify chromosome") |
|
710 | 810 |
if(length(ls(envir)) == 0) { |
711 | 811 |
instantiateObjects(calls=calls, |
712 | 812 |
conf=conf, |
... | ... |
@@ -741,13 +841,16 @@ thresholdCopyNumberSet <- function(object){ |
741 | 841 |
cat(".") |
742 | 842 |
if(sum(plate == uplate[p]) < 10) next() |
743 | 843 |
J <- plate==uplate[p] |
844 |
+ if(!is.null(NP)){ |
|
845 |
+ npMatrix <- NP[, J] |
|
846 |
+ } else npMatrix <- NULL |
|
744 | 847 |
oneBatch(plateIndex=p, |
745 | 848 |
A=A[, J], |
746 | 849 |
B=B[, J], |
747 | 850 |
calls=calls[, J], |
748 | 851 |
conf=conf[, J], |
749 | 852 |
gender=NULL, |
750 |
- NP[, J], |
|
853 |
+ npMatrix, |
|
751 | 854 |
plate[J], |
752 | 855 |
MIN.OBS=1, |
753 | 856 |
envir=envir, |
... | ... |
@@ -782,12 +885,14 @@ thresholdCopyNumberSet <- function(object){ |
782 | 885 |
envir[["steps"]] <- steps |
783 | 886 |
} |
784 | 887 |
if(!steps[4]){ |
785 |
- message("\nCopy number for nonpolymorphic probes...") |
|
786 |
- for(p in P){ |
|
787 |
- cat(".") |
|
788 |
- nonpolymorphic(plateIndex=p, |
|
789 |
- NP=NP[, plate==uplate[p]], |
|
790 |
- envir=envir) |
|
888 |
+ if(!is.null(NP)){ |
|
889 |
+ message("\nCopy number for nonpolymorphic probes...") |
|
890 |
+ for(p in P){ |
|
891 |
+ cat(".") |
|
892 |
+ nonpolymorphic(plateIndex=p, |
|
893 |
+ NP=NP[, plate==uplate[p]], |
|
894 |
+ envir=envir) |
|
895 |
+ } |
|
791 | 896 |
} |
792 | 897 |
steps[4] <- TRUE |
793 | 898 |
envir[["step"]] <- steps |
... | ... |
@@ -841,44 +946,38 @@ nonpolymorphic <- function(plateIndex, NP, envir, CONF.THR=0.99, DF.PRIOR=50, pk |
841 | 946 |
} else { |
842 | 947 |
ix <- 1:nrow(X) |
843 | 948 |
} |
844 |
- ##X <- X[ix, ] |
|
845 |
- ##Y <- Y[ix] |
|
846 | 949 |
betahat <- solve(crossprod(X[ix, ]), crossprod(X[ix, ], Y[ix])) |
847 |
-## Yhat <- X%*%betahat |
|
848 |
-## phihat <- 2^Yhat |
|
849 |
-## nuhat <- 2^X[, 2] - 2*phihat |
|
850 |
-## nuAB <- c(nuA[!flagsA], nuB[!flagsB]) |
|
851 |
-## plot(log2(nuhat), log2(nuAB), pch=".") |
|
852 |
-## plot(log2(nuhat)-log2(nuAB), pch=".") |
|
853 |
-## hist(log2(nuAB)) |
|
854 |
- ##plot(Y-Yhat, pch=".") |
|
855 |
- ##plot(Y, Yhat, pch=".") |
|
856 |
- ##calculate R2 |
|
857 | 950 |
if(CHR == 23){ |
951 |
+ normalNP <- envir[["normalNP"]] |
|
952 |
+ normalNP <- normalNP[, plate==uplate[p]] |
|
953 |
+ nuT <- envir[["nuT"]] |
|
954 |
+ phiT <- envir[["phiT"]] |
|
955 |
+ |
|
858 | 956 |
cnvs <- envir[["cnvs"]] |
859 | 957 |
loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv) |
860 | 958 |
cnProbes <- get("cnProbes", envir=.crlmmPkgEnv) |
861 |
-## data(cnProbes, package="genomewidesnp6Crlmm") |
|
862 | 959 |
cnProbes <- cnProbes[match(cnvs, rownames(cnProbes)), ] |
863 |
- ##par:pseudo-autosomal regions |
|
864 |
- par <- cnProbes[, "position"] < 2709520 | (cnProbes[, "position"] > 154584237 & cnProbes[, "position"] < 154913754) |
|
865 |
- gender <- envir[["gender"]] |
|
866 |
- mu1 <- rowMedians(NP[, gender=="male"], na.rm=TRUE) |
|
867 |
- mu2 <- rowMedians(NP[, gender=="female"], na.rm=TRUE) |
|
868 |
- mus <- log(cbind(mu1, mu2)) |
|
869 |
- X <- cbind(1, mus[, 1]) |
|
960 |
+ |
|
870 | 961 |
##For build Hg18 |
871 | 962 |
##http://genome.ucsc.edu/cgi-bin/hgGateway |
872 | 963 |
##pseudo-autosomal regions on X |
873 | 964 |
##chrX:1-2,709,520 and chrX:154584237-154913754, respectively |
874 |
- Yhat1 <- as.numeric(X %*% betahat) |
|
875 |
- X <- cbind(1, mus[, 2]) |
|
876 |
- Yhat2 <- as.numeric(X %*% betahat) |
|
877 |
- phi1 <- exp(Yhat1) |
|
878 |
- phi2 <- exp(Yhat2) |
|
879 |
- nu1 <- exp(mus[, 1]) - phi1 |
|
880 |
- nu1[par] <- exp(mus[par, 1]) - 2*phi1[par] |
|
881 |
- nu2 <- exp(mus[, 2]) - 2*phi2 |
|
965 |
+ ##par:pseudo-autosomal regions |
|
966 |
+ pseudoAR <- cnProbes[, "position"] < 2709520 | (cnProbes[, "position"] > 154584237 & cnProbes[, "position"] < 154913754) |
|
967 |
+ gender <- envir[["gender"]] |
|
968 |
+ mu1 <- rowMedians(NP[, gender=="male"], na.rm=TRUE) |
|
969 |
+ mu2 <- rowMedians(NP[, gender=="female"], na.rm=TRUE) |
|
970 |
+ mus <- log2(cbind(mu1, mu2)) |
|
971 |
+ X.men <- cbind(1, mus[, 1]) |
|
972 |
+ X.fem <- cbind(1, mus[, 2]) |
|
973 |
+ |
|
974 |
+ Yhat1 <- as.numeric(X.men %*% betahat) |
|
975 |
+ Yhat2 <- as.numeric(X.fem %*% betahat) |
|
976 |
+ phi1 <- 2^(Yhat1) |
|
977 |
+ phi2 <- 2^(Yhat2) |
|
978 |
+ nu1 <- 2^(mus[, 1]) - phi1 |
|
979 |
+ nu2 <- 2^(mus[, 2]) - 2*phi2 |
|
980 |
+ nu1[pseudoAR] <- 2^(mus[pseudoAR, 1]) - 2*phi1[pseudoAR] |
|
882 | 981 |
CT1 <- 1/phi1*(NP[, gender=="male"]-nu1) |
883 | 982 |
CT2 <- 1/phi2*(NP[, gender=="female"]-nu2) |
884 | 983 |
CT1 <- matrix(as.integer(100*CT1), nrow(CT1), ncol(CT1)) |
... | ... |
@@ -887,6 +986,16 @@ nonpolymorphic <- function(plateIndex, NP, envir, CONF.THR=0.99, DF.PRIOR=50, pk |
887 | 986 |
CT[, plate==uplate[p] & gender=="male"] <- CT1 |
888 | 987 |
CT[, plate==uplate[p] & gender=="female"] <- CT2 |
889 | 988 |
envir[["CT"]] <- CT |
989 |
+ |
|
990 |
+ ##only using females to compute the variance |
|
991 |
+ normalNP[, gender=="male"] <- NA |
|
992 |
+ sig2T[, p] <- rowMAD(log2(NP*normalNP), na.rm=TRUE)^2 |
|
993 |
+ nuT[, p] <- nu2 |
|
994 |
+ phiT[, p] <- phi2 |
|
995 |
+ envir[["sig2T"]] <- sig2T |
|
996 |
+ envir[["CT"]] <- CT |
|
997 |
+ envir[["phiT"]] <- nuT |
|
998 |
+ envir[["nuT"]] <- phiT |
|
890 | 999 |
} else { |
891 | 1000 |
normalNP <- envir[["normalNP"]] |
892 | 1001 |
normalNP <- normalNP[, plate==uplate[p]] |
... | ... |
@@ -934,7 +1043,6 @@ withinGenotypeMoments <- function(p, A, B, calls, conf, CONF.THR, DF.PRIOR, envi |
934 | 1043 |
IX <- matrix(gender, nrow(G), ncol(G), byrow=TRUE) |
935 | 1044 |
IX <- IX == "female" |
936 | 1045 |
} else IX <- matrix(TRUE, nrow(G), ncol(G)) |
937 |
- |
|
938 | 1046 |
index <- GT.B <- GT.A <- vector("list", 3) |
939 | 1047 |
names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB") |
940 | 1048 |
##-------------------------------------------------- |
... | ... |
@@ -1024,7 +1132,8 @@ oneBatch <- function(plateIndex, |
1024 | 1132 |
} |
1025 | 1133 |
message("running bias adjustment") |
1026 | 1134 |
##adjustment for nonpolymorphic probes |
1027 |
- biasAdjNP(plateIndex=p, envir=envir, priorProb=priorProb) |
|
1135 |
+ if(!is.null(NP)) |
|
1136 |
+ biasAdjNP(plateIndex=p, envir=envir, priorProb=priorProb) |
|
1028 | 1137 |
##adjustment for SNPs |
1029 | 1138 |
biasAdj(plateIndex=p, envir=envir, priorProb=priorProb) |
1030 | 1139 |
message("Recomputing location and scale parameters") |
... | ... |
@@ -1356,6 +1465,7 @@ polymorphic <- function(plateIndex, A, B, envir){ |
1356 | 1465 |
|
1357 | 1466 |
|
1358 | 1467 |
biasAdj <- function(plateIndex, envir, priorProb, PROP=0.75){ |
1468 |
+ gender <- envir[["gender"]] |
|
1359 | 1469 |
CHR <- envir[["chrom"]] |
1360 | 1470 |
if(CHR == 23){ |
1361 | 1471 |
phiAx <- envir[["phiAx"]] |
... | ... |
@@ -1429,18 +1539,13 @@ biasAdj <- function(plateIndex, envir, priorProb, PROP=0.75){ |
1429 | 1539 |
posteriorProb[, , 3] <- norm |
1430 | 1540 |
posteriorProb[, , 4] <- amp |
1431 | 1541 |
mostLikelyState <- apply(posteriorProb, c(1, 2), function(x) order(x, decreasing=TRUE)[1]) |
1542 |
+ if(CHR == 23){ |
|
1543 |
+ ##so state index 3 is the most likely state for men and women |
|
1544 |
+ mostLikelyState[, gender=="male"] <- mostLikelyState[, gender=="male"] + 1 |
|
1545 |
+ } |
|
1432 | 1546 |
##Adjust for SNPs that have less than 80% of the samples in an altered state |
1433 | 1547 |
##flag the remainder? |
1434 |
- if(CHR != 23){ |
|
1435 |
- proportionSamplesAltered <- rowMeans(mostLikelyState != 3) |
|
1436 |
- }else{ |
|
1437 |
- ##should also consider pseud |
|
1438 |
-## browser() |
|
1439 |
- gender <- envir[["gender"]] |
|
1440 |
-## tmp3 <- tmp2 |
|
1441 |
-## tmp3[, gender=="male"] <- tmp2[, gender=="male"] != 2 |
|
1442 |
-## tmp3[, gender=="female"] <- tmp2[, gender=="female"] != 3 |
|
1443 |
- } |
|
1548 |
+ proportionSamplesAltered <- rowMeans(mostLikelyState != 3) |
|
1444 | 1549 |
##Those near 1 have NaNs for nu and phi. this occurs by NaNs in the muA[,, "A"] or muA[, , "B"] for X chromosome |
1445 | 1550 |
##ii <- proportionSamplesAltered < PROP |
1446 | 1551 |
##ii <- proportionSamplesAltered > 0.05 |
... | ... |
@@ -1448,46 +1553,45 @@ biasAdj <- function(plateIndex, envir, priorProb, PROP=0.75){ |
1448 | 1553 |
ii <- proportionSamplesAltered < 0.8 & proportionSamplesAltered > 0.01 |
1449 | 1554 |
##only exclude observations from one tail, depending on |
1450 | 1555 |
##whether more are up or down |
1451 |
- if(CHR != 23){ |
|
1452 |
- moreup <- rowSums(mostLikelyState > 3) > rowSums(mostLikelyState < 3) ##3 is normal |
|
1453 |
- NORM <- matrix(FALSE, nrow(A), ncol(A)) |
|
1454 |
- NORM[proportionSamplesAltered > 0.8, ] <- FALSE |
|
1455 |
- ##big and greater than 1 if copy number 3 is more likely |
|
1456 |
- ratioUp <- posteriorProb[, , 4]/posteriorProb[, , 3] |
|
1457 |
- ##large values will have small ranks |
|
1458 |
- ##rankUP <- t(apply(ratio, 1, rank)) |
|
1459 |
- ## drop small ranks up to the maximum number, unless the ratio is greater than 1 |
|
1460 |
- ##NORM[ii & moreup, ] <- !(rankUP <= maxNumberToDrop) | (ratio < 1) |
|
1461 |
- NORM[ii & moreup, ] <- ratioUp[moreup & ii] < 1 ##normal more likely |
|
1462 |
- ##big and greater than 1 if copy number 1 is more likely than 2 |
|
1463 |
- ratioDown <- posteriorProb[, , 2]/posteriorProb[, , 3] |
|
1464 |
- ##big values have small ranks |
|
1465 |
- ##rankDown <- t(apply(ratio, 1, rank)) |
|
1466 |
- ##NORM[ii & !moreup, ] <- !(rankDown <= maxNumberToDrop) | (ratio < 1) |
|
1467 |
- NORM[ii & !moreup, ] <- ratioDown[!moreup & ii] < 1 ##normal more likely |
|
1468 |
-## NORM[ii & !moreup, ] <- up |
|
1469 |
- ##Define NORM so that we can iterate this step |
|
1470 |
- ##NA's in the previous iteration (normal) will be propogated |
|
1471 |
- normal <- NORM*normal |
|
1472 |
- } else{ |
|
1473 |
- fem <- mostLikelyState[, gender=="female"] |
|
1474 |
- mal <- mostLikelyState[, gender=="male"] |
|
1475 |
- moreupF <- rowSums(fem > 3) > rowSums(fem < 3) |
|
1476 |
- moreupM <- rowSums(mal > 2) > rowSums(mal < 2) |
|
1477 |
- notUpF <- fem[ii & moreupF, ] <= 3 |
|
1478 |
- notUpM <- fem[ii & moreupM, ] <= 2 |
|
1479 |
- notDownF <- fem[ii & !moreupF, ] >= 3 |
|
1480 |
- notDownM <- mal[ii & !moreupM, ] >= 2 |
|
1481 |
- normalF <- matrix(TRUE, nrow(fem), ncol(fem)) |
|
1482 |
- normalF[ii & moreupF, ] <- notUpF |
|
1483 |
- normalF[ii & !moreupF, ] <- notDownF |
|
1484 |
- normalM <- matrix(TRUE, nrow(mal), ncol(mal)) |
|
1485 |
- normalM[ii & moreupM, ] <- notUpM |
|
1486 |
- normalM[ii & !moreupM, ] <- notDownM |
|
1487 |
- normal <- matrix(TRUE, nrow(A), ncol(A)) |
|
1488 |
- normal[, gender=="female"] <- normalF |
|
1489 |
- normal[, gender=="male"] <- normalM |
|
1490 |
- } |
|
1556 |
+ moreup <- rowSums(mostLikelyState > 3) > rowSums(mostLikelyState < 3) ##3 is normal |
|
1557 |
+ NORM <- matrix(FALSE, nrow(A), ncol(A)) |
|
1558 |
+ NORM[proportionSamplesAltered > 0.8, ] <- FALSE |
|
1559 |
+ ##big and greater than 1 if copy number 3 is more likely |
|
1560 |
+ ratioUp <- posteriorProb[, , 4]/posteriorProb[, , 3] |
|
1561 |
+ ##large values will have small ranks |
|
1562 |
+ ##rankUP <- t(apply(ratio, 1, rank)) |
|
1563 |
+ ## drop small ranks up to the maximum number, unless the ratio is greater than 1 |
|
1564 |
+ ##NORM[ii & moreup, ] <- !(rankUP <= maxNumberToDrop) | (ratio < 1) |
|
1565 |
+ NORM[ii & moreup, ] <- ratioUp[moreup & ii] < 1 ##normal more likely |
|
1566 |
+ ##big and greater than 1 if copy number 1 is more likely than 2 |
|
1567 |
+ ratioDown <- posteriorProb[, , 2]/posteriorProb[, , 3] |
|
1568 |
+ ##big values have small ranks |
|
1569 |
+ ##rankDown <- t(apply(ratio, 1, rank)) |
|
1570 |
+ ##NORM[ii & !moreup, ] <- !(rankDown <= maxNumberToDrop) | (ratio < 1) |
|
1571 |
+ NORM[ii & !moreup, ] <- ratioDown[!moreup & ii] < 1 ##normal more likely |
|
1572 |
+ ## NORM[ii & !moreup, ] <- up |
|
1573 |
+ ##Define NORM so that we can iterate this step |
|
1574 |
+ ##NA's in the previous iteration (normal) will be propogated |
|
1575 |
+ normal <- NORM*normal |
|
1576 |
+## } else{ |
|
1577 |
+## fem <- mostLikelyState[, gender=="female"] |
|
1578 |
+## mal <- mostLikelyState[, gender=="male"] |
|
1579 |
+## moreupF <- rowSums(fem > 3) > rowSums(fem < 3) |
|
1580 |
+## moreupM <- rowSums(mal > 2) > rowSums(mal < 2) |
|
1581 |
+## notUpF <- fem[ii & moreupF, ] <= 3 |
|
1582 |
+## notUpM <- fem[ii & moreupM, ] <= 2 |
|
1583 |
+## notDownF <- fem[ii & !moreupF, ] >= 3 |
|
1584 |
+## notDownM <- mal[ii & !moreupM, ] >= 2 |
|
1585 |
+## normalF <- matrix(TRUE, nrow(fem), ncol(fem)) |
|
1586 |
+## normalF[ii & moreupF, ] <- notUpF |
|
1587 |
+## normalF[ii & !moreupF, ] <- notDownF |
|
1588 |
+## normalM <- matrix(TRUE, nrow(mal), ncol(mal)) |
|
1589 |
+## normalM[ii & moreupM, ] <- notUpM |
|
1590 |
+## normalM[ii & !moreupM, ] <- notDownM |
|
1591 |
+## normal <- matrix(TRUE, nrow(A), ncol(A)) |
|
1592 |
+## normal[, gender=="female"] <- normalF |
|
1593 |
+## normal[, gender=="male"] <- normalM |
|
1594 |
+## } |
|
1491 | 1595 |
flagAltered <- which(proportionSamplesAltered > 0.5) |
1492 | 1596 |
envir[["flagAltered"]] <- flagAltered |
1493 | 1597 |
normal[normal == FALSE] <- NA |
... | ... |
@@ -1692,6 +1796,7 @@ biasAdjNP <- function(plateIndex, envir, priorProb){ |
1692 | 1796 |
plate <- envir[["plate"]] |
1693 | 1797 |
uplate <- envir[["uplate"]] |
1694 | 1798 |
sig2T <- envir[["sig2T"]] |
1799 |
+ gender <- envir[["gender"]] |
|
1695 | 1800 |
normalNP <- normalNP[, plate==uplate[p]] |
1696 | 1801 |
NP <- NP[, plate==uplate[p]] |
1697 | 1802 |
sig2T <- sig2T[, p] |
... | ... |
@@ -1716,32 +1821,23 @@ biasAdjNP <- function(plateIndex, envir, priorProb){ |
1716 | 1821 |
counter <- counter+1 |
1717 | 1822 |
} |
1718 | 1823 |
mostLikelyState <- apply(emit, c(1, 2), function(x) order(x, decreasing=TRUE)[1]) |
1719 |
- ##Adjust for SNPs that have less than 80% of the samples in an altered state |
|
1720 |
- ##flag the remainder? |
|
1721 |
- if(CHR != 23){ |
|
1722 |
- tmp3 <- mostLikelyState != 3 |
|
1723 |
- }else{ |
|
1724 |
- browser() |
|
1725 |
- ##should also consider pseudoautosomal |
|
1726 |
- gender <- envir[["gender"]] |
|
1727 |
- tmp3 <- mostLikelyState |
|
1728 |
- tmp3[, gender=="male"] <- mostLikelyState[, gender=="male"] != 2 |
|
1729 |
- tmp3[, gender=="female"] <- mostLikelyState[, gender=="female"] != 3 |
|
1824 |
+ if(CHR == 23){ |
|
1825 |
+ ## the state index for male on chromosome 23 is 2 |
|
1826 |
+ ## add 1 so that the state index is 3 for 'normal' state |
|
1827 |
+ mostLikelyState[, gender=="male"] <- mostLikelyState[, gender=="male"] + 1 |
|
1730 | 1828 |
} |
1829 |
+ tmp3 <- mostLikelyState != 3 |
|
1731 | 1830 |
##Those near 1 have NaNs for nu and phi. this occurs by NaNs in the muA[,, "A"] or muA[, , "B"] for X chromosome |
1732 | 1831 |
proportionSamplesAltered <- rowMeans(tmp3)##prop normal |
1733 | 1832 |
ii <- proportionSamplesAltered < 0.75 |
1734 |
- ##only exclude observations from one tail, depending on |
|
1735 |
- ##whether more are up or down |
|
1736 |
- if(CHR != 23){ |
|
1737 |
- moreup <- rowSums(mostLikelyState > 3) > rowSums(mostLikelyState < 3) |
|
1738 |
- notUp <- mostLikelyState[ii & moreup, ] <= 3 |
|
1739 |
- notDown <- mostLikelyState[ii & !moreup, ] >= 3 |
|
1740 |
- NORM <- matrix(TRUE, nrow(NP), ncol(NP)) |
|
1741 |
- NORM[ii & moreup, ] <- notUp |
|
1742 |
- NORM[ii & !moreup, ] <- notDown |
|
1743 |
- normalNP <- normalNP*NORM |
|
1744 |
- } |
|
1833 |
+ moreup <- rowSums(mostLikelyState > 3) > rowSums(mostLikelyState < 3) |
|
1834 |
+ notUp <- mostLikelyState[ii & moreup, ] <= 3 |
|
1835 |
+ notDown <- mostLikelyState[ii & !moreup, ] >= 3 |
|
1836 |
+ NORM <- matrix(TRUE, nrow(NP), ncol(NP)) |
|
1837 |
+ NORM[ii & moreup, ] <- notUp |
|
1838 |
+ NORM[ii & !moreup, ] <- notDown |
|
1839 |
+ normalNP <- normalNP*NORM |
|
1840 |
+ |
|
1745 | 1841 |
flagAltered <- which(proportionSamplesAltered > 0.5) |
1746 | 1842 |
envir[["flagAlteredNP"]] <- flagAltered |
1747 | 1843 |
normalNP[normalNP == FALSE] <- NA |
... | ... |
@@ -3,47 +3,47 @@ |
3 | 3 |
# or to use the optional 'Path' column in sampleSheet |
4 | 4 |
# - there is a lot of header information that is currently discarded - could try and store this somewhere in the resulting NChannelSet |
5 | 5 |
|
6 |
-readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
|
7 |
- arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
|
8 |
- highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), saveDate=FALSE) { |
|
6 |
+readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
|
7 |
+ arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
|
8 |
+ highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), saveDate=FALSE) { |
|
9 | 9 |
if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet |
10 |
- arrayNames=NULL |
|
11 |
- if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) { |
|
12 |
- barcode = sampleSheet[,arrayInfoColNames$barcode] |
|
13 |
- arrayNames=barcode |
|
14 |
- } |
|
15 |
- if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) { |
|
16 |
- position = sampleSheet[,arrayInfoColNames$position] |
|
17 |
- if(is.null(arrayNames)) |
|
18 |
- arrayNames=position |
|
19 |
- else |
|
20 |
- arrayNames = paste(arrayNames, position, sep=sep) |
|
21 |
- if(highDensity) { |
|
22 |
- hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02") |
|
23 |
- for(i in names(hdExt)) |
|
24 |
- arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames) |
|
25 |
- } |
|
26 |
- } |
|
27 |
- pd = new("AnnotatedDataFrame", data = sampleSheet) |
|
10 |
+ arrayNames=NULL |
|
11 |
+ if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) { |
|
12 |
+ barcode = sampleSheet[,arrayInfoColNames$barcode] |
|
13 |
+ arrayNames=barcode |
|
14 |
+ } |
|
15 |
+ if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) { |
|
16 |
+ position = sampleSheet[,arrayInfoColNames$position] |
|
17 |
+ if(is.null(arrayNames)) |
|
18 |
+ arrayNames=position |
|
19 |
+ else |
|
20 |
+ arrayNames = paste(arrayNames, position, sep=sep) |
|
21 |
+ if(highDensity) { |
|
22 |
+ hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02") |
|
23 |
+ for(i in names(hdExt)) |
|
24 |
+ arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames) |
|
25 |
+ } |
|
26 |
+ } |
|
27 |
+ pd = new("AnnotatedDataFrame", data = sampleSheet) |
|
28 | 28 |
} |
29 | 29 |
if(is.null(arrayNames)) { |
30 |
- arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path)) |
|
31 |
- if(!is.null(sampleSheet)) { |
|
32 |
- sampleSheet=NULL |
|
33 |
- cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n") |
|
34 |
- } |
|
35 |
- pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
|
30 |
+ arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path)) |
|
31 |
+ if(!is.null(sampleSheet)) { |
|
32 |
+ sampleSheet=NULL |
|
33 |
+ cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n") |
|
34 |
+ } |
|
35 |
+ pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
|
36 | 36 |
} |
37 | 37 |
narrays = length(arrayNames) |
38 | 38 |
grnfiles = paste(arrayNames, fileExt$green, sep=sep) |
39 | 39 |
redfiles = paste(arrayNames, fileExt$red, sep=sep) |
40 | 40 |
if(length(grnfiles)==0 || length(redfiles)==0) |
41 |
- stop("Cannot find .idat files") |
|
41 |
+ stop("Cannot find .idat files") |
|
42 | 42 |
if(length(grnfiles)!=length(redfiles)) |
43 |
- stop("Cannot find matching .idat files") |
|
43 |
+ stop("Cannot find matching .idat files") |
|
44 | 44 |
if(!all(c(redfiles,grnfiles) %in% dir(path=path))) |
45 |
- stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n", |
|
46 |
- paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" ")) |
|
45 |
+ stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n", |
|
46 |
+ paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" ")) |
|
47 | 47 |
grnidats = file.path(path, grnfiles) |
48 | 48 |
redidats = file.path(path, redfiles) |
49 | 49 |
|
... | ... |
@@ -58,97 +58,95 @@ readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
58 | 58 |
|
59 | 59 |
# read in the data |
60 | 60 |
for(i in seq(along=arrayNames)) { |
61 |
- cat("reading", arrayNames[i], "\t") |
|
62 |
- idsG = idsR = G = R = NULL |
|
63 |
- |
|
64 |
- cat(paste(sep, fileExt$green, sep=""), "\t") |
|
65 |
- G = readIDAT(grnidats[i]) |
|
66 |
- idsG = rownames(G$Quants) |
|
67 |
- |
|
68 |
- headerInfo$nProbes[i] = G$nSNPsRead |
|
69 |
- headerInfo$Barcode[i] = G$Barcode |
|
70 |
- headerInfo$ChipType[i] = G$ChipType |
|
71 |
- headerInfo$Manifest[i] = G$Unknown$MostlyNull |
|
72 |
- headerInfo$Position[i] = G$Unknowns$MostlyA |
|
61 |
+ cat("reading", arrayNames[i], "\t") |
|
62 |
+ idsG = idsR = G = R = NULL |
|
63 |
+ cat(paste(sep, fileExt$green, sep=""), "\t") |
|
64 |
+ G = readIDAT(grnidats[i]) |
|
65 |
+ idsG = rownames(G$Quants) |
|
66 |
+ headerInfo$nProbes[i] = G$nSNPsRead |
|
67 |
+ headerInfo$Barcode[i] = G$Barcode |
|
68 |
+ headerInfo$ChipType[i] = G$ChipType |
|
69 |
+ headerInfo$Manifest[i] = G$Unknown$MostlyNull |
|
70 |
+ headerInfo$Position[i] = G$Unknowns$MostlyA |
|
73 | 71 |
|
74 |
- if(headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) { |
|
75 |
- # || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest |
|
76 |
- # but differed by a few SNPs for some reason - most of the chip was the same though |
|
77 |
-# stop("Chips are not of all of the same type - please check your data") |
|
78 |
- warning("Chips are not of the same type. Skipping ", basename(grnidats[i]), " and ", basename(redidats[i])) |
|
79 |
- next() |
|
80 |
- } |
|
81 |
- |
|
82 |
- dates$decode[i] = G$RunInfo[1, 1] |
|
83 |
- dates$scan[i] = G$RunInfo[2, 1] |
|
84 |
- |
|
85 |
- if(i==1) { |
|
86 |
- if(is.null(ids) && !is.null(G)) |
|
87 |
- ids = idsG |
|
88 |
- else |
|
89 |
- stop("Could not find probe IDs") |
|
90 |
- nprobes = length(ids) |
|
91 |
- narrays = length(arrayNames) |
|
72 |
+ if(headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) { |
|
73 |
+ ## || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest |
|
74 |
+ ## but differed by a few SNPs for some reason - most of the chip was the same though |
|
75 |
+ ## stop("Chips are not of all of the same type - please check your data") |
|
76 |
+ warning("Chips are not of the same type. Skipping ", basename(grnidats[i]), " and ", basename(redidats[i])) |
|
77 |
+ next() |
|
78 |
+ } |
|
79 |
+ |
|
80 |
+ dates$decode[i] = G$RunInfo[1, 1] |
|
81 |
+ dates$scan[i] = G$RunInfo[2, 1] |
|
82 |
+ |
|
83 |
+ if(i==1) { |
|
84 |
+ if(is.null(ids) && !is.null(G)) |
|
85 |
+ ids = idsG |
|
86 |
+ else |
|
87 |
+ stop("Could not find probe IDs") |
|
88 |
+ nprobes = length(ids) |
|
89 |
+ narrays = length(arrayNames) |
|
92 | 90 |
|
93 |
- tmpmat = matrix(NA, nprobes, narrays) |
|
94 |
- rownames(tmpmat) = ids |
|
95 |
- if(!is.null(sampleSheet)) |
|
96 |
- colnames(tmpmat) = sampleSheet$Sample_ID |
|
97 |
- else |
|
98 |
- colnames(tmpmat) = arrayNames |
|
99 |
- |
|
100 |
- RG = new("NChannelSet", |
|
101 |
- R=tmpmat, G=tmpmat, Rnb=tmpmat, Gnb=tmpmat, |
|
102 |
- Rse=tmpmat, Gse=tmpmat, annotation=headerInfo$Manifest[1], |
|
103 |
- phenoData=pd, storage.mode="environment") |
|
104 |
- rm(tmpmat) |
|
105 |
- gc() |
|
106 |
- } |
|
91 |
+ tmpmat = matrix(NA, nprobes, narrays) |
|
92 |
+ rownames(tmpmat) = ids |
|
93 |
+ if(!is.null(sampleSheet)) |
|
94 |
+ colnames(tmpmat) = sampleSheet$Sample_ID |
|
95 |
+ else |
|
96 |
+ colnames(tmpmat) = arrayNames |
|
97 |
+ |
|
98 |
+ RG = new("NChannelSet", |
|
99 |
+ R=tmpmat, G=tmpmat, Rnb=tmpmat, Gnb=tmpmat, |
|
100 |
+ Rse=tmpmat, Gse=tmpmat, annotation=headerInfo$Manifest[1], |
|
101 |
+ phenoData=pd, storage.mode="environment") |
|
102 |
+ rm(tmpmat) |
|
103 |
+ gc() |
|
104 |
+ } |
|
107 | 105 |
|
108 |
- if(length(ids)==length(idsG)) { |
|
109 |
- if(sum(ids==idsG)==nprobes) { |
|
110 |
- RG@assayData$G[,i] = G$Quants[, "Mean"] |
|
111 |
- RG@assayData$Gnb[,i] = G$Quants[, "NBeads"] |
|
112 |
- RG@assayData$Gse[,i] = G$Quants[, "SD"] |
|
113 |
- } |
|
114 |
- } |
|
115 |
- else { |
|
116 |
- indG = match(ids, idsG) |
|
117 |
- RG@assayData$G[,i] = G$Quants[indG, "Mean"] |
|
118 |
- RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"] |
|
119 |
- RG@assayData$Gse[,i] = G$Quants[indG, "SD"] |
|
120 |
- } |
|
121 |
- rm(G) |
|
122 |
- gc() |
|
106 |
+ if(length(ids)==length(idsG)) { |
|
107 |
+ if(sum(ids==idsG)==nprobes) { |
|
108 |
+ RG@assayData$G[,i] = G$Quants[, "Mean"] |
|
109 |
+ RG@assayData$Gnb[,i] = G$Quants[, "NBeads"] |
|
110 |
+ RG@assayData$Gse[,i] = G$Quants[, "SD"] |
|
111 |
+ } |
|
112 |
+ } |
|
113 |
+ else { |
|
114 |
+ indG = match(ids, idsG) |
|
115 |
+ RG@assayData$G[,i] = G$Quants[indG, "Mean"] |
|
116 |
+ RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"] |
|
117 |
+ RG@assayData$Gse[,i] = G$Quants[indG, "SD"] |
|
118 |
+ } |
|
119 |
+ rm(G) |
|
120 |
+ gc() |
|
123 | 121 |
|
124 |
- cat(paste(sep, fileExt$red, sep=""), "\n") |
|
125 |
- R = readIDAT(redidats[i]) |
|
126 |
- idsR = rownames(R$Quants) |
|
122 |
+ cat(paste(sep, fileExt$red, sep=""), "\n") |
|
123 |
+ R = readIDAT(redidats[i]) |
|
124 |
+ idsR = rownames(R$Quants) |
|
127 | 125 |
|
128 |
- if(length(ids)==length(idsG)) { |
|
129 |
- if(sum(ids==idsR)==nprobes) { |
|
130 |
- RG@assayData$R[,i] = R$Quants[ ,"Mean"] |
|
131 |
- RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"] |
|
132 |
- RG@assayData$Rse[,i] = R$Quants[ ,"SD"] |
|
133 |
- } |
|
134 |
- } |
|
135 |
- else { |
|
136 |
- indR = match(ids, idsR) |
|
137 |
- RG@assayData$R[,i] = R$Quants[indR, "Mean"] |
|
138 |
- RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"] |
|
139 |
- RG@assayData$Rse[,i] = R$Quants[indR, "SD"] |
|
140 |
- } |
|
141 |
- rm(R) |
|
142 |
- gc() |
|
126 |
+ if(length(ids)==length(idsG)) { |
|
127 |
+ if(sum(ids==idsR)==nprobes) { |
|
128 |
+ RG@assayData$R[,i] = R$Quants[ ,"Mean"] |
|
129 |
+ RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"] |
|
130 |
+ RG@assayData$Rse[,i] = R$Quants[ ,"SD"] |
|
131 |
+ } |
|
132 |
+ } |
|
133 |
+ else { |
|
134 |
+ indR = match(ids, idsR) |
|
135 |
+ RG@assayData$R[,i] = R$Quants[indR, "Mean"] |
|
136 |
+ RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"] |
|
137 |
+ RG@assayData$Rse[,i] = R$Quants[indR, "SD"] |
|
138 |
+ } |
|
139 |
+ rm(R) |
|
140 |
+ gc() |
|
143 | 141 |
} |
144 | 142 |
if(saveDate) |
145 |
- RG@scanDates = dates$scan |
|
143 |
+ RG@scanDates = dates$scan |
|
146 | 144 |
storageMode(RG) = "lockedEnvironment" |
147 | 145 |
RG |
148 | 146 |
} |
149 | 147 |
|
150 | 148 |
|
151 |
-# the readIDAT() and readBPM() functions below were provided by Keith Baggerly, 27/8/2008 |
|
149 |
+## the readIDAT() and readBPM() functions below were provided by Keith Baggerly, 27/8/2008 |
|
152 | 150 |
readIDAT <- function(idatFile){ |
153 | 151 |
|
154 | 152 |
fileSize <- file.info(idatFile)$size |
... | ... |
@@ -692,7 +690,6 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
692 | 690 |
mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
693 | 691 |
cdfName, sns, recallMin=10, recallRegMin=1000, |
694 | 692 |
returnParams=FALSE, badSNP=.7) { |
695 |
- |
|
696 | 693 |
if ((load.it | save.it) & missing(intensityFile)) |
697 | 694 |
stop("'intensityFile' is missing, and you chose either load.it or save.it") |
698 | 695 |
if (!missing(intensityFile)) |
... | ... |
@@ -3,15 +3,15 @@ setValidity("CopyNumberSet", function(object) { |
3 | 3 |
msg <- validMsg(NULL, assayDataValidMembers(assayData(object), c("CA", "CB"))) |
4 | 4 |
if (is.null(msg)) TRUE else msg |
5 | 5 |
}) |
6 |
-setMethod("CA", "CopyNumberSet", function(object) assayData(object)[["CA"]]) |
|
7 |
-setMethod("CB", "CopyNumberSet", function(object) assayData(object)[["CB"]]) |
|
6 |
+setMethod("CA", "CopyNumberSet", function(object) assayData(object)[["CA"]]/100) |
|
7 |
+setMethod("CB", "CopyNumberSet", function(object) assayData(object)[["CB"]]/100) |
|
8 | 8 |
setReplaceMethod("CB", signature(object="CopyNumberSet", value="matrix"), |
9 | 9 |
function(object, value) assayDataElementReplace(object, "CB", value)) |
10 | 10 |
setReplaceMethod("CA", signature(object="CopyNumberSet", value="matrix"), |
11 | 11 |
function(object, value) assayDataElementReplace(object, "CA", value)) |
12 | 12 |
|
13 | 13 |
setMethod("chromosome", "CopyNumberSet", function(object) fData(object)$chromosome) |
14 |
-setMethod("position", "CopyNumberSet", function(object) fData(object)$position) |
|
14 |
+ |
|
15 | 15 |
|
16 | 16 |
setMethod("copyNumber", "CopyNumberSet", function(object){ |
17 | 17 |
##ensure that 2 + NA = 2 by replacing NA's with zero |
... | ... |
@@ -20,14 +20,17 @@ setMethod("copyNumber", "CopyNumberSet", function(object){ |
20 | 20 |
nas <- is.na(CA) & is.na(CB) |
21 | 21 |
CA[is.na(CA)] <- 0 |
22 | 22 |
CB[is.na(CB)] <- 0 |
23 |
- CN <- CA/100 + CB/100 |
|
23 |
+ CN <- CA + CB |
|
24 | 24 |
##if both CA and CB are NA, report NA |
25 | 25 |
CN[nas] <- NA |
26 | 26 |
CN |
27 | 27 |
}) |
28 | 28 |
|
29 |
+setMethod("position", "CopyNumberSet", function(object) fData(object)$position) |
|
30 |
+ |
|
29 | 31 |
##setMethod("ellipse", "CopyNumberSet", function(x, copynumber, ...){ |
30 |
-ellipse.CopyNumberSet <- function(x, copynumber, ...){ |
|
32 |
+##ellipse.CopyNumberSet <- function(x, copynumber, ...){ |
|
33 |
+setMethod("ellipse", "CopyNumberSet", function(x, copynumber, ...){ |
|
31 | 34 |
##fittedOrder <- unique(sapply(basename(celFiles), function(x) strsplit(x, "_")[[1]][2])) |
32 | 35 |
##index <- match(plates, fittedOrder) |
33 | 36 |
if(nrow(x) > 1) stop("only 1 snp at a time") |
... | ... |
@@ -58,7 +61,7 @@ ellipse.CopyNumberSet <- function(x, copynumber, ...){ |
58 | 61 |
scale=scale), ...) |
59 | 62 |
} |
60 | 63 |
} |
61 |
-} |
|
64 |
+}) |
|
62 | 65 |
|
63 | 66 |
|
64 | 67 |
|
... | ... |
@@ -39,7 +39,7 @@ setMethod("combine", signature=signature(x="CrlmmSetList", y="CrlmmSetList"), |
39 | 39 |
abset <- combine(x.abset, y.abset) |
40 | 40 |
|
41 | 41 |
##we have hijacked the featureData slot to store parameters. Biobase will not allow combining our 'feature' data. |
42 |
- warning("removing featureData") |
|
42 |
+ warning("the featureData is not easily combined... removing the featureData") |
|
43 | 43 |
##fd1 <- featureData(x.snpset) |
44 | 44 |
##fd2 <- featureData(y.snpset) |
45 | 45 |
featureData(x.snpset) <- annotatedDataFrameFrom(calls(x.snpset), byrow=TRUE) |
... | ... |
@@ -53,18 +53,21 @@ setMethod("combine", signature=signature(x="CrlmmSetList", y="CrlmmSetList"), |
53 | 53 |
|
54 | 54 |
|
55 | 55 |
setMethod("featureNames", "CrlmmSetList", function(object) featureNames(object[[1]])) |
56 |
+ |
|
56 | 57 |
setMethod("plot", signature(x="CrlmmSetList"), |
57 | 58 |
function(x, y, ...){ |
58 | 59 |
A <- log2(A(x)) |
59 | 60 |
B <- log2(B(x)) |
60 | 61 |
plot(A, B, ...) |
61 | 62 |
}) |
63 |
+ |
|
62 | 64 |
setMethod("points", signature(x="CrlmmSetList"), |
63 | 65 |
function(x, y, ...){ |
64 | 66 |
A <- log2(A(x)) |
65 | 67 |
B <- log2(B(x)) |
66 | 68 |
points(A, B, ...) |
67 | 69 |
}) |
70 |
+ |
|
68 | 71 |
setMethod("sampleNames", "CrlmmSetList", function(object) sampleNames(object[[1]])) |
69 | 72 |
setMethod("scanDates", "CrlmmSetList", function(object) scanDates(object[[1]])) |
70 | 73 |
setMethod("show", "CrlmmSetList", function(object){ |
... | ... |
@@ -78,8 +81,8 @@ setMethod("splitByChromosome", "CrlmmSetList", function(object, cdfName, outdir) |
78 | 81 |
load(file.path(path, "cnProbes.rda")) |
79 | 82 |
for(CHR in 1:24){ |
80 | 83 |
cat("Chromosome ", CHR, "\n") |
81 |
- snps <- rownames(snpProbes)[snpProbes[, "chrom"] == CHR] |
|
82 |
- cnps <- rownames(cnProbes)[cnProbes[, "chrom"] == CHR] |
|
84 |
+ snps <- rownames(snpProbes)[snpProbes$chr == CHR] |
|
85 |
+ cnps <- rownames(cnProbes)[cnProbes$chr == CHR] |
|
83 | 86 |
index <- c(match(snps, featureNames(object)), |
84 | 87 |
match(cnps, featureNames(object))) |
85 | 88 |
crlmmResults <- object[index, ] |
... | ... |
@@ -142,7 +142,7 @@ loader <- function(theFile, envir, pkgname){ |
142 | 142 |
theFile <- file.path(system.file(package=pkgname), |
143 | 143 |
"extdata", theFile) |
144 | 144 |
if (!file.exists(theFile)) |
145 |
- stop("File", theFile, "does not exist in", pkgname) |
|
145 |
+ stop("File ", theFile, " does not exist in ", pkgname) |
|
146 | 146 |
load(theFile, envir=envir) |
147 | 147 |
} |
148 | 148 |
|
... | ... |
@@ -39,7 +39,6 @@ vignette. |
39 | 39 |
<<test, eval=FALSE, echo=FALSE>>= |
40 | 40 |
library(crlmm) |
41 | 41 |
load("~/madman/Rpacks/crlmm/inst/scripts/example.cnset.rda") |
42 |
- |
|
43 | 42 |
chromosome(example.cnset)[1:5] |
44 | 43 |
position(example.cnset)[1:5] |
45 | 44 |
scanDates(example.cnset)[1:5] |
... | ... |
@@ -53,6 +52,7 @@ library(genomewidesnp6Crlmm) |
53 | 52 |
Specify the complete path for the CEL files and a directory in which to |
54 | 53 |
store intermediate files: |
55 | 54 |
|
55 |
+ |
|
56 | 56 |
<<celfiles>>= |
57 | 57 |
celFiles <- list.celfiles("/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m", full.names=TRUE, pattern=".CEL") |
58 | 58 |
outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/1m/affy" |
... | ... |
@@ -299,7 +299,7 @@ if(!exists("fit")){ |
299 | 299 |
emission=emission.cn, |
300 | 300 |
tau=tau[, "transitionPr"], |
301 | 301 |
arm=tau[, "arm"], |
302 |
- normalIndex=2, |
|
302 |
+ normalIndex=3, |
|
303 | 303 |
normal2altered=0.005, |
304 | 304 |
altered2normal=0.5, |
305 | 305 |
altered2altered=0.0025) |
306 | 306 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,45 @@ |
1 |
+\name{ABset-class} |
|
2 |
+\Rdversion{1.1} |
|
3 |
+\docType{class} |
|
4 |
+\alias{ABset-class} |
|
5 |
+\alias{A} |
|
6 |
+\alias{A,ABset-method} |
|
7 |
+\alias{B} |
|
8 |
+\alias{B,ABset-method} |
|
9 |
+\title{Class "ABset"} |
|
10 |
+\description{Containter for quantile-normalized intensities} |
|
11 |
+\section{Objects from the Class}{ |
|
12 |
+Objects can be created by calls of the form \code{new("ABset", assayData, phenoData, featureData, experimentData, annotation, scanDates, ...)}. |
|
13 |
+} |
|
14 |
+\section{Slots}{ |
|
15 |
+ \describe{ |
|
16 |
+ \item{\code{assayData}:}{Object of class \code{"AssayData"} } |
|
17 |
+ \item{\code{phenoData}:}{Object of class \code{"AnnotatedDataFrame"} } |
|
18 |
+ \item{\code{featureData}:}{Object of class \code{"AnnotatedDataFrame"} } |
|
19 |
+ \item{\code{experimentData}:}{Object of class \code{"MIAME"} } |
|
20 |
+ \item{\code{annotation}:}{Object of class \code{"character"} } |
|
21 |
+ \item{\code{scanDates}:}{Object of class \code{"character"} } |
|
22 |
+ \item{\code{.__classVersion__}:}{Object of class \code{"Versions"} } |
|
23 |
+ } |
|
24 |
+} |
|
25 |
+\section{Extends}{ |
|
26 |
+Class \code{"\linkS4class{eSet}"}, directly. |
|
27 |
+Class \code{"\linkS4class{VersionedBiobase}"}, by class "eSet", distance 2. |
|
28 |
+Class \code{"\linkS4class{Versioned}"}, by class "eSet", distance 3. |
|
29 |
+} |
|
30 |
+\section{Methods}{ |
|
31 |
+ \describe{ |
|
32 |
+ \item{A}{\code{signature(object="ABset")}: accessor for the |
|
33 |
+ quantile-normalized intensities of allele A for polymorphic probes |
|
34 |
+ and the quantile normalized intensities for the copy number probes.} |
|
35 |
+ \item{B}{\code{signature(object="ABset")}: accessor for the |
|
36 |
+ quantile-normalized intensities of allele B for polymorphic probes.} |
|
37 |
+ \item{plot}{\code{signature(x = "ABset", y = "CopyNumberSet")}: |
|
38 |
+... }} |
|
39 |
+ |
|
40 |
+} |
|
41 |
+\author{R. Scharpf} |
|
42 |
+\examples{ |
|
43 |
+showClass("ABset") |
|
44 |
+} |
|
45 |
+\keyword{classes} |
0 | 46 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,85 @@ |
1 |
+\name{CopyNumberSet-class} |
|
2 |
+\Rdversion{1.1} |
|
3 |
+\docType{class} |
|
4 |
+\alias{CopyNumberSet-class} |
|
5 |
+\alias{CA} |
|
6 |
+\alias{CA<-} |
|
7 |
+\alias{CA,CopyNumberSet-method} |
|
8 |
+\alias{CA<-,CopyNumberSet,matrix-method} |
|
9 |
+\alias{CB} |
|
10 |
+\alias{CB<-} |
|
11 |
+\alias{CB,CopyNumberSet-method} |
|
12 |
+\alias{CB<-,CopyNumberSet,matrix-method} |
|
13 |
+\alias{chromosome,CopyNumberSet-method} |
|
14 |
+\alias{copyNumber,CopyNumberSet-method} |
|
15 |
+\alias{ellipse,CopyNumberSet-method} |
|
16 |
+\alias{plot,ABset,CopyNumberSet-method} |
|
17 |
+\alias{position,CopyNumberSet-method} |
|
18 |
+\title{Class "CopyNumberSet"} |
|
19 |
+\description{Container for allele-specific estimates of copy number and |
|
20 |
+ the corresponding uncertainty} |
|
21 |
+\section{Objects from the Class}{ |
|
22 |
+Objects can be created by calls of the form \code{new("CopyNumberSet", assayData, phenoData, featureData, experimentData, annotation, scanDates, ...)}. |
|
23 |
+} |
|
24 |
+\section{Slots}{ |
|
25 |
+ \describe{ |
|
26 |
+ \item{\code{assayData}:}{Object of class \code{"AssayData"}} |
|
27 |
+ \item{\code{phenoData}:}{Object of class \code{"AnnotatedDataFrame"}} |
|
28 |
+ \item{\code{featureData}:}{Object of class \code{"AnnotatedDataFrame"} } |
|
29 |
+ \item{\code{experimentData}:}{Object of class \code{"MIAME"} } |
|
30 |
+ \item{\code{annotation}:}{Object of class \code{"character"} } |
|
31 |
+ \item{\code{scanDates}:}{Object of class \code{"character"} } |
|
32 |
+ \item{\code{.__classVersion__}:}{Object of class \code{"Versions"} } |
|
33 |
+ } |
|
34 |
+} |
|
35 |
+\section{Extends}{ |
|
36 |
+Class \code{"\linkS4class{eSet}"}, directly. |
|
37 |
+Class \code{"\linkS4class{VersionedBiobase}"}, by class "eSet", distance 2. |
|
38 |
+Class \code{"\linkS4class{Versioned}"}, by class "eSet", distance 3. |
|
39 |
+} |
|
40 |
+\section{Methods}{ |
|
41 |
+ \describe{ |
|
42 |
+ |
|
43 |
+ \item{CA}{\code{signature(object = "CopyNumberSet")}: Extracts the |
|
44 |
+ copy number for allele 'A' at polymorphic loci. At nonpolymorphic |
|
45 |
+ loci, the total copy number is returned.} |
|
46 |
+ |
|
47 |
+ \item{CA<-}{\code{signature(object = "CopyNumberSet", |
|
48 |
+ value="matrix")}: Replaces CA matrix with supplied value.} |
|
49 |
+ |
|
50 |
+ \item{CB}{\code{signature(object = "CopyNumberSet")}: |
|
51 |
+ Extracts copy number for allele 'B' at polymorphic loci. NAs are |
|
52 |
+ returned for nonpolymorphic loci.} |
|
53 |
+ |
|
54 |
+ \item{CB<-}{\code{signature(object = "CopyNumberSet", |
|
55 |
+ value="matrix")}: Replaces CB matrix with supplied value .} |
|
56 |
+ |
|
57 |
+ |
|
58 |
+ \item{chromosome}{\code{signature(object = "CopyNumberSet")}: |
|
59 |
+ Extract chromosome.} |
|
60 |
+ |
|
61 |
+ \item{copyNumber}{\code{signature(object = "CopyNumberSet")}: |
|
62 |
+ Returns CA + CB.} |
|
63 |
+ |
|
64 |
+ \item{ellipse}{\code{signature(x = "CopyNumberSet", ...)}: |
|
65 |
+ Extracts parameters from \code{featureData} slot and draws |
|
66 |
+ prediction regions for the supplied copy number.} |
|
67 |
+ |
|
68 |
+ \item{plot}{\code{signature(x = "ABset", y = "CopyNumberSet")}: |
|
69 |
+ Physical position} |
|
70 |
+ \item{position}{\code{signature(object = "CopyNumberSet")}: physical position} |
|
71 |
+ } |
|
72 |
+} |
|
73 |
+ |
|
74 |
+\author{R. Scharpf} |
|
75 |
+\examples{ |
|
76 |
+showClass("CopyNumberSet") |
|
77 |
+ |
|
78 |
+\dontrun{ |
|
79 |
+##returns the copy number for allele A at polymorphic loci |
|
80 |
+CA(object[snpIndex(object), ]) |
|
81 |
+##returns the total copy number at nonpolymorphic loci |
|
82 |
+CA(object[cnIndex(object), ]) |
|
83 |
+} |
|
84 |
+} |
|
85 |
+\keyword{classes} |
0 | 86 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,118 @@ |
1 |
+\name{CrlmmSetList-class} |
|
2 |
+\Rdversion{1.1} |
|
3 |
+\docType{class} |
|
4 |
+\alias{CrlmmSetList-class} |
|
5 |
+\alias{[,CrlmmSetList-method} |
|
6 |
+\alias{A,CrlmmSetList-method} |
|
7 |
+\alias{B,CrlmmSetList-method} |
|
8 |
+\alias{calls,CrlmmSetList-method} |
|
9 |
+\alias{cnIndex,CrlmmSetList-method} |
|
10 |
+\alias{combine,CrlmmSetList,CrlmmSetList-method} |
|
11 |
+\alias{plot,CrlmmSetList,ANY-method} |
|
12 |
+\alias{points,CrlmmSetList-method} |
|
13 |
+\alias{sampleNames,CrlmmSetList-method} |
|
14 |
+\alias{scanDates,CrlmmSetList-method} |
|
15 |
+\alias{show,CrlmmSetList-method} |
|
16 |
+\alias{snpIndex,CrlmmSetList-method} |
|
17 |
+ |
|
18 |
+ |
|
19 |
+\title{Class "CrlmmSetList"} |
|
20 |
+\description{Container for quantile normalized intensities and genotype calls.} |
|
21 |
+\section{Objects from the Class}{ |
|
22 |
+ Objects from the class are created by calls to the \code{crlmmWrapper} function. |
|
23 |
+} |
|
24 |
+\section{Slots}{ |
|
25 |
+ \describe{ |
|
26 |
+ \item{\code{.Data}:}{Object of class \code{"list"} ~~ } |
|
27 |
+ } |
|
28 |
+% \item{\code{ABset}:}{Object of class \code{"ABset"}. |
|
29 |
+% Contains quantile normalized A and B intensities. Quantile |
|
30 |
+% normalized intensities for nonpolymorphic probes are stored |
|
31 |
+% in the A slot.} |
|
32 |
+% \item{\code{crlmmResult}:}{Object of class |
|
33 |
+% \code{"SnpSet"}. Contains genotype calls and the |
|
34 |
+% corresponding confidence estimates.} |
|
35 |
+} |
|
36 |
+ |
|
37 |
+\section{Details}{ |
|
38 |
+ |
|
39 |
+ Instances of \code{CrlmmSetList} are a list with two elements: |
|
40 |
+ |
|
41 |
+ 1. an object of class \code{ABset} |
|
42 |
+ |
|
43 |
+ 2. an object of class \code{SnpSet} |
|
44 |
+ |
|
45 |
+ The \code{featureNames} and \code{sampleNames} of both objects are |
|
46 |
+ required to be identical. |
|
47 |
+ |
|
48 |
+ Quantile-normalized intensities for the copy number probes are stored |
|
49 |
+ in the assay data element A of the ABset object. Corresponding rows |
|
50 |
+ for the B allele are recorded as NAs. Genotype calls for copy number |
|
51 |
+ probes in the SnpSet object are recorded as NAs. |
|
52 |
+ |
|
53 |
+} |
|
54 |
+ |
|
55 |
+\section{Extends}{ |
|
56 |
+ Class \code{"\linkS4class{list}"}, from data part. |
|
57 |
+ Class \code{"\linkS4class{vector}"}, by class "list", distance 2. |
|
58 |
+ Class \code{"\linkS4class{AssayData}"}, by class "list", distance 2. |
|
59 |
+} |
|
60 |
+\section{Methods}{ |
|
61 |
+ \describe{ |
|
62 |
+ \item{"["}{\code{signature(x = "CrlmmSetList")}: subsets both the } |
|
63 |
+ |
|
64 |
+ \item{"A"}{\code{signature(object = "CrlmmSetList")}: extracts the |
|
65 |
+ quantile normalized intensities for the A allele in the |
|
66 |
+ \code{ABset} element.} |
|
67 |
+ |
|
68 |
+ |
|
69 |
+ \item{"B"}{\code{signature(object = "CrlmmSetList")}: extracts the |
|
70 |
+ quantile normalized intensities for the B allele in the |
|
71 |
+ \code{ABset} element.} |
|
72 |
+ |
|
73 |
+ \item{"calls"}{\code{signature(object = "CrlmmSetList")}: extracts |
|
74 |
+ the genotype calls from the \code{SnpSet} element.} |
|
75 |
+ |
|
76 |
+ \item{"cnIndex"}{\code{signature(object = "CrlmmSetList")}: returns |
|
77 |
+ the row indices of the copy number probes.} |
|
78 |
+ |
|
79 |
+ \item{"combine"}{\code{signature(x = "CrlmmSetList", y = "CrlmmSetList")}: combines |
|
80 |
+ objects of the class. } |
|
81 |
+ |
|
82 |
+ \item{"plot"}{\code{signature(x = "CrlmmSetList")}: For A versus B |
|
83 |
+ scatterplots. } |
|
84 |
+ |
|
85 |
+ \item{"points"}{\code{signature(x = "CrlmmSetList")}: For A versus B |
|
86 |
+ scatterplots. } |
|
87 |
+ |
|
88 |
+ \item{"sampleNames"}{\code{signature(object = "CrlmmSetList")}: |
|
89 |
+ Accessor for the column identifiers of the assay data elements. } |
|
90 |
+ |
|
91 |
+ \item{"show"}{\code{signature(object = "CrlmmSetList")}: |
|
92 |
+ Shows the \code{ABset} and \code{SnpSet} elements of the list. } |
|
93 |
+ |
|
94 |
+ |
|
95 |
+ \item{"snpIndex"}{\code{signature(object = "CrlmmSetList")}: returns |
|
96 |
+ the row indices of the polymorphic probes.} |
|
97 |
+ |
|
98 |
+ |
|
99 |
+ } |
|
100 |
+} |
|
101 |
+ |
|
102 |
+ |
|
103 |
+\section{Warnings}{ |
|
104 |
+ |
|
105 |
+ \code{combine} will produce a warning as the \code{featureData} are |
|
106 |
+ not easily combined and therefore removed. This method needs |
|
107 |
+ improvement. |
|
108 |
+ |
|
109 |
+} |
|
110 |
+\author{R. Scharpf} |
|
111 |
+\seealso{ |
|
112 |
+ See also \code{"\link{crlmmWrapper}"}, |
|
113 |
+ \code{"\linkS4class{ABset}"} |
|
114 |
+} |
|
115 |
+\examples{ |
|
116 |
+showClass("CrlmmSetList") |
|
117 |
+} |
|
118 |
+\keyword{classes} |
0 | 119 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,58 @@ |
1 |
+\name{crlmmWrapper} |
|
2 |
+\Rdversion{1.1} |
|
3 |
+\alias{crlmmWrapper} |
|
4 |
+\title{ Function to quantile normalize the cell files and genotype the |
|
5 |
+ polymorphic loci. |
|
6 |
+} |
|
7 |
+\description{ |
|
8 |
+ Quantile normalizes the cel files to a target reference distribution |
|
9 |
+ (both polymorphic and nonpolymorphic loci). Genotype calls and |
|
10 |
+ confidence scores are assigned using the crlmm algorithm. |
|
11 |
+} |
|
12 |
+\usage{ |
|
13 |
+crlmmWrapper(filenames, outdir = "./", cdfName = "genomewidesnp6", save.it, splitByChr=TRUE, ...) |
|
14 |
+} |
|
15 |
+\arguments{ |
|
16 |
+ \item{filenames}{ |
|
17 |
+ Complete path to cel files |
|
18 |
+ } |
|
19 |
+ \item{outdir}{ |
|
20 |
+ Where to store results. |
|
21 |
+ } |
|
22 |
+ \item{cdfName}{ |
|
23 |
+ Annotation package. |
|
24 |
+ } |
|
25 |
+ \item{save.it}{ |
|
26 |
+ Whether to save intermediate results. Sometimes this can be helpful. |
|
27 |
+ } |
|
28 |
+ \item{splitByChr}{ |
|
29 |
+ Logical. Whether to split results by chromosome (default is TRUE) |
|
30 |
+ } |
|
31 |
+ \item{\dots}{ |
|
32 |
+ additional arguments to function \code{crlmm} |
|
33 |
+ } |
|
34 |
+} |
|
35 |
+\value{ |
|
36 |
+ An object of class \code{CrlmmSetList} |
|
37 |
+} |
|
38 |
+ |
|
39 |
+\author{ |
|
40 |
+ R. Scharpf |
|
41 |
+} |
|
42 |
+\seealso{ |
|
43 |
+ \code{\linkS4class{CrlmmSetList}} |
|
44 |
+ \code{\link{crlmm}} |
|
45 |
+} |
|
46 |
+\examples{ |
|
47 |
+\dontrun{ |
|
48 |
+library(crlmm) |
|
49 |
+library(genomewidesnp6Crlmm) |
|
50 |
+celFiles <- list.files("PATH_TO_CELS", full.name=TRUE) |
|
51 |
+crlmmList <- crlmmWrapper(celFiles, outdir="./", save.it=TRUE, intensityFile="intensities.rda") |
|
52 |
+} |
|
53 |
+} |
|
54 |
+% Add one or more standard keywords, see file 'KEYWORDS' in the |
|
55 |
+% R documentation directory. |
|
56 |
+\keyword{robust} |
|
57 |
+\keyword{methods} |
|
58 |
+\keyword{list} |
0 | 59 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,48 @@ |
1 |
+\name{methods-eSet} |
|
2 |
+\alias{cnIndex} |
|
3 |
+\alias{cnIndex,eSet-method} |
|
4 |
+\alias{snpIndex} |
|
5 |
+\alias{snpIndex,eSet-method} |
|
6 |
+ |
|
7 |
+ |
|
8 |
+\title{Methods for eSet derivatives} |
|
9 |
+ |
|
10 |
+\description{ |
|
11 |
+ |
|
12 |
+ \code{snpIndex} return the row indices of polymorphic loci. |
|
13 |
+ |
|
14 |
+ \code{cnIndex} return the row indices of copy number probes. |
|
15 |
+ |
|
16 |
+} |
|
17 |
+ |
|
18 |
+\usage{ |
|
19 |
+ |
|
20 |
+ cnIndex(object) |
|
21 |
+ |
|
22 |
+ snpIndex(object) |
|
23 |
+ |
|
24 |
+} |
|
25 |
+ |
|
26 |
+\arguments{ |
|
27 |
+ |
|
28 |
+ \item{object}{Any object extending eSet (ABset, CopyNumberSet, |
|
29 |
+ CrlmmSetList, SnpSet} |
|
30 |
+ |
|
31 |
+} |
|
32 |
+ |
|
33 |
+\value{ |
|
34 |
+ |
|
35 |
+ Vector of indices. |
|
36 |
+ |
|
37 |
+} |
|
38 |
+ |
|
39 |
+\seealso{ |
|
40 |
+ |
|
41 |
+ \code{"\linkS4class{ABset}"} |
|
42 |
+ \code{"\linkS4class{CopyNumberSet}"} |
|
43 |
+ \code{"\linkS4class{CrlmmSetList}"} |
|
44 |
+ |
|
45 |
+} |
|
46 |
+ |
|
47 |
+ |
|
48 |
+\keyword{manip} |
|
0 | 49 |
\ No newline at end of file |