... | ... |
@@ -42,7 +42,7 @@ BioC data packages |
42 | 42 |
|
43 | 43 |
2009-03-25 Rob Scharpf - committed version 1.0.60 |
44 | 44 |
* simplified some of the preliminary steps for the computeCopynumber function |
45 |
-** crlmm output subset within the body of the function |
|
45 |
+* crlmm output subset within the body of the function |
|
46 | 46 |
* modified vignette -- store results in an oligoSnpSet object (for now) |
47 | 47 |
* added function to extract the date from the celfile headers (celDates) |
48 | 48 |
|
... | ... |
@@ -78,7 +78,6 @@ BioC data packages |
78 | 78 |
* added functions to read in idat files, pre-process and genotype |
79 | 79 |
data from Illumina SNP arrays (functions named readIdatFiles() |
80 | 80 |
and crlmmIllumina() in crlmm-illumina.R) |
81 |
- |
|
82 | 81 |
* added man pages for these functions |
83 | 82 |
|
84 | 83 |
2009-04-01 Matt Ritchie - committed version 1.0.67 |
... | ... |
@@ -91,8 +90,18 @@ and crlmmIllumina() in crlmm-illumina.R) |
91 | 90 |
|
92 | 91 |
2009-04-03 B Carvalho - committed version 1.0.70 |
93 | 92 |
|
94 |
-** Updated TODO and DESCRIPTION |
|
93 |
+* Updated TODO and DESCRIPTION |
|
95 | 94 |
|
96 | 95 |
2009-04-04 R.Scharpf - committed version 1.0.71 |
97 | 96 |
|
98 | 97 |
* bug in oneBatch function for chromosome X. added additional checks for missing values |
98 |
+ |
|
99 |
+2009-04-06 Matt Ritchie - committed version 1.0.72 |
|
100 |
+ |
|
101 |
+* new argument in readIdatFiles() 'saveDate', which saves the date and time each array |
|
102 |
+is decoded and scanned |
|
103 |
+* results from crlmmIllumina() now saved as 'SnpSet' just like crlmm() |
|
104 |
+* reorganised Illumina preprocessing functions slightly to |
|
105 |
+ - allow copy number intensities to be saved along with snp intensities |
|
106 |
+ - make use of new indexing objects in the chip specific crlmm data packages |
|
107 |
+ - fix a few bugs |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for SNP 5.0 and 6.0 arrays. |
4 |
-Version: 1.0.71 |
|
4 |
+Version: 1.0.72 |
|
5 | 5 |
Date: 2008-12-30 |
6 | 6 |
Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
7 | 7 |
Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
... | ... |
@@ -5,7 +5,7 @@ |
5 | 5 |
|
6 | 6 |
readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
7 | 7 |
arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
8 |
- highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat")) { |
|
8 |
+ highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), saveDate=FALSE) { |
|
9 | 9 |
if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet |
10 | 10 |
arrayNames=NULL |
11 | 11 |
if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) { |
... | ... |
@@ -33,6 +33,7 @@ readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
33 | 33 |
cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n") |
34 | 34 |
} |
35 | 35 |
} |
36 |
+ narrays = length(arrayNames) |
|
36 | 37 |
grnfiles = paste(arrayNames, fileExt$green, sep=sep) |
37 | 38 |
redfiles = paste(arrayNames, fileExt$red, sep=sep) |
38 | 39 |
if(length(grnfiles)==0 || length(redfiles)==0) |
... | ... |
@@ -45,11 +46,17 @@ readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
45 | 46 |
grnidats = file.path(path, grnfiles) |
46 | 47 |
redidats = file.path(path, redfiles) |
47 | 48 |
|
48 |
- headerInfo = list(nProbes = rep(NA, length(arrayNames)), |
|
49 |
- Barcode = rep(NA, length(arrayNames)), |
|
50 |
- ChipType = rep(NA, length(arrayNames)), |
|
51 |
- Manifest = rep(NA, length(arrayNames)), # not sure about this one - sometimes blank |
|
52 |
- Position = rep(NA, length(arrayNames))) # this may also vary a bit |
|
49 |
+ headerInfo = list(nProbes = rep(NA, narrays), |
|
50 |
+ Barcode = rep(NA, narrays), |
|
51 |
+ ChipType = rep(NA, narrays), |
|
52 |
+ Manifest = rep(NA, narrays), # not sure about this one - sometimes blank |
|
53 |
+ Position = rep(NA, narrays)) # this may also vary a bit |
|
54 |
+ |
|
55 |
+ dates = list(decode=rep(NA, narrays), |
|
56 |
+ scan=rep(NA, narrays), |
|
57 |
+ register=rep(NA, narrays), |
|
58 |
+ extract=rep(NA, narrays)) |
|
59 |
+ |
|
53 | 60 |
# read in the data |
54 | 61 |
for(i in seq(along=arrayNames)) { |
55 | 62 |
cat("reading", arrayNames[i], "\t") |
... | ... |
@@ -80,6 +87,12 @@ readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
80 | 87 |
# || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest |
81 | 88 |
# but differed by a few SNPs for some reason - most of the chip was the same though |
82 | 89 |
stop("Chips are not of all of the same type - please check your data") |
90 |
+ |
|
91 |
+ dates$decode[i] = G$RunInfo[1, 1] |
|
92 |
+ dates$scan[i] = G$RunInfo[2, 1] |
|
93 |
+ dates$register[i] = G$RunInfo[3, 1] |
|
94 |
+ dates$extract[i] = G$RunInfo[4, 1] |
|
95 |
+ |
|
83 | 96 |
if(i==1) { |
84 | 97 |
if(is.null(ids) && !is.null(G)) |
85 | 98 |
ids = idsG |
... | ... |
@@ -122,14 +135,22 @@ readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
122 | 135 |
Rstderr[,i] = R$Quants[indR, "SD"] |
123 | 136 |
} |
124 | 137 |
} |
125 |
- if(is.null(sampleSheet)) |
|
126 |
- pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
|
127 |
- else |
|
128 |
- pd = new("AnnotatedDataFrame", data = sampleSheet) |
|
138 |
+ if(is.null(sampleSheet)) { |
|
139 |
+ if(saveDate) |
|
140 |
+ pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames, DecodeDate=dates$decode, ScanDate=dates$scan)) |
|
141 |
+ else |
|
142 |
+ pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
|
143 |
+ } |
|
144 |
+ else { |
|
145 |
+ if(saveDate) |
|
146 |
+ pd = new("AnnotatedDataFrame", data = cbind(sampleSheet, DecodeDate=dates$decode, ScanDate=dates$scan)) |
|
147 |
+ else |
|
148 |
+ pd = new("AnnotatedDataFrame", data = sampleSheet) |
|
149 |
+ } |
|
129 | 150 |
RG = new("NChannelSet", |
130 | 151 |
R=Rintens, G=Gintens, Rnb=Rnobeads, Gnb=Gnobeads, |
131 | 152 |
Rse=Rstderr, Gse=Gstderr, annotation=headerInfo$Manifest[1], |
132 |
- phenoData = pd) |
|
153 |
+ phenoData=pd) |
|
133 | 154 |
} |
134 | 155 |
|
135 | 156 |
|
... | ... |
@@ -390,7 +411,7 @@ readBPM <- function(bpmFile){ |
390 | 411 |
} |
391 | 412 |
|
392 | 413 |
|
393 |
-RGtoXY = function(RG, chipType, remove=TRUE, verbose=TRUE) { |
|
414 |
+RGtoXY = function(RG, chipType, verbose=TRUE) { |
|
394 | 415 |
chipList = c("human1mv1c", # 1M |
395 | 416 |
"human370v1c", # 370CNV |
396 | 417 |
"human650v3a", # 650Y |
... | ... |
@@ -413,32 +434,36 @@ RGtoXY = function(RG, chipType, remove=TRUE, verbose=TRUE) { |
413 | 434 |
rm(suggCall, msg) |
414 | 435 |
} |
415 | 436 |
if(verbose) message("Loading chip annotation information.") |
416 |
- loader("annotation.rda", .crlmmPkgEnv, pkgname) |
|
437 |
+ loader("address.rda", .crlmmPkgEnv, pkgname) |
|
417 | 438 |
# data(annotation, package=pkgname, envir=.crlmmPkgEnv) |
418 |
- annot <- getVarInEnv("annot") |
|
439 |
+ aids <- getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest |
|
440 |
+ bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest |
|
441 |
+ ids <- names(aids) |
|
442 |
+ snpbase <- getVarInEnv("base") |
|
419 | 443 |
|
420 |
- if(remove) |
|
421 |
- annot = annot[annot[,"ToCall"]==1,] |
|
422 |
- nsnps = nrow(annot) |
|
444 |
+ nsnps = length(aids) |
|
423 | 445 |
narrays = ncol(RG) |
424 |
- aidcol = match("AddressA_ID", colnames(annot)) |
|
425 |
- if(is.na(aidcol)) |
|
426 |
- aidcol = match("Address", colnames(annot)) |
|
427 |
- bidcol = match("AddressB_ID", colnames(annot)) |
|
428 |
- if(is.na(bidcol)) |
|
429 |
- bidcol = match("Address2", colnames(annot)) |
|
430 |
- aids = annot[, aidcol] |
|
431 |
- bids = annot[, bidcol] |
|
432 |
- snpids = annot[,"Name"] |
|
433 |
- snpbase = annot[,"SNP"] |
|
434 |
- rrgg = !is.na(bids) & bids!=0 |
|
446 |
+ |
|
447 |
+# aidcol = match("AddressA_ID", colnames(annot)) |
|
448 |
+# if(is.na(aidcol)) |
|
449 |
+# aidcol = match("Address", colnames(annot)) |
|
450 |
+# bidcol = match("AddressB_ID", colnames(annot)) |
|
451 |
+# if(is.na(bidcol)) |
|
452 |
+# bidcol = match("Address2", colnames(annot)) |
|
453 |
+# aids = annot[, aidcol] |
|
454 |
+# bids = annot[, bidcol] |
|
455 |
+# snpids = annot[,"Name"] |
|
456 |
+# snpbase = annot[,"SNP"] |
|
457 |
+ infI = !is.na(bids) & bids!=0 |
|
435 | 458 |
aord = match(aids, featureNames(RG)) # NAs are possible here |
436 |
- bord = match(bids[!rrgg], featureNames(RG)) # and here |
|
459 |
+ bord = match(bids, featureNames(RG)) # and here |
|
437 | 460 |
# argrg = aids[rrgg] |
438 | 461 |
# brgrg = bids[rrgg] |
439 | 462 |
X = Y = Xnb = Ynb = Xse = Yse = zero = matrix(0, nsnps, narrays) |
440 |
- rownames(X) = rownames(Y) = rownames(Xnb) = rownames(Ynb) = rownames(Xse) = rownames(Yse) = snpids |
|
463 |
+ rownames(X) = rownames(Y) = rownames(Xnb) = rownames(Ynb) = rownames(Xse) = rownames(Yse) = ids |
|
441 | 464 |
colnames(X) = colnames(Y) = colnames(Xnb) = colnames(Ynb) = colnames(Xse) = colnames(Yse) = sampleNames(RG)$G |
465 |
+ |
|
466 |
+ # First sort out Infinium II SNPs, X -> R (allele A) and Y -> G (allele B) from the same probe |
|
442 | 467 |
X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red |
443 | 468 |
Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green |
444 | 469 |
Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],] |
... | ... |
@@ -446,23 +471,38 @@ RGtoXY = function(RG, chipType, remove=TRUE, verbose=TRUE) { |
446 | 471 |
Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],] |
447 | 472 |
Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],] |
448 | 473 |
|
449 |
- # Important addition needed to get the correct intensities for the transitional Infinium I SNPs |
|
450 |
- # these SNPs are intergorated by 2 separate bead types (the second bead type is given in the 'AddressB_ID' |
|
451 |
- # (or 'Address2') column, and will have bid != 0 | !is.na(bid) |
|
452 |
- # for these it could be that the X signal is the R/G from the A bead and the Y signal the R/G from the B snp |
|
453 |
- # at present these are all zeroed out, so will presumably be called as hets |
|
454 |
- # this is not a problem for the training data, since we get the X and Y intensities directly from BeadStudio output |
|
474 |
+ ## Warning - not 100% sure that the code below is correct - could be more complicated than this |
|
475 |
+ |
|
476 |
+ # Next Infinium I where X -> R from allele A probe and Y -> R from allele B probe |
|
477 |
+# infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]") |
|
478 |
+ |
|
479 |
+# X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red |
|
480 |
+# Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green |
|
481 |
+# Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],] |
|
482 |
+# Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],] |
|
483 |
+# Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],] |
|
484 |
+# Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],] |
|
455 | 485 |
|
456 |
- X[!is.na(bord),] = 0 |
|
457 |
- Y[!is.na(bord),] = 0 |
|
458 |
- Xnb[!is.na(bord),] = 0 |
|
459 |
- Ynb[!is.na(bord),] = 0 |
|
460 |
- Xse[!is.na(bord),] = 0 |
|
461 |
- Yse[!is.na(bord),] = 0 |
|
486 |
+ # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe |
|
487 |
+# infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]") |
|
488 |
+ |
|
489 |
+# X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red |
|
490 |
+# Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green |
|
491 |
+# Xnb[infIGG,] = exprs(channel(RG, "Gnb"))[aord[infIGG],] |
|
492 |
+# Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],] |
|
493 |
+# Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],] |
|
494 |
+# Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],] |
|
495 |
+ |
|
496 |
+ # For now zero out Infinium I probes |
|
497 |
+ X[infI,] = 0 |
|
498 |
+ Y[infI,] = 0 |
|
499 |
+ Xnb[infI,] = 0 |
|
500 |
+ Ynb[infI,] = 0 |
|
501 |
+ Xse[infI,] = 0 |
|
502 |
+ Yse[infI,] = 0 |
|
462 | 503 |
|
463 | 504 |
zero[X==0 | Y==0] = 1 |
464 | 505 |
|
465 |
- rm(annot) |
|
466 | 506 |
gc() |
467 | 507 |
|
468 | 508 |
XY = new("NChannelSet", |
... | ... |
@@ -533,12 +573,12 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) { |
533 | 573 |
|
534 | 574 |
|
535 | 575 |
preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, fitMixture=TRUE, eps=0.1, verbose=TRUE, seed=1, |
536 |
- cdfName, sns, stripNorm=TRUE, useTarget=TRUE) { |
|
576 |
+ cdfName, sns, stripNorm=TRUE, useTarget=TRUE, save.it=FALSE, intensityFile) { |
|
537 | 577 |
if(stripNorm) |
538 | 578 |
XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose) |
539 |
- |
|
579 |
+ |
|
540 | 580 |
## MR: the code below is mostly straight from snprma.R |
541 |
- if (missing(sns)) sns <- sampleNames(XY) |
|
581 |
+ if (missing(sns)) sns <- sampleNames(XY)$X |
|
542 | 582 |
if(missing(cdfName)) |
543 | 583 |
cdfName <- annotation(XY) |
544 | 584 |
## stuffDir <- changeToCrlmmAnnotationName(cdfName) |
... | ... |
@@ -552,7 +592,9 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, fitMixture=TRUE, eps= |
552 | 592 |
} |
553 | 593 |
if(verbose) message("Loading snp annotation and mixture model parameters.") |
554 | 594 |
loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
555 |
- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
|
595 |
+ loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
|
596 |
+ loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname) |
|
597 |
+ loader("npProbesFid.rda", .crlmmPkgEnv, pkgname) |
|
556 | 598 |
# data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv) |
557 | 599 |
autosomeIndex <- getVarInEnv("autosomeIndex") |
558 | 600 |
# pnsa <- getVarInEnv("pnsa") |
... | ... |
@@ -564,9 +606,18 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, fitMixture=TRUE, eps= |
564 | 606 |
SMEDIAN <- getVarInEnv("SMEDIAN") |
565 | 607 |
theKnots <- getVarInEnv("theKnots") |
566 | 608 |
gns <- getVarInEnv("gns") |
567 |
- |
|
568 |
- nprobes <- nrow(XY) |
|
569 |
- narrays <- ncol(XY) |
|
609 |
+ |
|
610 |
+ # separate out copy number probes |
|
611 |
+ npIndex = getVarInEnv("npProbesFid") |
|
612 |
+ nprobes = length(npIndex) |
|
613 |
+ narrays = ncol(XY) |
|
614 |
+ A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays) |
|
615 |
+ B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays) |
|
616 |
+ cnAB = list(A=A, B=B, sns=sns, gns=names(npIndex), cdfName=cdfName) |
|
617 |
+ |
|
618 |
+ # next process snp probes |
|
619 |
+ snpIndex = getVarInEnv("snpProbesFid") |
|
620 |
+ nprobes <- length(snpIndex) |
|
570 | 621 |
|
571 | 622 |
##We will read each cel file, summarize, and run EM one by one |
572 | 623 |
##We will save parameters of EM to use later |
... | ... |
@@ -585,14 +636,14 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, fitMixture=TRUE, eps= |
585 | 636 |
##S will hold (A+B)/2 and M will hold A-B |
586 | 637 |
##NOTE: We actually dont need to save S. Only for pics etc... |
587 | 638 |
##f is the correction. we save to avoid recomputing |
588 |
- A <- matrix(as.integer(exprs(channel(XY, "X"))), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames)) |
|
589 |
- B <- matrix(as.integer(exprs(channel(XY, "Y"))), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames)) |
|
639 |
+ A <- matrix(as.integer(exprs(channel(XY, "X"))[snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames)) |
|
640 |
+ B <- matrix(as.integer(exprs(channel(XY, "Y"))[snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames)) |
|
590 | 641 |
|
591 | 642 |
# new lines below - useful to keep track of zeroed out SNPs |
592 |
- zero <- matrix(as.integer(exprs(channel(XY, "zero"))), nprobes, narrays) |
|
643 |
+ zero <- matrix(as.integer(exprs(channel(XY, "zero"))[snpIndex,]), nprobes, narrays) |
|
593 | 644 |
|
594 |
- colnames(A) <- colnames(B) <- colnames(zero) <- sampleNames(XY)$X |
|
595 |
- rownames(A) <- rownames(B) <- rownames(zero) <- featureNames(XY) |
|
645 |
+ colnames(A) <- colnames(B) <- colnames(zero) <- sns |
|
646 |
+ rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY) |
|
596 | 647 |
|
597 | 648 |
if(verbose){ |
598 | 649 |
message("Calibrating ", narrays, " arrays.") |
... | ... |
@@ -613,8 +664,8 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, fitMixture=TRUE, eps= |
613 | 664 |
SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2) |
614 | 665 |
} |
615 | 666 |
if(verbose) { |
616 |
- if (getRversion() > '2.7.0') setTxtProgressBar(pb, i) |
|
617 |
- else cat(".") |
|
667 |
+ if (getRversion() > '2.7.0') setTxtProgressBar(pb, i) |
|
668 |
+ else cat(".") |
|
618 | 669 |
} |
619 | 670 |
} |
620 | 671 |
if (verbose) { |
... | ... |
@@ -623,11 +674,20 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, fitMixture=TRUE, eps= |
623 | 674 |
} |
624 | 675 |
if (!fitMixture) SNR <- mixtureParams <- NA |
625 | 676 |
## gns comes from preprocStuff.rda |
626 |
- list(A=A, B=B, zero=zero, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName) |
|
677 |
+ res = list(A=A, B=B, zero=zero, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName) |
|
678 |
+ |
|
679 |
+ if(save.it & !missing(intensityFile)) { |
|
680 |
+ t0 <- proc.time() |
|
681 |
+ save(cnAB, res, file=intensityFile) |
|
682 |
+ t0 <- proc.time()-t0 |
|
683 |
+ if(verbose) message("Used ", round(t0[3],1), " seconds to save ", intensityFile, ".") |
|
684 |
+ } |
|
685 |
+ return(res) |
|
627 | 686 |
} |
628 | 687 |
|
629 | 688 |
|
630 |
-## MR: Could add arguments to allow this function to read in idat data as well, although this add a further 7 arguments, which might over-complicate things |
|
689 |
+## MR: Could add arguments to allow this function to read in idat data as well, |
|
690 |
+## although this would add a further 7 arguments, which might over-complicate things |
|
631 | 691 |
crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
632 | 692 |
row.names=TRUE, col.names=TRUE, |
633 | 693 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
... | ... |
@@ -635,15 +695,9 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
635 | 695 |
mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
636 | 696 |
cdfName, sns, recallMin=10, recallRegMin=1000, |
637 | 697 |
returnParams=FALSE, badSNP=.7) { |
638 |
- if(!missing(RG)) { |
|
639 |
- if(missing(XY)) |
|
640 |
- XY = RGtoXY(RG, chipType=cdfName) |
|
641 |
- else |
|
642 |
- stop("Both RG and XY specified - please use one or the other") |
|
643 |
- } |
|
698 |
+ |
|
644 | 699 |
if ((load.it | save.it) & missing(intensityFile)) |
645 | 700 |
stop("'intensityFile' is missing, and you chose either load.it or save.it") |
646 |
- if (missing(sns)) sns <- sampleNames(XY) #basename(filenames) |
|
647 | 701 |
if (!missing(intensityFile)) |
648 | 702 |
if (load.it & !file.exists(intensityFile)){ |
649 | 703 |
load.it <- FALSE |
... | ... |
@@ -651,23 +705,23 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
651 | 705 |
message("Not loading it, but running SNPRMA from scratch.") |
652 | 706 |
} |
653 | 707 |
if (!load.it){ |
654 |
-# res <- snprma(filenames, fitMixture=TRUE, |
|
655 |
-# mixtureSampleSize=mixtureSampleSize, verbose=verbose, |
|
656 |
-# eps=eps, cdfName=cdfName, sns=sns) |
|
657 |
- res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
|
658 |
- seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) |
|
659 |
- if(save.it){ |
|
660 |
- t0 <- proc.time() |
|
661 |
- save(res, file=intensityFile) |
|
662 |
- t0 <- proc.time()-t0 |
|
663 |
- if(verbose) message("Used ", t0[3], " seconds to save ", intensityFile, ".") |
|
708 |
+ if(!missing(RG)) { |
|
709 |
+ if(missing(XY)) |
|
710 |
+ XY = RGtoXY(RG, chipType=cdfName) |
|
711 |
+ else |
|
712 |
+ stop("Both RG and XY specified - please use one or the other") |
|
664 | 713 |
} |
714 |
+ if (missing(sns)) sns <- sampleNames(XY)$X |
|
715 |
+ |
|
716 |
+ res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
|
717 |
+ seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, |
|
718 |
+ save.it=save.it, intensityFile=intensityFile) |
|
665 | 719 |
}else{ |
666 | 720 |
if(verbose) message("Loading ", intensityFile, ".") |
667 | 721 |
obj <- load(intensityFile) |
668 | 722 |
if(verbose) message("Done.") |
669 |
- if(obj != "res") |
|
670 |
- stop("Object in ", intensityFile, " seems to be invalid.") |
|
723 |
+ if(!any(obj == "res")) |
|
724 |
+ stop("Object in ", intensityFile, " seems to be invalid.") |
|
671 | 725 |
} |
672 | 726 |
if(row.names) row.names=res$gns else row.names=NULL |
673 | 727 |
if(col.names) col.names=res$sns else col.names=NULL |
... | ... |
@@ -682,5 +736,5 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
682 | 736 |
|
683 | 737 |
res2[["SNR"]] <- res[["SNR"]] |
684 | 738 |
res2[["SKW"]] <- res[["SKW"]] |
685 |
- return(res2) # return(list2SnpSet(res2, returnParams=returnParams)) |
|
739 |
+ return(list2SnpSet(res2, returnParams=returnParams)) # return(res2) |
|
686 | 740 |
} |
... | ... |
@@ -96,10 +96,10 @@ list2SnpSet <- function(x, returnParams=FALSE){ |
96 | 96 |
"SNP Quality Score", |
97 | 97 |
"Center AA", "Center AB", "Center BB", |
98 | 98 |
"Scale AA", "Scale AB", "Scale BB", |
99 |
- "N AA", "N AB", "N BB", |
|
100 |
- "Shift in parameters AA", |
|
101 |
- "Shift in parameters AB", |
|
102 |
- "Shift in parameters BB"), |
|
99 |
+ "N AA", "N AB", "N BB"), |
|
100 |
+# "Shift in parameters AA", |
|
101 |
+# "Shift in parameters AB", |
|
102 |
+# "Shift in parameters BB"), |
|
103 | 103 |
row.names=c( |
104 | 104 |
"SNPQC", |
105 | 105 |
"cAA", "cAB", "cBB", |
... | ... |
@@ -50,16 +50,22 @@ crlmmIllumina(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
50 | 50 |
\item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)} |
51 | 51 |
} |
52 | 52 |
\value{ |
53 |
+ A \code{SnpSet} object which contains |
|
53 | 54 |
\item{calls}{Genotype calls (1 - AA, 2 - AB, 3 - BB)} |
54 |
- \item{confs}{Confidence scores 'round(-1000*log2(1-p))'} |
|
55 |
+ \item{callProbability}{confidence scores 'round(-1000*log2(1-p))'} |
|
56 |
+ in the \code{assayData} slot and |
|
55 | 57 |
\item{SNPQC}{SNP Quality Scores} |
56 |
- \item{batchQC}{Batch Quality Score} |
|
57 |
- \item{params}{Recalibrated parameters} |
|
58 |
+ \item{batchQC}{Batch Quality Scores} |
|
59 |
+ along with center and scale parameters when \code{returnParams=TRUE} |
|
60 |
+ in the \code{featureData} slot. |
|
58 | 61 |
} |
59 | 62 |
|
60 | 63 |
\details{ |
61 | 64 |
Note: The user should specify either the \code{RG} or \code{XY} |
62 |
- intensities, not both. |
|
65 |
+ intensities, not both. Alternatively if \code{crlmmIllumina} has been |
|
66 |
+ run already with \code{save.it=TRUE}, the preprocessed data can be |
|
67 |
+ loaded from file by specifying \code{load.it=TRUE} and |
|
68 |
+ \code{intensityFile} (\code{RG} or \code{XY} are not needed in this case). |
|
63 | 69 |
} |
64 | 70 |
|
65 | 71 |
\references{ |
... | ... |
@@ -12,7 +12,8 @@ readIdatFiles(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
12 | 12 |
arrayInfoColNames=list(barcode="SentrixBarcode_A", |
13 | 13 |
position="SentrixPosition_A"), |
14 | 14 |
highDensity=FALSE, sep="_", |
15 |
- fileExt=list(green="Grn.idat", red="Red.idat")) |
|
15 |
+ fileExt=list(green="Grn.idat", red="Red.idat"), |
|
16 |
+ saveDate=FALSE) |
|
16 | 17 |
} |
17 | 18 |
|
18 | 19 |
\arguments{ |
... | ... |
@@ -40,6 +41,8 @@ readIdatFiles(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
40 | 41 |
names.} |
41 | 42 |
\item{fileExt}{list containing elements 'Green' and 'Red' which |
42 | 43 |
specify the .idat file extension for the Cy3 and Cy5 channels.} |
44 |
+ \item{saveDate}{logical. Should the dates from each .idat be saved |
|
45 |
+ with sample information?} |
|
43 | 46 |
} |
44 | 47 |
|
45 | 48 |
\details{ |