git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@38665 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -143,3 +143,7 @@ is decoded and scanned |
143 | 143 |
* Fixed downstream vignette to account for the SnpSet object being returned by crlmm |
144 | 144 |
|
145 | 145 |
* Fixed some minor status messages |
146 |
+ |
|
147 |
+2009-04-09 R Scharpf - committed version 1.0.81 |
|
148 |
+ |
|
149 |
+* added a skeleton of a copy number vignette for the illumina platform (illumina_copynumber.Rnw) to inst/scripts |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.0.80 |
|
4 |
+Version: 1.0.81 |
|
5 | 5 |
Date: 2008-12-30 |
6 | 6 |
Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
7 | 7 |
Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
... | ... |
@@ -188,7 +188,8 @@ goodSnps <- function(phi.thr, envir, fewAA=20, fewBB=20){ |
188 | 188 |
|
189 | 189 |
instantiateObjects <- function(calls, conf, NP, plate, envir, chrom, A, B, |
190 | 190 |
gender, SNRmin=5, SNR, |
191 |
- pkgname){ |
|
191 |
+ pkgname, |
|
192 |
+ locusSet){ |
|
192 | 193 |
pkgname <- paste(pkgname, "Crlmm", sep="") |
193 | 194 |
envir[["chrom"]] <- chrom |
194 | 195 |
CHR_INDEX <- paste(chrom, "index", sep="") |
... | ... |
@@ -615,7 +616,7 @@ oneBatch <- function(plateIndex, |
615 | 616 |
CHR <- envir[["chrom"]] |
616 | 617 |
if(bias.adj){ |
617 | 618 |
nuA <- envir[["nuA"]] |
618 |
- if(all(is.na(nuA))) { |
|
619 |
+ if(all(is.na(nuA))){ |
|
619 | 620 |
message("Background and signal coefficients have not yet been estimated -- can not do bias correction yet") |
620 | 621 |
stop("Must run computeCopynumber a second time with bias.adj=TRUE to do the adjustment") |
621 | 622 |
} |
... | ... |
@@ -1021,7 +1022,7 @@ biasAdj <- function(plateIndex, envir, priorProb, PROP=0.75){ |
1021 | 1022 |
hemDel <- hemDel/total |
1022 | 1023 |
norm <- norm/total |
1023 | 1024 |
amp <- amp/total |
1024 |
- envir[["posteriorProb"]] <- list(hemDel=hemDel, norm=norm, amp=amp) |
|
1025 |
+ envir[["posteriorProb"]] <- list(hemDel=hemDel, norm=norm, amp=amp) |
|
1025 | 1026 |
tmp <- array(NA, dim=c(nrow(A), ncol(A), 4)) |
1026 | 1027 |
tmp[, , 1] <- homDel |
1027 | 1028 |
tmp[, , 2] <- hemDel |
... | ... |
@@ -1047,18 +1048,21 @@ biasAdj <- function(plateIndex, envir, priorProb, PROP=0.75){ |
1047 | 1048 |
##whether more are up or down |
1048 | 1049 |
if(CHR != 23){ |
1049 | 1050 |
moreup <- rowSums(tmp2 > 3) > rowSums(tmp2 < 3) ##3 is normal |
1050 |
- ##notUp <- tmp2[ii & moreup, ] <= 3 |
|
1051 |
- ##notDown <- tmp2[ii & !moreup, ] >= 3 |
|
1052 |
- |
|
1051 |
+ down <- tmp2[ii & moreup, ] <= 3 |
|
1052 |
+ up <- tmp2[ii & !moreup, ] >= 3 |
|
1053 | 1053 |
##What if we just keep X% of the ones with the highest |
1054 | 1054 |
##posterior probability for normal |
1055 |
- prNorm <- tmp[, , 3] |
|
1056 |
- rankNorm <- t(apply(prNorm, 1, order, decreasing=TRUE)) |
|
1057 |
- percentage <- round(0.75*ncol(prNorm),0) |
|
1058 |
- NORM <- rankNorm <= percentage |
|
1059 |
-## NORM <- matrix(NA, nrow(A), ncol(A)) |
|
1060 |
-## NORM[ii & moreup, ] <- notUp |
|
1061 |
-## NORM[ii & !moreup, ] <- notDown |
|
1055 |
+## prNorm <- tmp[, , 3] |
|
1056 |
+## rankNorm <- t(apply(prNorm, 1, order, decreasing=TRUE)) |
|
1057 |
+ |
|
1058 |
+ rankUP <- t(apply(tmp[moreup & ii, , 4]/tmp[moreup & ii, , 3], 1, order, decreasing=TRUE)) |
|
1059 |
+ rankDown <- t(apply(tmp[!moreup & ii, , 2]/tmp[!moreup & ii, , 3], 1, order, decreasing=TRUE)) |
|
1060 |
+ maxNumberToDrop <- round(0.25*ncol(tmp2),0) |
|
1061 |
+## NORM <- rankNorm <= percentage |
|
1062 |
+ NORM <- matrix(TRUE, nrow(A), ncol(A)) |
|
1063 |
+ NORM[ii & moreup, ] <- rankUP >= maxNumberToDrop |
|
1064 |
+ NORM[ii & !moreup, ] <- rankDown >= maxNumberToDrop |
|
1065 |
+## NORM[ii & !moreup, ] <- up |
|
1062 | 1066 |
##Define NORM so that we can iterate this step |
1063 | 1067 |
##NA's in the previous iteration (normal) will be propogated |
1064 | 1068 |
normal <- NORM*normal |
1065 | 1069 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,80 @@ |
1 |
+<<>>= |
|
2 |
+# start R from /home/bst/other/mritchie/R/R-300309/bin/R |
|
3 |
+# as this has current crlmm and region package installed |
|
4 |
+# if you want to ue your own R, the current region package is at |
|
5 |
+# /thumper/ctsa/snpmicroarray/illumina/crlmm/370k/human370v1cCrlmm/ |
|
6 |
+library(Biobase) |
|
7 |
+library(crlmm) |
|
8 |
+setwd("/thumper/ctsa/snpmicroarray/illumina/IDATS/370k") |
|
9 |
+@ |
|
10 |
+ |
|
11 |
+<<readIdat>>= |
|
12 |
+samplesheet5 = read.csv("HumanHap370Duo_Sample_Map.csv", header=TRUE, as.is=TRUE)[-c(28:46,61:75,78:79),] |
|
13 |
+if(!exists("RG")){ |
|
14 |
+ ## remove samples which I don't have .idats for |
|
15 |
+ ## subset further to allow for quicker testing |
|
16 |
+ ##samplesheet5 = samplesheet5[1:5,] |
|
17 |
+ ## read in the .idats |
|
18 |
+ RG <- readIdatFiles(samplesheet5, arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), saveDate=TRUE) |
|
19 |
+} |
|
20 |
+@ |
|
21 |
+ |
|
22 |
+Get dates for each array - scan date is likely to be most meaningful |
|
23 |
+ |
|
24 |
+<<dates>>= |
|
25 |
+pd <- pData(RG) |
|
26 |
+scandatetime = strptime(as.character(pd$ScanDate), "%m/%d/%Y %H:%M:%S %p") |
|
27 |
+decodedatetime = strptime(as.character(pd$DecodeDate), "%m/%d/%Y %H:%M:%S %p") |
|
28 |
+table(format(scandatetime, "%d %b %Y")) |
|
29 |
+table(format(scandatetime, "%b")) |
|
30 |
+@ |
|
31 |
+ |
|
32 |
+Note: the code below is run for testing purposes only. None of these |
|
33 |
+functions are exported, so would not routinely be run directly by the |
|
34 |
+user. A typical analysis would involve runnning readIdatFiles() |
|
35 |
+followed by crlmmIllumina() |
|
36 |
+ |
|
37 |
+<<crlmm>>= |
|
38 |
+if(!exists("res.rda")){ |
|
39 |
+ outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/illumina/HumanCNV370-Duo" |
|
40 |
+ if(!file.exists(file.path(outdir, "res.rda"))){ |
|
41 |
+ crlmmOut <- crlmmIllumina(RG=RG, cdfName="human370v1c", sns=pData(RG)$ID, returnParams=TRUE, save.it=TRUE, intensityFile=file.path(outdir, "res.rda")) |
|
42 |
+ save(crlmmOut, file=file.path(outdir, "crlmmOut.rda")) |
|
43 |
+ } else{ |
|
44 |
+ message("Loading...") |
|
45 |
+ load(file.path(outdir, "res.rda")) |
|
46 |
+ load(file.path(outdir, "crlmmOut.rda")) |
|
47 |
+ } |
|
48 |
+} |
|
49 |
+@ |
|
50 |
+ |
|
51 |
+<<SNR>>= |
|
52 |
+hist(crlmmOut$SNR) ##approx. 5-fold higher than what we see in Affy! |
|
53 |
+@ |
|
54 |
+ |
|
55 |
+<<>>= |
|
56 |
+##cnAB was assigned to the global environment |
|
57 |
+NP = (cnAB$A+cnAB$B)/2 # average normalized A and B intensities |
|
58 |
+colnames(NP) <- colnames(crlmm:::calls(crlmmOut)) |
|
59 |
+cnenv = new.env() |
|
60 |
+chr22 <- computeCopynumber(chrom=22, A=res[["A"]], B=res[["B"]], |
|
61 |
+ calls=crlmm:::calls(crlmmOut), |
|
62 |
+ conf=confs(crlmmOut), |
|
63 |
+ plate=rep(1, ncol(NP)), |
|
64 |
+ NP=NP, |
|
65 |
+ cdfName="human370v1c", |
|
66 |
+ SNR=res[["SNR"]], |
|
67 |
+ envir=cnenv) |
|
68 |
+@ |
|
69 |
+ |
|
70 |
+<<>>= |
|
71 |
+copyA=cnenv[["CA"]] |
|
72 |
+copyB=cnenv[["CB"]] |
|
73 |
+copyT=(copyA + copyB)/100 |
|
74 |
+copyT[copyT > 6] <- 6 |
|
75 |
+copyT[copyT < 0] <- 0 |
|
76 |
+plot(copyT[, 1], pch=".") |
|
77 |
+@ |
|
78 |
+ |
|
79 |
+ |
|
80 |
+ |