git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@41017 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.3.16 |
|
4 |
+Version: 1.3.17 |
|
5 | 5 |
Date: 2009-07-22 |
6 | 6 |
Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
7 | 7 |
Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
... | ... |
@@ -7,21 +7,23 @@ readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
7 | 7 |
arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
8 | 8 |
highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), saveDate=FALSE) { |
9 | 9 |
if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet |
10 |
- arrayNames=NULL |
|
11 |
- if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) { |
|
12 |
- barcode = sampleSheet[,arrayInfoColNames$barcode] |
|
13 |
- arrayNames=barcode |
|
14 |
- } |
|
15 |
- if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) { |
|
16 |
- position = sampleSheet[,arrayInfoColNames$position] |
|
17 |
- if(is.null(arrayNames)) |
|
18 |
- arrayNames=position |
|
19 |
- else |
|
20 |
- arrayNames = paste(arrayNames, position, sep=sep) |
|
21 |
- if(highDensity) { |
|
22 |
- hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02") |
|
23 |
- for(i in names(hdExt)) |
|
24 |
- arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames) |
|
10 |
+ if(!is.null(arrayNames)){ |
|
11 |
+ ##arrayNames=NULL |
|
12 |
+ if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) { |
|
13 |
+ barcode = sampleSheet[,arrayInfoColNames$barcode] |
|
14 |
+ arrayNames=barcode |
|
15 |
+ } |
|
16 |
+ if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) { |
|
17 |
+ position = sampleSheet[,arrayInfoColNames$position] |
|
18 |
+ if(is.null(arrayNames)) |
|
19 |
+ arrayNames=position |
|
20 |
+ else |
|
21 |
+ arrayNames = paste(arrayNames, position, sep=sep) |
|
22 |
+ if(highDensity) { |
|
23 |
+ hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02") |
|
24 |
+ for(i in names(hdExt)) |
|
25 |
+ arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames) |
|
26 |
+ } |
|
25 | 27 |
} |
26 | 28 |
} |
27 | 29 |
pd = new("AnnotatedDataFrame", data = sampleSheet) |
... | ... |
@@ -46,17 +48,14 @@ readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
46 | 48 |
paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" ")) |
47 | 49 |
grnidats = file.path(path, grnfiles) |
48 | 50 |
redidats = file.path(path, redfiles) |
49 |
- |
|
50 | 51 |
headerInfo = list(nProbes = rep(NA, narrays), |
51 | 52 |
Barcode = rep(NA, narrays), |
52 | 53 |
ChipType = rep(NA, narrays), |
53 | 54 |
Manifest = rep(NA, narrays), # not sure about this one - sometimes blank |
54 | 55 |
Position = rep(NA, narrays)) # this may also vary a bit |
55 |
- |
|
56 | 56 |
dates = list(decode=rep(NA, narrays), |
57 | 57 |
scan=rep(NA, narrays)) |
58 |
- |
|
59 |
- # read in the data |
|
58 |
+ ## read in the data |
|
60 | 59 |
for(i in seq(along=arrayNames)) { |
61 | 60 |
cat("reading", arrayNames[i], "\t") |
62 | 61 |
idsG = idsR = G = R = NULL |
... | ... |
@@ -68,7 +67,6 @@ readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
68 | 67 |
headerInfo$ChipType[i] = G$ChipType |
69 | 68 |
headerInfo$Manifest[i] = G$Unknown$MostlyNull |
70 | 69 |
headerInfo$Position[i] = G$Unknowns$MostlyA |
71 |
- |
|
72 | 70 |
if(headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) { |
73 | 71 |
## || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest |
74 | 72 |
## but differed by a few SNPs for some reason - most of the chip was the same though |
... | ... |
@@ -76,29 +74,23 @@ readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
76 | 74 |
warning("Chips are not of the same type. Skipping ", basename(grnidats[i]), " and ", basename(redidats[i])) |
77 | 75 |
next() |
78 | 76 |
} |
79 |
- |
|
80 | 77 |
dates$decode[i] = G$RunInfo[1, 1] |
81 | 78 |
dates$scan[i] = G$RunInfo[2, 1] |
82 |
- |
|
83 | 79 |
if(i==1) { |
84 |
- if(is.null(ids) && !is.null(G)) |
|
80 |
+ if(is.null(ids) && !is.null(G)){ |
|
85 | 81 |
ids = idsG |
86 |
- else |
|
87 |
- stop("Could not find probe IDs") |
|
82 |
+ }else stop("Could not find probe IDs") |
|
88 | 83 |
nprobes = length(ids) |
89 | 84 |
narrays = length(arrayNames) |
90 |
- |
|
91 | 85 |
tmpmat = matrix(NA, nprobes, narrays) |
92 | 86 |
rownames(tmpmat) = ids |
93 |
- if(!is.null(sampleSheet)) |
|
87 |
+ if(!is.null(sampleSheet)){ |
|
94 | 88 |
colnames(tmpmat) = sampleSheet$Sample_ID |
95 |
- else |
|
96 |
- colnames(tmpmat) = arrayNames |
|
97 |
- |
|
98 |
- RG = new("NChannelSet", |
|
99 |
- R=tmpmat, G=tmpmat, Rnb=tmpmat, Gnb=tmpmat, |
|
100 |
- Rse=tmpmat, Gse=tmpmat, annotation=headerInfo$Manifest[1], |
|
101 |
- phenoData=pd, storage.mode="environment") |
|
89 |
+ }else colnames(tmpmat) = arrayNames |
|
90 |
+ RG <- new("NChannelSet", |
|
91 |
+ R=tmpmat, G=tmpmat, Rnb=tmpmat, Gnb=tmpmat, |
|
92 |
+ Rse=tmpmat, Gse=tmpmat, annotation=headerInfo$Manifest[1], |
|
93 |
+ phenoData=pd, storage.mode="environment") |
|
102 | 94 |
rm(tmpmat) |
103 | 95 |
gc() |
104 | 96 |
} |
... | ... |
@@ -109,8 +101,7 @@ readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
109 | 101 |
RG@assayData$Gnb[,i] = G$Quants[, "NBeads"] |
110 | 102 |
RG@assayData$Gse[,i] = G$Quants[, "SD"] |
111 | 103 |
} |
112 |
- } |
|
113 |
- else { |
|
104 |
+ } else { |
|
114 | 105 |
indG = match(ids, idsG) |
115 | 106 |
RG@assayData$G[,i] = G$Quants[indG, "Mean"] |
116 | 107 |
RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"] |
... | ... |
@@ -129,8 +120,7 @@ readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".", |
129 | 120 |
RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"] |
130 | 121 |
RG@assayData$Rse[,i] = R$Quants[ ,"SD"] |
131 | 122 |
} |
132 |
- } |
|
133 |
- else { |
|
123 |
+ } else { |
|
134 | 124 |
indR = match(ids, idsR) |
135 | 125 |
RG@assayData$R[,i] = R$Quants[indR, "Mean"] |
136 | 126 |
RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"] |
... | ... |
@@ -14,7 +14,7 @@ |
14 | 14 |
\newcommand{\R}{\textsf{R}} |
15 | 15 |
|
16 | 16 |
\begin{document} |
17 |
-\title{Trisomy analysis} |
|
17 |
+\title{Copy number estimation} |
|
18 | 18 |
\date{May, 2009} |
19 | 19 |
\author{Rob Scharpf} |
20 | 20 |
\maketitle |
... | ... |
@@ -171,7 +171,7 @@ Plot physical position versus copy number for the first sample. Recall |
171 | 171 |
that the copy number estimates were multiplied by 100 and stored as an |
172 | 172 |
integer. |
173 | 173 |
|
174 |
-<<oneSample, fig=TRUE, width=8, height=4, include=FALSE, eval=FALSE>>= |
|
174 |
+<<oneSample, fig=TRUE, width=8, height=4, include=FALSE>>= |
|
175 | 175 |
par(las=1) |
176 | 176 |
plot(position(crlmmResults), copyNumber(crlmmResults)[, 1], pch=".", cex=2, xaxt="n", col="grey20", ylim=c(0,6), |
177 | 177 |
ylab="copy number", xlab="physical position (Mb)", |
... | ... |
@@ -199,7 +199,7 @@ plate. Plotting symbols are the genotype calls (1=AA, 2=AB, 3=BB); light |
199 | 199 |
grey points are from other plates. One could also add the prediction |
200 | 200 |
regions for 0-4 copies, but it gets crowded. |
201 | 201 |
|
202 |
-<<genotypeCalls, fig=TRUE, width=8, height=8, include=FALSE, eval=FALSE>>= |
|
202 |
+<<genotypeCalls, fig=TRUE, width=8, height=8, include=FALSE>>= |
|
203 | 203 |
xlim <- ylim <- c(6.5,13) |
204 | 204 |
pch <- 21 |
205 | 205 |
colors <- c("red", "blue", "green3") |
... | ... |
@@ -221,7 +221,7 @@ for(i in indices[[j]]){ |
221 | 221 |
} |
222 | 222 |
@ |
223 | 223 |
|
224 |
-<<predictionRegion, fig=TRUE, width=8, height=8, include=FALSE, eval=FALSE>>= |
|
224 |
+<<predictionRegion, fig=TRUE, width=8, height=8, include=FALSE>>= |
|
225 | 225 |
require(RColorBrewer) |
226 | 226 |
greens <- brewer.pal(9, "Greens") |
227 | 227 |
J <- split(1:ncol(crlmmResults), batch(crlmmResults)) |