git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@58739 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays. |
4 |
-Version: 1.11.46 |
|
4 |
+Version: 1.11.47 |
|
5 | 5 |
Date: 2010-12-10 |
6 | 6 |
Author: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>, Ingo Ruczinski <iruczins@jhsph.edu>, Rafael A Irizarry |
7 | 7 |
Maintainer: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU> |
... | ... |
@@ -151,6 +151,7 @@ readIdatFiles = function(sampleSheet=NULL, |
151 | 151 |
|
152 | 152 |
|
153 | 153 |
## the readIDAT() and readBPM() functions below were provided by Keith Baggerly, 27/8/2008 |
154 |
+## edits provided by Kasper Daniel Hansen, 4/10/2011 |
|
154 | 155 |
readIDAT <- function(idatFile){ |
155 | 156 |
fileSize <- file.info(idatFile)$size |
156 | 157 |
|
... | ... |
@@ -160,14 +161,12 @@ readIDAT <- function(idatFile){ |
160 | 161 |
|
161 | 162 |
} |
162 | 163 |
|
163 |
- versionNumber <- readBin(tempCon, "integer", n=1, size=8, |
|
164 |
- endian="little") |
|
164 |
+ versionNumber <- readBin(tempCon, "integer", n=1, size=8, endian="little") |
|
165 | 165 |
|
166 | 166 |
if(versionNumber<3) |
167 | 167 |
stop("Older style IDAT files not supported: consider updating your scanner settings") |
168 | 168 |
|
169 |
- nFields <- readBin(tempCon, "integer", n=1, size=4, |
|
170 |
- endian="little") |
|
169 |
+ nFields <- readBin(tempCon, "integer", n=1, size=4, endian="little") |
|
171 | 170 |
|
172 | 171 |
fields <- matrix(0,nFields,3); |
173 | 172 |
colnames(fields) <- c("Field Code", "Byte Offset", "Bytes") |
... | ... |
@@ -178,26 +177,26 @@ readIDAT <- function(idatFile){ |
178 | 177 |
readBin(tempCon, "integer", n=1, size=8, endian="little") |
179 | 178 |
} |
180 | 179 |
|
181 |
- knownCodes <- c(1000, 102, 103, 104, 107, 200, 300, 400, |
|
182 |
- 401, 402, 403, 404, 405, 406, 407, 408, 409) |
|
183 |
- names(knownCodes) <- |
|
184 |
- c("nSNPsRead", # 1000 |
|
185 |
- "IlluminaID", # 102 |
|
186 |
- "SD", # 103 |
|
187 |
- "Mean", # 104 |
|
188 |
- "NBeads", # 107 |
|
189 |
- "MidBlock", # 200 |
|
190 |
- "RunInfo", # 300 |
|
191 |
- "RedGreen", # 400 |
|
192 |
- "MostlyNull", # 401 |
|
193 |
- "Barcode", # 402 |
|
194 |
- "ChipType", # 403 |
|
195 |
- "MostlyA", # 404 |
|
196 |
- "Unknown.1", # 405 |
|
197 |
- "Unknown.2", # 406 |
|
198 |
- "Unknown.3", # 407 |
|
199 |
- "Unknown.4", # 408 |
|
200 |
- "Unknown.5" # 409 |
|
180 |
+ knownCodes <- |
|
181 |
+ c("nSNPsRead" = 1000, |
|
182 |
+ "IlluminaID" = 102, |
|
183 |
+ "SD" = 103, |
|
184 |
+ "Mean" = 104, |
|
185 |
+ "NBeads" = 107, |
|
186 |
+ "MidBlock" = 200, |
|
187 |
+ "RunInfo" = 300, |
|
188 |
+ "RedGreen" = 400, |
|
189 |
+ "MostlyNull" = 401, |
|
190 |
+ "Barcode" = 402, |
|
191 |
+ "ChipType" = 403, |
|
192 |
+ "MostlyA" = 404, |
|
193 |
+ "Unknown.1" = 405, |
|
194 |
+ "Unknown.2" = 406, |
|
195 |
+ "Unknown.3" = 407, |
|
196 |
+ "Unknown.4" = 408, |
|
197 |
+ "Unknown.5" = 409, |
|
198 |
+ "Unknown.6" = 410, |
|
199 |
+ "Unknown.7" = 510 |
|
201 | 200 |
) |
202 | 201 |
|
203 | 202 |
nNewFields <- 1 |
... | ... |
@@ -212,123 +211,158 @@ readIDAT <- function(idatFile){ |
212 | 211 |
} |
213 | 212 |
} |
214 | 213 |
|
215 |
- seek(tempCon, fields["nSNPsRead", "Byte Offset"]) |
|
216 |
- nSNPsRead <- readBin(tempCon, "integer", n=1, size=4, |
|
217 |
- endian="little") |
|
218 |
- |
|
219 |
- seek(tempCon, fields["IlluminaID", "Byte Offset"]) |
|
220 |
- IlluminaID <- readBin(tempCon, "integer", n=nSNPsRead, size=4, |
|
221 |
- endian="little") |
|
222 |
- |
|
223 |
- seek(tempCon, fields["SD", "Byte Offset"]) |
|
224 |
- SD <- readBin(tempCon, "integer", n=nSNPsRead, size=2, |
|
225 |
- endian="little", signed=FALSE) |
|
226 |
- |
|
227 |
- seek(tempCon, fields["Mean", "Byte Offset"]) |
|
228 |
- Mean <- readBin(tempCon, "integer", n=nSNPsRead, size=2, |
|
229 |
- endian="little", signed=FALSE) |
|
214 |
+ fields <- fields[order(fields[, "Byte Offset"]),] |
|
230 | 215 |
|
231 |
- seek(tempCon, fields["NBeads", "Byte Offset"]) |
|
232 |
- NBeads <- readBin(tempCon, "integer", n=nSNPsRead, size=1, signed=FALSE) |
|
233 |
- |
|
234 |
- seek(tempCon, fields["MidBlock", "Byte Offset"]) |
|
235 |
- nMidBlockEntries <- readBin(tempCon, "integer", n=1, size=4, |
|
236 |
- endian="little") |
|
237 |
- MidBlock <- readBin(tempCon, "integer", n=nMidBlockEntries, size=4, |
|
238 |
- endian="little") |
|
239 |
- |
|
240 |
- seek(tempCon, fields["RunInfo", "Byte Offset"]) |
|
241 |
- nRunInfoBlocks <- readBin(tempCon, "integer", n=1, size=4, |
|
242 |
- endian="little") |
|
243 |
- RunInfo <- matrix(NA, nRunInfoBlocks, 5) |
|
244 |
- colnames(RunInfo) <- c("RunTime", "BlockType", "BlockPars", |
|
245 |
- "BlockCode", "CodeVersion") |
|
246 |
- for(i1 in 1:2) { #nRunInfoBlocks){ ## MR edit |
|
247 |
- for(i2 in 1:5){ |
|
248 |
- nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
249 |
- RunInfo[i1,i2] <- readChar(tempCon, nChars) |
|
250 |
- } |
|
216 |
+ seek(tempCon, fields["nSNPsRead", "Byte Offset"]) |
|
217 |
+ nSNPsRead <- readBin(tempCon, "integer", n=1, size=4, endian="little") |
|
218 |
+ |
|
219 |
+ readBlock <- function(nam) { |
|
220 |
+ switch(nam, |
|
221 |
+ "IlluminaID" = { |
|
222 |
+ seek(tempCon, fields["IlluminaID", "Byte Offset"]) |
|
223 |
+ IlluminaID <- readBin(tempCon, "integer", n=nSNPsRead, size=4, endian="little") |
|
224 |
+ IlluminaID |
|
225 |
+ }, |
|
226 |
+ "SD" = { |
|
227 |
+ seek(tempCon, fields["SD", "Byte Offset"]) |
|
228 |
+ SD <- readBin(tempCon, "integer", n=nSNPsRead, size=2, endian="little", signed=FALSE) |
|
229 |
+ SD |
|
230 |
+ }, |
|
231 |
+ "Mean" = { |
|
232 |
+ seek(tempCon, fields["Mean", "Byte Offset"]) |
|
233 |
+ Mean <- readBin(tempCon, "integer", n=nSNPsRead, size=2, endian="little", signed=FALSE) |
|
234 |
+ Mean |
|
235 |
+ }, |
|
236 |
+ "NBeads" = { |
|
237 |
+ seek(tempCon, fields["NBeads", "Byte Offset"]) |
|
238 |
+ NBeads <- readBin(tempCon, "integer", n=nSNPsRead, size=1, signed=FALSE) |
|
239 |
+ NBeads |
|
240 |
+ }, |
|
241 |
+ "MidBlock" = { |
|
242 |
+ seek(tempCon, fields["MidBlock", "Byte Offset"]) |
|
243 |
+ nMidBlockEntries <- readBin(tempCon, "integer", n=1, size=4, endian="little") |
|
244 |
+ MidBlock <- readBin(tempCon, "integer", n=nMidBlockEntries, size=4, |
|
245 |
+ endian="little") |
|
246 |
+ MidBlock |
|
247 |
+ }, |
|
248 |
+ "RunInfo" = { |
|
249 |
+ seek(tempCon, fields["RunInfo", "Byte Offset"]) |
|
250 |
+ nRunInfoBlocks <- readBin(tempCon, "integer", n=1, size=4, endian="little") |
|
251 |
+ RunInfo <- matrix(NA, nRunInfoBlocks, 5) |
|
252 |
+ colnames(RunInfo) <- c("RunTime", "BlockType", "BlockPars", |
|
253 |
+ "BlockCode", "CodeVersion") |
|
254 |
+ for(i1 in 1:2) { #nRunInfoBlocks){ ## MR edit |
|
255 |
+ for(i2 in 1:5){ |
|
256 |
+ nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
257 |
+ RunInfo[i1,i2] <- readChar(tempCon, nChars) |
|
258 |
+ } |
|
259 |
+ } |
|
260 |
+ RunInfo |
|
261 |
+ }, |
|
262 |
+ "RedGreen" = { |
|
263 |
+ seek(tempCon, fields["RedGreen", "Byte Offset"]) |
|
264 |
+ RedGreen <- readBin(tempCon, "numeric", n=1, size=4, |
|
265 |
+ endian="little") |
|
266 |
+ RedGreen |
|
267 |
+ }, |
|
268 |
+ "MostlyNull" = { |
|
269 |
+ seek(tempCon, fields["MostlyNull", "Byte Offset"]) |
|
270 |
+ nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
271 |
+ MostlyNull <- readChar(tempCon, nChars) |
|
272 |
+ MostlyNull |
|
273 |
+ }, |
|
274 |
+ "Barcode" = { |
|
275 |
+ seek(tempCon, fields["Barcode", "Byte Offset"]) |
|
276 |
+ nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
277 |
+ Barcode <- readChar(tempCon, nChars) |
|
278 |
+ Barcode |
|
279 |
+ }, |
|
280 |
+ "ChipType" = { |
|
281 |
+ seek(tempCon, fields["ChipType", "Byte Offset"]) |
|
282 |
+ nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
283 |
+ ChipType <- readChar(tempCon, nChars) |
|
284 |
+ ChipType |
|
285 |
+ }, |
|
286 |
+ "MostlyA" = { |
|
287 |
+ seek(tempCon, fields["MostlyA", "Byte Offset"]) |
|
288 |
+ nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
289 |
+ MostlyA <- readChar(tempCon, nChars) |
|
290 |
+ }, |
|
291 |
+ "Unknown.1" = { |
|
292 |
+ seek(tempCon, fields["Unknown.1", "Byte Offset"]) |
|
293 |
+ nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
294 |
+ Unknown.1 <- readChar(tempCon, nChars) |
|
295 |
+ Unknown.1 |
|
296 |
+ }, |
|
297 |
+ "Unknown.2" = { |
|
298 |
+ seek(tempCon, fields["Unknown.2", "Byte Offset"]) |
|
299 |
+ nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
300 |
+ Unknown.2 <- readChar(tempCon, nChars) |
|
301 |
+ Unknown.2 |
|
302 |
+ }, |
|
303 |
+ "Unknown.3" = { |
|
304 |
+ seek(tempCon, fields["Unknown.3", "Byte Offset"]) |
|
305 |
+ nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
306 |
+ Unknown.3 <- readChar(tempCon, nChars) |
|
307 |
+ Unknown.3 |
|
308 |
+ }, |
|
309 |
+ "Unknown.4" = { |
|
310 |
+ seek(tempCon, fields["Unknown.4", "Byte Offset"]) |
|
311 |
+ nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
312 |
+ Unknown.4 <- readChar(tempCon, nChars) |
|
313 |
+ Unknown.4 |
|
314 |
+ }, |
|
315 |
+ "Unknown.5" = { |
|
316 |
+ seek(tempCon, fields["Unknown.5", "Byte Offset"]) |
|
317 |
+ nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
318 |
+ Unknown.5 <- readChar(tempCon, nChars) |
|
319 |
+ Unknown.5 |
|
320 |
+ }, |
|
321 |
+ "Unknown.6" = { |
|
322 |
+ seek(tempCon, fields["Unknown.6", "Byte Offset"]) |
|
323 |
+ nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
324 |
+ Unknown.6 <- readChar(tempCon, nChars) |
|
325 |
+ Unknown.6 |
|
326 |
+ }, |
|
327 |
+ "Unknown.7" = { |
|
328 |
+ seek(tempCon, fields["Unknown.7", "Byte Offset"]) |
|
329 |
+ nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
330 |
+ Unknown.7 <- readChar(tempCon, nChars) |
|
331 |
+ Unknown.7 |
|
332 |
+ }) |
|
251 | 333 |
} |
252 | 334 |
|
253 |
- seek(tempCon, fields["RedGreen", "Byte Offset"]) |
|
254 |
- RedGreen <- readBin(tempCon, "numeric", n=1, size=4, |
|
255 |
- endian="little") |
|
256 |
- #RedGreen <- readBin(tempCon, "integer", n=4, size=1, |
|
257 |
- # endian="little") |
|
258 |
- |
|
259 |
- seek(tempCon, fields["MostlyNull", "Byte Offset"]) |
|
260 |
- nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
261 |
- MostlyNull <- readChar(tempCon, nChars) |
|
262 |
- |
|
263 |
- seek(tempCon, fields["Barcode", "Byte Offset"]) |
|
264 |
- nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
265 |
- Barcode <- readChar(tempCon, nChars) |
|
266 |
- |
|
267 |
- seek(tempCon, fields["ChipType", "Byte Offset"]) |
|
268 |
- nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
269 |
- ChipType <- readChar(tempCon, nChars) |
|
270 |
- |
|
271 |
- seek(tempCon, fields["MostlyA", "Byte Offset"]) |
|
272 |
- nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
273 |
- MostlyA <- readChar(tempCon, nChars) |
|
274 |
- |
|
275 |
- seek(tempCon, fields["Unknown.1", "Byte Offset"]) |
|
276 |
- nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
277 |
- Unknown.1 <- readChar(tempCon, nChars) |
|
278 |
- |
|
279 |
- seek(tempCon, fields["Unknown.2", "Byte Offset"]) |
|
280 |
- nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
281 |
- Unknown.2 <- readChar(tempCon, nChars) |
|
282 |
- |
|
283 |
- seek(tempCon, fields["Unknown.3", "Byte Offset"]) |
|
284 |
- nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
285 |
- Unknown.3 <- readChar(tempCon, nChars) |
|
286 |
- |
|
287 |
- seek(tempCon, fields["Unknown.4", "Byte Offset"]) |
|
288 |
- nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
289 |
- Unknown.4 <- readChar(tempCon, nChars) |
|
290 |
- |
|
291 |
- seek(tempCon, fields["Unknown.5", "Byte Offset"]) |
|
292 |
- nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
|
293 |
- Unknown.5 <- readChar(tempCon, nChars) |
|
335 |
+ readFields <- setdiff(rownames(fields), "nSNPsRead") |
|
336 |
+ names(readFields) <- readFields |
|
337 |
+ |
|
338 |
+ allFields <- lapply(readFields, readBlock) |
|
294 | 339 |
|
295 | 340 |
close(tempCon) |
296 | 341 |
|
297 |
- Unknowns <- |
|
298 |
- list(MostlyNull=MostlyNull, |
|
299 |
- MostlyA=MostlyA, |
|
300 |
- Unknown.1=Unknown.1, |
|
301 |
- Unknown.2=Unknown.2, |
|
302 |
- Unknown.3=Unknown.3, |
|
303 |
- Unknown.4=Unknown.4, |
|
304 |
- Unknown.5=Unknown.5) |
|
342 |
+ UnknownNames <- c("MostlyNull", "MostlyA", "Unknown.1", |
|
343 |
+ "Unknown.2", "Unknown.3", "Unknown.4", |
|
344 |
+ "Unknown.5", "Unknown.6", "Unknown.7") |
|
345 |
+ Unknowns <- allFields[intersect(names(allFields), UnknownNames)] |
|
305 | 346 |
|
306 |
- Quants <- cbind(Mean, SD, NBeads) |
|
347 |
+ Quants <- cbind(allFields$Mean, allFields$SD, allFields$NBeads) |
|
307 | 348 |
colnames(Quants) <- c("Mean", "SD", "NBeads") |
308 |
- rownames(Quants) <- as.character(IlluminaID) |
|
349 |
+ rownames(Quants) <- as.character(allFields$IlluminaID) |
|
309 | 350 |
|
351 |
+ InfoNames <- c("MidBlock", "RunInfo", "RedGreen", "Barcode", "ChipType") |
|
352 |
+ Info <- allFields[intersect(names(allFields), InfoNames)] |
|
353 |
+ |
|
310 | 354 |
idatValues <- |
311 | 355 |
list(fileSize=fileSize, |
312 | 356 |
versionNumber=versionNumber, |
313 | 357 |
nFields=nFields, |
314 | 358 |
fields=fields, |
315 | 359 |
nSNPsRead=nSNPsRead, |
316 |
- #IlluminaID=IlluminaID, |
|
317 |
- #SD=SD, |
|
318 |
- #Mean=Mean, |
|
319 |
- #NBeads=NBeads, |
|
320 |
- Quants=Quants, |
|
321 |
- MidBlock=MidBlock, |
|
322 |
- RunInfo=RunInfo, |
|
323 |
- RedGreen=RedGreen, |
|
324 |
- Barcode=Barcode, |
|
325 |
- ChipType=ChipType, |
|
326 |
- Unknowns=Unknowns) |
|
327 |
- |
|
360 |
+ Quants=Quants) |
|
361 |
+ idatValues <- c(idatValues, Info, list(Unknowns = Unknowns)) |
|
328 | 362 |
idatValues |
329 |
- |
|
330 | 363 |
} |
331 | 364 |
|
365 |
+ |
|
332 | 366 |
readBPM <- function(bpmFile){ |
333 | 367 |
|
334 | 368 |
## Reads and parses Illumina BPM files |