Browse code

Update to crlmm-illumina: by Matt Ritchie - brought from GitHub

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@58739 bc3139a8-67e5-0310-9ffc-ced21a209358

Benilton Carvalho authored on 04/10/2011 11:02:32
Showing2 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.11.46
4
+Version: 1.11.47
5 5
 Date: 2010-12-10
6 6
 Author: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.edu.au>, Ingo Ruczinski <iruczins@jhsph.edu>, Rafael A Irizarry
7 7
 Maintainer: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -151,6 +151,7 @@ readIdatFiles = function(sampleSheet=NULL,
151 151
 
152 152
 
153 153
 ## the readIDAT() and readBPM() functions below were provided by Keith Baggerly, 27/8/2008
154
+## edits provided by Kasper Daniel Hansen, 4/10/2011
154 155
 readIDAT <- function(idatFile){
155 156
   fileSize <- file.info(idatFile)$size
156 157
 
... ...
@@ -160,14 +161,12 @@ readIDAT <- function(idatFile){
160 161
 
161 162
   }
162 163
 
163
-  versionNumber <- readBin(tempCon, "integer", n=1, size=8,
164
-                           endian="little")
164
+  versionNumber <- readBin(tempCon, "integer", n=1, size=8, endian="little")
165 165
 
166 166
   if(versionNumber<3)
167 167
 	  stop("Older style IDAT files not supported:  consider updating your scanner settings")
168 168
 
169
-  nFields <- readBin(tempCon, "integer", n=1, size=4,
170
-                     endian="little")
169
+  nFields <- readBin(tempCon, "integer", n=1, size=4, endian="little")
171 170
 
172 171
   fields <- matrix(0,nFields,3);
173 172
   colnames(fields) <- c("Field Code", "Byte Offset", "Bytes")
... ...
@@ -178,26 +177,26 @@ readIDAT <- function(idatFile){
178 177
       readBin(tempCon, "integer", n=1, size=8, endian="little")
179 178
   }
180 179
 
181
-  knownCodes <- c(1000, 102, 103, 104, 107, 200, 300, 400,
182
-                  401, 402, 403, 404, 405, 406, 407, 408, 409)
183
-  names(knownCodes) <-
184
-    c("nSNPsRead",  # 1000
185
-      "IlluminaID", #  102
186
-      "SD",         #  103
187
-      "Mean",       #  104
188
-      "NBeads",     #  107
189
-      "MidBlock",   #  200
190
-      "RunInfo",    #  300
191
-      "RedGreen",   #  400
192
-      "MostlyNull", #  401
193
-      "Barcode",    #  402
194
-      "ChipType",   #  403
195
-      "MostlyA",    #  404
196
-      "Unknown.1",  #  405
197
-      "Unknown.2",  #  406
198
-      "Unknown.3",  #  407
199
-      "Unknown.4",  #  408
200
-      "Unknown.5"   #  409
180
+  knownCodes <-
181
+    c("nSNPsRead"  = 1000,
182
+      "IlluminaID" =  102,
183
+      "SD"         =  103,
184
+      "Mean"       =  104,
185
+      "NBeads"     =  107,
186
+      "MidBlock"   =  200,
187
+      "RunInfo"    =  300,
188
+      "RedGreen"   =  400,
189
+      "MostlyNull" =  401,
190
+      "Barcode"    =  402,
191
+      "ChipType"   =  403,
192
+      "MostlyA"    =  404,
193
+      "Unknown.1"  =  405,
194
+      "Unknown.2"  =  406,
195
+      "Unknown.3"  =  407,
196
+      "Unknown.4"  =  408,
197
+      "Unknown.5"  =  409,
198
+      "Unknown.6"  =  410,
199
+      "Unknown.7"  =  510
201 200
       )
202 201
 
203 202
   nNewFields <- 1
... ...
@@ -212,123 +211,158 @@ readIDAT <- function(idatFile){
212 211
     }
213 212
   }
214 213
 
215
-  seek(tempCon, fields["nSNPsRead", "Byte Offset"])
216
-  nSNPsRead <- readBin(tempCon, "integer", n=1, size=4,
217
-                       endian="little")
218
-
219
-  seek(tempCon, fields["IlluminaID", "Byte Offset"])
220
-  IlluminaID <- readBin(tempCon, "integer", n=nSNPsRead, size=4,
221
-                       endian="little")
222
-
223
-  seek(tempCon, fields["SD", "Byte Offset"])
224
-  SD <- readBin(tempCon, "integer", n=nSNPsRead, size=2,
225
-                endian="little", signed=FALSE)
226
-
227
-  seek(tempCon, fields["Mean", "Byte Offset"])
228
-  Mean <- readBin(tempCon, "integer", n=nSNPsRead, size=2,
229
-                  endian="little", signed=FALSE)
214
+  fields <- fields[order(fields[, "Byte Offset"]),]
230 215
 
231
-  seek(tempCon, fields["NBeads", "Byte Offset"])
232
-  NBeads <- readBin(tempCon, "integer", n=nSNPsRead, size=1, signed=FALSE)
233
-
234
-  seek(tempCon, fields["MidBlock", "Byte Offset"])
235
-  nMidBlockEntries <- readBin(tempCon, "integer", n=1, size=4,
236
-                              endian="little")
237
-  MidBlock <- readBin(tempCon, "integer", n=nMidBlockEntries, size=4,
238
-                      endian="little")
239
-
240
-  seek(tempCon, fields["RunInfo", "Byte Offset"])
241
-  nRunInfoBlocks <- readBin(tempCon, "integer", n=1, size=4,
242
-                            endian="little")
243
-  RunInfo <- matrix(NA, nRunInfoBlocks, 5)
244
-  colnames(RunInfo) <- c("RunTime", "BlockType", "BlockPars",
245
-                         "BlockCode", "CodeVersion")
246
-  for(i1 in 1:2) { #nRunInfoBlocks){  ## MR edit
247
-    for(i2 in 1:5){
248
-      nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
249
-      RunInfo[i1,i2] <- readChar(tempCon, nChars)
250
-    }
216
+  seek(tempCon, fields["nSNPsRead", "Byte Offset"])
217
+  nSNPsRead <- readBin(tempCon, "integer", n=1, size=4, endian="little")
218
+
219
+  readBlock <- function(nam) {
220
+      switch(nam,
221
+             "IlluminaID" = {
222
+                 seek(tempCon, fields["IlluminaID", "Byte Offset"])
223
+                 IlluminaID <- readBin(tempCon, "integer", n=nSNPsRead, size=4, endian="little")
224
+                 IlluminaID
225
+             },
226
+             "SD" = {
227
+                 seek(tempCon, fields["SD", "Byte Offset"])
228
+                 SD <- readBin(tempCon, "integer", n=nSNPsRead, size=2, endian="little", signed=FALSE)
229
+                 SD
230
+             },
231
+             "Mean" = {
232
+                 seek(tempCon, fields["Mean", "Byte Offset"])
233
+                 Mean <- readBin(tempCon, "integer", n=nSNPsRead, size=2, endian="little", signed=FALSE)
234
+                 Mean
235
+             },
236
+             "NBeads" = {
237
+                 seek(tempCon, fields["NBeads", "Byte Offset"])
238
+                 NBeads <- readBin(tempCon, "integer", n=nSNPsRead, size=1, signed=FALSE)
239
+                 NBeads
240
+             },
241
+             "MidBlock" = {
242
+                 seek(tempCon, fields["MidBlock", "Byte Offset"])
243
+                 nMidBlockEntries <- readBin(tempCon, "integer", n=1, size=4, endian="little")
244
+                 MidBlock <- readBin(tempCon, "integer", n=nMidBlockEntries, size=4,
245
+                                     endian="little")
246
+                 MidBlock
247
+             },
248
+             "RunInfo" = {
249
+                 seek(tempCon, fields["RunInfo", "Byte Offset"])
250
+                 nRunInfoBlocks <- readBin(tempCon, "integer", n=1, size=4, endian="little")
251
+                 RunInfo <- matrix(NA, nRunInfoBlocks, 5)
252
+                 colnames(RunInfo) <- c("RunTime", "BlockType", "BlockPars",
253
+                                        "BlockCode", "CodeVersion")
254
+                 for(i1 in 1:2) { #nRunInfoBlocks){  ## MR edit
255
+                     for(i2 in 1:5){
256
+                         nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
257
+                         RunInfo[i1,i2] <- readChar(tempCon, nChars)
258
+                     }
259
+                 }
260
+                 RunInfo
261
+             },
262
+             "RedGreen" = {
263
+                 seek(tempCon, fields["RedGreen", "Byte Offset"])
264
+                 RedGreen <- readBin(tempCon, "numeric", n=1, size=4,
265
+                                     endian="little")
266
+                 RedGreen
267
+             },
268
+             "MostlyNull" = {
269
+                 seek(tempCon, fields["MostlyNull", "Byte Offset"])
270
+                 nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
271
+                 MostlyNull <- readChar(tempCon, nChars)
272
+                 MostlyNull
273
+             },
274
+             "Barcode" = {                 
275
+                 seek(tempCon, fields["Barcode", "Byte Offset"])
276
+                 nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
277
+                 Barcode <- readChar(tempCon, nChars)
278
+                 Barcode
279
+             },
280
+             "ChipType" = {
281
+                 seek(tempCon, fields["ChipType", "Byte Offset"])
282
+                 nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
283
+                 ChipType <- readChar(tempCon, nChars)
284
+                 ChipType
285
+             },
286
+             "MostlyA" = {
287
+                 seek(tempCon, fields["MostlyA", "Byte Offset"])
288
+                 nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
289
+                 MostlyA <- readChar(tempCon, nChars)
290
+             },
291
+             "Unknown.1" = {
292
+                 seek(tempCon, fields["Unknown.1", "Byte Offset"])
293
+                 nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
294
+                 Unknown.1 <- readChar(tempCon, nChars)
295
+                 Unknown.1
296
+             },
297
+             "Unknown.2" = {
298
+                 seek(tempCon, fields["Unknown.2", "Byte Offset"])
299
+                 nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
300
+                 Unknown.2 <- readChar(tempCon, nChars)
301
+                 Unknown.2
302
+             },
303
+             "Unknown.3" = {
304
+                 seek(tempCon, fields["Unknown.3", "Byte Offset"])
305
+                 nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
306
+                 Unknown.3 <- readChar(tempCon, nChars)
307
+                 Unknown.3
308
+             },
309
+             "Unknown.4" = {
310
+                 seek(tempCon, fields["Unknown.4", "Byte Offset"])
311
+                 nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
312
+                 Unknown.4 <- readChar(tempCon, nChars)
313
+                 Unknown.4
314
+             },
315
+             "Unknown.5" = {
316
+                 seek(tempCon, fields["Unknown.5", "Byte Offset"])
317
+                 nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
318
+                 Unknown.5 <- readChar(tempCon, nChars)
319
+                 Unknown.5
320
+             },
321
+             "Unknown.6" = {
322
+                 seek(tempCon, fields["Unknown.6", "Byte Offset"])
323
+                 nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
324
+                 Unknown.6 <- readChar(tempCon, nChars)
325
+                 Unknown.6
326
+             },
327
+             "Unknown.7" = {
328
+                 seek(tempCon, fields["Unknown.7", "Byte Offset"])
329
+                 nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
330
+                 Unknown.7 <- readChar(tempCon, nChars)
331
+                 Unknown.7
332
+             })
251 333
   }
252 334
 
253
-  seek(tempCon, fields["RedGreen", "Byte Offset"])
254
-  RedGreen <- readBin(tempCon, "numeric", n=1, size=4,
255
-                      endian="little")
256
-  #RedGreen <- readBin(tempCon, "integer", n=4, size=1,
257
-  #                    endian="little")
258
-
259
-  seek(tempCon, fields["MostlyNull", "Byte Offset"])
260
-  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
261
-  MostlyNull <- readChar(tempCon, nChars)
262
-
263
-  seek(tempCon, fields["Barcode", "Byte Offset"])
264
-  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
265
-  Barcode <- readChar(tempCon, nChars)
266
-
267
-  seek(tempCon, fields["ChipType", "Byte Offset"])
268
-  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
269
-  ChipType <- readChar(tempCon, nChars)
270
-
271
-  seek(tempCon, fields["MostlyA", "Byte Offset"])
272
-  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
273
-  MostlyA <- readChar(tempCon, nChars)
274
-
275
-  seek(tempCon, fields["Unknown.1", "Byte Offset"])
276
-  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
277
-  Unknown.1 <- readChar(tempCon, nChars)
278
-
279
-  seek(tempCon, fields["Unknown.2", "Byte Offset"])
280
-  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
281
-  Unknown.2 <- readChar(tempCon, nChars)
282
-
283
-  seek(tempCon, fields["Unknown.3", "Byte Offset"])
284
-  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
285
-  Unknown.3 <- readChar(tempCon, nChars)
286
-
287
-  seek(tempCon, fields["Unknown.4", "Byte Offset"])
288
-  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
289
-  Unknown.4 <- readChar(tempCon, nChars)
290
-
291
-  seek(tempCon, fields["Unknown.5", "Byte Offset"])
292
-  nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
293
-  Unknown.5 <- readChar(tempCon, nChars)
335
+  readFields <- setdiff(rownames(fields), "nSNPsRead")
336
+  names(readFields) <- readFields
337
+  
338
+  allFields <- lapply(readFields, readBlock)
294 339
 
295 340
   close(tempCon)
296 341
 
297
-  Unknowns <-
298
-    list(MostlyNull=MostlyNull,
299
-         MostlyA=MostlyA,
300
-         Unknown.1=Unknown.1,
301
-         Unknown.2=Unknown.2,
302
-         Unknown.3=Unknown.3,
303
-         Unknown.4=Unknown.4,
304
-         Unknown.5=Unknown.5)
342
+  UnknownNames <- c("MostlyNull", "MostlyA", "Unknown.1",
343
+                    "Unknown.2", "Unknown.3", "Unknown.4",
344
+                    "Unknown.5", "Unknown.6", "Unknown.7")
345
+  Unknowns <- allFields[intersect(names(allFields), UnknownNames)]
305 346
 
306
-  Quants <- cbind(Mean, SD, NBeads)
347
+  Quants <- cbind(allFields$Mean, allFields$SD, allFields$NBeads)
307 348
   colnames(Quants) <- c("Mean", "SD", "NBeads")
308
-  rownames(Quants) <- as.character(IlluminaID)
349
+  rownames(Quants) <- as.character(allFields$IlluminaID)
309 350
 
351
+  InfoNames <- c("MidBlock", "RunInfo", "RedGreen", "Barcode", "ChipType")
352
+  Info <- allFields[intersect(names(allFields), InfoNames)]
353
+  
310 354
   idatValues <-
311 355
     list(fileSize=fileSize,
312 356
          versionNumber=versionNumber,
313 357
          nFields=nFields,
314 358
          fields=fields,
315 359
          nSNPsRead=nSNPsRead,
316
-         #IlluminaID=IlluminaID,
317
-         #SD=SD,
318
-         #Mean=Mean,
319
-         #NBeads=NBeads,
320
-         Quants=Quants,
321
-         MidBlock=MidBlock,
322
-         RunInfo=RunInfo,
323
-         RedGreen=RedGreen,
324
-         Barcode=Barcode,
325
-         ChipType=ChipType,
326
-         Unknowns=Unknowns)
327
-
360
+         Quants=Quants)
361
+  idatValues <- c(idatValues, Info, list(Unknowns = Unknowns))
328 362
   idatValues
329
-
330 363
 }
331 364
 
365
+
332 366
 readBPM <- function(bpmFile){
333 367
 
334 368
   ## Reads and parses Illumina BPM files