Browse code

Removed tryCatch() statements in readIdatFiles. Restored previous implementation.

Added statements to illumina_copynumber.Rnw that make use of checkExists function.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@49126 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 30/08/2010 19:40:19
Showing 4 changed files

... ...
@@ -2,4 +2,24 @@ setOldClass("ellipse")
2 2
 setOldClass("ff_matrix")
3 3
 setOldClass("ffdf")
4 4
 setClassUnion("ff_or_matrix", c("ff_matrix", "matrix", "ffdf"))
5
+<<<<<<< HEAD
5 6
 
7
+=======
8
+setClass("CNSetLM", contains="CNSet", representation(lM="list_or_ffdf"))
9
+setMethod("initialize", "CNSetLM", function(.Object, lM=new("list"), ...){
10
+	.Object@lM <- lM
11
+	.Object <- callNextMethod(.Object, ...)
12
+})
13
+setValidity("CNSetLM",
14
+	    function(object){
15
+		    if(!"batch" %in% varLabels(protocolData(object)))
16
+			    return("'batch' not defined in protocolData")
17
+		    if(!"chromosome" %in% fvarLabels(object))
18
+			    return("'chromosome' not defined in featureData")
19
+		    if(!"position" %in% fvarLabels(object))
20
+			    return("'position' not defined in featureData")
21
+		    if(!"isSnp" %in% fvarLabels(object))
22
+			    return("'isSnp' not defined in featureData")
23
+		    return(TRUE)
24
+	    })
25
+>>>>>>> Removed tryCatch() statements in readIdatFiles. Restored previous implementation.
... ...
@@ -101,3 +101,4 @@ setGeneric("tau2B<-", function(object, value) standardGeneric("tau2B<-"))
101 101
 setGeneric("flags<-", function(object, value) standardGeneric("flags<-"))
102 102
 
103 103
 
104
+
... ...
@@ -12,7 +12,7 @@ readIdatFiles <- function(sampleSheet=NULL,
12 12
 			  sep="_",
13 13
 			  fileExt=list(green="Grn.idat", red="Red.idat"),
14 14
 			  saveDate=FALSE) {
15
-			  
15
+
16 16
        if(!is.null(arrayNames)) {
17 17
                pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
18 18
        }
... ...
@@ -22,7 +22,7 @@ readIdatFiles <- function(sampleSheet=NULL,
22 22
 			       barcode = sampleSheet[,arrayInfoColNames$barcode]
23 23
 			       arrayNames=barcode
24 24
 		       }
25
-		       if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {  
25
+		       if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {
26 26
 			       position = sampleSheet[,arrayInfoColNames$position]
27 27
 			       if(is.null(arrayNames))
28 28
 				       arrayNames=position
... ...
@@ -36,7 +36,7 @@ readIdatFiles <- function(sampleSheet=NULL,
36 36
 		       }
37 37
 	       }
38 38
 	       pd = new("AnnotatedDataFrame", data = sampleSheet)
39
-	       sampleNames(pd) <- basename(arrayNames)               
39
+	       sampleNames(pd) <- basename(arrayNames)
40 40
        }
41 41
        if(is.null(arrayNames)) {
42 42
                arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
... ...
@@ -46,7 +46,7 @@ readIdatFiles <- function(sampleSheet=NULL,
46 46
                }
47 47
                pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
48 48
        }
49
-       
49
+
50 50
        narrays = length(arrayNames)
51 51
        grnfiles = paste(arrayNames, fileExt$green, sep=sep)
52 52
        redfiles = paste(arrayNames, fileExt$red, sep=sep)
... ...
@@ -63,7 +63,7 @@ readIdatFiles <- function(sampleSheet=NULL,
63 63
 	       redidats <- redfiles
64 64
        }
65 65
        if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
66
-       if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")       
66
+       if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
67 67
        headerInfo = list(nProbes = rep(NA, narrays),
68 68
                          Barcode = rep(NA, narrays),
69 69
                          ChipType = rep(NA, narrays),
... ...
@@ -119,12 +119,12 @@ readIdatFiles <- function(sampleSheet=NULL,
119 119
 	       }
120 120
 	       rm(G)
121 121
 	       gc()
122
-         
122
+
123 123
 	       cat(paste(sep, fileExt$red, sep=""), "\n")
124 124
 	       R = readIDAT(redidats[i])
125 125
 	       idsR = rownames(R$Quants)
126
-         
127
-	       if(length(ids)==length(idsG)) {   
126
+
127
+	       if(length(ids)==length(idsG)) {
128 128
 		       if(sum(ids==idsR)==nprobes) {
129 129
 			       RG@assayData$R[,i] = R$Quants[ ,"Mean"]
130 130
 		           zeroR = R$Quants[ ,"NBeads"]==0
... ...
@@ -145,7 +145,7 @@ readIdatFiles <- function(sampleSheet=NULL,
145 145
 #       if(class(RG@assayData$R)[1]=="ff_matrix") {
146 146
          close(RG@assayData$R)
147 147
          close(RG@assayData$G)
148
-         close(RG@assayData$zero)         
148
+         close(RG@assayData$zero)
149 149
 #       }
150 150
        RG
151 151
 }
... ...
@@ -161,27 +161,27 @@ readIDAT <- function(idatFile){
161 161
 
162 162
   }
163 163
 
164
-  versionNumber <- readBin(tempCon, "integer", n=1, size=8, 
164
+  versionNumber <- readBin(tempCon, "integer", n=1, size=8,
165 165
                            endian="little", signed=FALSE)
166 166
 
167 167
   if(versionNumber<3)
168 168
 	  stop("Older style IDAT files not supported:  consider updating your scanner settings")
169
-  
170
-  nFields <- readBin(tempCon, "integer", n=1, size=4, 
169
+
170
+  nFields <- readBin(tempCon, "integer", n=1, size=4,
171 171
                      endian="little", signed=FALSE)
172 172
 
173 173
   fields <- matrix(0,nFields,3);
174 174
   colnames(fields) <- c("Field Code", "Byte Offset", "Bytes")
175 175
   for(i1 in 1:nFields){
176
-    fields[i1,"Field Code"] <- 
176
+    fields[i1,"Field Code"] <-
177 177
       readBin(tempCon, "integer", n=1, size=2, endian="little", signed=FALSE)
178
-    fields[i1,"Byte Offset"] <- 
178
+    fields[i1,"Byte Offset"] <-
179 179
       readBin(tempCon, "integer", n=1, size=8, endian="little", signed=FALSE)
180 180
   }
181 181
 
182 182
   knownCodes <- c(1000, 102, 103, 104, 107, 200, 300, 400,
183 183
                   401, 402, 403, 404, 405, 406, 407, 408, 409)
184
-  names(knownCodes) <- 
184
+  names(knownCodes) <-
185 185
     c("nSNPsRead",  # 1000
186 186
       "IlluminaID", #  102
187 187
       "SD",         #  103
... ...
@@ -214,35 +214,35 @@ readIDAT <- function(idatFile){
214 214
   }
215 215
 
216 216
   seek(tempCon, fields["nSNPsRead", "Byte Offset"])
217
-  nSNPsRead <- readBin(tempCon, "integer", n=1, size=4, 
217
+  nSNPsRead <- readBin(tempCon, "integer", n=1, size=4,
218 218
                        endian="little", signed=FALSE)
219 219
 
220 220
   seek(tempCon, fields["IlluminaID", "Byte Offset"])
221
-  IlluminaID <- readBin(tempCon, "integer", n=nSNPsRead, size=4, 
221
+  IlluminaID <- readBin(tempCon, "integer", n=nSNPsRead, size=4,
222 222
                        endian="little", signed=FALSE)
223 223
 
224 224
   seek(tempCon, fields["SD", "Byte Offset"])
225
-  SD <- readBin(tempCon, "integer", n=nSNPsRead, size=2, 
225
+  SD <- readBin(tempCon, "integer", n=nSNPsRead, size=2,
226 226
                 endian="little", signed=FALSE)
227 227
 
228 228
   seek(tempCon, fields["Mean", "Byte Offset"])
229
-  Mean <- readBin(tempCon, "integer", n=nSNPsRead, size=2, 
229
+  Mean <- readBin(tempCon, "integer", n=nSNPsRead, size=2,
230 230
                   endian="little", signed=FALSE)
231 231
 
232 232
   seek(tempCon, fields["NBeads", "Byte Offset"])
233 233
   NBeads <- readBin(tempCon, "integer", n=nSNPsRead, size=1, signed=FALSE)
234 234
 
235 235
   seek(tempCon, fields["MidBlock", "Byte Offset"])
236
-  nMidBlockEntries <- readBin(tempCon, "integer", n=1, size=4, 
236
+  nMidBlockEntries <- readBin(tempCon, "integer", n=1, size=4,
237 237
                               endian="little", signed=FALSE)
238
-  MidBlock <- readBin(tempCon, "integer", n=nMidBlockEntries, size=4, 
238
+  MidBlock <- readBin(tempCon, "integer", n=nMidBlockEntries, size=4,
239 239
                       endian="little", signed=FALSE)
240 240
 
241 241
   seek(tempCon, fields["RunInfo", "Byte Offset"])
242
-  nRunInfoBlocks <- readBin(tempCon, "integer", n=1, size=4, 
242
+  nRunInfoBlocks <- readBin(tempCon, "integer", n=1, size=4,
243 243
                             endian="little", signed=FALSE)
244 244
   RunInfo <- matrix(NA, nRunInfoBlocks, 5)
245
-  colnames(RunInfo) <- c("RunTime", "BlockType", "BlockPars", 
245
+  colnames(RunInfo) <- c("RunTime", "BlockType", "BlockPars",
246 246
                          "BlockCode", "CodeVersion")
247 247
   for(i1 in 1:2) { #nRunInfoBlocks){  ## MR edit
248 248
     for(i2 in 1:5){
... ...
@@ -252,50 +252,50 @@ readIDAT <- function(idatFile){
252 252
   }
253 253
 
254 254
   seek(tempCon, fields["RedGreen", "Byte Offset"])
255
-  RedGreen <- readBin(tempCon, "numeric", n=1, size=4, 
255
+  RedGreen <- readBin(tempCon, "numeric", n=1, size=4,
256 256
                       endian="little", signed=FALSE)
257
-  #RedGreen <- readBin(tempCon, "integer", n=4, size=1, 
257
+  #RedGreen <- readBin(tempCon, "integer", n=4, size=1,
258 258
   #                    endian="little", signed=FALSE)
259 259
 
260 260
   seek(tempCon, fields["MostlyNull", "Byte Offset"])
261 261
   nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
262
-  MostlyNull <- readChar(tempCon, nChars) 
262
+  MostlyNull <- readChar(tempCon, nChars)
263 263
 
264 264
   seek(tempCon, fields["Barcode", "Byte Offset"])
265 265
   nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
266
-  Barcode <- readChar(tempCon, nChars) 
266
+  Barcode <- readChar(tempCon, nChars)
267 267
 
268 268
   seek(tempCon, fields["ChipType", "Byte Offset"])
269 269
   nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
270
-  ChipType <- readChar(tempCon, nChars) 
270
+  ChipType <- readChar(tempCon, nChars)
271 271
 
272 272
   seek(tempCon, fields["MostlyA", "Byte Offset"])
273 273
   nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
274
-  MostlyA <- readChar(tempCon, nChars) 
274
+  MostlyA <- readChar(tempCon, nChars)
275 275
 
276 276
   seek(tempCon, fields["Unknown.1", "Byte Offset"])
277 277
   nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
278
-  Unknown.1 <- readChar(tempCon, nChars) 
278
+  Unknown.1 <- readChar(tempCon, nChars)
279 279
 
280 280
   seek(tempCon, fields["Unknown.2", "Byte Offset"])
281 281
   nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
282
-  Unknown.2 <- readChar(tempCon, nChars) 
282
+  Unknown.2 <- readChar(tempCon, nChars)
283 283
 
284 284
   seek(tempCon, fields["Unknown.3", "Byte Offset"])
285 285
   nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
286
-  Unknown.3 <- readChar(tempCon, nChars) 
286
+  Unknown.3 <- readChar(tempCon, nChars)
287 287
 
288 288
   seek(tempCon, fields["Unknown.4", "Byte Offset"])
289 289
   nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
290
-  Unknown.4 <- readChar(tempCon, nChars) 
290
+  Unknown.4 <- readChar(tempCon, nChars)
291 291
 
292 292
   seek(tempCon, fields["Unknown.5", "Byte Offset"])
293 293
   nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
294
-  Unknown.5 <- readChar(tempCon, nChars) 
294
+  Unknown.5 <- readChar(tempCon, nChars)
295 295
 
296 296
   close(tempCon)
297 297
 
298
-  Unknowns <- 
298
+  Unknowns <-
299 299
     list(MostlyNull=MostlyNull,
300 300
          MostlyA=MostlyA,
301 301
          Unknown.1=Unknown.1,
... ...
@@ -308,21 +308,21 @@ readIDAT <- function(idatFile){
308 308
   colnames(Quants) <- c("Mean", "SD", "NBeads")
309 309
   rownames(Quants) <- as.character(IlluminaID)
310 310
 
311
-  idatValues <- 
312
-    list(fileSize=fileSize, 
313
-         versionNumber=versionNumber, 
314
-         nFields=nFields, 
311
+  idatValues <-
312
+    list(fileSize=fileSize,
313
+         versionNumber=versionNumber,
314
+         nFields=nFields,
315 315
          fields=fields,
316
-         nSNPsRead=nSNPsRead, 
317
-         #IlluminaID=IlluminaID, 
318
-         #SD=SD, 
319
-         #Mean=Mean, 
316
+         nSNPsRead=nSNPsRead,
317
+         #IlluminaID=IlluminaID,
318
+         #SD=SD,
319
+         #Mean=Mean,
320 320
          #NBeads=NBeads,
321 321
          Quants=Quants,
322
-         MidBlock=MidBlock, 
323
-         RunInfo=RunInfo, 
324
-         RedGreen=RedGreen, 
325
-         Barcode=Barcode, 
322
+         MidBlock=MidBlock,
323
+         RunInfo=RunInfo,
324
+         RedGreen=RedGreen,
325
+         Barcode=Barcode,
326 326
          ChipType=ChipType,
327 327
          Unknowns=Unknowns)
328 328
 
... ...
@@ -333,20 +333,20 @@ readIDAT <- function(idatFile){
333 333
 readBPM <- function(bpmFile){
334 334
 
335 335
   ## Reads and parses Illumina BPM files
336
-  
336
+
337 337
   fileSize <- file.info(bpmFile)$size
338 338
 
339 339
   tempCon <- file(bpmFile,"rb")
340 340
 
341 341
   # The first few bytes of the egtFile are some type of
342
-  # header, but there's no related byte offset information. 
342
+  # header, but there's no related byte offset information.
343 343
 
344 344
   prefixCheck <- readChar(tempCon,3) ## should be "BPM"
345 345
 
346 346
   null.1 <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
347
-  ## should be 1  
348
-  
349
-  versionNumber <- 
347
+  ## should be 1
348
+
349
+  versionNumber <-
350 350
     readBin(tempCon, "integer", n=1, size=4, endian="little", signed=FALSE)
351 351
   ## should be 4
352 352
 
... ...
@@ -360,14 +360,14 @@ readBPM <- function(bpmFile){
360 360
   entriesByteOffset <- seek(tempCon);
361 361
   nEntries <- readBin(tempCon, "integer", n=1, size=4,
362 362
                       endian="little", signed=FALSE)
363
-    
363
+
364 364
   if(FALSE){
365
-    
365
+
366 366
     snpIndexByteOffset <- seek(tempCon)
367 367
     snpIndex <- readBin(tempCon, "integer", n=nEntries, size=4,
368 368
                         endian="little", signed=FALSE)
369 369
     ## for the 1M array, these are simply in order from 1 to 1072820.
370
-    
370
+
371 371
     snpNamesByteOffset <- seek(tempCon)
372 372
     snpNames <- rep("A", nEntries)
373 373
     for(i1 in 1:nEntries){
... ...
@@ -378,21 +378,21 @@ readBPM <- function(bpmFile){
378 378
   }
379 379
 
380 380
   seek(tempCon, 15278138)
381
-    
381
+
382 382
   normIDByteOffset <- seek(tempCon)
383 383
   normID <- readBin(tempCon, "integer", n=nEntries, size=1, signed=FALSE) + 1
384
-  
384
+
385 385
   newBlockByteOffset <- seek(tempCon)
386 386
   newBlock <- readBin(tempCon, "integer", n=10000, size=1, signed=FALSE)
387
-  
387
+
388 388
   close(tempCon)
389 389
 
390 390
   byteOffsets <- list(entriesByteOffset=entriesByteOffset,
391
-                      #snpIndexByteOffset=snpIndexByteOffset, 
391
+                      #snpIndexByteOffset=snpIndexByteOffset,
392 392
                       #snpNamesByteOffset=snpNamesByteOffset,
393 393
                       normIDByteOffset=normIDByteOffset,
394 394
                       newBlockByteOffset=newBlockByteOffset)
395
-  
395
+
396 396
   allStuff <- list(prefixCheck=prefixCheck,
397 397
                    null.1=null.1,
398 398
                    versionNumber=versionNumber,
... ...
@@ -406,7 +406,7 @@ readBPM <- function(bpmFile){
406 406
                    newBlock=newBlock,
407 407
                    byteOffsets=byteOffsets)
408 408
   allStuff
409
-  
409
+
410 410
 }
411 411
 
412 412
 
... ...
@@ -424,7 +424,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
424 424
   if(missing(chipType)){
425 425
 	  chipType = match.arg(annotation(RG), chipList)
426 426
   } else chipType = match.arg(chipType, chipList)
427
-  
427
+
428 428
   pkgname <- getCrlmmAnnotationName(chipType)
429 429
   if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
430 430
      suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
... ...
@@ -439,21 +439,21 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
439 439
   bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
440 440
   ids <- names(aids)
441 441
   snpbase <- getVarInEnv("base")
442
-  
442
+
443 443
   nsnps = length(aids)
444 444
   narrays = ncol(RG)
445
-  
445
+
446 446
 #  aidcol = match("AddressA_ID", colnames(annot))
447 447
 #  if(is.na(aidcol))
448 448
 #    aidcol = match("Address", colnames(annot))
449 449
 #  bidcol = match("AddressB_ID", colnames(annot))
450
-#  if(is.na(bidcol)) 
450
+#  if(is.na(bidcol))
451 451
 #    bidcol = match("Address2", colnames(annot))
452 452
 #  aids = annot[, aidcol]
453 453
 #  bids = annot[, bidcol]
454 454
 #  snpids = annot[,"Name"]
455
-#  snpbase = annot[,"SNP"] 
456
-  infI = !is.na(bids) & bids!=0  
455
+#  snpbase = annot[,"SNP"]
456
+  infI = !is.na(bids) & bids!=0
457 457
   aord = match(aids, featureNames(RG)) # NAs are possible here
458 458
   bord = match(bids, featureNames(RG)) # and here
459 459
 #  argrg = aids[rrgg]
... ...
@@ -463,7 +463,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
463 463
 	     X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"),
464 464
 	     Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"),
465 465
 	     zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer"),
466
-	     annotation=chipType, phenoData=RG@phenoData, 
466
+	     annotation=chipType, phenoData=RG@phenoData,
467 467
 	     protocolData=RG@protocolData, storage.mode="environment")
468 468
   featureNames(XY) = ids # featureNames(RG)
469 469
   gc()
... ...
@@ -471,39 +471,39 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
471 471
   XY@assayData$X[1:nsnps,] = 0
472 472
   XY@assayData$Y[1:nsnps,] = 0
473 473
   XY@assayData$zero[1:nsnps,] = 0
474
-  
474
+
475 475
   open(RG@assayData$R)
476 476
   open(RG@assayData$G)
477 477
   open(RG@assayData$zero)
478
-  
478
+
479 479
   # First sort out Infinium II SNPs, X -> R (allele A)  and Y -> G (allele B) from the same probe
480 480
   XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red
481 481
   XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green
482 482
   XY@assayData$zero[!is.na(aord),] = exprs(channel(RG, "zero"))[aord[!is.na(aord)],] # mostly green
483 483
   gc()
484
-  
484
+
485 485
   close(RG@assayData$R)
486 486
   close(RG@assayData$G)
487 487
   close(RG@assayData$zero)
488
-  
488
+
489 489
   ## Warning - not 100% sure that the code below is correct - could be more complicated than this
490
-  
490
+
491 491
   # Next Infinium I where X -> R from allele A probe and Y -> R from allele B probe
492 492
 #  infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]")
493
-  
493
+
494 494
 #  X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red
495 495
 #  Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green
496
-  
496
+
497 497
   # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe
498 498
 #  infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]")
499 499
 
500 500
 #  X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red
501 501
 #  Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green
502
-    
502
+
503 503
   #  For now zero out Infinium I probes
504 504
   XY@assayData$X[infI,] = 0
505 505
   XY@assayData$Y[infI,] = 0
506
-  XY@assayData$zero[infI,] = 0  
506
+  XY@assayData$zero[infI,] = 0
507 507
   gc()
508 508
 
509 509
 #  if(class(XY@assayData$X)[1]=="ff_matrix") {
... ...
@@ -511,7 +511,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
511 511
     close(XY@assayData$Y)
512 512
     close(XY@assayData$zero)
513 513
 #  }
514
-  
514
+
515 515
 #  storageMode(XY) = "lockedEnvironment"
516 516
   XY
517 517
 }
... ...
@@ -530,15 +530,15 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
530 530
   loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
531 531
 
532 532
   stripnum <- getVarInEnv("stripnum")
533
-  
533
+
534 534
   if(useTarget)
535 535
     targetdist = getVarInEnv("reference")
536
-  
536
+
537 537
   if(verbose){
538 538
     message("Quantile normalizing ", ncol(XY), " arrays by ", max(stripnum), " strips.")
539 539
     if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=max(stripnum), style=3)
540 540
   }
541
-  
541
+
542 542
 #  if(class(XY@assayData$X)[1]=="ff_matrix") {
543 543
     open(XY@assayData$X)
544 544
     open(XY@assayData$Y)
... ...
@@ -566,7 +566,7 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
566 566
     close(XY@assayData$X)
567 567
     close(XY@assayData$Y)
568 568
 #  }
569
-  
569
+
570 570
   if(verbose)
571 571
     cat("\n")
572 572
   XY
... ...
@@ -609,7 +609,7 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
609 609
   SMEDIAN <- getVarInEnv("SMEDIAN")
610 610
   theKnots <- getVarInEnv("theKnots")
611 611
   narrays = ncol(XY)
612
-  
612
+
613 613
   if(save.it & !missing(cnFile)) {
614 614
     # separate out copy number probes
615 615
     npIndex = getVarInEnv("npProbesFid")
... ...
@@ -618,7 +618,7 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
618 618
       open(XY@assayData$X)
619 619
       open(XY@assayData$Y)
620 620
       open(XY@assayData$zero)
621
-      
621
+
622 622
       A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays)
623 623
       B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
624 624
 
... ...
@@ -631,26 +631,26 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
631 631
 
632 632
       colnames(A) <- colnames(B) <- colnames(zero) <- sns
633 633
       rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex)
634
-  
634
+
635 635
       cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName)
636
-    
637
-      t0 <- proc.time() 
638
-      save(cnAB, file=cnFile) 
636
+
637
+      t0 <- proc.time()
638
+      save(cnAB, file=cnFile)
639 639
       t0 <- proc.time()-t0
640 640
       if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".")
641 641
        rm(cnAB, B, zero)
642 642
     }
643 643
   }
644
-  
644
+
645 645
   # next process snp probes
646 646
   snpIndex = getVarInEnv("snpProbesFid")
647 647
   nprobes <- length(snpIndex)
648
-  
648
+
649 649
   ##We will read each cel file, summarize, and run EM one by one
650 650
   ##We will save parameters of EM to use later
651 651
   mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
652 652
   SNR <- initializeBigVector("crlmmSNR-", narrays, "double")
653
-  SKW <- initializeBigVector("crlmmSKW-", narrays, "double") 
653
+  SKW <- initializeBigVector("crlmmSKW-", narrays, "double")
654 654
 
655 655
   ## This is the sample for the fitting of splines
656 656
   ## BC: I like better the idea of the user passing the seed,
... ...
@@ -659,7 +659,7 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
659 659
   set.seed(seed)
660 660
   idx <- sort(sample(autosomeIndex, mixtureSampleSize))
661 661
   idx2 <- sample(nprobes, 10^5)
662
-  
662
+
663 663
   ##S will hold (A+B)/2 and M will hold A-B
664 664
   ##NOTE: We actually dont need to save S. Only for pics etc...
665 665
   ##f is the correction. we save to avoid recomputing
... ...
@@ -667,7 +667,7 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
667 667
   A <- initializeBigMatrix("crlmmA-", nprobes, narrays, "integer")
668 668
   B <- initializeBigMatrix("crlmmB-", nprobes, narrays, "integer")
669 669
   zero <- initializeBigMatrix("crlmmZero-", nprobes, narrays, "integer")
670
-  
670
+
671 671
   if(verbose){
672 672
      message("Calibrating ", narrays, " arrays.")
673 673
      if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3)
... ...
@@ -676,11 +676,11 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
676 676
   open(XY@assayData$X)
677 677
   open(XY@assayData$Y)
678 678
   open(XY@assayData$zero)
679
-  
679
+
680 680
   for(i in 1:narrays){
681 681
      A[,i] = exprs(channel(XY, "X"))[snpIndex,i]
682 682
      B[,i] = exprs(channel(XY, "Y"))[snpIndex,i]
683
-     zero[,i] = exprs(channel(XY, "zero"))[snpIndex,i] 
683
+     zero[,i] = exprs(channel(XY, "zero"))[snpIndex,i]
684 684
 #    SKW[i] = mean((A[snpIndex,i][idx2]-mean(A[snpIndex,i][idx2]))^3)/(sd(A[snpIndex,i][idx2])^3)
685 685
      SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3)
686 686
     if(fitMixture){
... ...
@@ -707,8 +707,8 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
707 707
 
708 708
   close(XY@assayData$X)
709 709
   close(XY@assayData$Y)
710
-  close(XY@assayData$zero)  
711
-  
710
+  close(XY@assayData$zero)
711
+
712 712
 #  if(class(A)[1]=="ff_matrix") {
713 713
     close(A)
714 714
     close(B)
... ...
@@ -716,8 +716,8 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
716 716
     close(SKW)
717 717
     close(mixtureParams)
718 718
     close(SNR)
719
-#  }  
720
-  
719
+#  }
720
+
721 721
   res = list(A=A, B=B,
722 722
              zero=zero, sns=sns, gns=names(snpIndex), SNR=SNR, SKW=SKW,
723 723
              mixtureParams=mixtureParams, cdfName=cdfName)
... ...
@@ -726,11 +726,11 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
726 726
   open(res[["B"]])
727 727
   open(res[["zero"]])
728 728
   open(res[["SNR"]])
729
-  open(res[["mixtureParams"]])  
730
-  
729
+  open(res[["mixtureParams"]])
730
+
731 731
   if(save.it & !missing(snpFile)) {
732
-    t0 <- proc.time() 
733
-    save(res, file=snpFile) 
732
+    t0 <- proc.time()
733
+    save(res, file=snpFile)
734 734
     t0 <- proc.time()-t0
735 735
     if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
736 736
   }
... ...
@@ -770,7 +770,7 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
770 770
         stop("Both RG and XY specified - please use one or the other")
771 771
     }
772 772
     if (missing(sns)) sns <- sampleNames(XY) #$X
773
-    
773
+
774 774
     res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
775 775
                         seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget,
776 776
                         save.it=save.it, snpFile=snpFile, cnFile=cnFile)
... ...
@@ -786,13 +786,13 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
786 786
 #                    alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
787 787
 #                    call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
788 788
 #                    callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
789
-#  	            annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD) 
789
+#  	            annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD)
790 790
 #    sampleNames(callSet) <- sns
791 791
 #    featureNames(callSet) <- res[["gns"]]
792 792
 #    pData(callSet)$SKW <- rep(NA, length(sns))
793 793
 #    pData(callSet)$SNR <- rep(NA, length(sns))
794 794
 #    pData(callSet)$gender <- rep(NA, length(sns))
795
-					  
795
+
796 796
   }else{
797 797
       if(verbose) message("Loading ", snpFile, ".")
798 798
         obj <- load(snpFile)
... ...
@@ -808,7 +808,7 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
808 808
 
809 809
 #    rm(phenD, protD , fD)
810 810
 
811
-#    snp.index <- res$snpIndex #match(res$gns, featureNames(callSet))                
811
+#    snp.index <- res$snpIndex #match(res$gns, featureNames(callSet))
812 812
 #    suppressWarnings(A(callSet) <- res[["A"]])
813 813
 #    suppressWarnings(B(callSet) <- res[["B"]])
814 814
 #    pData(callSet)$SKW <- res$SKW
... ...
@@ -817,7 +817,7 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
817 817
 #    rm(res); gc()
818 818
   if(row.names) row.names=res$gns else row.names=NULL
819 819
   if(col.names) col.names=res$sns else col.names=NULL
820
-  
820
+
821 821
   res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]),
822 822
                   B=res[["B"]], # as.matrix(B(callSet)[snp.index,]),  # j]),
823 823
                   SNR=res[["SNR"]], # callSet$SNR, # [j],
... ...
@@ -834,7 +834,7 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
834 834
                   verbose=verbose,
835 835
                   returnParams=returnParams,
836 836
                   badSNP=badSNP)
837
-#    rm(res); gc()  
837
+#    rm(res); gc()
838 838
 #    suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
839 839
 #    suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
840 840
 #    callSet$gender[j] <- tmp$gender
... ...
@@ -851,8 +851,8 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
851 851
 }
852 852
 
853 853
 
854
-## MR: Below is a more memory efficient version of crlmmIllumina() which 
855
-## reads in the .idats and genotypes in the one function and removes objects 
854
+## MR: Below is a more memory efficient version of crlmmIllumina() which
855
+## reads in the .idats and genotypes in the one function and removes objects
856 856
 ## after they have been used
857 857
 crlmmIlluminaV2 = function(sampleSheet=NULL,
858 858
 			  arrayNames=NULL,
... ...
@@ -867,7 +867,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
867 867
 #                          rgFile,
868 868
 			  stripNorm=TRUE,
869 869
 			  useTarget=TRUE,
870
-			  row.names=TRUE, 
870
+			  row.names=TRUE,
871 871
 			  col.names=TRUE,
872 872
 			  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
873 873
                           seed=1, save.it=FALSE, snpFile, cnFile,
... ...
@@ -878,7 +878,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
878 878
   if(missing(cdfName)) stop("must specify cdfName")
879 879
   if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
880 880
 #  if(missing(sns)) sns <- basename(arrayNames)
881
-			  
881
+
882 882
 #  if (save.rg & missing(rgFile))
883 883
 #    stop("'rgFile' is missing, and you chose save.rg")
884 884
   if (save.it & (missing(snpFile) | missing(cnFile)))
... ...
@@ -914,7 +914,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
914 914
     open(res[["B"]])
915 915
     open(res[["SNR"]])
916 916
     open(res[["mixtureParams"]])
917
-						   
917
+
918 918
 #    fD = featureData(XY)
919 919
 #    phenD = XY@phenoData
920 920
 #    protD = XY@protocolData
... ...
@@ -938,7 +938,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
938 938
 #    featureNames(callSet) <- res[["gns"]]
939 939
 #    pData(callSet)$SKW <- rep(NA, length(sns))
940 940
 #    pData(callSet)$SNR <- rep(NA, length(sns))
941
-#    pData(callSet)$gender <- rep(NA, length(sns))			
941
+#    pData(callSet)$gender <- rep(NA, length(sns))
942 942
 #	}
943 943
 #	pData(callSet)[j,] <- phenD
944 944
 #	pData(protocolData(callSet))[j,] <- protD
... ...
@@ -946,7 +946,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
946 946
 #	pData(protocolData(callSet)) <- protD
947 947
 
948 948
 #    rm(phenD, protD, fD)
949
-	
949
+
950 950
 #    if(k > 1 & nrow(res[[1]]) != nrow(callSet)){
951 951
         ##RS: I don't understand why the IDATS for the
952 952
         ##same platform potentially have different lengths
... ...
@@ -954,7 +954,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
954 954
 #        res[["B"]] <- res[["B"]][res$gns %in% featureNames(callSet), ]
955 955
 #    }
956 956
 
957
-#    snp.index <- res$snpIndex #match(res$gns, featureNames(callSet))                
957
+#    snp.index <- res$snpIndex #match(res$gns, featureNames(callSet))
958 958
 #    suppressWarnings(A(callSet)[, j] <- res[["A"]])
959 959
 #    suppressWarnings(B(callSet)[, j] <- res[["B"]])
960 960
 #    suppressWarnings(A(callSet) <- res[["A"]])
... ...
@@ -966,7 +966,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
966 966
 #    mixtureParams <- res$mixtureParams
967 967
 #    rm(res); gc()
968 968
   if(row.names) row.names=res$gns else row.names=NULL
969
-  if(col.names) col.names=res$sns else col.names=NULL    
969
+  if(col.names) col.names=res$sns else col.names=NULL
970 970
   res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]),
971 971
                   B=res[["B"]], # as.matrix(B(callSet)[snp.index,]),  # j]),
972 972
                   SNR=res[["SNR"]], # callSet$SNR, # [j],
... ...
@@ -983,7 +983,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
983 983
                   verbose=verbose,
984 984
                   returnParams=returnParams,
985 985
                   badSNP=badSNP)
986
-#    rm(res); gc()  
986
+#    rm(res); gc()
987 987
 #    suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
988 988
 #    suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
989 989
 #    callSet$gender[j] <- tmp$gender
... ...
@@ -18,9 +18,9 @@
18 18
 
19 19
 \begin{document}
20 20
 \title{Using \crlmm{} for copy number estimation and genotype calling
21
-  with Illumina platforms} 
21
+  with Illumina platforms}
22 22
 
23
-\date{\today} 
23
+\date{\today}
24 24
 
25 25
 \author{Rob Scharpf}
26 26
 \maketitle
... ...
@@ -40,10 +40,11 @@ which annotation packages are currently available. Next we create a
40 40
 directory where output files will be stored and indicate the directory
41 41
 that contains the IDAT files that will be used in our analysis.
42 42
 
43
+
43 44
 <<setup, echo=FALSE, results=hide>>=
44 45
 options(width=70)
45 46
 options(continue=" ")
46
-@ 
47
+@
47 48
 
48 49
 <<libraries>>=
49 50
 library(ff)
... ...
@@ -52,9 +53,9 @@ crlmm:::validCdfNames()
52 53
 if(getRversion() < "2.12.0"){
53 54
 	rpath <- getRversion()
54 55
 } else rpath <- "trunk"
55
-outdir <- paste("/thumper/ctsa/snpmicroarray/rs/ProcessedData/crlmm/", rpath, "/illumina_vignette", sep="")	
56
+outdir <- paste("/thumper/ctsa/snpmicroarray/rs/ProcessedData/crlmm/", rpath, "/illumina_vignette", sep="")
56 57
 datadir <- "/thumper/ctsa/snpmicroarray/illumina/IDATS/370k"
57
-@ 
58
+@
58 59
 
59 60
 \paragraph{About this vignette.} This vignette was created using
60 61
 Illumina IDAT files that are located in a specific directory on my
... ...
@@ -66,7 +67,7 @@ these directories exist.
66 67
 <<checkSetup>>=
67 68
 if(!file.exists(outdir)) stop("Please specify valid directory for storing output")
68 69
 if(!file.exists(pathToCels)) stop("Please specify the correct path to the CEL files")
69
-@ 
70
+@
70 71
 
71 72
 \paragraph{Preprocessing Illumina IDAT files.}
72 73
 To perform copy number analysis on the Illumina platform, several
... ...
@@ -95,50 +96,51 @@ from the RG object and added to the \Robject{SnpSet} returned by
95 96
 %(Users wanting the \Rpackage{ff} implementation should try
96 97
 %\Rfunction{crlmm:::crlmmIllumina2} with the same arguments.)
97 98
 
99
+
98 100
 <<samplesToProcess>>=
99 101
 samplesheet = read.csv(file.path(datadir, "HumanHap370Duo_Sample_Map.csv"), header=TRUE, as.is=TRUE)
100 102
 samplesheet <- samplesheet[-c(28:46,61:75,78:79), ]
101 103
 arrayNames <- file.path(datadir, unique(samplesheet[, "SentrixPosition"]))
102 104
 grnfiles = all(file.exists(paste(arrayNames, "_Grn.idat", sep="")))
103 105
 redfiles = all(file.exists(paste(arrayNames, "_Red.idat", sep="")))
104
-@ 
106
+@
105 107
 
106 108
 <<samplesToProcess2, echo=FALSE>>=
107
-## To speed up repeated calls to Sweave 
109
+## To speed up repeated calls to Sweave
108 110
 RG <- checkExists("RG", .path=outdir,
109 111
 		  .FUN=readIdatFiles2,
110 112
 		  sampleSheet=samplesheet,
111 113
 		  path=dirname(arrayNames[1]),
112
-		  arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), 
114
+		  arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"),
113 115
 		  saveDate=TRUE)
114 116
 annotation(RG) <- "human370v1c"
115 117
 crlmmResult <- checkExists("crlmmResult",
116 118
 			   .path=outdir,
117 119
 			   .FUN=crlmmIllumina2,
118 120
 			   RG=RG,
119
-			   sns=pData(RG)$ID, 
121
+			   sns=pData(RG)$ID,
120 122
 			   returnParams=TRUE,
121 123
 			   cnFile=file.path(outdir, "cnFile.rda"),
122 124
 			   snpFile=file.path(outdir, "snpFile.rda"),
123 125
 			   save.it=TRUE)
124 126
 protocolData(crlmmResult)$ScanDate <- protocolData(RG)$ScanDate
125
-@ 
127
+@
126 128
 
127 129
 <<samplesToProcess3, eval=FALSE>>=
128
-RG <- readIdatFiles2(samplesheet[1:10, ], 
129
-		    path=dirname(arrayNames[1]), 
130
-		    arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), 
130
+RG <- readIdatFiles2(samplesheet[1:10, ],
131
+		    path=dirname(arrayNames[1]),
132
+		    arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"),
131 133
 		    saveDate=TRUE)
132 134
 annotation(RG) <- "human370v1c"
133
-crlmmResult <- crlmmIllumina2(RG=RG, 
134
-			     cdfName="human370v1c", 
135
-			     sns=pData(RG)$ID, 
135
+crlmmResult <- crlmmIllumina2(RG=RG,
136
+			     cdfName="human370v1c",
137
+			     sns=pData(RG)$ID,
136 138
 			     returnParams=TRUE,
137 139
 			     cnFile=file.path(outdir, "cnFile.rda"),
138 140
 			     snpFile=file.path(outdir, "snpFile.rda"),
139 141
 			     save.it=TRUE)
140 142
 protocolData(crlmmResult)$ScanDate <- protocolData(RG)$ScanDate
141
-@ 
143
+@
142 144
 
143 145
 %\noindent Finally, we load a few of the intermediate files that were
144 146
 %created during the preprocessing and genotyping.
... ...
@@ -147,13 +149,13 @@ protocolData(crlmmResult)$ScanDate <- protocolData(RG)$ScanDate
147 149
 %res <- get("res")
148 150
 %load(file.path(outdir, "cnFile.rda"))
149 151
 %cnAB <- get("cnAB")
150
-%@ 
152
+%@
151 153
 %
152 154
 %<<loadIntermediate, echo=FALSE>>=
153 155
 %trace(checkExists, browser)
154 156
 %res <- checkExists("snpFile", .path=outdir, .FUN=load, file=file.path(outdir, "snpFile.rda"))
155 157
 %cnAB <- checkExists("cnFile", .path=outdir, .FUN=load, file=file.path(outdir, "cnFile.rda"))
156
-%@ 
158
+%@
157 159
 
158 160
 After running the crlmm algorithm, we construct a container for
159 161
 storing the quantile normalized intensities, genotype calls, and
... ...
@@ -172,11 +174,11 @@ if(!file.exists(file.path(outdir, "cnSet.rda"))){
172 174
 				 snpFile=snpFile,
173 175
 				 cnFile=cnFile)
174 176
 }
175
-@ 
177
+@
176 178
 
177 179
 <<constructContainer2, eval=FALSE>>=
178 180
 container <- constructIlluminaCNSet(crlmmResult=crlmmResult, snpFile=snpFile, cnFile=cnFile)
179
-@ 
181
+@
180 182
 
181 183
 As described in the \texttt{copynumber} vignette, two \R{} functions
182 184
 for copy number estimation are available: \Rfunction{crlmmCopynumber}
... ...
@@ -194,11 +196,11 @@ the genotyping and copy number estimation steps.  Here, we use the
194 196
 cnSet <- checkExists("cnSet", .path=outdir,
195 197
 		     .FUN=crlmmCopynumber,
196 198
 		     object=container)
197
-@ 
199
+@
198 200
 
199 201
 <<estimateCopynumber, eval=FALSE>>=
200 202
 cnSet <- crlmmCopynumberLD(container)
201
-@ 
203
+@
202 204
 
203 205
 %In the following code, we define helper functions to construct a
204 206
 %\Robject{featureData} object that chromosome and physical position
... ...
@@ -230,12 +232,12 @@ cb <- CB(cnSet)[marker.index, ]/100
230 232
 missing.index <- which(rowSums(is.na(ca))==ncol(cnSet))
231 233
 ca <- ca[-missing.index, ]
232 234
 cb <- cb[-missing.index, ]
233
-@ 
235
+@
234 236
 \noindent Negating the \Robject{isSnp} function could be used to
235 237
 extract the estimates at nonpolymorphic markers. For instance,
236 238
 <<monomorphic-accessor>>=
237 239
 cn.monomorphic <- CA(cnSet)[which(chromosome(cnSet) == 21 & !isSnp(cnSet)), ]/100
238
-@ 
240
+@
239 241
 
240 242
 At polymorphic loci, the total copy number is the sum of the number of
241 243
 copies of the A allele and the number of copies for the B allele.  At
... ...
@@ -251,7 +253,7 @@ cn.total2 <- totalCopyNumber(cnSet, i=marker.index)
251 253
 cn.total2 <- cn.total2[-missing.index, ]
252 254
 dimnames(cn.total) <- NULL
253 255
 all.equal(cn.total2, cn.total)
254
-@ 
256
+@
255 257
 
256 258
 A few simple visualizations may be helpful at this point. The first
257 259
 plot is a histogram of the signal to noise ratio of the sample -- an
... ...
@@ -260,11 +262,11 @@ statistic tends to be much higher for Illumina than for the Affymetrix
260 262
 platforms.) The second is a visualization of the total copy number
261 263
 estimates plotted versus physical position on chromosome 1 for the two
262 264
 samples with the lowest (top) and highest (bottom) signal to noise
263
-ratios.  
265
+ratios.
264 266
 
265 267
 <<snr, fig=TRUE>>=
266 268
 hist(cnSet$SNR, breaks=15)
267
-@ 
269
+@
268 270
 
269 271
 <<firstSample, fig=TRUE>>=
270 272
 low.snr <- which(cnSet$SNR == min(cnSet$SNR))
... ...
@@ -281,7 +283,7 @@ for(j in c(low.snr, high.snr)){
281 283
 }
282 284
 axis(1, at=pretty(x), labels=pretty(x/1e6))
283 285
 mtext("Mb", 1, outer=TRUE, line=2)
284
-@ 
286
+@
285 287
 
286 288
 Here's a very simple approach to handle outliers by applying a running
287 289
 median using a window of size 3.  Following outlier removal, we
... ...
@@ -303,12 +305,12 @@ for(j in c(low.snr, high.snr)){
303 305
 }
304 306
 axis(1, at=pretty(x), labels=pretty(x/1e6))
305 307
 mtext("Mb", 1, outer=TRUE, line=2)
306
-@ 
308
+@
307 309
 
308 310
 \section{Session information}
309 311
 <<sessionInfo, results=tex>>=
310 312
 toLatex(sessionInfo())
311
-@ 
313
+@
312 314
 
313 315
 
314 316
 \end{document}