Browse code

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@40465 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 02/07/2009 07:39:23
Showing6 changed files

... ...
@@ -179,4 +179,10 @@ is decoded and scanned
179 179
 * updated computeCopynumber
180 180
 * export A, B methods for ABset class
181 181
 
182
+2009-07-01 M Ritchie - committed version 1.3.7
183
+
184
+* made memory savings to reading and normalization if Illumina data
185
+* Now use 'scanDates' slot to save scan dates instead of saving them in phenoData slot
186
+* added vignette for genoytping illumina data (crlmmIllumina.pdf in inst/doc and crlmmIllumina.Rnw in inst/scripts)
187
+* Changed stop() to warning() when idats are of different type in readIdatFiles()
182 188
 
... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.3.6
4
+Version: 1.3.7
5 5
 Date: 2009-06-17
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -3,7 +3,7 @@ useDynLib("crlmm", .registration=TRUE)
3 3
 ##importClassesFrom(methods, ANY, character, data.frame, "function", integer, matrix, numeric)
4 4
 
5 5
 importClassesFrom(Biobase, AnnotatedDataFrame, AssayData, eSet,
6
-		  SnpSet, MIAME, Versioned, VersionedBiobase, Versions)
6
+		  SnpSet, NChannelSet, MIAME, Versioned, VersionedBiobase, Versions)
7 7
 
8 8
 importClassesFrom(oligoClasses, SnpLevelSet)
9 9
 
... ...
@@ -13,7 +13,7 @@ importMethodsFrom(Biobase, annotation, "annotation<-", annotatedDataFrameFrom,
13 13
                   fData, featureData, "featureData<-", featureNames,
14 14
 		  fvarMetadata, fvarLabels,
15 15
                   pData, phenoData, "phenoData<-", pubMedIds, rowMedians, sampleNames, scanDates,
16
-                  "scanDates<-", storageMode, updateObject)
16
+                  "scanDates<-", storageMode, "storageMode<-", updateObject)
17 17
 
18 18
 
19 19
 ##importMethodsFrom(oligoClasses, chromosome, copyNumber, position)
... ...
@@ -24,7 +24,7 @@ readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
24 24
                arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
25 25
             }
26 26
          }
27
-#         pd = new("AnnotatedDataFrame", data = sampleSheet)
27
+         pd = new("AnnotatedDataFrame", data = sampleSheet)
28 28
        }
29 29
        if(is.null(arrayNames)) {
30 30
          arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
... ...
@@ -32,6 +32,7 @@ readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
32 32
            sampleSheet=NULL
33 33
            cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
34 34
          }
35
+         pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
35 36
        }
36 37
        narrays = length(arrayNames)
37 38
        grnfiles = paste(arrayNames, fileExt$green, sep=sep)
... ...
@@ -59,21 +60,10 @@ readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
59 60
        for(i in seq(along=arrayNames)) {
60 61
          cat("reading", arrayNames[i], "\t")
61 62
          idsG = idsR = G = R = NULL
62
-#         if(grnfiles[i] %in% dir(path=path)) {
63
+
63 64
          cat(paste(sep, fileExt$green, sep=""), "\t")
64 65
          G = readIDAT(grnidats[i])
65 66
          idsG = rownames(G$Quants)
66
-#         }
67
-#         else
68
-#           stop("Could not find ", grnidats[i])
69
-         
70
-#         if(redfiles[i] %in% dir(path=path)) {
71
-          cat(paste(sep, fileExt$red, sep=""), "\n")
72
-          R = readIDAT(redidats[i])
73
-          idsR = rownames(R$Quants)
74
-#         }
75
-#         else
76
-#           stop("Could not find ", redidats[i])
77 67
          
78 68
          headerInfo$nProbes[i] = G$nSNPsRead
79 69
          headerInfo$Barcode[i] = G$Barcode
... ...
@@ -81,10 +71,13 @@ readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
81 71
          headerInfo$Manifest[i] = G$Unknown$MostlyNull
82 72
          headerInfo$Position[i] = G$Unknowns$MostlyA
83 73
          
84
-         if(headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1])
74
+         if(headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) {
85 75
                   # || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest
86 76
                   # but differed by a few SNPs for some reason - most of the chip was the same though
87
-           stop("Chips are not of all of the same type - please check your data")
77
+#           stop("Chips are not of all of the same type - please check your data")
78
+           warning("Chips are not of the same type.  Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
79
+           next()
80
+         }
88 81
 
89 82
          dates$decode[i] = G$RunInfo[1, 1]
90 83
          dates$scan[i] = G$RunInfo[2, 1]
... ...
@@ -96,57 +89,62 @@ readIdatFiles = function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
96 89
              stop("Could not find probe IDs")
97 90
            nprobes = length(ids)
98 91
            narrays = length(arrayNames)
99
-           Gintens = Rintens = Gnobeads = Rnobeads = Gstderr = Rstderr = matrix(NA, nprobes, narrays)
100
-           rownames(Gintens) = rownames(Rintens) = rownames(Gnobeads) = rownames(Rnobeads) = rownames(Gstderr) = rownames(Rstderr) = ids
92
+           
93
+           tmpmat = matrix(NA, nprobes, narrays)
94
+           rownames(tmpmat) = ids
101 95
            if(!is.null(sampleSheet))
102
-             colnames(Gintens) = colnames(Rintens) =  colnames(Gnobeads) = colnames(Rnobeads) = colnames(Gstderr) = colnames(Rstderr) = sampleSheet$Sample_ID
96
+             colnames(tmpmat) = sampleSheet$Sample_ID
103 97
            else
104
-             colnames(Gintens) = colnames(Rintens) =  colnames(Gnobeads) = colnames(Rnobeads) = colnames(Gstderr) = colnames(Rstderr) = arrayNames
98
+             colnames(tmpmat) = arrayNames
99
+
100
+           RG = new("NChannelSet",
101
+                     R=tmpmat, G=tmpmat, Rnb=tmpmat, Gnb=tmpmat,
102
+                     Rse=tmpmat, Gse=tmpmat, annotation=headerInfo$Manifest[1],
103
+                     phenoData=pd, storage.mode="environment")
104
+           rm(tmpmat)
105
+           gc()
105 106
          }
106 107
          
107 108
          if(length(ids)==length(idsG)) {
108 109
            if(sum(ids==idsG)==nprobes) {
109
-             Gintens[,i] = G$Quants[, "Mean"]
110
-             Gnobeads[,i] = G$Quants[, "NBeads"]
111
-             Gstderr[,i] = G$Quants[, "SD"]
110
+             RG@assayData$G[,i] = G$Quants[, "Mean"]
111
+             RG@assayData$Gnb[,i] = G$Quants[, "NBeads"]
112
+             RG@assayData$Gse[,i] = G$Quants[, "SD"]
112 113
            }
113 114
          }
114 115
          else {
115 116
            indG = match(ids, idsG)
116
-           Gintens[,i] = G$Quants[indG, "Mean"]
117
-           Gnobeads[,i] = G$Quants[indG, "NBeads"]
118
-           Gstderr[,i] = G$Quants[indG, "SD"]
117
+           RG@assayData$G[,i] = G$Quants[indG, "Mean"]
118
+           RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"]
119
+           RG@assayData$Gse[,i] = G$Quants[indG, "SD"]
119 120
          }
121
+         rm(G)
122
+         gc()
123
+         
124
+         cat(paste(sep, fileExt$red, sep=""), "\n")
125
+         R = readIDAT(redidats[i])
126
+         idsR = rownames(R$Quants)
127
+         
120 128
          if(length(ids)==length(idsG)) {   
121 129
            if(sum(ids==idsR)==nprobes) {
122
-             Rintens[,i] = R$Quants[ ,"Mean"]
123
-             Rnobeads[,i] = R$Quants[ ,"NBeads"]
124
-             Rstderr[,i] = R$Quants[ ,"SD"]
130
+             RG@assayData$R[,i] = R$Quants[ ,"Mean"]
131
+             RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"]
132
+             RG@assayData$Rse[,i] = R$Quants[ ,"SD"]
125 133
            }
126 134
          }
127 135
          else {
128 136
            indR = match(ids, idsR)
129
-           Rintens[,i] = R$Quants[indR, "Mean"]
130
-           Rnobeads[,i] = R$Quants[indR, "NBeads"]
131
-           Rstderr[,i] = R$Quants[indR, "SD"]
137
+           RG@assayData$R[,i] = R$Quants[indR, "Mean"]
138
+           RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"]
139
+           RG@assayData$Rse[,i] = R$Quants[indR, "SD"]
132 140
          }
141
+         rm(R)
142
+         gc()
133 143
        }
134
-       if(is.null(sampleSheet)) {
135
-         if(saveDate)
136
-           pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames, DecodeDate=dates$decode, ScanDate=dates$scan))
137
-         else
138
-           pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
139
-       }
140
-       else {
141
-         if(saveDate)
142
-           pd = new("AnnotatedDataFrame", data = cbind(sampleSheet, DecodeDate=dates$decode, ScanDate=dates$scan))
143
-         else
144
-           pd = new("AnnotatedDataFrame", data = sampleSheet)
145
-       }
146
-       RG = new("NChannelSet",
147
-                R=Rintens, G=Gintens, Rnb=Rnobeads, Gnb=Gnobeads,
148
-                Rse=Rstderr, Gse=Gstderr, annotation=headerInfo$Manifest[1],
149
-                phenoData=pd)
144
+       if(saveDate)
145
+         RG@scanDates = dates$scan
146
+       storageMode(RG) = "lockedEnvironment"
147
+       RG
150 148
 }
151 149
 
152 150
 
... ...
@@ -455,17 +453,23 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
455 453
   bord = match(bids, featureNames(RG)) # and here
456 454
 #  argrg = aids[rrgg]
457 455
 #  brgrg = bids[rrgg]
458
-  X = Y = Xnb = Ynb = Xse = Yse = zero = matrix(0, nsnps, narrays)
459
-  rownames(X) = rownames(Y) = rownames(Xnb) = rownames(Ynb) = rownames(Xse) = rownames(Yse) = ids
460
-  colnames(X) = colnames(Y) = colnames(Xnb) = colnames(Ynb) = colnames(Xse) = colnames(Yse) = sampleNames(RG) #$G
461 456
 
457
+  tmpmat = matrix(0, nsnps, narrays)
458
+  rownames(tmpmat) = ids
459
+  colnames(tmpmat) = sampleNames(RG)
460
+  XY = new("NChannelSet", X=tmpmat, Y=tmpmat, Xnb=tmpmat, Ynb=tmpmat, Xse=tmpmat, Yse=tmpmat, zero=tmpmat,
461
+                 annotation=chipType, phenoData=RG@phenoData, scanDates=RG@scanDates, storage.mode="environment")
462
+  rm(tmpmat)
463
+  gc()
464
+  
462 465
   # First sort out Infinium II SNPs, X -> R (allele A)  and Y -> G (allele B) from the same probe
463
-  X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red
464
-  Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green
465
-  Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],]
466
-  Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],]
467
-  Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],]
468
-  Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],]
466
+  XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red
467
+  XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green
468
+  XY@assayData$Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],]
469
+  XY@assayData$Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],]
470
+  XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],]
471
+  XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],]
472
+  gc()
469 473
   
470 474
   ## Warning - not 100% sure that the code below is correct - could be more complicated than this
471 475
   
... ...
@@ -490,24 +494,18 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
490 494
 #  Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],]
491 495
     
492 496
   #  For now zero out Infinium I probes
493
-  X[infI,] = 0
494
-  Y[infI,] = 0
495
-  Xnb[infI,] = 0
496
-  Ynb[infI,] = 0
497
-  Xse[infI,] = 0
498
-  Yse[infI,] = 0
499
-
500
-  zero[X==0 | Y==0] = 1
501
-  
497
+  XY@assayData$X[infI,] = 0
498
+  XY@assayData$Y[infI,] = 0
499
+  XY@assayData$Xnb[infI,] = 0
500
+  XY@assayData$Ynb[infI,] = 0
501
+  XY@assayData$Xse[infI,] = 0
502
+  XY@assayData$Yse[infI,] = 0
503
+
504
+  XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1
502 505
   gc()
503
-  
504
-  XY = new("NChannelSet",
505
-           X=X, Y=Y,
506
-           Xnb=Xnb, Ynb=Ynb,
507
-           Xse=Xse, Yse=Yse,
508
-           zero=zero,
509
-           annotation=chipType,
510
-           phenoData=RG@phenoData)
506
+
507
+#  storageMode(XY) = "lockedEnvironment"
508
+  XY
511 509
 }
512 510
 
513 511
 stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
... ...
@@ -528,9 +526,9 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
528 526
   if(useTarget)
529 527
     targetdist = getVarInEnv("reference")
530 528
   
531
-  Xqws = Yqws = matrix(0, nrow(XY), ncol(XY))
532
-  colnames(Xqws) = colnames(Yqws) = sampleNames(XY) #$X
533
-  rownames(Xqws) = rownames(Yqws) = featureNames(XY)
529
+#  Xqws = Yqws = matrix(0, nrow(XY), ncol(XY))
530
+#  colnames(Xqws) = colnames(Yqws) = sampleNames(XY) #$X
531
+#  rownames(Xqws) = rownames(Yqws) = featureNames(XY)
534 532
 
535 533
   if(verbose){
536 534
     message("Quantile normalizing ", ncol(XY), " arrays by ", max(stripnum), " strips.")
... ...
@@ -548,23 +546,26 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
548 546
       tmp = normalize.quantiles.use.target(as.matrix(cbind(subX, subY)), targetdist[[s]])
549 547
     else
550 548
       tmp = normalize.quantiles(as.matrix(cbind(subX, subY)))
551
-    Xqws[sel,] = tmp[,1:(ncol(tmp)/2)]
552
-    Yqws[sel,] = tmp[,(ncol(tmp)/2+1):ncol(tmp)]
549
+    XY@assayData$X[sel,] = tmp[,1:(ncol(tmp)/2)]
550
+    XY@assayData$Y[sel,] = tmp[,(ncol(tmp)/2+1):ncol(tmp)]
551
+#    Xqws[sel,] = tmp[,1:(ncol(tmp)/2)]
552
+#    Yqws[sel,] = tmp[,(ncol(tmp)/2+1):ncol(tmp)]
553 553
     rm(subX, subY, tmp, sel)
554 554
     gc()
555 555
   }
556 556
   if(verbose)
557 557
     cat("\n")
558
-  XYNorm = new("NChannelSet",
559
-                X=Xqws+16,
560
-                Y=Yqws+16,
561
-                Xnb=exprs(channel(XY, "Xnb")),
562
-                Ynb=exprs(channel(XY, "Ynb")),
563
-                Xse=exprs(channel(XY, "Xse")),
564
-                Yse=exprs(channel(XY, "Yse")),
565
-                zero=exprs(channel(XY, "zero")),
566
-                annotation=annotation(XY),
567
-                phenoData=XY@phenoData)
558
+  XY
559
+#  XYNorm = new("NChannelSet",
560
+#                X=Xqws+16,
561
+#                Y=Yqws+16,
562
+#                Xnb=exprs(channel(XY, "Xnb")),
563
+#                Ynb=exprs(channel(XY, "Ynb")),
564
+#                Xse=exprs(channel(XY, "Xse")),
565
+#                Yse=exprs(channel(XY, "Yse")),
566
+#                zero=exprs(channel(XY, "zero")),
567
+#                annotation=annotation(XY),
568
+#                phenoData=XY@phenoData)
568 569
 }
569 570
 
570 571
 
571 572
new file mode 100644
572 573
Binary files /dev/null and b/inst/doc/crlmmIllumina.pdf differ
573 574
new file mode 100644
... ...
@@ -0,0 +1,147 @@
1
+%\VignetteIndexEntry{crlmm Vignette - Illumina 370k chip}
2
+%\VignetteKeywords{genotype, crlmm, Illumina}
3
+%\VignettePackage{crlmm}
4
+
5
+\documentclass[12pt]{article}
6
+
7
+\usepackage{amsmath,pstricks}
8
+\usepackage[authoryear,round]{natbib}
9
+\usepackage{hyperref}
10
+\usepackage{Sweave}
11
+
12
+\textwidth=6.2in
13
+\textheight=8.5in
14
+%\parskip=.3cm
15
+\oddsidemargin=.1in
16
+\evensidemargin=.1in
17
+\headheight=-.3in
18
+
19
+\newcommand{\scscst}{\scriptscriptstyle}
20
+\newcommand{\scst}{\scriptstyle}
21
+
22
+
23
+\newcommand{\Rfunction}[1]{{\texttt{#1}}}
24
+\newcommand{\Robject}[1]{{\texttt{#1}}}
25
+\newcommand{\Rpackage}[1]{{\textit{#1}}}
26
+\newcommand{\Rmethod}[1]{{\texttt{#1}}}
27
+\newcommand{\Rfunarg}[1]{{\texttt{#1}}}
28
+\newcommand{\Rclass}[1]{{\textit{#1}}}
29
+
30
+\textwidth=6.2in
31
+
32
+\bibliographystyle{plainnat} 
33
+ 
34
+\begin{document}
35
+%\setkeys{Gin}{width=0.55\textwidth}
36
+
37
+\title{Using \Rpackage{crlmm} to genotype data from Illumina's Infinium BeadChips}
38
+\author{Matt Ritchie}
39
+\maketitle
40
+
41
+\section{Getting started}
42
+
43
+In this user guide we read in and genotype data from 40 HapMap samples 
44
+which have been analyzed using Illumina's 370k Duo BeadChips.
45
+This data is available in the \Rpackage{hapmap370k} package.  
46
+Additional chip-specific model parameters and basic SNP annotation 
47
+information used by CRLMM is stored in the \Rpackage{human370v1c} package.
48
+These can be downloaded from \href{http://rafalab.jhsph.edu/software.html}{http://rafalab.jhsph.edu/software.html}
49
+and must be installed for the following code to work.
50
+
51
+\section{Reading in data}
52
+
53
+The function \Rfunction{readIdatFiles} extracts the Red and Green intensities 
54
+from the binary {\tt idat} files output by Illumina's scanning device.
55
+The file {\tt samples370k.csv} contains information about each sample.
56
+
57
+<<<echo=FALSE, results=hide, eval=TRUE>>=
58
+options(width=50)
59
+@ 
60
+
61
+<<read>>=
62
+library(Biobase)
63
+library(crlmm)
64
+library(hapmap370k)
65
+
66
+data.dir = system.file("idatFiles", package="hapmap370k")
67
+
68
+# Read in sample annotation info
69
+samples = read.csv(file.path(data.dir, "samples370k.csv"), as.is=TRUE)
70
+samples[1:5,]
71
+@ 
72
+
73
+<<read2, results=hide, cache=TRUE>>=
74
+# Read in .idats using sampleSheet information
75
+RG = readIdatFiles(samples, path=data.dir, arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), saveDate=TRUE)
76
+@
77
+
78
+Reading in this data takes approximately 90 seconds and peak memory usage 
79
+was 1.2 GB of RAM on our linux system.
80
+The \Robject{RG} object is an \Rclass{NChannelSet} which stores the 
81
+Red and Green intensities, the number of beads and standard errors for 
82
+each bead-type.  
83
+The scanning date of each array is stored in the \Robject{scanDates} slot.
84
+
85
+<<explore>>=
86
+class(RG)
87
+dim(RG)
88
+slotNames(RG)
89
+channelNames(RG)
90
+exprs(channel(RG, "R"))[1:5,1:5]
91
+exprs(channel(RG, "G"))[1:5,1:5]
92
+pd = pData(RG)
93
+pd[1:5,]
94
+
95
+scandatetime = strptime(scanDates(RG), "%m/%d/%Y %H:%M:%S %p")
96
+datescanned = substr(scandatetime, 1, 10)
97
+scanbatch =  factor(datescanned)
98
+levels(scanbatch) = 1:16
99
+scanbatch = as.numeric(scanbatch)
100
+@
101
+
102
+Plots of the summarised data can be easily generated to check for arrays 
103
+with poor signal.
104
+
105
+<<boxplots, fig=TRUE, width=8, height=8>>=
106
+par(mfrow=c(2,1), mai=c(0.4,0.4,0.4,0.1), oma=c(1,1,0,0))
107
+boxplot(log2(exprs(channel(RG, "R"))), xlab="Array", ylab="", main="Red channel", outline=FALSE, las=2)
108
+boxplot(log2(exprs(channel(RG, "G"))), xlab="Array", ylab="", main="Green channel", outline=FALSE, las=2)
109
+mtext(expression(log[2](intensity)), side=2, outer=TRUE)
110
+mtext("Array", side=1, outer=TRUE)
111
+@
112
+
113
+\section{Genotyping}
114
+
115
+Next we use the function \Rfunction{crlmmIllumina} which performs preprocessing followed by genotyping
116
+using the CRLMM algorithm.
117
+
118
+<<genotype, results=hide, cache=TRUE>>=
119
+crlmmResult = crlmmIllumina(RG=RG, cdfName="human370v1c", sns=pData(RG)$ID, returnParams=TRUE)
120
+@ 
121
+
122
+This analysis took 470 seconds to complete and peak memory usage was 3.3 GB on our system.
123
+The output stored in \Robject{crlmmResult} is a \Rclass{SnpSet} object.                                                                                                                                                         
124
+<<explore2>>=
125
+  class(crlmmResult)
126
+  dim(crlmmResult)
127
+  slotNames(crlmmResult)
128
+  calls(crlmmResult)[1:10, 1:5]
129
+@ 
130
+
131
+Plotting the {\it SNR} reveals no obvious batch effects in this data set (different symbols are used for 
132
+arrays scanned on different days).
133
+
134
+<<snr,  fig=TRUE, width=8, height=6>>=
135
+plot(crlmmResult[["SNR"]], pch=scanbatch, xlab="Array", ylab="SNR", main="Signal-to-noise ratio per array", las=2)
136
+@
137
+
138
+\section{System information}
139
+
140
+This analysis was carried out on a linux machine with 32GB of RAM 
141
+using the following packages:
142
+
143
+<<session>>=
144
+sessionInfo()
145
+@
146
+
147
+\end{document}