Added statements to illumina_copynumber.Rnw that make use of checkExists function.
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@49126 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -2,4 +2,24 @@ setOldClass("ellipse") |
2 | 2 |
setOldClass("ff_matrix") |
3 | 3 |
setOldClass("ffdf") |
4 | 4 |
setClassUnion("ff_or_matrix", c("ff_matrix", "matrix", "ffdf")) |
5 |
+<<<<<<< HEAD |
|
5 | 6 |
|
7 |
+======= |
|
8 |
+setClass("CNSetLM", contains="CNSet", representation(lM="list_or_ffdf")) |
|
9 |
+setMethod("initialize", "CNSetLM", function(.Object, lM=new("list"), ...){ |
|
10 |
+ .Object@lM <- lM |
|
11 |
+ .Object <- callNextMethod(.Object, ...) |
|
12 |
+}) |
|
13 |
+setValidity("CNSetLM", |
|
14 |
+ function(object){ |
|
15 |
+ if(!"batch" %in% varLabels(protocolData(object))) |
|
16 |
+ return("'batch' not defined in protocolData") |
|
17 |
+ if(!"chromosome" %in% fvarLabels(object)) |
|
18 |
+ return("'chromosome' not defined in featureData") |
|
19 |
+ if(!"position" %in% fvarLabels(object)) |
|
20 |
+ return("'position' not defined in featureData") |
|
21 |
+ if(!"isSnp" %in% fvarLabels(object)) |
|
22 |
+ return("'isSnp' not defined in featureData") |
|
23 |
+ return(TRUE) |
|
24 |
+ }) |
|
25 |
+>>>>>>> Removed tryCatch() statements in readIdatFiles. Restored previous implementation. |
... | ... |
@@ -12,7 +12,7 @@ readIdatFiles <- function(sampleSheet=NULL, |
12 | 12 |
sep="_", |
13 | 13 |
fileExt=list(green="Grn.idat", red="Red.idat"), |
14 | 14 |
saveDate=FALSE) { |
15 |
- |
|
15 |
+ |
|
16 | 16 |
if(!is.null(arrayNames)) { |
17 | 17 |
pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
18 | 18 |
} |
... | ... |
@@ -22,7 +22,7 @@ readIdatFiles <- function(sampleSheet=NULL, |
22 | 22 |
barcode = sampleSheet[,arrayInfoColNames$barcode] |
23 | 23 |
arrayNames=barcode |
24 | 24 |
} |
25 |
- if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) { |
|
25 |
+ if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) { |
|
26 | 26 |
position = sampleSheet[,arrayInfoColNames$position] |
27 | 27 |
if(is.null(arrayNames)) |
28 | 28 |
arrayNames=position |
... | ... |
@@ -36,7 +36,7 @@ readIdatFiles <- function(sampleSheet=NULL, |
36 | 36 |
} |
37 | 37 |
} |
38 | 38 |
pd = new("AnnotatedDataFrame", data = sampleSheet) |
39 |
- sampleNames(pd) <- basename(arrayNames) |
|
39 |
+ sampleNames(pd) <- basename(arrayNames) |
|
40 | 40 |
} |
41 | 41 |
if(is.null(arrayNames)) { |
42 | 42 |
arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path)) |
... | ... |
@@ -46,7 +46,7 @@ readIdatFiles <- function(sampleSheet=NULL, |
46 | 46 |
} |
47 | 47 |
pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
48 | 48 |
} |
49 |
- |
|
49 |
+ |
|
50 | 50 |
narrays = length(arrayNames) |
51 | 51 |
grnfiles = paste(arrayNames, fileExt$green, sep=sep) |
52 | 52 |
redfiles = paste(arrayNames, fileExt$red, sep=sep) |
... | ... |
@@ -63,7 +63,7 @@ readIdatFiles <- function(sampleSheet=NULL, |
63 | 63 |
redidats <- redfiles |
64 | 64 |
} |
65 | 65 |
if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files") |
66 |
- if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files") |
|
66 |
+ if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files") |
|
67 | 67 |
headerInfo = list(nProbes = rep(NA, narrays), |
68 | 68 |
Barcode = rep(NA, narrays), |
69 | 69 |
ChipType = rep(NA, narrays), |
... | ... |
@@ -119,12 +119,12 @@ readIdatFiles <- function(sampleSheet=NULL, |
119 | 119 |
} |
120 | 120 |
rm(G) |
121 | 121 |
gc() |
122 |
- |
|
122 |
+ |
|
123 | 123 |
cat(paste(sep, fileExt$red, sep=""), "\n") |
124 | 124 |
R = readIDAT(redidats[i]) |
125 | 125 |
idsR = rownames(R$Quants) |
126 |
- |
|
127 |
- if(length(ids)==length(idsG)) { |
|
126 |
+ |
|
127 |
+ if(length(ids)==length(idsG)) { |
|
128 | 128 |
if(sum(ids==idsR)==nprobes) { |
129 | 129 |
RG@assayData$R[,i] = R$Quants[ ,"Mean"] |
130 | 130 |
zeroR = R$Quants[ ,"NBeads"]==0 |
... | ... |
@@ -145,7 +145,7 @@ readIdatFiles <- function(sampleSheet=NULL, |
145 | 145 |
# if(class(RG@assayData$R)[1]=="ff_matrix") { |
146 | 146 |
close(RG@assayData$R) |
147 | 147 |
close(RG@assayData$G) |
148 |
- close(RG@assayData$zero) |
|
148 |
+ close(RG@assayData$zero) |
|
149 | 149 |
# } |
150 | 150 |
RG |
151 | 151 |
} |
... | ... |
@@ -161,27 +161,27 @@ readIDAT <- function(idatFile){ |
161 | 161 |
|
162 | 162 |
} |
163 | 163 |
|
164 |
- versionNumber <- readBin(tempCon, "integer", n=1, size=8, |
|
164 |
+ versionNumber <- readBin(tempCon, "integer", n=1, size=8, |
|
165 | 165 |
endian="little", signed=FALSE) |
166 | 166 |
|
167 | 167 |
if(versionNumber<3) |
168 | 168 |
stop("Older style IDAT files not supported: consider updating your scanner settings") |
169 |
- |
|
170 |
- nFields <- readBin(tempCon, "integer", n=1, size=4, |
|
169 |
+ |
|
170 |
+ nFields <- readBin(tempCon, "integer", n=1, size=4, |
|
171 | 171 |
endian="little", signed=FALSE) |
172 | 172 |
|
173 | 173 |
fields <- matrix(0,nFields,3); |
174 | 174 |
colnames(fields) <- c("Field Code", "Byte Offset", "Bytes") |
175 | 175 |
for(i1 in 1:nFields){ |
176 |
- fields[i1,"Field Code"] <- |
|
176 |
+ fields[i1,"Field Code"] <- |
|
177 | 177 |
readBin(tempCon, "integer", n=1, size=2, endian="little", signed=FALSE) |
178 |
- fields[i1,"Byte Offset"] <- |
|
178 |
+ fields[i1,"Byte Offset"] <- |
|
179 | 179 |
readBin(tempCon, "integer", n=1, size=8, endian="little", signed=FALSE) |
180 | 180 |
} |
181 | 181 |
|
182 | 182 |
knownCodes <- c(1000, 102, 103, 104, 107, 200, 300, 400, |
183 | 183 |
401, 402, 403, 404, 405, 406, 407, 408, 409) |
184 |
- names(knownCodes) <- |
|
184 |
+ names(knownCodes) <- |
|
185 | 185 |
c("nSNPsRead", # 1000 |
186 | 186 |
"IlluminaID", # 102 |
187 | 187 |
"SD", # 103 |
... | ... |
@@ -214,35 +214,35 @@ readIDAT <- function(idatFile){ |
214 | 214 |
} |
215 | 215 |
|
216 | 216 |
seek(tempCon, fields["nSNPsRead", "Byte Offset"]) |
217 |
- nSNPsRead <- readBin(tempCon, "integer", n=1, size=4, |
|
217 |
+ nSNPsRead <- readBin(tempCon, "integer", n=1, size=4, |
|
218 | 218 |
endian="little", signed=FALSE) |
219 | 219 |
|
220 | 220 |
seek(tempCon, fields["IlluminaID", "Byte Offset"]) |
221 |
- IlluminaID <- readBin(tempCon, "integer", n=nSNPsRead, size=4, |
|
221 |
+ IlluminaID <- readBin(tempCon, "integer", n=nSNPsRead, size=4, |
|
222 | 222 |
endian="little", signed=FALSE) |
223 | 223 |
|
224 | 224 |
seek(tempCon, fields["SD", "Byte Offset"]) |
225 |
- SD <- readBin(tempCon, "integer", n=nSNPsRead, size=2, |
|
225 |
+ SD <- readBin(tempCon, "integer", n=nSNPsRead, size=2, |
|
226 | 226 |
endian="little", signed=FALSE) |
227 | 227 |
|
228 | 228 |
seek(tempCon, fields["Mean", "Byte Offset"]) |
229 |
- Mean <- readBin(tempCon, "integer", n=nSNPsRead, size=2, |
|
229 |
+ Mean <- readBin(tempCon, "integer", n=nSNPsRead, size=2, |
|
230 | 230 |
endian="little", signed=FALSE) |
231 | 231 |
|
232 | 232 |
seek(tempCon, fields["NBeads", "Byte Offset"]) |
233 | 233 |
NBeads <- readBin(tempCon, "integer", n=nSNPsRead, size=1, signed=FALSE) |
234 | 234 |
|
235 | 235 |
seek(tempCon, fields["MidBlock", "Byte Offset"]) |
236 |
- nMidBlockEntries <- readBin(tempCon, "integer", n=1, size=4, |
|
236 |
+ nMidBlockEntries <- readBin(tempCon, "integer", n=1, size=4, |
|
237 | 237 |
endian="little", signed=FALSE) |
238 |
- MidBlock <- readBin(tempCon, "integer", n=nMidBlockEntries, size=4, |
|
238 |
+ MidBlock <- readBin(tempCon, "integer", n=nMidBlockEntries, size=4, |
|
239 | 239 |
endian="little", signed=FALSE) |
240 | 240 |
|
241 | 241 |
seek(tempCon, fields["RunInfo", "Byte Offset"]) |
242 |
- nRunInfoBlocks <- readBin(tempCon, "integer", n=1, size=4, |
|
242 |
+ nRunInfoBlocks <- readBin(tempCon, "integer", n=1, size=4, |
|
243 | 243 |
endian="little", signed=FALSE) |
244 | 244 |
RunInfo <- matrix(NA, nRunInfoBlocks, 5) |
245 |
- colnames(RunInfo) <- c("RunTime", "BlockType", "BlockPars", |
|
245 |
+ colnames(RunInfo) <- c("RunTime", "BlockType", "BlockPars", |
|
246 | 246 |
"BlockCode", "CodeVersion") |
247 | 247 |
for(i1 in 1:2) { #nRunInfoBlocks){ ## MR edit |
248 | 248 |
for(i2 in 1:5){ |
... | ... |
@@ -252,50 +252,50 @@ readIDAT <- function(idatFile){ |
252 | 252 |
} |
253 | 253 |
|
254 | 254 |
seek(tempCon, fields["RedGreen", "Byte Offset"]) |
255 |
- RedGreen <- readBin(tempCon, "numeric", n=1, size=4, |
|
255 |
+ RedGreen <- readBin(tempCon, "numeric", n=1, size=4, |
|
256 | 256 |
endian="little", signed=FALSE) |
257 |
- #RedGreen <- readBin(tempCon, "integer", n=4, size=1, |
|
257 |
+ #RedGreen <- readBin(tempCon, "integer", n=4, size=1, |
|
258 | 258 |
# endian="little", signed=FALSE) |
259 | 259 |
|
260 | 260 |
seek(tempCon, fields["MostlyNull", "Byte Offset"]) |
261 | 261 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
262 |
- MostlyNull <- readChar(tempCon, nChars) |
|
262 |
+ MostlyNull <- readChar(tempCon, nChars) |
|
263 | 263 |
|
264 | 264 |
seek(tempCon, fields["Barcode", "Byte Offset"]) |
265 | 265 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
266 |
- Barcode <- readChar(tempCon, nChars) |
|
266 |
+ Barcode <- readChar(tempCon, nChars) |
|
267 | 267 |
|
268 | 268 |
seek(tempCon, fields["ChipType", "Byte Offset"]) |
269 | 269 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
270 |
- ChipType <- readChar(tempCon, nChars) |
|
270 |
+ ChipType <- readChar(tempCon, nChars) |
|
271 | 271 |
|
272 | 272 |
seek(tempCon, fields["MostlyA", "Byte Offset"]) |
273 | 273 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
274 |
- MostlyA <- readChar(tempCon, nChars) |
|
274 |
+ MostlyA <- readChar(tempCon, nChars) |
|
275 | 275 |
|
276 | 276 |
seek(tempCon, fields["Unknown.1", "Byte Offset"]) |
277 | 277 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
278 |
- Unknown.1 <- readChar(tempCon, nChars) |
|
278 |
+ Unknown.1 <- readChar(tempCon, nChars) |
|
279 | 279 |
|
280 | 280 |
seek(tempCon, fields["Unknown.2", "Byte Offset"]) |
281 | 281 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
282 |
- Unknown.2 <- readChar(tempCon, nChars) |
|
282 |
+ Unknown.2 <- readChar(tempCon, nChars) |
|
283 | 283 |
|
284 | 284 |
seek(tempCon, fields["Unknown.3", "Byte Offset"]) |
285 | 285 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
286 |
- Unknown.3 <- readChar(tempCon, nChars) |
|
286 |
+ Unknown.3 <- readChar(tempCon, nChars) |
|
287 | 287 |
|
288 | 288 |
seek(tempCon, fields["Unknown.4", "Byte Offset"]) |
289 | 289 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
290 |
- Unknown.4 <- readChar(tempCon, nChars) |
|
290 |
+ Unknown.4 <- readChar(tempCon, nChars) |
|
291 | 291 |
|
292 | 292 |
seek(tempCon, fields["Unknown.5", "Byte Offset"]) |
293 | 293 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
294 |
- Unknown.5 <- readChar(tempCon, nChars) |
|
294 |
+ Unknown.5 <- readChar(tempCon, nChars) |
|
295 | 295 |
|
296 | 296 |
close(tempCon) |
297 | 297 |
|
298 |
- Unknowns <- |
|
298 |
+ Unknowns <- |
|
299 | 299 |
list(MostlyNull=MostlyNull, |
300 | 300 |
MostlyA=MostlyA, |
301 | 301 |
Unknown.1=Unknown.1, |
... | ... |
@@ -308,21 +308,21 @@ readIDAT <- function(idatFile){ |
308 | 308 |
colnames(Quants) <- c("Mean", "SD", "NBeads") |
309 | 309 |
rownames(Quants) <- as.character(IlluminaID) |
310 | 310 |
|
311 |
- idatValues <- |
|
312 |
- list(fileSize=fileSize, |
|
313 |
- versionNumber=versionNumber, |
|
314 |
- nFields=nFields, |
|
311 |
+ idatValues <- |
|
312 |
+ list(fileSize=fileSize, |
|
313 |
+ versionNumber=versionNumber, |
|
314 |
+ nFields=nFields, |
|
315 | 315 |
fields=fields, |
316 |
- nSNPsRead=nSNPsRead, |
|
317 |
- #IlluminaID=IlluminaID, |
|
318 |
- #SD=SD, |
|
319 |
- #Mean=Mean, |
|
316 |
+ nSNPsRead=nSNPsRead, |
|
317 |
+ #IlluminaID=IlluminaID, |
|
318 |
+ #SD=SD, |
|
319 |
+ #Mean=Mean, |
|
320 | 320 |
#NBeads=NBeads, |
321 | 321 |
Quants=Quants, |
322 |
- MidBlock=MidBlock, |
|
323 |
- RunInfo=RunInfo, |
|
324 |
- RedGreen=RedGreen, |
|
325 |
- Barcode=Barcode, |
|
322 |
+ MidBlock=MidBlock, |
|
323 |
+ RunInfo=RunInfo, |
|
324 |
+ RedGreen=RedGreen, |
|
325 |
+ Barcode=Barcode, |
|
326 | 326 |
ChipType=ChipType, |
327 | 327 |
Unknowns=Unknowns) |
328 | 328 |
|
... | ... |
@@ -333,20 +333,20 @@ readIDAT <- function(idatFile){ |
333 | 333 |
readBPM <- function(bpmFile){ |
334 | 334 |
|
335 | 335 |
## Reads and parses Illumina BPM files |
336 |
- |
|
336 |
+ |
|
337 | 337 |
fileSize <- file.info(bpmFile)$size |
338 | 338 |
|
339 | 339 |
tempCon <- file(bpmFile,"rb") |
340 | 340 |
|
341 | 341 |
# The first few bytes of the egtFile are some type of |
342 |
- # header, but there's no related byte offset information. |
|
342 |
+ # header, but there's no related byte offset information. |
|
343 | 343 |
|
344 | 344 |
prefixCheck <- readChar(tempCon,3) ## should be "BPM" |
345 | 345 |
|
346 | 346 |
null.1 <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
347 |
- ## should be 1 |
|
348 |
- |
|
349 |
- versionNumber <- |
|
347 |
+ ## should be 1 |
|
348 |
+ |
|
349 |
+ versionNumber <- |
|
350 | 350 |
readBin(tempCon, "integer", n=1, size=4, endian="little", signed=FALSE) |
351 | 351 |
## should be 4 |
352 | 352 |
|
... | ... |
@@ -360,14 +360,14 @@ readBPM <- function(bpmFile){ |
360 | 360 |
entriesByteOffset <- seek(tempCon); |
361 | 361 |
nEntries <- readBin(tempCon, "integer", n=1, size=4, |
362 | 362 |
endian="little", signed=FALSE) |
363 |
- |
|
363 |
+ |
|
364 | 364 |
if(FALSE){ |
365 |
- |
|
365 |
+ |
|
366 | 366 |
snpIndexByteOffset <- seek(tempCon) |
367 | 367 |
snpIndex <- readBin(tempCon, "integer", n=nEntries, size=4, |
368 | 368 |
endian="little", signed=FALSE) |
369 | 369 |
## for the 1M array, these are simply in order from 1 to 1072820. |
370 |
- |
|
370 |
+ |
|
371 | 371 |
snpNamesByteOffset <- seek(tempCon) |
372 | 372 |
snpNames <- rep("A", nEntries) |
373 | 373 |
for(i1 in 1:nEntries){ |
... | ... |
@@ -378,21 +378,21 @@ readBPM <- function(bpmFile){ |
378 | 378 |
} |
379 | 379 |
|
380 | 380 |
seek(tempCon, 15278138) |
381 |
- |
|
381 |
+ |
|
382 | 382 |
normIDByteOffset <- seek(tempCon) |
383 | 383 |
normID <- readBin(tempCon, "integer", n=nEntries, size=1, signed=FALSE) + 1 |
384 |
- |
|
384 |
+ |
|
385 | 385 |
newBlockByteOffset <- seek(tempCon) |
386 | 386 |
newBlock <- readBin(tempCon, "integer", n=10000, size=1, signed=FALSE) |
387 |
- |
|
387 |
+ |
|
388 | 388 |
close(tempCon) |
389 | 389 |
|
390 | 390 |
byteOffsets <- list(entriesByteOffset=entriesByteOffset, |
391 |
- #snpIndexByteOffset=snpIndexByteOffset, |
|
391 |
+ #snpIndexByteOffset=snpIndexByteOffset, |
|
392 | 392 |
#snpNamesByteOffset=snpNamesByteOffset, |
393 | 393 |
normIDByteOffset=normIDByteOffset, |
394 | 394 |
newBlockByteOffset=newBlockByteOffset) |
395 |
- |
|
395 |
+ |
|
396 | 396 |
allStuff <- list(prefixCheck=prefixCheck, |
397 | 397 |
null.1=null.1, |
398 | 398 |
versionNumber=versionNumber, |
... | ... |
@@ -406,7 +406,7 @@ readBPM <- function(bpmFile){ |
406 | 406 |
newBlock=newBlock, |
407 | 407 |
byteOffsets=byteOffsets) |
408 | 408 |
allStuff |
409 |
- |
|
409 |
+ |
|
410 | 410 |
} |
411 | 411 |
|
412 | 412 |
|
... | ... |
@@ -424,7 +424,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
424 | 424 |
if(missing(chipType)){ |
425 | 425 |
chipType = match.arg(annotation(RG), chipList) |
426 | 426 |
} else chipType = match.arg(chipType, chipList) |
427 |
- |
|
427 |
+ |
|
428 | 428 |
pkgname <- getCrlmmAnnotationName(chipType) |
429 | 429 |
if(!require(pkgname, character.only=TRUE, quietly=!verbose)){ |
430 | 430 |
suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="") |
... | ... |
@@ -439,21 +439,21 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
439 | 439 |
bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest |
440 | 440 |
ids <- names(aids) |
441 | 441 |
snpbase <- getVarInEnv("base") |
442 |
- |
|
442 |
+ |
|
443 | 443 |
nsnps = length(aids) |
444 | 444 |
narrays = ncol(RG) |
445 |
- |
|
445 |
+ |
|
446 | 446 |
# aidcol = match("AddressA_ID", colnames(annot)) |
447 | 447 |
# if(is.na(aidcol)) |
448 | 448 |
# aidcol = match("Address", colnames(annot)) |
449 | 449 |
# bidcol = match("AddressB_ID", colnames(annot)) |
450 |
-# if(is.na(bidcol)) |
|
450 |
+# if(is.na(bidcol)) |
|
451 | 451 |
# bidcol = match("Address2", colnames(annot)) |
452 | 452 |
# aids = annot[, aidcol] |
453 | 453 |
# bids = annot[, bidcol] |
454 | 454 |
# snpids = annot[,"Name"] |
455 |
-# snpbase = annot[,"SNP"] |
|
456 |
- infI = !is.na(bids) & bids!=0 |
|
455 |
+# snpbase = annot[,"SNP"] |
|
456 |
+ infI = !is.na(bids) & bids!=0 |
|
457 | 457 |
aord = match(aids, featureNames(RG)) # NAs are possible here |
458 | 458 |
bord = match(bids, featureNames(RG)) # and here |
459 | 459 |
# argrg = aids[rrgg] |
... | ... |
@@ -463,7 +463,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
463 | 463 |
X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"), |
464 | 464 |
Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"), |
465 | 465 |
zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer"), |
466 |
- annotation=chipType, phenoData=RG@phenoData, |
|
466 |
+ annotation=chipType, phenoData=RG@phenoData, |
|
467 | 467 |
protocolData=RG@protocolData, storage.mode="environment") |
468 | 468 |
featureNames(XY) = ids # featureNames(RG) |
469 | 469 |
gc() |
... | ... |
@@ -471,39 +471,39 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
471 | 471 |
XY@assayData$X[1:nsnps,] = 0 |
472 | 472 |
XY@assayData$Y[1:nsnps,] = 0 |
473 | 473 |
XY@assayData$zero[1:nsnps,] = 0 |
474 |
- |
|
474 |
+ |
|
475 | 475 |
open(RG@assayData$R) |
476 | 476 |
open(RG@assayData$G) |
477 | 477 |
open(RG@assayData$zero) |
478 |
- |
|
478 |
+ |
|
479 | 479 |
# First sort out Infinium II SNPs, X -> R (allele A) and Y -> G (allele B) from the same probe |
480 | 480 |
XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red |
481 | 481 |
XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green |
482 | 482 |
XY@assayData$zero[!is.na(aord),] = exprs(channel(RG, "zero"))[aord[!is.na(aord)],] # mostly green |
483 | 483 |
gc() |
484 |
- |
|
484 |
+ |
|
485 | 485 |
close(RG@assayData$R) |
486 | 486 |
close(RG@assayData$G) |
487 | 487 |
close(RG@assayData$zero) |
488 |
- |
|
488 |
+ |
|
489 | 489 |
## Warning - not 100% sure that the code below is correct - could be more complicated than this |
490 |
- |
|
490 |
+ |
|
491 | 491 |
# Next Infinium I where X -> R from allele A probe and Y -> R from allele B probe |
492 | 492 |
# infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]") |
493 |
- |
|
493 |
+ |
|
494 | 494 |
# X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red |
495 | 495 |
# Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green |
496 |
- |
|
496 |
+ |
|
497 | 497 |
# Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe |
498 | 498 |
# infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]") |
499 | 499 |
|
500 | 500 |
# X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red |
501 | 501 |
# Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green |
502 |
- |
|
502 |
+ |
|
503 | 503 |
# For now zero out Infinium I probes |
504 | 504 |
XY@assayData$X[infI,] = 0 |
505 | 505 |
XY@assayData$Y[infI,] = 0 |
506 |
- XY@assayData$zero[infI,] = 0 |
|
506 |
+ XY@assayData$zero[infI,] = 0 |
|
507 | 507 |
gc() |
508 | 508 |
|
509 | 509 |
# if(class(XY@assayData$X)[1]=="ff_matrix") { |
... | ... |
@@ -511,7 +511,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
511 | 511 |
close(XY@assayData$Y) |
512 | 512 |
close(XY@assayData$zero) |
513 | 513 |
# } |
514 |
- |
|
514 |
+ |
|
515 | 515 |
# storageMode(XY) = "lockedEnvironment" |
516 | 516 |
XY |
517 | 517 |
} |
... | ... |
@@ -530,15 +530,15 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) { |
530 | 530 |
loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
531 | 531 |
|
532 | 532 |
stripnum <- getVarInEnv("stripnum") |
533 |
- |
|
533 |
+ |
|
534 | 534 |
if(useTarget) |
535 | 535 |
targetdist = getVarInEnv("reference") |
536 |
- |
|
536 |
+ |
|
537 | 537 |
if(verbose){ |
538 | 538 |
message("Quantile normalizing ", ncol(XY), " arrays by ", max(stripnum), " strips.") |
539 | 539 |
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=max(stripnum), style=3) |
540 | 540 |
} |
541 |
- |
|
541 |
+ |
|
542 | 542 |
# if(class(XY@assayData$X)[1]=="ff_matrix") { |
543 | 543 |
open(XY@assayData$X) |
544 | 544 |
open(XY@assayData$Y) |
... | ... |
@@ -566,7 +566,7 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) { |
566 | 566 |
close(XY@assayData$X) |
567 | 567 |
close(XY@assayData$Y) |
568 | 568 |
# } |
569 |
- |
|
569 |
+ |
|
570 | 570 |
if(verbose) |
571 | 571 |
cat("\n") |
572 | 572 |
XY |
... | ... |
@@ -609,7 +609,7 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
609 | 609 |
SMEDIAN <- getVarInEnv("SMEDIAN") |
610 | 610 |
theKnots <- getVarInEnv("theKnots") |
611 | 611 |
narrays = ncol(XY) |
612 |
- |
|
612 |
+ |
|
613 | 613 |
if(save.it & !missing(cnFile)) { |
614 | 614 |
# separate out copy number probes |
615 | 615 |
npIndex = getVarInEnv("npProbesFid") |
... | ... |
@@ -618,7 +618,7 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
618 | 618 |
open(XY@assayData$X) |
619 | 619 |
open(XY@assayData$Y) |
620 | 620 |
open(XY@assayData$zero) |
621 |
- |
|
621 |
+ |
|
622 | 622 |
A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays) |
623 | 623 |
B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays) |
624 | 624 |
|
... | ... |
@@ -631,26 +631,26 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
631 | 631 |
|
632 | 632 |
colnames(A) <- colnames(B) <- colnames(zero) <- sns |
633 | 633 |
rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex) |
634 |
- |
|
634 |
+ |
|
635 | 635 |
cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName) |
636 |
- |
|
637 |
- t0 <- proc.time() |
|
638 |
- save(cnAB, file=cnFile) |
|
636 |
+ |
|
637 |
+ t0 <- proc.time() |
|
638 |
+ save(cnAB, file=cnFile) |
|
639 | 639 |
t0 <- proc.time()-t0 |
640 | 640 |
if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".") |
641 | 641 |
rm(cnAB, B, zero) |
642 | 642 |
} |
643 | 643 |
} |
644 |
- |
|
644 |
+ |
|
645 | 645 |
# next process snp probes |
646 | 646 |
snpIndex = getVarInEnv("snpProbesFid") |
647 | 647 |
nprobes <- length(snpIndex) |
648 |
- |
|
648 |
+ |
|
649 | 649 |
##We will read each cel file, summarize, and run EM one by one |
650 | 650 |
##We will save parameters of EM to use later |
651 | 651 |
mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, narrays, "double") |
652 | 652 |
SNR <- initializeBigVector("crlmmSNR-", narrays, "double") |
653 |
- SKW <- initializeBigVector("crlmmSKW-", narrays, "double") |
|
653 |
+ SKW <- initializeBigVector("crlmmSKW-", narrays, "double") |
|
654 | 654 |
|
655 | 655 |
## This is the sample for the fitting of splines |
656 | 656 |
## BC: I like better the idea of the user passing the seed, |
... | ... |
@@ -659,7 +659,7 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
659 | 659 |
set.seed(seed) |
660 | 660 |
idx <- sort(sample(autosomeIndex, mixtureSampleSize)) |
661 | 661 |
idx2 <- sample(nprobes, 10^5) |
662 |
- |
|
662 |
+ |
|
663 | 663 |
##S will hold (A+B)/2 and M will hold A-B |
664 | 664 |
##NOTE: We actually dont need to save S. Only for pics etc... |
665 | 665 |
##f is the correction. we save to avoid recomputing |
... | ... |
@@ -667,7 +667,7 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
667 | 667 |
A <- initializeBigMatrix("crlmmA-", nprobes, narrays, "integer") |
668 | 668 |
B <- initializeBigMatrix("crlmmB-", nprobes, narrays, "integer") |
669 | 669 |
zero <- initializeBigMatrix("crlmmZero-", nprobes, narrays, "integer") |
670 |
- |
|
670 |
+ |
|
671 | 671 |
if(verbose){ |
672 | 672 |
message("Calibrating ", narrays, " arrays.") |
673 | 673 |
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3) |
... | ... |
@@ -676,11 +676,11 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
676 | 676 |
open(XY@assayData$X) |
677 | 677 |
open(XY@assayData$Y) |
678 | 678 |
open(XY@assayData$zero) |
679 |
- |
|
679 |
+ |
|
680 | 680 |
for(i in 1:narrays){ |
681 | 681 |
A[,i] = exprs(channel(XY, "X"))[snpIndex,i] |
682 | 682 |
B[,i] = exprs(channel(XY, "Y"))[snpIndex,i] |
683 |
- zero[,i] = exprs(channel(XY, "zero"))[snpIndex,i] |
|
683 |
+ zero[,i] = exprs(channel(XY, "zero"))[snpIndex,i] |
|
684 | 684 |
# SKW[i] = mean((A[snpIndex,i][idx2]-mean(A[snpIndex,i][idx2]))^3)/(sd(A[snpIndex,i][idx2])^3) |
685 | 685 |
SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3) |
686 | 686 |
if(fitMixture){ |
... | ... |
@@ -707,8 +707,8 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
707 | 707 |
|
708 | 708 |
close(XY@assayData$X) |
709 | 709 |
close(XY@assayData$Y) |
710 |
- close(XY@assayData$zero) |
|
711 |
- |
|
710 |
+ close(XY@assayData$zero) |
|
711 |
+ |
|
712 | 712 |
# if(class(A)[1]=="ff_matrix") { |
713 | 713 |
close(A) |
714 | 714 |
close(B) |
... | ... |
@@ -716,8 +716,8 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
716 | 716 |
close(SKW) |
717 | 717 |
close(mixtureParams) |
718 | 718 |
close(SNR) |
719 |
-# } |
|
720 |
- |
|
719 |
+# } |
|
720 |
+ |
|
721 | 721 |
res = list(A=A, B=B, |
722 | 722 |
zero=zero, sns=sns, gns=names(snpIndex), SNR=SNR, SKW=SKW, |
723 | 723 |
mixtureParams=mixtureParams, cdfName=cdfName) |
... | ... |
@@ -726,11 +726,11 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
726 | 726 |
open(res[["B"]]) |
727 | 727 |
open(res[["zero"]]) |
728 | 728 |
open(res[["SNR"]]) |
729 |
- open(res[["mixtureParams"]]) |
|
730 |
- |
|
729 |
+ open(res[["mixtureParams"]]) |
|
730 |
+ |
|
731 | 731 |
if(save.it & !missing(snpFile)) { |
732 |
- t0 <- proc.time() |
|
733 |
- save(res, file=snpFile) |
|
732 |
+ t0 <- proc.time() |
|
733 |
+ save(res, file=snpFile) |
|
734 | 734 |
t0 <- proc.time()-t0 |
735 | 735 |
if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".") |
736 | 736 |
} |
... | ... |
@@ -770,7 +770,7 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
770 | 770 |
stop("Both RG and XY specified - please use one or the other") |
771 | 771 |
} |
772 | 772 |
if (missing(sns)) sns <- sampleNames(XY) #$X |
773 |
- |
|
773 |
+ |
|
774 | 774 |
res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
775 | 775 |
seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, |
776 | 776 |
save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
... | ... |
@@ -786,13 +786,13 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
786 | 786 |
# alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)), |
787 | 787 |
# call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)), |
788 | 788 |
# callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)), |
789 |
-# annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD) |
|
789 |
+# annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD) |
|
790 | 790 |
# sampleNames(callSet) <- sns |
791 | 791 |
# featureNames(callSet) <- res[["gns"]] |
792 | 792 |
# pData(callSet)$SKW <- rep(NA, length(sns)) |
793 | 793 |
# pData(callSet)$SNR <- rep(NA, length(sns)) |
794 | 794 |
# pData(callSet)$gender <- rep(NA, length(sns)) |
795 |
- |
|
795 |
+ |
|
796 | 796 |
}else{ |
797 | 797 |
if(verbose) message("Loading ", snpFile, ".") |
798 | 798 |
obj <- load(snpFile) |
... | ... |
@@ -808,7 +808,7 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
808 | 808 |
|
809 | 809 |
# rm(phenD, protD , fD) |
810 | 810 |
|
811 |
-# snp.index <- res$snpIndex #match(res$gns, featureNames(callSet)) |
|
811 |
+# snp.index <- res$snpIndex #match(res$gns, featureNames(callSet)) |
|
812 | 812 |
# suppressWarnings(A(callSet) <- res[["A"]]) |
813 | 813 |
# suppressWarnings(B(callSet) <- res[["B"]]) |
814 | 814 |
# pData(callSet)$SKW <- res$SKW |
... | ... |
@@ -817,7 +817,7 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
817 | 817 |
# rm(res); gc() |
818 | 818 |
if(row.names) row.names=res$gns else row.names=NULL |
819 | 819 |
if(col.names) col.names=res$sns else col.names=NULL |
820 |
- |
|
820 |
+ |
|
821 | 821 |
res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]), |
822 | 822 |
B=res[["B"]], # as.matrix(B(callSet)[snp.index,]), # j]), |
823 | 823 |
SNR=res[["SNR"]], # callSet$SNR, # [j], |
... | ... |
@@ -834,7 +834,7 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
834 | 834 |
verbose=verbose, |
835 | 835 |
returnParams=returnParams, |
836 | 836 |
badSNP=badSNP) |
837 |
-# rm(res); gc() |
|
837 |
+# rm(res); gc() |
|
838 | 838 |
# suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]]) |
839 | 839 |
# suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]]) |
840 | 840 |
# callSet$gender[j] <- tmp$gender |
... | ... |
@@ -851,8 +851,8 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
851 | 851 |
} |
852 | 852 |
|
853 | 853 |
|
854 |
-## MR: Below is a more memory efficient version of crlmmIllumina() which |
|
855 |
-## reads in the .idats and genotypes in the one function and removes objects |
|
854 |
+## MR: Below is a more memory efficient version of crlmmIllumina() which |
|
855 |
+## reads in the .idats and genotypes in the one function and removes objects |
|
856 | 856 |
## after they have been used |
857 | 857 |
crlmmIlluminaV2 = function(sampleSheet=NULL, |
858 | 858 |
arrayNames=NULL, |
... | ... |
@@ -867,7 +867,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
867 | 867 |
# rgFile, |
868 | 868 |
stripNorm=TRUE, |
869 | 869 |
useTarget=TRUE, |
870 |
- row.names=TRUE, |
|
870 |
+ row.names=TRUE, |
|
871 | 871 |
col.names=TRUE, |
872 | 872 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
873 | 873 |
seed=1, save.it=FALSE, snpFile, cnFile, |
... | ... |
@@ -878,7 +878,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
878 | 878 |
if(missing(cdfName)) stop("must specify cdfName") |
879 | 879 |
if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames") |
880 | 880 |
# if(missing(sns)) sns <- basename(arrayNames) |
881 |
- |
|
881 |
+ |
|
882 | 882 |
# if (save.rg & missing(rgFile)) |
883 | 883 |
# stop("'rgFile' is missing, and you chose save.rg") |
884 | 884 |
if (save.it & (missing(snpFile) | missing(cnFile))) |
... | ... |
@@ -914,7 +914,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
914 | 914 |
open(res[["B"]]) |
915 | 915 |
open(res[["SNR"]]) |
916 | 916 |
open(res[["mixtureParams"]]) |
917 |
- |
|
917 |
+ |
|
918 | 918 |
# fD = featureData(XY) |
919 | 919 |
# phenD = XY@phenoData |
920 | 920 |
# protD = XY@protocolData |
... | ... |
@@ -938,7 +938,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
938 | 938 |
# featureNames(callSet) <- res[["gns"]] |
939 | 939 |
# pData(callSet)$SKW <- rep(NA, length(sns)) |
940 | 940 |
# pData(callSet)$SNR <- rep(NA, length(sns)) |
941 |
-# pData(callSet)$gender <- rep(NA, length(sns)) |
|
941 |
+# pData(callSet)$gender <- rep(NA, length(sns)) |
|
942 | 942 |
# } |
943 | 943 |
# pData(callSet)[j,] <- phenD |
944 | 944 |
# pData(protocolData(callSet))[j,] <- protD |
... | ... |
@@ -946,7 +946,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
946 | 946 |
# pData(protocolData(callSet)) <- protD |
947 | 947 |
|
948 | 948 |
# rm(phenD, protD, fD) |
949 |
- |
|
949 |
+ |
|
950 | 950 |
# if(k > 1 & nrow(res[[1]]) != nrow(callSet)){ |
951 | 951 |
##RS: I don't understand why the IDATS for the |
952 | 952 |
##same platform potentially have different lengths |
... | ... |
@@ -954,7 +954,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
954 | 954 |
# res[["B"]] <- res[["B"]][res$gns %in% featureNames(callSet), ] |
955 | 955 |
# } |
956 | 956 |
|
957 |
-# snp.index <- res$snpIndex #match(res$gns, featureNames(callSet)) |
|
957 |
+# snp.index <- res$snpIndex #match(res$gns, featureNames(callSet)) |
|
958 | 958 |
# suppressWarnings(A(callSet)[, j] <- res[["A"]]) |
959 | 959 |
# suppressWarnings(B(callSet)[, j] <- res[["B"]]) |
960 | 960 |
# suppressWarnings(A(callSet) <- res[["A"]]) |
... | ... |
@@ -966,7 +966,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
966 | 966 |
# mixtureParams <- res$mixtureParams |
967 | 967 |
# rm(res); gc() |
968 | 968 |
if(row.names) row.names=res$gns else row.names=NULL |
969 |
- if(col.names) col.names=res$sns else col.names=NULL |
|
969 |
+ if(col.names) col.names=res$sns else col.names=NULL |
|
970 | 970 |
res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]), |
971 | 971 |
B=res[["B"]], # as.matrix(B(callSet)[snp.index,]), # j]), |
972 | 972 |
SNR=res[["SNR"]], # callSet$SNR, # [j], |
... | ... |
@@ -983,7 +983,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
983 | 983 |
verbose=verbose, |
984 | 984 |
returnParams=returnParams, |
985 | 985 |
badSNP=badSNP) |
986 |
-# rm(res); gc() |
|
986 |
+# rm(res); gc() |
|
987 | 987 |
# suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]]) |
988 | 988 |
# suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]]) |
989 | 989 |
# callSet$gender[j] <- tmp$gender |
... | ... |
@@ -18,9 +18,9 @@ |
18 | 18 |
|
19 | 19 |
\begin{document} |
20 | 20 |
\title{Using \crlmm{} for copy number estimation and genotype calling |
21 |
- with Illumina platforms} |
|
21 |
+ with Illumina platforms} |
|
22 | 22 |
|
23 |
-\date{\today} |
|
23 |
+\date{\today} |
|
24 | 24 |
|
25 | 25 |
\author{Rob Scharpf} |
26 | 26 |
\maketitle |
... | ... |
@@ -40,10 +40,11 @@ which annotation packages are currently available. Next we create a |
40 | 40 |
directory where output files will be stored and indicate the directory |
41 | 41 |
that contains the IDAT files that will be used in our analysis. |
42 | 42 |
|
43 |
+ |
|
43 | 44 |
<<setup, echo=FALSE, results=hide>>= |
44 | 45 |
options(width=70) |
45 | 46 |
options(continue=" ") |
46 |
-@ |
|
47 |
+@ |
|
47 | 48 |
|
48 | 49 |
<<libraries>>= |
49 | 50 |
library(ff) |
... | ... |
@@ -52,9 +53,9 @@ crlmm:::validCdfNames() |
52 | 53 |
if(getRversion() < "2.12.0"){ |
53 | 54 |
rpath <- getRversion() |
54 | 55 |
} else rpath <- "trunk" |
55 |
-outdir <- paste("/thumper/ctsa/snpmicroarray/rs/ProcessedData/crlmm/", rpath, "/illumina_vignette", sep="") |
|
56 |
+outdir <- paste("/thumper/ctsa/snpmicroarray/rs/ProcessedData/crlmm/", rpath, "/illumina_vignette", sep="") |
|
56 | 57 |
datadir <- "/thumper/ctsa/snpmicroarray/illumina/IDATS/370k" |
57 |
-@ |
|
58 |
+@ |
|
58 | 59 |
|
59 | 60 |
\paragraph{About this vignette.} This vignette was created using |
60 | 61 |
Illumina IDAT files that are located in a specific directory on my |
... | ... |
@@ -66,7 +67,7 @@ these directories exist. |
66 | 67 |
<<checkSetup>>= |
67 | 68 |
if(!file.exists(outdir)) stop("Please specify valid directory for storing output") |
68 | 69 |
if(!file.exists(pathToCels)) stop("Please specify the correct path to the CEL files") |
69 |
-@ |
|
70 |
+@ |
|
70 | 71 |
|
71 | 72 |
\paragraph{Preprocessing Illumina IDAT files.} |
72 | 73 |
To perform copy number analysis on the Illumina platform, several |
... | ... |
@@ -95,50 +96,51 @@ from the RG object and added to the \Robject{SnpSet} returned by |
95 | 96 |
%(Users wanting the \Rpackage{ff} implementation should try |
96 | 97 |
%\Rfunction{crlmm:::crlmmIllumina2} with the same arguments.) |
97 | 98 |
|
99 |
+ |
|
98 | 100 |
<<samplesToProcess>>= |
99 | 101 |
samplesheet = read.csv(file.path(datadir, "HumanHap370Duo_Sample_Map.csv"), header=TRUE, as.is=TRUE) |
100 | 102 |
samplesheet <- samplesheet[-c(28:46,61:75,78:79), ] |
101 | 103 |
arrayNames <- file.path(datadir, unique(samplesheet[, "SentrixPosition"])) |
102 | 104 |
grnfiles = all(file.exists(paste(arrayNames, "_Grn.idat", sep=""))) |
103 | 105 |
redfiles = all(file.exists(paste(arrayNames, "_Red.idat", sep=""))) |
104 |
-@ |
|
106 |
+@ |
|
105 | 107 |
|
106 | 108 |
<<samplesToProcess2, echo=FALSE>>= |
107 |
-## To speed up repeated calls to Sweave |
|
109 |
+## To speed up repeated calls to Sweave |
|
108 | 110 |
RG <- checkExists("RG", .path=outdir, |
109 | 111 |
.FUN=readIdatFiles2, |
110 | 112 |
sampleSheet=samplesheet, |
111 | 113 |
path=dirname(arrayNames[1]), |
112 |
- arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), |
|
114 |
+ arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), |
|
113 | 115 |
saveDate=TRUE) |
114 | 116 |
annotation(RG) <- "human370v1c" |
115 | 117 |
crlmmResult <- checkExists("crlmmResult", |
116 | 118 |
.path=outdir, |
117 | 119 |
.FUN=crlmmIllumina2, |
118 | 120 |
RG=RG, |
119 |
- sns=pData(RG)$ID, |
|
121 |
+ sns=pData(RG)$ID, |
|
120 | 122 |
returnParams=TRUE, |
121 | 123 |
cnFile=file.path(outdir, "cnFile.rda"), |
122 | 124 |
snpFile=file.path(outdir, "snpFile.rda"), |
123 | 125 |
save.it=TRUE) |
124 | 126 |
protocolData(crlmmResult)$ScanDate <- protocolData(RG)$ScanDate |
125 |
-@ |
|
127 |
+@ |
|
126 | 128 |
|
127 | 129 |
<<samplesToProcess3, eval=FALSE>>= |
128 |
-RG <- readIdatFiles2(samplesheet[1:10, ], |
|
129 |
- path=dirname(arrayNames[1]), |
|
130 |
- arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), |
|
130 |
+RG <- readIdatFiles2(samplesheet[1:10, ], |
|
131 |
+ path=dirname(arrayNames[1]), |
|
132 |
+ arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), |
|
131 | 133 |
saveDate=TRUE) |
132 | 134 |
annotation(RG) <- "human370v1c" |
133 |
-crlmmResult <- crlmmIllumina2(RG=RG, |
|
134 |
- cdfName="human370v1c", |
|
135 |
- sns=pData(RG)$ID, |
|
135 |
+crlmmResult <- crlmmIllumina2(RG=RG, |
|
136 |
+ cdfName="human370v1c", |
|
137 |
+ sns=pData(RG)$ID, |
|
136 | 138 |
returnParams=TRUE, |
137 | 139 |
cnFile=file.path(outdir, "cnFile.rda"), |
138 | 140 |
snpFile=file.path(outdir, "snpFile.rda"), |
139 | 141 |
save.it=TRUE) |
140 | 142 |
protocolData(crlmmResult)$ScanDate <- protocolData(RG)$ScanDate |
141 |
-@ |
|
143 |
+@ |
|
142 | 144 |
|
143 | 145 |
%\noindent Finally, we load a few of the intermediate files that were |
144 | 146 |
%created during the preprocessing and genotyping. |
... | ... |
@@ -147,13 +149,13 @@ protocolData(crlmmResult)$ScanDate <- protocolData(RG)$ScanDate |
147 | 149 |
%res <- get("res") |
148 | 150 |
%load(file.path(outdir, "cnFile.rda")) |
149 | 151 |
%cnAB <- get("cnAB") |
150 |
-%@ |
|
152 |
+%@ |
|
151 | 153 |
% |
152 | 154 |
%<<loadIntermediate, echo=FALSE>>= |
153 | 155 |
%trace(checkExists, browser) |
154 | 156 |
%res <- checkExists("snpFile", .path=outdir, .FUN=load, file=file.path(outdir, "snpFile.rda")) |
155 | 157 |
%cnAB <- checkExists("cnFile", .path=outdir, .FUN=load, file=file.path(outdir, "cnFile.rda")) |
156 |
-%@ |
|
158 |
+%@ |
|
157 | 159 |
|
158 | 160 |
After running the crlmm algorithm, we construct a container for |
159 | 161 |
storing the quantile normalized intensities, genotype calls, and |
... | ... |
@@ -172,11 +174,11 @@ if(!file.exists(file.path(outdir, "cnSet.rda"))){ |
172 | 174 |
snpFile=snpFile, |
173 | 175 |
cnFile=cnFile) |
174 | 176 |
} |
175 |
-@ |
|
177 |
+@ |
|
176 | 178 |
|
177 | 179 |
<<constructContainer2, eval=FALSE>>= |
178 | 180 |
container <- constructIlluminaCNSet(crlmmResult=crlmmResult, snpFile=snpFile, cnFile=cnFile) |
179 |
-@ |
|
181 |
+@ |
|
180 | 182 |
|
181 | 183 |
As described in the \texttt{copynumber} vignette, two \R{} functions |
182 | 184 |
for copy number estimation are available: \Rfunction{crlmmCopynumber} |
... | ... |
@@ -194,11 +196,11 @@ the genotyping and copy number estimation steps. Here, we use the |
194 | 196 |
cnSet <- checkExists("cnSet", .path=outdir, |
195 | 197 |
.FUN=crlmmCopynumber, |
196 | 198 |
object=container) |
197 |
-@ |
|
199 |
+@ |
|
198 | 200 |
|
199 | 201 |
<<estimateCopynumber, eval=FALSE>>= |
200 | 202 |
cnSet <- crlmmCopynumberLD(container) |
201 |
-@ |
|
203 |
+@ |
|
202 | 204 |
|
203 | 205 |
%In the following code, we define helper functions to construct a |
204 | 206 |
%\Robject{featureData} object that chromosome and physical position |
... | ... |
@@ -230,12 +232,12 @@ cb <- CB(cnSet)[marker.index, ]/100 |
230 | 232 |
missing.index <- which(rowSums(is.na(ca))==ncol(cnSet)) |
231 | 233 |
ca <- ca[-missing.index, ] |
232 | 234 |
cb <- cb[-missing.index, ] |
233 |
-@ |
|
235 |
+@ |
|
234 | 236 |
\noindent Negating the \Robject{isSnp} function could be used to |
235 | 237 |
extract the estimates at nonpolymorphic markers. For instance, |
236 | 238 |
<<monomorphic-accessor>>= |
237 | 239 |
cn.monomorphic <- CA(cnSet)[which(chromosome(cnSet) == 21 & !isSnp(cnSet)), ]/100 |
238 |
-@ |
|
240 |
+@ |
|
239 | 241 |
|
240 | 242 |
At polymorphic loci, the total copy number is the sum of the number of |
241 | 243 |
copies of the A allele and the number of copies for the B allele. At |
... | ... |
@@ -251,7 +253,7 @@ cn.total2 <- totalCopyNumber(cnSet, i=marker.index) |
251 | 253 |
cn.total2 <- cn.total2[-missing.index, ] |
252 | 254 |
dimnames(cn.total) <- NULL |
253 | 255 |
all.equal(cn.total2, cn.total) |
254 |
-@ |
|
256 |
+@ |
|
255 | 257 |
|
256 | 258 |
A few simple visualizations may be helpful at this point. The first |
257 | 259 |
plot is a histogram of the signal to noise ratio of the sample -- an |
... | ... |
@@ -260,11 +262,11 @@ statistic tends to be much higher for Illumina than for the Affymetrix |
260 | 262 |
platforms.) The second is a visualization of the total copy number |
261 | 263 |
estimates plotted versus physical position on chromosome 1 for the two |
262 | 264 |
samples with the lowest (top) and highest (bottom) signal to noise |
263 |
-ratios. |
|
265 |
+ratios. |
|
264 | 266 |
|
265 | 267 |
<<snr, fig=TRUE>>= |
266 | 268 |
hist(cnSet$SNR, breaks=15) |
267 |
-@ |
|
269 |
+@ |
|
268 | 270 |
|
269 | 271 |
<<firstSample, fig=TRUE>>= |
270 | 272 |
low.snr <- which(cnSet$SNR == min(cnSet$SNR)) |
... | ... |
@@ -281,7 +283,7 @@ for(j in c(low.snr, high.snr)){ |
281 | 283 |
} |
282 | 284 |
axis(1, at=pretty(x), labels=pretty(x/1e6)) |
283 | 285 |
mtext("Mb", 1, outer=TRUE, line=2) |
284 |
-@ |
|
286 |
+@ |
|
285 | 287 |
|
286 | 288 |
Here's a very simple approach to handle outliers by applying a running |
287 | 289 |
median using a window of size 3. Following outlier removal, we |
... | ... |
@@ -303,12 +305,12 @@ for(j in c(low.snr, high.snr)){ |
303 | 305 |
} |
304 | 306 |
axis(1, at=pretty(x), labels=pretty(x/1e6)) |
305 | 307 |
mtext("Mb", 1, outer=TRUE, line=2) |
306 |
-@ |
|
308 |
+@ |
|
307 | 309 |
|
308 | 310 |
\section{Session information} |
309 | 311 |
<<sessionInfo, results=tex>>= |
310 | 312 |
toLatex(sessionInfo()) |
311 |
-@ |
|
313 |
+@ |
|
312 | 314 |
|
313 | 315 |
|
314 | 316 |
\end{document} |