- added platform to validCdfNames()
- added platform to chipList in RGtoXY and RGtoXY2
Other minor changes to copynumber vignette
- removed eol whitespace
- removed some commented lines towards the end
- added a message to indicate loading a saved object
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/branches/RELEASE_2_6/madman/Rpacks/crlmm@49102 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -30,7 +30,7 @@ readIdatFiles <- function(sampleSheet=NULL, |
30 | 30 |
barcode = sampleSheet[,arrayInfoColNames$barcode] |
31 | 31 |
arrayNames=barcode |
32 | 32 |
} |
33 |
- if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) { |
|
33 |
+ if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) { |
|
34 | 34 |
position = sampleSheet[,arrayInfoColNames$position] |
35 | 35 |
if(is.null(arrayNames)) |
36 | 36 |
arrayNames=position |
... | ... |
@@ -44,7 +44,7 @@ readIdatFiles <- function(sampleSheet=NULL, |
44 | 44 |
} |
45 | 45 |
} |
46 | 46 |
pd = new("AnnotatedDataFrame", data = sampleSheet) |
47 |
- sampleNames(pd) <- basename(arrayNames) |
|
47 |
+ sampleNames(pd) <- basename(arrayNames) |
|
48 | 48 |
} |
49 | 49 |
if(is.null(arrayNames)) { |
50 | 50 |
arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path)) |
... | ... |
@@ -54,7 +54,7 @@ readIdatFiles <- function(sampleSheet=NULL, |
54 | 54 |
} |
55 | 55 |
pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
56 | 56 |
} |
57 |
- |
|
57 |
+ |
|
58 | 58 |
narrays = length(arrayNames) |
59 | 59 |
grnfiles = paste(arrayNames, fileExt$green, sep=sep) |
60 | 60 |
redfiles = paste(arrayNames, fileExt$red, sep=sep) |
... | ... |
@@ -71,7 +71,7 @@ readIdatFiles <- function(sampleSheet=NULL, |
71 | 71 |
redidats <- redfiles |
72 | 72 |
} |
73 | 73 |
if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files") |
74 |
- if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files") |
|
74 |
+ if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files") |
|
75 | 75 |
## if(!all(c(redfiles,grnfiles) %in% dir(path=path))){ |
76 | 76 |
## stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n", |
77 | 77 |
## paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" ")) |
... | ... |
@@ -146,12 +146,12 @@ readIdatFiles <- function(sampleSheet=NULL, |
146 | 146 |
} |
147 | 147 |
rm(G) |
148 | 148 |
gc() |
149 |
- |
|
149 |
+ |
|
150 | 150 |
cat(paste(sep, fileExt$red, sep=""), "\n") |
151 | 151 |
R = readIDAT(redidats[i]) |
152 | 152 |
idsR = rownames(R$Quants) |
153 |
- |
|
154 |
- if(length(ids)==length(idsG)) { |
|
153 |
+ |
|
154 |
+ if(length(ids)==length(idsG)) { |
|
155 | 155 |
if(sum(ids==idsR)==nprobes) { |
156 | 156 |
RG@assayData$R[,i] = R$Quants[ ,"Mean"] |
157 | 157 |
zeroR = R$Quants[ ,"NBeads"]==0 |
... | ... |
@@ -204,7 +204,7 @@ readIdatFiles2 <- function(sampleSheet=NULL, |
204 | 204 |
barcode = sampleSheet[,arrayInfoColNames$barcode] |
205 | 205 |
arrayNames=barcode |
206 | 206 |
} |
207 |
- if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) { |
|
207 |
+ if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) { |
|
208 | 208 |
position = sampleSheet[,arrayInfoColNames$position] |
209 | 209 |
if(is.null(arrayNames)) |
210 | 210 |
arrayNames=position |
... | ... |
@@ -218,7 +218,7 @@ readIdatFiles2 <- function(sampleSheet=NULL, |
218 | 218 |
} |
219 | 219 |
} |
220 | 220 |
pd = new("AnnotatedDataFrame", data = sampleSheet) |
221 |
- sampleNames(pd) <- basename(arrayNames) |
|
221 |
+ sampleNames(pd) <- basename(arrayNames) |
|
222 | 222 |
} |
223 | 223 |
if(is.null(arrayNames)) { |
224 | 224 |
arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path)) |
... | ... |
@@ -228,7 +228,7 @@ readIdatFiles2 <- function(sampleSheet=NULL, |
228 | 228 |
} |
229 | 229 |
pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
230 | 230 |
} |
231 |
- |
|
231 |
+ |
|
232 | 232 |
narrays = length(arrayNames) |
233 | 233 |
grnfiles = paste(arrayNames, fileExt$green, sep=sep) |
234 | 234 |
redfiles = paste(arrayNames, fileExt$red, sep=sep) |
... | ... |
@@ -245,7 +245,7 @@ readIdatFiles2 <- function(sampleSheet=NULL, |
245 | 245 |
redidats <- redfiles |
246 | 246 |
} |
247 | 247 |
if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files") |
248 |
- if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files") |
|
248 |
+ if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files") |
|
249 | 249 |
## if(!all(c(redfiles,grnfiles) %in% dir(path=path))){ |
250 | 250 |
## stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n", |
251 | 251 |
## paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" ")) |
... | ... |
@@ -327,12 +327,12 @@ readIdatFiles2 <- function(sampleSheet=NULL, |
327 | 327 |
} |
328 | 328 |
rm(G) |
329 | 329 |
gc() |
330 |
- |
|
330 |
+ |
|
331 | 331 |
cat(paste(sep, fileExt$red, sep=""), "\n") |
332 | 332 |
R = readIDAT(redidats[i]) |
333 | 333 |
idsR = rownames(R$Quants) |
334 |
- |
|
335 |
- if(length(ids)==length(idsG)) { |
|
334 |
+ |
|
335 |
+ if(length(ids)==length(idsG)) { |
|
336 | 336 |
if(sum(ids==idsR)==nprobes) { |
337 | 337 |
RG@assayData$R[,i] = R$Quants[ ,"Mean"] |
338 | 338 |
zeroR = R$Quants[ ,"NBeads"]==0 |
... | ... |
@@ -369,24 +369,24 @@ readIDAT <- function(idatFile){ |
369 | 369 |
|
370 | 370 |
} |
371 | 371 |
|
372 |
- versionNumber <- readBin(tempCon, "integer", n=1, size=8, |
|
372 |
+ versionNumber <- readBin(tempCon, "integer", n=1, size=8, |
|
373 | 373 |
endian="little", signed=FALSE) |
374 |
- |
|
375 |
- nFields <- readBin(tempCon, "integer", n=1, size=4, |
|
374 |
+ |
|
375 |
+ nFields <- readBin(tempCon, "integer", n=1, size=4, |
|
376 | 376 |
endian="little", signed=FALSE) |
377 | 377 |
|
378 | 378 |
fields <- matrix(0,nFields,3); |
379 | 379 |
colnames(fields) <- c("Field Code", "Byte Offset", "Bytes") |
380 | 380 |
for(i1 in 1:nFields){ |
381 |
- fields[i1,"Field Code"] <- |
|
381 |
+ fields[i1,"Field Code"] <- |
|
382 | 382 |
readBin(tempCon, "integer", n=1, size=2, endian="little", signed=FALSE) |
383 |
- fields[i1,"Byte Offset"] <- |
|
383 |
+ fields[i1,"Byte Offset"] <- |
|
384 | 384 |
readBin(tempCon, "integer", n=1, size=8, endian="little", signed=FALSE) |
385 | 385 |
} |
386 | 386 |
|
387 | 387 |
knownCodes <- c(1000, 102, 103, 104, 107, 200, 300, 400, |
388 | 388 |
401, 402, 403, 404, 405, 406, 407, 408, 409) |
389 |
- names(knownCodes) <- |
|
389 |
+ names(knownCodes) <- |
|
390 | 390 |
c("nSNPsRead", # 1000 |
391 | 391 |
"IlluminaID", # 102 |
392 | 392 |
"SD", # 103 |
... | ... |
@@ -419,35 +419,35 @@ readIDAT <- function(idatFile){ |
419 | 419 |
} |
420 | 420 |
|
421 | 421 |
seek(tempCon, fields["nSNPsRead", "Byte Offset"]) |
422 |
- nSNPsRead <- readBin(tempCon, "integer", n=1, size=4, |
|
422 |
+ nSNPsRead <- readBin(tempCon, "integer", n=1, size=4, |
|
423 | 423 |
endian="little", signed=FALSE) |
424 | 424 |
|
425 | 425 |
seek(tempCon, fields["IlluminaID", "Byte Offset"]) |
426 |
- IlluminaID <- readBin(tempCon, "integer", n=nSNPsRead, size=4, |
|
426 |
+ IlluminaID <- readBin(tempCon, "integer", n=nSNPsRead, size=4, |
|
427 | 427 |
endian="little", signed=FALSE) |
428 | 428 |
|
429 | 429 |
seek(tempCon, fields["SD", "Byte Offset"]) |
430 |
- SD <- readBin(tempCon, "integer", n=nSNPsRead, size=2, |
|
430 |
+ SD <- readBin(tempCon, "integer", n=nSNPsRead, size=2, |
|
431 | 431 |
endian="little", signed=FALSE) |
432 | 432 |
|
433 | 433 |
seek(tempCon, fields["Mean", "Byte Offset"]) |
434 |
- Mean <- readBin(tempCon, "integer", n=nSNPsRead, size=2, |
|
434 |
+ Mean <- readBin(tempCon, "integer", n=nSNPsRead, size=2, |
|
435 | 435 |
endian="little", signed=FALSE) |
436 | 436 |
|
437 | 437 |
seek(tempCon, fields["NBeads", "Byte Offset"]) |
438 | 438 |
NBeads <- readBin(tempCon, "integer", n=nSNPsRead, size=1, signed=FALSE) |
439 | 439 |
|
440 | 440 |
seek(tempCon, fields["MidBlock", "Byte Offset"]) |
441 |
- nMidBlockEntries <- readBin(tempCon, "integer", n=1, size=4, |
|
441 |
+ nMidBlockEntries <- readBin(tempCon, "integer", n=1, size=4, |
|
442 | 442 |
endian="little", signed=FALSE) |
443 |
- MidBlock <- readBin(tempCon, "integer", n=nMidBlockEntries, size=4, |
|
443 |
+ MidBlock <- readBin(tempCon, "integer", n=nMidBlockEntries, size=4, |
|
444 | 444 |
endian="little", signed=FALSE) |
445 | 445 |
|
446 | 446 |
seek(tempCon, fields["RunInfo", "Byte Offset"]) |
447 |
- nRunInfoBlocks <- readBin(tempCon, "integer", n=1, size=4, |
|
447 |
+ nRunInfoBlocks <- readBin(tempCon, "integer", n=1, size=4, |
|
448 | 448 |
endian="little", signed=FALSE) |
449 | 449 |
RunInfo <- matrix(NA, nRunInfoBlocks, 5) |
450 |
- colnames(RunInfo) <- c("RunTime", "BlockType", "BlockPars", |
|
450 |
+ colnames(RunInfo) <- c("RunTime", "BlockType", "BlockPars", |
|
451 | 451 |
"BlockCode", "CodeVersion") |
452 | 452 |
for(i1 in 1:2) { #nRunInfoBlocks){ ## MR edit |
453 | 453 |
for(i2 in 1:5){ |
... | ... |
@@ -457,50 +457,50 @@ readIDAT <- function(idatFile){ |
457 | 457 |
} |
458 | 458 |
|
459 | 459 |
seek(tempCon, fields["RedGreen", "Byte Offset"]) |
460 |
- RedGreen <- readBin(tempCon, "numeric", n=1, size=4, |
|
460 |
+ RedGreen <- readBin(tempCon, "numeric", n=1, size=4, |
|
461 | 461 |
endian="little", signed=FALSE) |
462 |
- #RedGreen <- readBin(tempCon, "integer", n=4, size=1, |
|
462 |
+ #RedGreen <- readBin(tempCon, "integer", n=4, size=1, |
|
463 | 463 |
# endian="little", signed=FALSE) |
464 | 464 |
|
465 | 465 |
seek(tempCon, fields["MostlyNull", "Byte Offset"]) |
466 | 466 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
467 |
- MostlyNull <- readChar(tempCon, nChars) |
|
467 |
+ MostlyNull <- readChar(tempCon, nChars) |
|
468 | 468 |
|
469 | 469 |
seek(tempCon, fields["Barcode", "Byte Offset"]) |
470 | 470 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
471 |
- Barcode <- readChar(tempCon, nChars) |
|
471 |
+ Barcode <- readChar(tempCon, nChars) |
|
472 | 472 |
|
473 | 473 |
seek(tempCon, fields["ChipType", "Byte Offset"]) |
474 | 474 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
475 |
- ChipType <- readChar(tempCon, nChars) |
|
475 |
+ ChipType <- readChar(tempCon, nChars) |
|
476 | 476 |
|
477 | 477 |
seek(tempCon, fields["MostlyA", "Byte Offset"]) |
478 | 478 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
479 |
- MostlyA <- readChar(tempCon, nChars) |
|
479 |
+ MostlyA <- readChar(tempCon, nChars) |
|
480 | 480 |
|
481 | 481 |
seek(tempCon, fields["Unknown.1", "Byte Offset"]) |
482 | 482 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
483 |
- Unknown.1 <- readChar(tempCon, nChars) |
|
483 |
+ Unknown.1 <- readChar(tempCon, nChars) |
|
484 | 484 |
|
485 | 485 |
seek(tempCon, fields["Unknown.2", "Byte Offset"]) |
486 | 486 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
487 |
- Unknown.2 <- readChar(tempCon, nChars) |
|
487 |
+ Unknown.2 <- readChar(tempCon, nChars) |
|
488 | 488 |
|
489 | 489 |
seek(tempCon, fields["Unknown.3", "Byte Offset"]) |
490 | 490 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
491 |
- Unknown.3 <- readChar(tempCon, nChars) |
|
491 |
+ Unknown.3 <- readChar(tempCon, nChars) |
|
492 | 492 |
|
493 | 493 |
seek(tempCon, fields["Unknown.4", "Byte Offset"]) |
494 | 494 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
495 |
- Unknown.4 <- readChar(tempCon, nChars) |
|
495 |
+ Unknown.4 <- readChar(tempCon, nChars) |
|
496 | 496 |
|
497 | 497 |
seek(tempCon, fields["Unknown.5", "Byte Offset"]) |
498 | 498 |
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
499 |
- Unknown.5 <- readChar(tempCon, nChars) |
|
499 |
+ Unknown.5 <- readChar(tempCon, nChars) |
|
500 | 500 |
|
501 | 501 |
close(tempCon) |
502 | 502 |
|
503 |
- Unknowns <- |
|
503 |
+ Unknowns <- |
|
504 | 504 |
list(MostlyNull=MostlyNull, |
505 | 505 |
MostlyA=MostlyA, |
506 | 506 |
Unknown.1=Unknown.1, |
... | ... |
@@ -513,21 +513,21 @@ readIDAT <- function(idatFile){ |
513 | 513 |
colnames(Quants) <- c("Mean", "SD", "NBeads") |
514 | 514 |
rownames(Quants) <- as.character(IlluminaID) |
515 | 515 |
|
516 |
- idatValues <- |
|
517 |
- list(fileSize=fileSize, |
|
518 |
- versionNumber=versionNumber, |
|
519 |
- nFields=nFields, |
|
516 |
+ idatValues <- |
|
517 |
+ list(fileSize=fileSize, |
|
518 |
+ versionNumber=versionNumber, |
|
519 |
+ nFields=nFields, |
|
520 | 520 |
fields=fields, |
521 |
- nSNPsRead=nSNPsRead, |
|
522 |
- #IlluminaID=IlluminaID, |
|
523 |
- #SD=SD, |
|
524 |
- #Mean=Mean, |
|
521 |
+ nSNPsRead=nSNPsRead, |
|
522 |
+ #IlluminaID=IlluminaID, |
|
523 |
+ #SD=SD, |
|
524 |
+ #Mean=Mean, |
|
525 | 525 |
#NBeads=NBeads, |
526 | 526 |
Quants=Quants, |
527 |
- MidBlock=MidBlock, |
|
528 |
- RunInfo=RunInfo, |
|
529 |
- RedGreen=RedGreen, |
|
530 |
- Barcode=Barcode, |
|
527 |
+ MidBlock=MidBlock, |
|
528 |
+ RunInfo=RunInfo, |
|
529 |
+ RedGreen=RedGreen, |
|
530 |
+ Barcode=Barcode, |
|
531 | 531 |
ChipType=ChipType, |
532 | 532 |
Unknowns=Unknowns) |
533 | 533 |
|
... | ... |
@@ -538,20 +538,20 @@ readIDAT <- function(idatFile){ |
538 | 538 |
readBPM <- function(bpmFile){ |
539 | 539 |
|
540 | 540 |
## Reads and parses Illumina BPM files |
541 |
- |
|
541 |
+ |
|
542 | 542 |
fileSize <- file.info(bpmFile)$size |
543 | 543 |
|
544 | 544 |
tempCon <- file(bpmFile,"rb") |
545 | 545 |
|
546 | 546 |
# The first few bytes of the egtFile are some type of |
547 |
- # header, but there's no related byte offset information. |
|
547 |
+ # header, but there's no related byte offset information. |
|
548 | 548 |
|
549 | 549 |
prefixCheck <- readChar(tempCon,3) ## should be "BPM" |
550 | 550 |
|
551 | 551 |
null.1 <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE) |
552 |
- ## should be 1 |
|
553 |
- |
|
554 |
- versionNumber <- |
|
552 |
+ ## should be 1 |
|
553 |
+ |
|
554 |
+ versionNumber <- |
|
555 | 555 |
readBin(tempCon, "integer", n=1, size=4, endian="little", signed=FALSE) |
556 | 556 |
## should be 4 |
557 | 557 |
|
... | ... |
@@ -565,14 +565,14 @@ readBPM <- function(bpmFile){ |
565 | 565 |
entriesByteOffset <- seek(tempCon); |
566 | 566 |
nEntries <- readBin(tempCon, "integer", n=1, size=4, |
567 | 567 |
endian="little", signed=FALSE) |
568 |
- |
|
568 |
+ |
|
569 | 569 |
if(FALSE){ |
570 |
- |
|
570 |
+ |
|
571 | 571 |
snpIndexByteOffset <- seek(tempCon) |
572 | 572 |
snpIndex <- readBin(tempCon, "integer", n=nEntries, size=4, |
573 | 573 |
endian="little", signed=FALSE) |
574 | 574 |
## for the 1M array, these are simply in order from 1 to 1072820. |
575 |
- |
|
575 |
+ |
|
576 | 576 |
snpNamesByteOffset <- seek(tempCon) |
577 | 577 |
snpNames <- rep("A", nEntries) |
578 | 578 |
for(i1 in 1:nEntries){ |
... | ... |
@@ -583,21 +583,21 @@ readBPM <- function(bpmFile){ |
583 | 583 |
} |
584 | 584 |
|
585 | 585 |
seek(tempCon, 15278138) |
586 |
- |
|
586 |
+ |
|
587 | 587 |
normIDByteOffset <- seek(tempCon) |
588 | 588 |
normID <- readBin(tempCon, "integer", n=nEntries, size=1, signed=FALSE) + 1 |
589 |
- |
|
589 |
+ |
|
590 | 590 |
newBlockByteOffset <- seek(tempCon) |
591 | 591 |
newBlock <- readBin(tempCon, "integer", n=10000, size=1, signed=FALSE) |
592 |
- |
|
592 |
+ |
|
593 | 593 |
close(tempCon) |
594 | 594 |
|
595 | 595 |
byteOffsets <- list(entriesByteOffset=entriesByteOffset, |
596 |
- #snpIndexByteOffset=snpIndexByteOffset, |
|
596 |
+ #snpIndexByteOffset=snpIndexByteOffset, |
|
597 | 597 |
#snpNamesByteOffset=snpNamesByteOffset, |
598 | 598 |
normIDByteOffset=normIDByteOffset, |
599 | 599 |
newBlockByteOffset=newBlockByteOffset) |
600 |
- |
|
600 |
+ |
|
601 | 601 |
allStuff <- list(prefixCheck=prefixCheck, |
602 | 602 |
null.1=null.1, |
603 | 603 |
versionNumber=versionNumber, |
... | ... |
@@ -611,7 +611,7 @@ readBPM <- function(bpmFile){ |
611 | 611 |
newBlock=newBlock, |
612 | 612 |
byteOffsets=byteOffsets) |
613 | 613 |
allStuff |
614 |
- |
|
614 |
+ |
|
615 | 615 |
} |
616 | 616 |
|
617 | 617 |
RGtoXY = function(RG, chipType, verbose=TRUE) { |
... | ... |
@@ -623,11 +623,12 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
623 | 623 |
"human370quadv3c", # 370CNV quad |
624 | 624 |
"human550v3b", # 550K |
625 | 625 |
"human1mduov3b", # 1M Duo |
626 |
- "humanomni1quadv1b") # Omni1 quad |
|
626 |
+ "humanomni1quadv1b", # Omni1 quad |
|
627 |
+ "humanomniexpress12v1b") # Omni express 12 |
|
627 | 628 |
if(missing(chipType)){ |
628 | 629 |
chipType = match.arg(annotation(RG), chipList) |
629 | 630 |
} else chipType = match.arg(chipType, chipList) |
630 |
- |
|
631 |
+ |
|
631 | 632 |
pkgname <- getCrlmmAnnotationName(chipType) |
632 | 633 |
if(!require(pkgname, character.only=TRUE, quietly=!verbose)){ |
633 | 634 |
suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="") |
... | ... |
@@ -643,21 +644,21 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
643 | 644 |
bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest |
644 | 645 |
ids <- names(aids) |
645 | 646 |
snpbase <- getVarInEnv("base") |
646 |
- |
|
647 |
+ |
|
647 | 648 |
nsnps = length(aids) |
648 | 649 |
narrays = ncol(RG) |
649 |
- |
|
650 |
+ |
|
650 | 651 |
# aidcol = match("AddressA_ID", colnames(annot)) |
651 | 652 |
# if(is.na(aidcol)) |
652 | 653 |
# aidcol = match("Address", colnames(annot)) |
653 | 654 |
# bidcol = match("AddressB_ID", colnames(annot)) |
654 |
-# if(is.na(bidcol)) |
|
655 |
+# if(is.na(bidcol)) |
|
655 | 656 |
# bidcol = match("Address2", colnames(annot)) |
656 | 657 |
# aids = annot[, aidcol] |
657 | 658 |
# bids = annot[, bidcol] |
658 | 659 |
# snpids = annot[,"Name"] |
659 |
-# snpbase = annot[,"SNP"] |
|
660 |
- infI = !is.na(bids) & bids!=0 |
|
660 |
+# snpbase = annot[,"SNP"] |
|
661 |
+ infI = !is.na(bids) & bids!=0 |
|
661 | 662 |
aord = match(aids, featureNames(RG)) # NAs are possible here |
662 | 663 |
bord = match(bids, featureNames(RG)) # and here |
663 | 664 |
# argrg = aids[rrgg] |
... | ... |
@@ -670,7 +671,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
670 | 671 |
annotation=chipType, phenoData=RG@phenoData, protocolData=RG@protocolData, storage.mode="environment") |
671 | 672 |
rm(tmpmat) |
672 | 673 |
gc() |
673 |
- |
|
674 |
+ |
|
674 | 675 |
# First sort out Infinium II SNPs, X -> R (allele A) and Y -> G (allele B) from the same probe |
675 | 676 |
XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red |
676 | 677 |
XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green |
... | ... |
@@ -680,19 +681,19 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
680 | 681 |
# XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],] |
681 | 682 |
# XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],] |
682 | 683 |
gc() |
683 |
- |
|
684 |
+ |
|
684 | 685 |
## Warning - not 100% sure that the code below is correct - could be more complicated than this |
685 |
- |
|
686 |
+ |
|
686 | 687 |
# Next Infinium I where X -> R from allele A probe and Y -> R from allele B probe |
687 | 688 |
# infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]") |
688 |
- |
|
689 |
+ |
|
689 | 690 |
# X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red |
690 | 691 |
# Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green |
691 | 692 |
# Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],] |
692 | 693 |
# Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],] |
693 | 694 |
# Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],] |
694 | 695 |
# Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],] |
695 |
- |
|
696 |
+ |
|
696 | 697 |
# Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe |
697 | 698 |
# infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]") |
698 | 699 |
|
... | ... |
@@ -702,11 +703,11 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
702 | 703 |
# Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],] |
703 | 704 |
# Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],] |
704 | 705 |
# Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],] |
705 |
- |
|
706 |
+ |
|
706 | 707 |
# For now zero out Infinium I probes |
707 | 708 |
XY@assayData$X[infI,] = 0 |
708 | 709 |
XY@assayData$Y[infI,] = 0 |
709 |
- XY@assayData$zero[infI,] = 0 |
|
710 |
+ XY@assayData$zero[infI,] = 0 |
|
710 | 711 |
# XY@assayData$Xnb[infI,] = 0 |
711 | 712 |
# XY@assayData$Ynb[infI,] = 0 |
712 | 713 |
# XY@assayData$Xse[infI,] = 0 |
... | ... |
@@ -721,7 +722,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
721 | 722 |
|
722 | 723 |
|
723 | 724 |
RGtoXY2 = function(RG, chipType, verbose=TRUE) { |
724 |
- chipList = c("human1mv1c", # 1M |
|
725 |
+ chipList = c("human1mv1c", # 1M |
|
725 | 726 |
"human370v1c", # 370CNV |
726 | 727 |
"human650v3a", # 650Y |
727 | 728 |
"human610quadv1b", # 610 quad |
... | ... |
@@ -729,11 +730,12 @@ RGtoXY2 = function(RG, chipType, verbose=TRUE) { |
729 | 730 |
"human370quadv3c", # 370CNV quad |
730 | 731 |
"human550v3b", # 550K |
731 | 732 |
"human1mduov3b", # 1M Duo |
732 |
- "humanomni1quadv1b") # Omni1 quad |
|
733 |
+ "humanomni1quadv1b", # Omni1 quad |
|
734 |
+ "humanomniexpress12v1b") # Omni express 12 |
|
733 | 735 |
if(missing(chipType)){ |
734 | 736 |
chipType = match.arg(annotation(RG), chipList) |
735 | 737 |
} else chipType = match.arg(chipType, chipList) |
736 |
- |
|
738 |
+ |
|
737 | 739 |
pkgname <- getCrlmmAnnotationName(chipType) |
738 | 740 |
if(!require(pkgname, character.only=TRUE, quietly=!verbose)){ |
739 | 741 |
suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="") |
... | ... |
@@ -749,21 +751,21 @@ RGtoXY2 = function(RG, chipType, verbose=TRUE) { |
749 | 751 |
bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest |
750 | 752 |
ids <- names(aids) |
751 | 753 |
snpbase <- getVarInEnv("base") |
752 |
- |
|
754 |
+ |
|
753 | 755 |
nsnps = length(aids) |
754 | 756 |
narrays = ncol(RG) |
755 |
- |
|
757 |
+ |
|
756 | 758 |
# aidcol = match("AddressA_ID", colnames(annot)) |
757 | 759 |
# if(is.na(aidcol)) |
758 | 760 |
# aidcol = match("Address", colnames(annot)) |
759 | 761 |
# bidcol = match("AddressB_ID", colnames(annot)) |
760 |
-# if(is.na(bidcol)) |
|
762 |
+# if(is.na(bidcol)) |
|
761 | 763 |
# bidcol = match("Address2", colnames(annot)) |
762 | 764 |
# aids = annot[, aidcol] |
763 | 765 |
# bids = annot[, bidcol] |
764 | 766 |
# snpids = annot[,"Name"] |
765 |
-# snpbase = annot[,"SNP"] |
|
766 |
- infI = !is.na(bids) & bids!=0 |
|
767 |
+# snpbase = annot[,"SNP"] |
|
768 |
+ infI = !is.na(bids) & bids!=0 |
|
767 | 769 |
aord = match(aids, featureNames(RG)) # NAs are possible here |
768 | 770 |
bord = match(bids, featureNames(RG)) # and here |
769 | 771 |
# argrg = aids[rrgg] |
... | ... |
@@ -782,7 +784,7 @@ RGtoXY2 = function(RG, chipType, verbose=TRUE) { |
782 | 784 |
XY@assayData$X[1:nsnps,] = 0 |
783 | 785 |
XY@assayData$Y[1:nsnps,] = 0 |
784 | 786 |
XY@assayData$zero[1:nsnps,] = 0 |
785 |
- |
|
787 |
+ |
|
786 | 788 |
# First sort out Infinium II SNPs, X -> R (allele A) and Y -> G (allele B) from the same probe |
787 | 789 |
XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red |
788 | 790 |
XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green |
... | ... |
@@ -792,19 +794,19 @@ RGtoXY2 = function(RG, chipType, verbose=TRUE) { |
792 | 794 |
# XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],] |
793 | 795 |
# XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],] |
794 | 796 |
gc() |
795 |
- |
|
797 |
+ |
|
796 | 798 |
## Warning - not 100% sure that the code below is correct - could be more complicated than this |
797 |
- |
|
799 |
+ |
|
798 | 800 |
# Next Infinium I where X -> R from allele A probe and Y -> R from allele B probe |
799 | 801 |
# infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]") |
800 |
- |
|
802 |
+ |
|
801 | 803 |
# X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red |
802 | 804 |
# Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green |
803 | 805 |
# Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],] |
804 | 806 |
# Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],] |
805 | 807 |
# Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],] |
806 | 808 |
# Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],] |
807 |
- |
|
809 |
+ |
|
808 | 810 |
# Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe |
809 | 811 |
# infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]") |
810 | 812 |
|
... | ... |
@@ -814,11 +816,11 @@ RGtoXY2 = function(RG, chipType, verbose=TRUE) { |
814 | 816 |
# Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],] |
815 | 817 |
# Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],] |
816 | 818 |
# Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],] |
817 |
- |
|
819 |
+ |
|
818 | 820 |
# For now zero out Infinium I probes |
819 | 821 |
XY@assayData$X[infI,] = 0 |
820 | 822 |
XY@assayData$Y[infI,] = 0 |
821 |
- XY@assayData$zero[infI,] = 0 |
|
823 |
+ XY@assayData$zero[infI,] = 0 |
|
822 | 824 |
# XY@assayData$Xnb[infI,] = 0 |
823 | 825 |
# XY@assayData$Ynb[infI,] = 0 |
824 | 826 |
# XY@assayData$Xse[infI,] = 0 |
... | ... |
@@ -846,10 +848,10 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) { |
846 | 848 |
# data(preprocStuff, package=pkgname, envir=.crlmmPkgEnv) |
847 | 849 |
|
848 | 850 |
stripnum <- getVarInEnv("stripnum") |
849 |
- |
|
851 |
+ |
|
850 | 852 |
if(useTarget) |
851 | 853 |
targetdist = getVarInEnv("reference") |
852 |
- |
|
854 |
+ |
|
853 | 855 |
# Xqws = Yqws = matrix(0, nrow(XY), ncol(XY)) |
854 | 856 |
# colnames(Xqws) = colnames(Yqws) = sampleNames(XY) #$X |
855 | 857 |
# rownames(Xqws) = rownames(Yqws) = featureNames(XY) |
... | ... |
@@ -857,7 +859,7 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) { |
857 | 859 |
if(verbose){ |
858 | 860 |
message("Quantile normalizing ", ncol(XY), " arrays by ", max(stripnum), " strips.") |
859 | 861 |
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=max(stripnum), style=3) |
860 |
- } |
|
862 |
+ } |
|
861 | 863 |
for(s in 1:max(stripnum)) { |
862 | 864 |
if(verbose) { |
863 | 865 |
if (getRversion() > '2.7.0') setTxtProgressBar(pb, s) |
... | ... |
@@ -937,7 +939,7 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
937 | 939 |
SMEDIAN <- getVarInEnv("SMEDIAN") |
938 | 940 |
theKnots <- getVarInEnv("theKnots") |
939 | 941 |
gns <- getVarInEnv("gns") |
940 |
- |
|
942 |
+ |
|
941 | 943 |
# separate out copy number probes |
942 | 944 |
npIndex = getVarInEnv("npProbesFid") |
943 | 945 |
nprobes = length(npIndex) |
... | ... |
@@ -946,24 +948,24 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
946 | 948 |
B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays) |
947 | 949 |
|
948 | 950 |
# new lines below - useful to keep track of zeroed out probes |
949 |
- zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays) |
|
951 |
+ zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays) |
|
950 | 952 |
|
951 | 953 |
colnames(A) <- colnames(B) <- colnames(zero) <- sns |
952 | 954 |
rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex) |
953 |
- |
|
955 |
+ |
|
954 | 956 |
cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName) |
955 | 957 |
if(save.it & !missing(cnFile)) { |
956 |
- t0 <- proc.time() |
|
957 |
- save(cnAB, file=cnFile) |
|
958 |
+ t0 <- proc.time() |
|
959 |
+ save(cnAB, file=cnFile) |
|
958 | 960 |
t0 <- proc.time()-t0 |
959 | 961 |
if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".") |
960 | 962 |
} |
961 | 963 |
rm(cnAB, B, zero) |
962 |
- |
|
964 |
+ |
|
963 | 965 |
# next process snp probes |
964 | 966 |
snpIndex = getVarInEnv("snpProbesFid") |
965 | 967 |
nprobes <- length(snpIndex) |
966 |
- |
|
968 |
+ |
|
967 | 969 |
##We will read each cel file, summarize, and run EM one by one |
968 | 970 |
##We will save parameters of EM to use later |
969 | 971 |
mixtureParams <- matrix(0, 4, narrays) |
... | ... |
@@ -977,7 +979,7 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
977 | 979 |
set.seed(seed) |
978 | 980 |
idx <- sort(sample(autosomeIndex, mixtureSampleSize)) |
979 | 981 |
idx2 <- sample(nprobes, 10^5) |
980 |
- |
|
982 |
+ |
|
981 | 983 |
##S will hold (A+B)/2 and M will hold A-B |
982 | 984 |
##NOTE: We actually dont need to save S. Only for pics etc... |
983 | 985 |
##f is the correction. we save to avoid recomputing |
... | ... |
@@ -986,10 +988,10 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
986 | 988 |
|
987 | 989 |
# new lines below - useful to keep track of zeroed out SNPs |
988 | 990 |
zero <- matrix(as.integer(exprs(channel(XY, "zero"))[snpIndex,]), nprobes, narrays) |
989 |
- |
|
991 |
+ |
|
990 | 992 |
colnames(A) <- colnames(B) <- colnames(zero) <- sns |
991 | 993 |
rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY) |
992 |
- |
|
994 |
+ |
|
993 | 995 |
if(verbose){ |
994 | 996 |
message("Calibrating ", narrays, " arrays.") |
995 | 997 |
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3) |
... | ... |
@@ -1022,8 +1024,8 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
1022 | 1024 |
res = list(A=A, B=B, zero=zero, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName) |
1023 | 1025 |
|
1024 | 1026 |
if(save.it & !missing(snpFile)) { |
1025 |
- t0 <- proc.time() |
|
1026 |
- save(res, file=snpFile) |
|
1027 |
+ t0 <- proc.time() |
|
1028 |
+ save(res, file=snpFile) |
|
1027 | 1029 |
t0 <- proc.time()-t0 |
1028 | 1030 |
if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".") |
1029 | 1031 |
} |
... | ... |
@@ -1076,7 +1078,7 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5, |
1076 | 1078 |
theKnots <- getVarInEnv("theKnots") |
1077 | 1079 |
# gns <- featureNames(XY) # getVarInEnv("gns") # needs to include np probes - gns is only for snps |
1078 | 1080 |
narrays = ncol(XY) |
1079 |
- |
|
1081 |
+ |
|
1080 | 1082 |
if(save.it & !missing(cnFile)) { |
1081 | 1083 |
# separate out copy number probes |
1082 | 1084 |
npIndex = getVarInEnv("npProbesFid") |
... | ... |
@@ -1085,29 +1087,29 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5, |
1085 | 1087 |
B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays) |
1086 | 1088 |
|
1087 | 1089 |
# new lines below - useful to keep track of zeroed out probes |
1088 |
- zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays) |
|
1090 |
+ zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays) |
|
1089 | 1091 |
|
1090 | 1092 |
colnames(A) <- colnames(B) <- colnames(zero) <- sns |
1091 | 1093 |
rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex) |
1092 |
- |
|
1094 |
+ |
|
1093 | 1095 |
cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName) |
1094 |
- |
|
1095 |
- t0 <- proc.time() |
|
1096 |
- save(cnAB, file=cnFile) |
|
1096 |
+ |
|
1097 |
+ t0 <- proc.time() |
|
1098 |
+ save(cnAB, file=cnFile) |
|
1097 | 1099 |
t0 <- proc.time()-t0 |
1098 | 1100 |
if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".") |
1099 | 1101 |
rm(cnAB, B, zero) |
1100 | 1102 |
} |
1101 |
- |
|
1103 |
+ |
|
1102 | 1104 |
# next process snp probes |
1103 | 1105 |
snpIndex = getVarInEnv("snpProbesFid") |
1104 | 1106 |
nprobes <- length(snpIndex) |
1105 |
- |
|
1107 |
+ |
|
1106 | 1108 |
##We will read each cel file, summarize, and run EM one by one |
1107 | 1109 |
##We will save parameters of EM to use later |
1108 | 1110 |
mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, narrays, "double") |
1109 | 1111 |
SNR <- initializeBigVector("crlmmSNR-", narrays, "double") |
1110 |
- SKW <- initializeBigVector("crlmmSKW-", narrays, "double") |
|
1112 |
+ SKW <- initializeBigVector("crlmmSKW-", narrays, "double") |
|
1111 | 1113 |
# mixtureParams <- matrix(0, 4, narrays) |
1112 | 1114 |
# SNR <- vector("numeric", narrays) |
1113 | 1115 |
# SKW <- vector("numeric", narrays) |
... | ... |
@@ -1119,7 +1121,7 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5, |
1119 | 1121 |
set.seed(seed) |
1120 | 1122 |
idx <- sort(sample(autosomeIndex, mixtureSampleSize)) |
1121 | 1123 |
idx2 <- sample(nprobes, 10^5) |
1122 |
- |
|
1124 |
+ |
|
1123 | 1125 |
##S will hold (A+B)/2 and M will hold A-B |
1124 | 1126 |
##NOTE: We actually dont need to save S. Only for pics etc... |
1125 | 1127 |
##f is the correction. we save to avoid recomputing |
... | ... |
@@ -1138,15 +1140,15 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5, |
1138 | 1140 |
# if(!is.integer(A)) { |
1139 | 1141 |
# A = matrix(as.integer(A), nrow(A), ncol(A)) |
1140 | 1142 |
# B = matrix(as.integer(B), nrow(B), ncol(B)) |
1141 |
-# } |
|
1142 |
- |
|
1143 |
+# } |
|
1144 |
+ |
|
1143 | 1145 |
# colnames(A) <- colnames(B) <- colnames(zero) <- sns |
1144 | 1146 |
# rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY) |
1145 | 1147 |
|
1146 | 1148 |
A <- initializeBigMatrix("crlmmA-", nprobes, narrays, "integer") |
1147 | 1149 |
B <- initializeBigMatrix("crlmmB-", nprobes, narrays, "integer") |
1148 | 1150 |
zero <- initializeBigMatrix("crlmmZero-", nprobes, narrays, "integer") |
1149 |
- |
|
1151 |
+ |
|
1150 | 1152 |
if(verbose){ |
1151 | 1153 |
message("Calibrating ", narrays, " arrays.") |
1152 | 1154 |
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3) |
... | ... |
@@ -1155,7 +1157,7 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5, |
1155 | 1157 |
for(i in 1:narrays){ |
1156 | 1158 |
A[,i] = exprs(channel(XY, "X"))[snpIndex,i] |
1157 | 1159 |
B[,i] = exprs(channel(XY, "Y"))[snpIndex,i] |
1158 |
- zero[,i] = exprs(channel(XY, "zero"))[snpIndex,i] |
|
1160 |
+ zero[,i] = exprs(channel(XY, "zero"))[snpIndex,i] |
|
1159 | 1161 |
# SKW[i] = mean((A[snpIndex,i][idx2]-mean(A[snpIndex,i][idx2]))^3)/(sd(A[snpIndex,i][idx2])^3) |
1160 | 1162 |
SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3) |
1161 | 1163 |
if(fitMixture){ |
... | ... |
@@ -1184,8 +1186,8 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5, |
1184 | 1186 |
mixtureParams=mixtureParams, cdfName=cdfName) # , snpIndex=snpIndex, npIndex=npIndex) |
1185 | 1187 |
|
1186 | 1188 |
if(save.it & !missing(snpFile)) { |
1187 |
- t0 <- proc.time() |
|
1188 |
- save(res, file=snpFile) |
|
1189 |
+ t0 <- proc.time() |
|
1190 |
+ save(res, file=snpFile) |
|
1189 | 1191 |
t0 <- proc.time()-t0 |
1190 | 1192 |
if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".") |
1191 | 1193 |
} |
... | ... |
@@ -1224,7 +1226,7 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
1224 | 1226 |
stop("Both RG and XY specified - please use one or the other") |
1225 | 1227 |
} |
1226 | 1228 |
if (missing(sns)) sns <- sampleNames(XY) #$X |
1227 |
- |
|
1229 |
+ |
|
1228 | 1230 |
res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
1229 | 1231 |
seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, |
1230 | 1232 |
save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
... | ... |
@@ -1245,7 +1247,7 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
1245 | 1247 |
recallRegMin=1000, SNRMin=SNRMin, |
1246 | 1248 |
returnParams=returnParams, badSNP=badSNP, |
1247 | 1249 |
verbose=verbose) |
1248 |
- |
|
1250 |
+ |
|
1249 | 1251 |
res2[["SNR"]] <- res[["SNR"]] |
1250 | 1252 |
res2[["SKW"]] <- res[["SKW"]] |
1251 | 1253 |
rm(res) |
... | ... |
@@ -1279,7 +1281,7 @@ crlmmIllumina2 <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
1279 | 1281 |
stop("Both RG and XY specified - please use one or the other") |
1280 | 1282 |
} |
1281 | 1283 |
if (missing(sns)) sns <- sampleNames(XY) #$X |
1282 |
- |
|
1284 |
+ |
|
1283 | 1285 |
res = preprocessInfinium2v2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
1284 | 1286 |
seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #, |
1285 | 1287 |
# save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
... | ... |
@@ -1295,13 +1297,13 @@ crlmmIllumina2 <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
1295 | 1297 |
# alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)), |
1296 | 1298 |
# call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)), |
1297 | 1299 |
# callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)), |
1298 |
-# annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD) |
|
1300 |
+# annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD) |
|
1299 | 1301 |
# sampleNames(callSet) <- sns |
1300 | 1302 |
# featureNames(callSet) <- res[["gns"]] |
1301 | 1303 |
# pData(callSet)$SKW <- rep(NA, length(sns)) |
1302 | 1304 |
# pData(callSet)$SNR <- rep(NA, length(sns)) |
1303 | 1305 |
# pData(callSet)$gender <- rep(NA, length(sns)) |
1304 |
- |
|
1306 |
+ |
|
1305 | 1307 |
}else{ |
1306 | 1308 |
if(verbose) message("Loading ", snpFile, ".") |
1307 | 1309 |
obj <- load(snpFile) |
... | ... |
@@ -1311,8 +1313,8 @@ crlmmIllumina2 <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
1311 | 1313 |
} |
1312 | 1314 |
|
1313 | 1315 |
# rm(phenD, protD , fD) |
1314 |
- |
|
1315 |
-# snp.index <- res$snpIndex #match(res$gns, featureNames(callSet)) |
|
1316 |
+ |
|
1317 |
+# snp.index <- res$snpIndex #match(res$gns, featureNames(callSet)) |
|
1316 | 1318 |
# suppressWarnings(A(callSet) <- res[["A"]]) |
1317 | 1319 |
# suppressWarnings(B(callSet) <- res[["B"]]) |
1318 | 1320 |
# pData(callSet)$SKW <- res$SKW |
... | ... |
@@ -1321,7 +1323,7 @@ crlmmIllumina2 <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
1321 | 1323 |
# rm(res); gc() |
1322 | 1324 |
if(row.names) row.names=res$gns else row.names=NULL |
1323 | 1325 |
if(col.names) col.names=res$sns else col.names=NULL |
1324 |
- |
|
1326 |
+ |
|
1325 | 1327 |
res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]), |
1326 | 1328 |
B=res[["B"]], # as.matrix(B(callSet)[snp.index,]), # j]), |
1327 | 1329 |
SNR=res[["SNR"]], # callSet$SNR, # [j], |
... | ... |
@@ -1338,7 +1340,7 @@ crlmmIllumina2 <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
1338 | 1340 |
verbose=verbose, |
1339 | 1341 |
returnParams=returnParams, |
1340 | 1342 |
badSNP=badSNP) |
1341 |
-# rm(res); gc() |
|
1343 |
+# rm(res); gc() |
|
1342 | 1344 |
# suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]]) |
1343 | 1345 |
# suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]]) |
1344 | 1346 |
# callSet$gender[j] <- tmp$gender |
... | ... |
@@ -1355,8 +1357,8 @@ crlmmIllumina2 <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
1355 | 1357 |
} |
1356 | 1358 |
|
1357 | 1359 |
|
1358 |
-## MR: Below is a more memory efficient version of crlmmIllumina() which |
|
1359 |
-## reads in the .idats and genotypes in the one function and removes objects |
|
1360 |
+## MR: Below is a more memory efficient version of crlmmIllumina() which |
|
1361 |
+## reads in the .idats and genotypes in the one function and removes objects |
|
1360 | 1362 |
## after they have been used |
1361 | 1363 |
crlmmIlluminaV2 = function(sampleSheet=NULL, |
1362 | 1364 |
arrayNames=NULL, |
... | ... |
@@ -1371,7 +1373,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
1371 | 1373 |
# rgFile, |
1372 | 1374 |
stripNorm=TRUE, |
1373 | 1375 |
useTarget=TRUE, |
1374 |
- row.names=TRUE, |
|
1376 |
+ row.names=TRUE, |
|
1375 | 1377 |
col.names=TRUE, |
1376 | 1378 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
1377 | 1379 |
seed=1, save.ab=FALSE, snpFile, cnFile, |
... | ... |
@@ -1382,7 +1384,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
1382 | 1384 |
if(missing(cdfName)) stop("must specify cdfName") |
1383 | 1385 |
if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames") |
1384 | 1386 |
# if(missing(sns)) sns <- basename(arrayNames) |
1385 |
- |
|
1387 |
+ |
|
1386 | 1388 |
# if (save.rg & missing(rgFile)) |
1387 | 1389 |
# stop("'rgFile' is missing, and you chose save.rg") |
1388 | 1390 |
if (save.ab & (missing(snpFile) | missing(cnFile))) |
... | ... |
@@ -1437,7 +1439,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
1437 | 1439 |
# featureNames(callSet) <- res[["gns"]] |
1438 | 1440 |
# pData(callSet)$SKW <- rep(NA, length(sns)) |
1439 | 1441 |
# pData(callSet)$SNR <- rep(NA, length(sns)) |
1440 |
-# pData(callSet)$gender <- rep(NA, length(sns)) |
|
1442 |
+# pData(callSet)$gender <- rep(NA, length(sns)) |
|
1441 | 1443 |
# } |
1442 | 1444 |
# pData(callSet)[j,] <- phenD |
1443 | 1445 |
# pData(protocolData(callSet))[j,] <- protD |
... | ... |
@@ -1445,7 +1447,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
1445 | 1447 |
# pData(protocolData(callSet)) <- protD |
1446 | 1448 |
|
1447 | 1449 |
# rm(phenD, protD, fD) |
1448 |
- |
|
1450 |
+ |
|
1449 | 1451 |
# if(k > 1 & nrow(res[[1]]) != nrow(callSet)){ |
1450 | 1452 |
##RS: I don't understand why the IDATS for the |
1451 | 1453 |
##same platform potentially have different lengths |
... | ... |
@@ -1453,7 +1455,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
1453 | 1455 |
# res[["B"]] <- res[["B"]][res$gns %in% featureNames(callSet), ] |
1454 | 1456 |
# } |
1455 | 1457 |
|
1456 |
-# snp.index <- res$snpIndex #match(res$gns, featureNames(callSet)) |
|
1458 |
+# snp.index <- res$snpIndex #match(res$gns, featureNames(callSet)) |
|
1457 | 1459 |
# suppressWarnings(A(callSet)[, j] <- res[["A"]]) |
1458 | 1460 |
# suppressWarnings(B(callSet)[, j] <- res[["B"]]) |
1459 | 1461 |
# suppressWarnings(A(callSet) <- res[["A"]]) |
... | ... |
@@ -1465,7 +1467,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
1465 | 1467 |
# mixtureParams <- res$mixtureParams |
1466 | 1468 |
# rm(res); gc() |
1467 | 1469 |
if(row.names) row.names=res$gns else row.names=NULL |
1468 |
- if(col.names) col.names=res$sns else col.names=NULL |
|
1470 |
+ if(col.names) col.names=res$sns else col.names=NULL |
|
1469 | 1471 |
res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]), |
1470 | 1472 |
B=res[["B"]], # as.matrix(B(callSet)[snp.index,]), # j]), |
1471 | 1473 |
SNR=res[["SNR"]], # callSet$SNR, # [j], |
... | ... |
@@ -1482,7 +1484,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
1482 | 1484 |
verbose=verbose, |
1483 | 1485 |
returnParams=returnParams, |
1484 | 1486 |
badSNP=badSNP) |
1485 |
-# rm(res); gc() |
|
1487 |
+# rm(res); gc() |
|
1486 | 1488 |
# suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]]) |
1487 | 1489 |
# suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]]) |
1488 | 1490 |
# callSet$gender[j] <- tmp$gender |
... | ... |
@@ -165,8 +165,10 @@ validCdfNames <- function(){ |
165 | 165 |
"human610quadv1b", |
166 | 166 |
"human660quadv1a", |
167 | 167 |
"human1mduov3b", |
168 |
- "humanomni1quadv1b") |
|
168 |
+ "humanomni1quadv1b", |
|
169 |
+ "humanomniexpress12v1b") |
|
169 | 170 |
} |
171 |
+ |
|
170 | 172 |
isValidCdfName <- function(cdfName){ |
171 | 173 |
chipList <- validCdfNames() |
172 | 174 |
result <- cdfName %in% chipList |
... | ... |
@@ -24,18 +24,18 @@ |
24 | 24 |
|
25 | 25 |
<<setup, echo=FALSE, results=hide>>= |
26 | 26 |
options(continue=" ", width=70) |
27 |
-@ |
|
27 |
+@ |
|
28 | 28 |
|
29 | 29 |
%\section{Estimating copy number} |
30 | 30 |
|
31 | 31 |
%At present, software for copy number estimation is provided only for the |
32 |
-%Affymetrix 6.0 platform. |
|
32 |
+%Affymetrix 6.0 platform. |
|
33 | 33 |
|
34 | 34 |
\begin{abstract} |
35 | 35 |
This vignette estimates copy number for HapMap samples on the |
36 | 36 |
Affymetrix 6.0 platform. See \citep{Scharpf2010} for the working |
37 | 37 |
paper. |
38 |
- |
|
38 |
+ |
|
39 | 39 |
\end{abstract} |
40 | 40 |
|
41 | 41 |
\section{Simple usage} |
... | ... |
@@ -47,7 +47,7 @@ library(oligoClasses) |
47 | 47 |
library(crlmm) |
48 | 48 |
crlmm:::validCdfNames() |
49 | 49 |
cdfName <- "genomewidesnp6" |
50 |
-@ |
|
50 |
+@ |
|
51 | 51 |
|
52 | 52 |
\paragraph{Preprocess and genotype.} |
53 | 53 |
|
... | ... |
@@ -57,7 +57,7 @@ intensities and genotype data, and define a 'batch' variable. The |
57 | 57 |
batch variable will later be useful when we estimate copy number. We |
58 | 58 |
typically use the 96 well chemistry plate or the scan date of the |
59 | 59 |
array as a surrogate for batch. Adjusting by date or chemistry plate |
60 |
-can be helpful for limiting the influence of batch effects. |
|
60 |
+can be helpful for limiting the influence of batch effects. |
|
61 | 61 |
|
62 | 62 |
<<celfiles>>= |
63 | 63 |
celFiles <- list.celfiles("/thumper/ctsa/snpmicroarray/hapmap/raw/affy/1m", full.names=TRUE, pattern=".CEL") |
... | ... |
@@ -68,7 +68,7 @@ batch <- substr(basename(celFiles), 13, 13) |
68 | 68 |
celFiles <- celFiles[batch %in% c("C", "Y")] |
69 | 69 |
batch <- batch[batch %in% c("C", "Y")] |
70 | 70 |
stopifnot(length(batch) == length(celFiles)) |
71 |
-@ |
|
71 |
+@ |
|
72 | 72 |
|
73 | 73 |
The preprocessing steps for copy number estimation includes quantile |
74 | 74 |
normalization of the polymorphic and nonpolymorphic markers and a |
... | ... |
@@ -95,9 +95,12 @@ if(!exists("cnSet")){ |
95 | 95 |
if(!file.exists(file.path(outdir, "cnSet.rda"))){ |
96 | 96 |
cnSet <- genotype(celFiles[1:50], cdfName=cdfName, copynumber=TRUE, batch=batch[1:50]) |
97 | 97 |
save(cnSet, file=file.path(outdir, "cnSet.rda")) |
98 |
- } else load(file.path(outdir, "cnSet.rda")) |
|
98 |
+ } else{ |
|
99 |
+ message("Loading cnSet container") |
|
100 |
+ load(file.path(outdir, "cnSet.rda")) |
|
101 |
+ } |
|
99 | 102 |
} |
100 |
-@ |
|
103 |
+@ |
|
101 | 104 |
|
102 | 105 |
A more memory efficient approach to preprocessing and genotyping is |
103 | 106 |
implemented in the \R{} function \Rfunction{genotype2}. The arguments |
... | ... |
@@ -114,13 +117,18 @@ library(ff) |
114 | 117 |
ld.path <- file.path(outdir, "ffobjs") |
115 | 118 |
if(!file.exists(ld.path)) dir.create(ld.path) |
116 | 119 |
ldPath(ld.path) |
120 |
+ocProbesets(100000) |
|
121 |
+ocSamples(300) |
|
117 | 122 |
if(!exists("cnSet2")){ |
118 | 123 |
if(!file.exists(file.path(outdir, "cnSet2.rda"))){ |
119 | 124 |
cnSet2 <- genotype2(celFiles, cdfName=cdfName, copynumber=TRUE, batch=batch) |
120 | 125 |
save(cnSet2, file=file.path(outdir, "cnSet2.rda")) |
121 |
- } else load(file.path(outdir, "cnSet2.rda")) |
|
126 |
+ } else{ |
|
127 |
+ message("Loading cnSet2 container") |
|
128 |
+ load(file.path(outdir, "cnSet2.rda")) |
|
129 |
+ } |
|
122 | 130 |
} |
123 |
-@ |
|
131 |
+@ |
|
124 | 132 |
|
125 | 133 |
The objects returned by the \Rfunction{genotype} and |
126 | 134 |
\Rfunction{genotype2} differ in size as the former returns an object |
... | ... |
@@ -151,7 +159,7 @@ if(FALSE){ |
151 | 159 |
gt1 <- calls(cnSet)[, 1:50] |
152 | 160 |
gt2 <- calls(cnSet2)[, 1:50] |
153 | 161 |
all.equal(gt1, gt2) |
154 |
-@ |
|
162 |
+@ |
|
155 | 163 |
|
156 | 164 |
\noindent With \Robject{ff} objects the additional I/O time required |
157 | 165 |
for extracting data is substantial and will increase for larger |
... | ... |
@@ -194,7 +202,7 @@ open(snpCallProbability(cnSet2)) |
194 | 202 |
posterior.prob2 <- integerScoreToProbability(snpCallProbability(cnSet2)[rows, cols]) |
195 | 203 |
close(snpCallProbability(cnSet2)) |
196 | 204 |
all.equal(posterior.prob1, posterior.prob2) |
197 |
-@ |
|
205 |
+@ |
|
198 | 206 |
|
199 | 207 |
Next, we obtain locus-level estimates of copy number by fitting a |
200 | 208 |
linear model to each SNP. A variable named 'batch' must be indicated |
... | ... |
@@ -212,14 +220,16 @@ latter. As with the preprocessing and genotyping, the following code |
212 | 220 |
should be submitted as part of the batch job as it is too slow for |
213 | 221 |
interactive analysis. |
214 | 222 |
|
223 |
+ |
|
224 |
+ |
|
215 | 225 |
<<cn, echo=FALSE, eval=FALSE>>= |
216 | 226 |
##Matrix format |
217 | 227 |
cnSet <- crlmmCopynumber(cnSet) |
218 |
-@ |
|
228 |
+@ |
|
219 | 229 |
|
220 | 230 |
<<save.cnset, echo=FALSE, eval=FALSE>>= |
221 | 231 |
save(cnSet, file=file.path(outdir, "cnSet.rda")) |
222 |
-@ |
|
232 |
+@ |
|
223 | 233 |
|
224 | 234 |
<<cn.ff, echo=FALSE, eval=FALSE>>= |
225 | 235 |
rm(cnSet); gc() |
... | ... |
@@ -227,13 +237,13 @@ ocProbesets(75e3) |
227 | 237 |
##ff objects |
228 | 238 |
system.time(cnSet2 <- crlmmCopynumber2(cnSet2)) |
229 | 239 |
save(cnSet2, file=file.path(outdir, "cnSet2.rda")) |
230 |
-@ |
|
240 |
+@ |
|
231 | 241 |
|
232 | 242 |
<<timings, eval=FALSE>>= |
233 | 243 |
tryCatch(print(paste("Time for matrix version:", time1)), error=function(e) print("timing for CN estimation not available")) |
234 | 244 |
tryCatch(print(paste("Time for ff version:", time2)), error=function(e) print("timing for CN estimation not available")) |
235 | 245 |
rm(cnSet2); gc() |
236 |
-@ |
|
246 |
+@ |
|
237 | 247 |
|
238 | 248 |
|
239 | 249 |
\section{Accessors} |
... | ... |
@@ -245,7 +255,7 @@ visualizations using the sample object provided in the |
245 | 255 |
<<sampleset>>= |
246 | 256 |
data(sample.CNSetLM) |
247 | 257 |
x <- sample.CNSetLM |
248 |
-@ |
|
258 |
+@ |
|
249 | 259 |
|
250 | 260 |
|
251 | 261 |
\subsection{Quantile-normalized intensities} |
... | ... |
@@ -258,7 +268,7 @@ snp.index <- which(isSnp(x)) |
258 | 268 |
np.index <- which(!isSnp(x)) |
259 | 269 |
a <- (A(x))[snp.index, ] |
260 | 270 |
dim(a) |
261 |
-@ |
|
271 |
+@ |
|
262 | 272 |
|
263 | 273 |
The extra set of parentheses surrounding \Robject{A(cnSet2)} above is |
264 | 274 |
added to emphasize the appropriate order of operations. Subsetting the |
... | ... |
@@ -267,54 +277,54 @@ large datasets. |
267 | 277 |
|
268 | 278 |
<<eval=FALSE>>= |
269 | 279 |
a <- A(x[snp.index, ]) |
270 |
-@ |
|
280 |
+@ |
|
271 | 281 |
|
272 | 282 |
The quantile-normalized intensities for nonpolymorphic loci are obtained |
273 | 283 |
by: |
274 | 284 |
|
275 | 285 |
<<>>= |
276 | 286 |
npIntensities <- (A(x))[np.index, ] |
277 |
-@ |
|
287 |
+@ |
|
278 | 288 |
|
279 | 289 |
Quantile normalized intensities for the B allele at polymorphic loci: |
280 | 290 |
|
281 | 291 |
<<B.polymorphic>>= |
282 | 292 |
b.snps <- (B(x))[snp.index, ] |
283 |
-@ |
|
293 |
+@ |
|
284 | 294 |
|
285 | 295 |
Note that NAs are recorded in the 'B' assay data element for |
286 | 296 |
nonpolymorphic loci: |
287 | 297 |
|
288 | 298 |
<<B.NAs>>= |
289 | 299 |
all(is.na((B(x))[np.index, ])) |
290 |
-@ |
|
300 |
+@ |
|
291 | 301 |
|
292 | 302 |
<<clean, echo=FALSE>>= |
293 | 303 |
rm(b.snps, a, npIntensities); gc() |
294 |
-@ |
|
304 |
+@ |
|
295 | 305 |
|
296 | 306 |
\paragraph{\Robject{SnpSet}: Genotype calls and confidence scores} |
297 | 307 |
|
298 | 308 |
Genotype calls: |
299 | 309 |
<<genotypes>>= |
300 | 310 |
genotypes <- (snpCall(x))[snp.index, ] |
301 |
-@ |
|
311 |
+@ |
|
302 | 312 |
Confidence scores of the genotype calls: |
303 | 313 |
<<confidenceScores>>= |
304 | 314 |
genotypeConf <- integerScoreToProbability(snpCallProbability(x)[snp.index[1:10], ]) |
305 |
-@ |
|
315 |
+@ |
|
306 | 316 |
|
307 | 317 |
\paragraph{\Robject{CopyNumberSet}: allele-specific copy number} |
308 | 318 |
|
309 | 319 |
Allele-specific copy number at polymorphic loci: |
310 | 320 |
<<ca>>= |
311 | 321 |
ca <- CA(x[snp.index, ])/100 |
312 |
-@ |
|
322 |
+@ |
|
313 | 323 |
|
314 | 324 |
Total copy number at nonpolymorphic loci: |
315 | 325 |
<<ca>>= |
316 | 326 |
cn.nonpolymorphic <- CA(x[np.index, ])/100 |
317 |
-@ |
|
327 |
+@ |
|
318 | 328 |
|
319 | 329 |
Total copy number at both polymorphic and nonpolymorphic loci: |
320 | 330 |
<<totalCopynumber>>= |
... | ... |
@@ -322,7 +332,7 @@ path <- system.file("scripts", package="crlmm") |
322 | 332 |
source(file.path(path, "helperFunctions.R")) |
323 | 333 |
cn <- totalCopyNumber(x, i=c(snp.index, np.index)) |
324 | 334 |
apply(cn, 2, median, na.rm=TRUE) |
325 |
-@ |
|
335 |
+@ |
|
326 | 336 |
|
327 | 337 |
(Note: the accessor totalCopyNumber method is sourced from a few |
328 | 338 |
helper functions. Documentation will be available in the devel version |
... | ... |
@@ -335,15 +345,15 @@ Information on physical position and chromosome can be accessed as follows: |
335 | 345 |
<<positionChromosome>>= |
336 | 346 |
xx <- position(x) |
337 | 347 |
yy <- chromosome(x) |
338 |
-@ |
|
348 |
+@ |
|
339 | 349 |
|
340 | 350 |
Parameters from the linear model used to estimate copy number are |
341 |
-stored in the slot \Robject{lM}. |
|
351 |
+stored in the slot \Robject{lM}. |
|
342 | 352 |
|
343 | 353 |
<<copynumberParameters>>= |
344 | 354 |
names(lM(x)) |
345 | 355 |
dim(lM(x)[[1]]) |
346 |
-@ |
|
356 |
+@ |
|
347 | 357 |
|
348 | 358 |
|
349 | 359 |
|
... | ... |
@@ -355,7 +365,7 @@ A histogram of the signal to noise ratio for the HapMap samples: |
355 | 365 |
|
356 | 366 |
<<plotSnr, fig=TRUE, include=FALSE>>= |
357 | 367 |
hist(x$SNR, xlab="SNR", main="", breaks=25) |
358 |
-@ |
|
368 |
+@ |
|
359 | 369 |
|
360 | 370 |
\begin{figure} |
361 | 371 |
\centering |
... | ... |
@@ -371,24 +381,24 @@ Figure \ref{fig:oneSample} plots physical position (horizontal axis) |
371 | 381 |
versus copy number (vertical axis) for the first sample. There is less |
372 | 382 |
information to estimate copy number at nonpolymorphic loci; improvements |
373 | 383 |
to the univariate prediction regions at nonpolymorphic loci are a future |
374 |
-area of research. |
|
384 |
+area of research. |
|
375 | 385 |
|
376 | 386 |
<<oneSample, fig=TRUE, width=8, height=4, include=FALSE>>= |
377 | 387 |
par(las=1, mar=c(4, 5, 4, 2)) |
378 |
-plot(position(x), copyNumber(x)[, 1]/100, pch=21, |
|
379 |
- cex=0.4, xaxt="n", col="grey20", ylim=c(0,5), |
|
388 |
+plot(position(x), copyNumber(x)[, 1]/100, pch=21, |
|
389 |
+ cex=0.4, xaxt="n", col="grey20", ylim=c(0,5), |
|
380 | 390 |
ylab="copy number", xlab="physical position (Mb)", |
381 | 391 |
main=paste(sampleNames(x)[1], ", CHR:", unique(chromosome(x)))) |
382 | 392 |
points(position(x)[!isSnp(x)], copyNumber(x)[!isSnp(x), 1]/100, |
383 | 393 |
pch=21, cex=0.4, col="lightblue", bg="lightblue") |
384 | 394 |
axis(1, at=pretty(range(position(x))), labels=pretty(range(position(x)))/1e6) |
385 | 395 |
abline(h=2) |
386 |
-@ |
|
396 |
+@ |
|
387 | 397 |
|
388 | 398 |
<<idiogram, eval=FALSE, echo=FALSE>>= |
389 | 399 |
require(SNPchip) |
390 | 400 |
plotCytoband(22, new=FALSE, cytoband.ycoords=c(3.8, 4), label.cytoband=FALSE) |
391 |
-@ |
|
401 |
+@ |
|
392 | 402 |
|
393 | 403 |
\begin{figure} |
394 | 404 |
\includegraphics[width=0.9\textwidth]{copynumber-oneSample} |
... | ... |
@@ -408,14 +418,14 @@ plotCytoband(22, new=FALSE, cytoband.ycoords=c(3.8, 4), label.cytoband=FALSE) |
408 | 418 |
%xlim <- c(10*1e6, max(position(cnSet2))) |
409 | 419 |
%cols <- brewer.pal(8, "Dark2")[1:4] |
410 | 420 |
%for(j in 1:3){ |
411 |
-% plot(position(cnSet2), cnState[, j], pch=".", col=cols[2], xaxt="n", |
|
421 |
+% plot(position(cnSet2), cnState[, j], pch=".", col=cols[2], xaxt="n", |
|
412 | 422 |
% ylab="copy number", xlab="Physical position (Mb)", type="s", lwd=2, |
413 | 423 |
% ylim=c(0,6), xlim=xlim) |
414 | 424 |
% points(position(cnSet2), cns[, j], pch=".", col=cols[3]) |
415 | 425 |
% lines(position(cnSet2), cnState[,j], lwd=2, col=cols[2]) |
416 |
-% axis(1, at=pretty(position(cnSet)), |
|
426 |
+% axis(1, at=pretty(position(cnSet)), |
|
417 | 427 |
% labels=pretty(position(cnSet))/1e6) |
418 |
-% abline(h=c(1,3), lty=2, col=cols[1]) |
|
428 |
+% abline(h=c(1,3), lty=2, col=cols[1]) |
|
419 | 429 |
% legend("topright", bty="n", legend=sampleNames(cnSet)[j]) |
420 | 430 |
% legend("topleft", lty=1, col=cols[2], legend="copy number state", |
421 | 431 |
% bty="n", lwd=2) |
... | ... |
@@ -423,7 +433,7 @@ plotCytoband(22, new=FALSE, cytoband.ycoords=c(3.8, 4), label.cytoband=FALSE) |
423 | 433 |
% label.cytoband=FALSE, xlim=xlim) |
424 | 434 |
%} |
425 | 435 |
%par(op) |
426 |
-%@ |
|
436 |
+%@ |
|
427 | 437 |
% |
428 | 438 |
%\begin{figure} |
429 | 439 |
% \includegraphics[width=\textwidth]{copynumber-overlayHmmPredictions} |
... | ... |
@@ -462,13 +472,13 @@ plot(i, x, copynumber=2) |
462 | 472 |
##for(i in index){ |
463 | 473 |
## gt <- calls(cnSet)[i, ] |
464 | 474 |
## if(i != 89){ |
465 |
-## myScatter(cnSet[i, ], |
|
466 |
-## pch=pch, |
|
467 |
-## col=colors[snpCall(cnSet)[i, ]], |
|
475 |
+## myScatter(cnSet[i, ], |
|
476 |
+## pch=pch, |
|
477 |
+## col=colors[snpCall(cnSet)[i, ]], |
|
468 | 478 |
## bg=colors[snpCall(cnSet)[i, ]], cex=cex, |
469 | 479 |
## xlim=xlim, ylim=ylim) |
470 | 480 |
## mtext("A", 1, outer=TRUE, line=1) |
471 |
-## mtext("B", 2, outer=TRUE, line=1) |
|
481 |
+## mtext("B", 2, outer=TRUE, line=1) |
|
472 | 482 |
## crlmm:::ellipse.CNSet(cnSet[i, ], copynumber=2, batch="C", lwd=2, col="black") |
473 | 483 |
## crlmm:::ellipse.CNSet(cnSet[i, ], copynumber=2, batch="Y", lwd=2, col="grey50") |
474 | 484 |
## } else { |
... | ... |
@@ -493,10 +503,87 @@ plot(i, x, copynumber=2) |
493 | 503 |
% |
494 | 504 |
%See the technical report \citep{Scharpf2009}. |
495 | 505 |
|
506 |
+\section{Trouble shooting} |
|
507 |
+ |
|
508 |
+Suppose that you are only interested in estimating copy number at |
|
509 |
+autosomal markers and you encountered an error using the |
|
510 |
+crlmmCopynumber2 function for estimating copy number at nonpolymorphic |
|
511 |
+markers on chromosome X. |
|
512 |
+ |
|
513 |
+Making a subset of a cnSet2 without pulling all the data from disk to |
|
514 |
+memory. |
|
515 |
+<<>>= |
|
516 |
+cnSet <- cnSet2 |
|
517 |
+@ |
|
518 |
+ |
|
519 |
+<<>>= |
|
520 |
+cnSet <- crlmmCopynumber2(cnSet) |
|
521 |
+@ |
|
522 |
+ |
|
523 |
+ |
|
524 |
+<<>>= |
|
525 |
+marker.index <- which(chromosome(cnSet) < 23) |
|
526 |
+nr <- length(marker.index) |
|
527 |
+nc <- ncol(cnSet) |
|
528 |
+@ |
|
529 |
+ |
|
530 |
+Set up a new directory for storing ff objects so that we can |
|
531 |
+completely remove all the ff objects in the old directory when we're |
|
532 |
+finished. |
|
533 |
+ |
|
534 |
+<<>>= |
|
535 |
+library(ff) |
|
536 |
+ldPath("newDirectory") |
|
537 |
+dir.create("newDirectory") |
|
538 |
+@ |
|
539 |
+ |
|
540 |
+Next, initialize the container using utilities from the oligoClasses package. |
|
541 |
+ |
|
542 |
+<<>>= |
|
543 |
+CA <- initializeBigMatrix("CA", nr, nc) |
|
544 |
+CB <- initializeBigMatrix("CB", nr, nc) |
|
545 |
+A <- initializeBigMatrix("A", nr, nc) |
|
546 |
+B <- initializeBigMatrix("B", nr, nc) |
|
547 |
+GT <- initializeBigMatrix("GT", nr, nc) |
|
548 |
+PP <- initializeBigMatrix("confs", nr, nc) |
|
549 |
+cnSet.autosomes <- new("CNSetLM", |
|
550 |
+ alleleA=A, |
|
551 |
+ alleleB=B, |
|
552 |
+ call=GT, |
|
553 |
+ callProbability=PP, |
|
554 |
+ CA=CA, |
|
555 |
+ CB=CB, |
|
556 |
+ protocolData=protocolData(cnSet), |
|
557 |
+ featureData=featureData(cnSet)[marker.index, ], |
|
558 |
+ phenoData=phenoData(cnSet), |
|
559 |
+ annotation=annotation(cnSet)) |
|
560 |
+@ |
|
561 |
+ |
|
562 |
+Next, we need to populate the ff objects with data. The easiest way to |
|
563 |
+do this without requiring too much RAM is to loop through the |
|
564 |
+samples. Lets do 5 samples at a time. |
|
565 |
+ |
|
566 |
+<<populateWithData>>= |
|
567 |
+sample.index <- splitIndicesByLength(1:ncol(cnSet), 5) |
|
568 |
+for(j in sample.index){ |
|
569 |
+ A(cnSet.autosomes)[, j] <- A(cnSet)[marker.index, j] |
|
570 |
+ B(cnSet.autosomes)[, j] <- B(cnSet)[marker.index, j] |
|
571 |
+ calls(cnSet.autosomes)[, j] <- calls(cnSet)[marker.index, j] |
|
572 |
+ snpCallProbability(cnSet.autosomes)[, j] <- snpCallProbability(cnSet)[marker.index, j] |
|
573 |
+} |
|
574 |
+@ |
|
575 |
+ |
|
576 |
+Check to see that the data is there. |
|
577 |
+ |
|
578 |
+The subset operation would pull all the data from disk and create a |
|
579 |
+cnSet object with matrices. If the dataset is very large, the subset |
|
580 |
+operation would be very slow and potentially create a large amount of |
|
581 |
+RAM. |
|
582 |
+ |
|
496 | 583 |
\section{Session information} |
497 | 584 |
<<sessionInfo, results=tex>>= |
498 | 585 |
toLatex(sessionInfo()) |
499 |
-@ |
|
586 |
+@ |
|
500 | 587 |
|
501 | 588 |
\section*{References} |
502 | 589 |
|