... | ... |
@@ -2,7 +2,7 @@ Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for |
4 | 4 |
Affymetrix SNP 5.0 and 6.0 and Illumina arrays |
5 |
-Version: 1.37.0 |
|
5 |
+Version: 1.37.1 |
|
6 | 6 |
Author: Benilton S Carvalho, Robert Scharpf, Matt Ritchie, Ingo |
7 | 7 |
Ruczinski, Rafael A Irizarry |
8 | 8 |
Maintainer: Benilton S Carvalho <benilton@unicamp.br>, |
... | ... |
@@ -324,186 +324,139 @@ readGenCallOutput = function(filenames, path=".", cdfName, |
324 | 324 |
|
325 | 325 |
|
326 | 326 |
|
327 |
-RGtoXY = function(RG, chipType, anno, verbose=TRUE) { |
|
328 |
- if(chipType!="nopackage") { |
|
329 |
- needToLoad <- !all(sapply(c('addressA', 'addressB', 'base'), isLoaded)) |
|
330 |
- if(needToLoad){ |
|
331 |
- chipList = c("human1mv1c",# 1M |
|
332 |
- "human370v1c", # 370CNV |
|
333 |
- "human650v3a", # 650Y |
|
334 |
- "human610quadv1b", # 610 quad |
|
335 |
- "human660quadv1a", # 660 quad |
|
336 |
- "human370quadv3c", # 370CNV quad |
|
337 |
- "human550v3b", # 550K |
|
338 |
- "human1mduov3b", # 1M Duo |
|
339 |
- "humanomni1quadv1b", # Omni1 quad |
|
340 |
- "humanomni25quadv1b", # Omni2.5 quad |
|
341 |
- "humanomni258v1a", # Omni2.5 8 v1 A |
|
342 |
- "humanomni258v1p1b", # Omni2.5 8 v1.1 B |
|
343 |
- "humanomni5quadv1b", # Omni5 quad |
|
344 |
- "humanomniexpress12v1b", # Omni express 12 |
|
345 |
- "humanimmuno12v1b", # Immuno chip 12 |
|
346 |
- "humancytosnp12v2p1h", # CytoSNP 12 |
|
347 |
- "humanexome12v1p2a", # Exome 12 v1.2 A |
|
348 |
- "humanomniexpexome8v1p1b") # Omni Express Exome 8 v1.1b |
|
349 |
- ## RS: added cleancdfname() |
|
350 |
- if(missing(chipType)){ |
|
351 |
- chipType = match.arg(cleancdfname(annotation(RG)), chipList) |
|
352 |
- } else chipType = match.arg(cleancdfname(chipType), chipList) |
|
353 |
- pkgname = getCrlmmAnnotationName(chipType) |
|
354 |
- if(!require(pkgname, character.only=TRUE, quietly=!verbose)){ |
|
355 |
- suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="") |
|
356 |
- msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall) |
|
357 |
- message(strwrap(msg)) |
|
358 |
- stop("Package ", pkgname, " could not be found.") |
|
359 |
- rm(suggCall, msg) |
|
360 |
- } |
|
361 |
- if(verbose) message("Loading chip annotation information.") |
|
362 |
- loader("address.rda", .crlmmPkgEnv, pkgname) |
|
363 |
- } |
|
364 |
- aids = getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest |
|
365 |
- bids = getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest |
|
366 |
- snpbase = getVarInEnv("base") |
|
367 |
-## loader(‘file.rda’) |
|
368 |
-## x = getVarInEnv(‘x’) |
|
369 |
-## y = getVarInEnv(‘y’) |
|
370 |
-## |
|
371 |
-## I’d consider using something like: |
|
372 |
-## |
|
373 |
-## needToLoad = !all(sapply(c(‘x’, ‘y’), isLoaded)) |
|
374 |
-## if (needToLoad){ |
|
375 |
-## loader(‘file.rda’) |
|
376 |
-## x = getVarInEnv(‘x’) |
|
377 |
-## y = getVarInEnv(‘y’) |
|
378 |
-## } |
|
379 |
- ids = names(aids) |
|
380 |
- nsnps = length(aids) |
|
381 |
- narrays = ncol(RG) |
|
382 |
-# aidcol = match("AddressA_ID", colnames(annot)) |
|
383 |
-# if(is.na(aidcol)) |
|
384 |
-# aidcol = match("Address", colnames(annot)) |
|
385 |
-# if(is.na(bidcol)) |
|
386 |
-# bidcol = match("Address2", colnames(annot)) |
|
387 |
-# aids = annot[, aidcol] |
|
388 |
-# bids = annot[, bidcol] |
|
389 |
-# snpids = annot[,"Name"] |
|
390 |
-# snpbase = annot[,"SNP"] |
|
391 |
- infI = !is.na(bids) & bids!=0 |
|
392 |
- aord = match(aids, featureNames(RG)) # NAs are possible here |
|
393 |
- bord = match(bids, featureNames(RG)) # and here |
|
394 |
-# argrg = aids[rrgg] |
|
395 |
-# brgrg = bids[rrgg] |
|
396 |
- xyPhenoData = AnnotatedDataFrame(data=RG@phenoData@data,varMetadata=RG@phenoData@varMetadata) |
|
397 |
- levels(xyPhenoData@varMetadata$channel) = c("X","Y","zero","_ALL_") |
|
398 |
- XY <- new("NChannelSet", |
|
399 |
- assayDataNew(X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"), |
|
400 |
- Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"), |
|
401 |
- zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer")), |
|
402 |
- phenoData=xyPhenoData, protocolData=RG@protocolData, annotation=chipType) |
|
403 |
- storageMode(XY) = "environment" |
|
404 |
-# XY <- new("NChannelSet", |
|
405 |
-# X=matrix(0, nsnps, narrays), |
|
406 |
-# Y=matrix(0, nsnps, narrays), |
|
407 |
-# zero=matrix(0, nsnps, narrays), |
|
408 |
-# annotation=chipType, phenoData=RG@phenoData, |
|
409 |
-# protocolData=RG@protocolData, storage.mode="environment") |
|
410 |
- featureNames(XY) = ids |
|
411 |
- sampleNames(XY) = sampleNames(RG) |
|
412 |
- ## RS |
|
413 |
- rm(list=c("bord", "infI", "aids", "bids", "ids", "snpbase")) |
|
414 |
-# print(gc()) |
|
415 |
- gc(verbose=FALSE) |
|
416 |
- # Need to initialize - matrices filled with NAs to begin with |
|
417 |
-# XY@assayData$X[1:nsnps,] = 0 |
|
418 |
-# XY@assayData$Y[1:nsnps,] = 0 |
|
419 |
-# XY@assayData$zero[1:nsnps,] = 0 |
|
420 |
- |
|
421 |
- # First sort out Infinium II SNPs, X -> R (allele A) and Y -> G (allele B) from the same probe |
|
422 |
- ## RS added |
|
423 |
- not.na <- !is.na(aord) |
|
424 |
- not.na.aord <- aord[not.na] |
|
425 |
- ## RS substitution !is.na(aord) -> not.na |
|
426 |
- ##r <- as.matrix(exprs(channel(RG, "R"))[not.na.aord,]) # mostly red |
|
427 |
- r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ]) |
|
428 |
- XY@assayData$X[not.na,] <- r |
|
429 |
- rm(r);gc(verbose=FALSE) |
|
430 |
- g <- as.matrix(assayData(RG)[["G"]][not.na.aord,]) # mostly green |
|
431 |
- XY@assayData$Y[not.na,] <- g |
|
432 |
- rm(g); gc(verbose=FALSE) |
|
433 |
- ##z <- as.matrix(exprs(channel(RG, "zero"))[not.na.aord,]) # mostly green |
|
434 |
- z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ]) |
|
435 |
- XY@assayData$zero[not.na,] <- z |
|
436 |
- rm(z); gc(verbose=FALSE) |
|
437 |
- ##RS added |
|
438 |
- rm(RG) |
|
439 |
-# print(gc()) |
|
440 |
- gc(verbose=FALSE) |
|
441 |
- ## Warning - not 100% sure that the code below is correct - could be more complicated than this |
|
442 |
-# infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]") |
|
443 |
- |
|
444 |
-# X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red |
|
445 |
-# Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green |
|
446 |
- |
|
447 |
- # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe |
|
448 |
-# infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]") |
|
449 |
- |
|
450 |
-# X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red |
|
451 |
-# Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green |
|
327 |
+RGtoXY = function (RG, chipType, anno, verbose = TRUE) |
|
328 |
+{ |
|
329 |
+ if (chipType != "nopackage") { |
|
330 |
+ needToLoad <- !all(sapply(c("addressA", "addressB", "base"), |
|
331 |
+ isLoaded)) |
|
332 |
+ if (needToLoad) { |
|
333 |
+ chipList = c("human1mv1c", "human370v1c", "human650v3a", |
|
334 |
+ "human610quadv1b", "human660quadv1a", "human370quadv3c", |
|
335 |
+ "human550v3b", "human1mduov3b", "humanomni1quadv1b", |
|
336 |
+ "humanomni25quadv1b", "humanomni258v1a", "humanomni258v1p1b", |
|
337 |
+ "humanomni5quadv1b", "humanomniexpress12v1b", |
|
338 |
+ "humanimmuno12v1b", "humancytosnp12v2p1h", "humanexome12v1p2a", |
|
339 |
+ "humanomniexpexome8v1p1b") |
|
340 |
+ if (missing(chipType)) { |
|
341 |
+ chipType = match.arg(cleancdfname(annotation(RG)), |
|
342 |
+ chipList) |
|
343 |
+ } |
|
344 |
+ else chipType = match.arg(cleancdfname(chipType), |
|
345 |
+ chipList) |
|
346 |
+ pkgname = getCrlmmAnnotationName(chipType) |
|
347 |
+ if (!require(pkgname, character.only = TRUE, quietly = !verbose)) { |
|
348 |
+ suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", |
|
349 |
+ sep = "") |
|
350 |
+ msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", |
|
351 |
+ suggCall) |
|
352 |
+ message(strwrap(msg)) |
|
353 |
+ stop("Package ", pkgname, " could not be found.") |
|
354 |
+ rm(suggCall, msg) |
|
355 |
+ } |
|
356 |
+ if (verbose) |
|
357 |
+ message("Loading chip annotation information.") |
|
358 |
+ loader("address.rda", .crlmmPkgEnv, pkgname) |
|
359 |
+ } |
|
360 |
+ aids = getVarInEnv("addressA") |
|
361 |
+ bids = getVarInEnv("addressB") |
|
362 |
+ snpbase = getVarInEnv("base") |
|
363 |
+ ids = names(aids) |
|
364 |
+ nsnps = length(aids) |
|
365 |
+ narrays = ncol(RG) |
|
366 |
+ infI = !is.na(bids) & bids != 0 |
|
367 |
+ aord = match(aids, featureNames(RG)) |
|
368 |
+ bord = match(bids, featureNames(RG)) |
|
369 |
+ xyPhenoData = AnnotatedDataFrame(data = RG@phenoData@data, |
|
370 |
+ varMetadata = RG@phenoData@varMetadata) |
|
371 |
+ levels(xyPhenoData@varMetadata$channel) = c("X", "Y", |
|
372 |
+ "zero", "_ALL_") |
|
373 |
+ XY <- new("NChannelSet", assayDataNew(X = initializeBigMatrix(name = "X", |
|
374 |
+ nr = nsnps, nc = narrays, vmode = "integer"), Y = initializeBigMatrix(name = "Y", |
|
375 |
+ nr = nsnps, nc = narrays, vmode = "integer"), zero = initializeBigMatrix(name = "zero", |
|
376 |
+ nr = nsnps, nc = narrays, vmode = "integer")), phenoData = xyPhenoData, |
|
377 |
+ protocolData = RG@protocolData, annotation = chipType) |
|
378 |
+ storageMode(XY) = "environment" |
|
379 |
+ featureNames(XY) = ids |
|
380 |
+ sampleNames(XY) = sampleNames(RG) |
|
381 |
+ rm(list = c("bord", "infI", "aids", "bids", "ids", "snpbase")) |
|
382 |
+ gc(verbose = FALSE) |
|
383 |
+ not.na <- !is.na(aord) |
|
384 |
+ not.na.aord <- aord[not.na] |
|
385 |
+ r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ]) |
|
386 |
+ XY@assayData$X[not.na, ] <- r |
|
387 |
+ rm(r) |
|
388 |
+ gc(verbose = FALSE) |
|
389 |
+ g <- as.matrix(assayData(RG)[["G"]][not.na.aord, ]) |
|
390 |
+ XY@assayData$Y[not.na, ] <- g |
|
391 |
+ rm(g) |
|
392 |
+ gc(verbose = FALSE) |
|
393 |
+ z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ]) |
|
394 |
+ XY@assayData$zero[not.na, ] <- z |
|
395 |
+ rm(z) |
|
396 |
+ gc(verbose = FALSE) |
|
397 |
+ rm(RG) |
|
398 |
+ gc(verbose = FALSE) |
|
399 |
+ } |
|
400 |
+ if (chipType == "nopackage" && !is.null(anno)) { |
|
401 |
+ pkgname = NULL |
|
402 |
+ if(sum(colnames(anno@data)=="AddressA_ID")!=1){ |
|
403 |
+ |
|
404 |
+ message("annotation format is wrong, could not find a column with name: AddressA_ID") |
|
405 |
+ }else{ |
|
406 |
+ anno2=anno@data |
|
407 |
+ # chr = as.character(anno$Chr) |
|
408 |
+ #chr[chr=="X"] = 23 |
|
409 |
+ # chr[chr=="Y"] = 24 |
|
410 |
+ # chr[chr=="XY"] = 25 |
|
411 |
+#chr[chr=="MT"] = 26 |
|
412 |
+#anno[,"chromosome"]=as.integer(chr) |
|
452 | 413 |
|
453 |
- # For now zero out Infinium I probes |
|
454 |
-# XY@assayData$X[infI,] = 0 |
|
455 |
-# XY@assayData$Y[infI,] = 0 |
|
456 |
-# XY@assayData$zero[infI,] = 0 |
|
457 |
-# gc(verbose=FALSE) |
|
458 |
- } |
|
459 |
- if(chipType=="nopackage" && !is.null(anno)) { |
|
460 |
- pkgname=NULL |
|
461 |
-# anno=read.csv(annot,header=TRUE, as.is=TRUE, check.names=FALSE,skip=7) |
|
462 |
- aids=anno[,"AddressA_ID"] |
|
463 |
- bids=anno[,"AddressB_ID"] |
|
464 |
- snpbase=anno[,"SNP"] |
|
465 |
- names(aids)=ids=anno[,"Name"] |
|
466 |
-# m=match(aids,rownames(RG@assayData$R)) |
|
467 |
-# ARG=RG[m[!is.na(m)],] |
|
468 |
-# mm=match(rownames(ARG@assayData$R),aids) |
|
469 |
-# aids=aids[mm] |
|
470 |
-# bids=bids[mm] |
|
471 |
-# snpbas=snpbase[mm] |
|
472 |
- nsnps = length(aids) |
|
473 |
- narrays = ncol(RG) |
|
474 |
- infI = !is.na(bids) & bids != 0 |
|
475 |
- aord = match(aids, featureNames(RG)) |
|
476 |
- bord = match(bids, featureNames(RG)) |
|
477 |
- xyPhenoData = AnnotatedDataFrame(data = RG@phenoData@data, |
|
478 |
- varMetadata = RG@phenoData@varMetadata) |
|
479 |
- levels(xyPhenoData@varMetadata$channel) = c("X", "Y", "zero", "_ALL_") |
|
480 |
- XY <- new("NChannelSet", assayDataNew(X = initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"), |
|
481 |
- Y = initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"), |
|
482 |
- zero = initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer")), |
|
483 |
- phenoData = xyPhenoData, protocolData = RG@protocolData) |
|
484 |
- storageMode(XY) = "environment" |
|
485 |
- featureNames(XY) = names(aids) |
|
486 |
- sampleNames(XY) = sampleNames(RG) |
|
487 |
- rm(list = c("bord", "infI", "aids", "bids", "snpbase")) |
|
488 |
- gc(verbose = FALSE) |
|
489 |
- not.na <- !is.na(aord) |
|
490 |
- not.na.aord <- aord[not.na] |
|
491 |
- r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ]) |
|
492 |
- XY@assayData$X[not.na, ] <- r |
|
493 |
- rm(r) |
|
494 |
- gc(verbose = FALSE) |
|
495 |
- g <- as.matrix(assayData(RG)[["G"]][not.na.aord, ]) |
|
496 |
- XY@assayData$Y[not.na, ] <- g |
|
497 |
- rm(g) |
|
498 |
- gc(verbose = FALSE) |
|
499 |
- z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ]) |
|
500 |
- XY@assayData$zero[not.na, ] <- z |
|
501 |
- rm(z) |
|
502 |
- gc(verbose = FALSE) |
|
503 |
- rm(RG) |
|
504 |
- gc(verbose = FALSE) |
|
505 |
- } |
|
506 |
- XY |
|
414 |
+ aids = anno2[, "AddressA_ID"] |
|
415 |
+ bids = anno2[, "AddressB_ID"] |
|
416 |
+ snpbase = anno2[, "SNP"] |
|
417 |
+ names(aids) = ids = anno2[, "featureNames"] |
|
418 |
+ nsnps = length(aids) |
|
419 |
+ narrays = ncol(RG) |
|
420 |
+ infI = !is.na(bids) & bids != 0 |
|
421 |
+ aord = match(aids, featureNames(RG)) |
|
422 |
+ bord = match(bids,featureNames(RG)) |
|
423 |
+ xyPhenoData = AnnotatedDataFrame(data = RG@phenoData@data, |
|
424 |
+ varMetadata = RG@phenoData@varMetadata) |
|
425 |
+ levels(xyPhenoData@varMetadata$channel) = c("X", "Y", |
|
426 |
+ "zero", "_ALL_") |
|
427 |
+ XY <- new("NChannelSet", assayDataNew(X = initializeBigMatrix(name = "X", |
|
428 |
+ nr = nsnps, nc = narrays, vmode = "integer"), Y = initializeBigMatrix(name = "Y", |
|
429 |
+ nr = nsnps, nc = narrays, vmode = "integer"), zero = initializeBigMatrix(name = "zero", |
|
430 |
+ nr = nsnps, nc = narrays, vmode = "integer")), phenoData = xyPhenoData, |
|
431 |
+ protocolData = RG@protocolData) |
|
432 |
+ storageMode(XY) = "environment" |
|
433 |
+ # featureNames(XY) = names(aids) |
|
434 |
+ sampleNames(XY) = sampleNames(RG) |
|
435 |
+ rm(list = c("bord", "infI", "aids", "bids", "snpbase")) |
|
436 |
+ gc(verbose = FALSE) |
|
437 |
+ not.na <- !is.na(aord) |
|
438 |
+ not.na.aord <- aord[not.na] |
|
439 |
+ r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ]) |
|
440 |
+ XY@assayData$X[not.na, ] <- r |
|
441 |
+ rm(r) |
|
442 |
+ gc(verbose = FALSE) |
|
443 |
+ g <- as.matrix(assayData(RG)[["G"]][not.na.aord, ]) |
|
444 |
+ XY@assayData$Y[not.na, ] <- g |
|
445 |
+ rm(g) |
|
446 |
+ gc(verbose = FALSE) |
|
447 |
+ z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ]) |
|
448 |
+ XY@assayData$zero[not.na, ] <- z |
|
449 |
+ rm(z) |
|
450 |
+ gc(verbose = FALSE) |
|
451 |
+ rm(RG) |
|
452 |
+ gc(verbose = FALSE) |
|
453 |
+ |
|
454 |
+ |
|
455 |
+ |
|
456 |
+ |
|
457 |
+ } |
|
458 |
+ } |
|
459 |
+ XY |
|
507 | 460 |
} |
508 | 461 |
|
509 | 462 |
|
... | ... |
@@ -1155,7 +1108,10 @@ constructInf <- function(sampleSheet=NULL, |
1155 | 1108 |
genome <- getAvailableIlluminaGenomeBuild(path) |
1156 | 1109 |
featureData = getFeatureData(cdfName, copynumber=TRUE, genome=genome) |
1157 | 1110 |
} else { |
1158 |
- featureData=anno |
|
1111 |
+ anno = as(anno, "AnnotatedDataFrame") |
|
1112 |
+ anno = as(anno, "GenomeAnnotatedDataFrame") |
|
1113 |
+featureData=anno |
|
1114 |
+ |
|
1159 | 1115 |
} |
1160 | 1116 |
nr <- nrow(featureData) |
1161 | 1117 |
nc <- narrays |
... | ... |
@@ -144,7 +144,11 @@ priormeans=prepriormeans |
144 | 144 |
XIndex = seq(1:nrow(cnSet))[chromosome(cnSet)==23 & isSnp(cnSet)] |
145 | 145 |
### impute gender if gender information not provided |
146 | 146 |
if (is.null(gender)) { |
147 |
- gender = krlmmImputeGender(cnSet, annotation, YIndex, verbose, offset=offset) |
|
147 |
+ if(sum(is.na(YIndex))==length(YIndex)){ |
|
148 |
+ gender=NULL |
|
149 |
+ }else{ |
|
150 |
+ gender = krlmmImputeGender(cnSet, annotation, YIndex, verbose, offset=offset) |
|
151 |
+ } |
|
148 | 152 |
} |
149 | 153 |
|
150 | 154 |
### double-check ChrY ChrX SNP cluster, impute gender if gender information not provided |