... | ... |
@@ -324,186 +324,139 @@ readGenCallOutput = function(filenames, path=".", cdfName, |
324 | 324 |
|
325 | 325 |
|
326 | 326 |
|
327 |
-RGtoXY = function(RG, chipType, anno, verbose=TRUE) { |
|
328 |
- if(chipType!="nopackage") { |
|
329 |
- needToLoad <- !all(sapply(c('addressA', 'addressB', 'base'), isLoaded)) |
|
330 |
- if(needToLoad){ |
|
331 |
- chipList = c("human1mv1c",# 1M |
|
332 |
- "human370v1c", # 370CNV |
|
333 |
- "human650v3a", # 650Y |
|
334 |
- "human610quadv1b", # 610 quad |
|
335 |
- "human660quadv1a", # 660 quad |
|
336 |
- "human370quadv3c", # 370CNV quad |
|
337 |
- "human550v3b", # 550K |
|
338 |
- "human1mduov3b", # 1M Duo |
|
339 |
- "humanomni1quadv1b", # Omni1 quad |
|
340 |
- "humanomni25quadv1b", # Omni2.5 quad |
|
341 |
- "humanomni258v1a", # Omni2.5 8 v1 A |
|
342 |
- "humanomni258v1p1b", # Omni2.5 8 v1.1 B |
|
343 |
- "humanomni5quadv1b", # Omni5 quad |
|
344 |
- "humanomniexpress12v1b", # Omni express 12 |
|
345 |
- "humanimmuno12v1b", # Immuno chip 12 |
|
346 |
- "humancytosnp12v2p1h", # CytoSNP 12 |
|
347 |
- "humanexome12v1p2a", # Exome 12 v1.2 A |
|
348 |
- "humanomniexpexome8v1p1b") # Omni Express Exome 8 v1.1b |
|
349 |
- ## RS: added cleancdfname() |
|
350 |
- if(missing(chipType)){ |
|
351 |
- chipType = match.arg(cleancdfname(annotation(RG)), chipList) |
|
352 |
- } else chipType = match.arg(cleancdfname(chipType), chipList) |
|
353 |
- pkgname = getCrlmmAnnotationName(chipType) |
|
354 |
- if(!require(pkgname, character.only=TRUE, quietly=!verbose)){ |
|
355 |
- suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="") |
|
356 |
- msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall) |
|
357 |
- message(strwrap(msg)) |
|
358 |
- stop("Package ", pkgname, " could not be found.") |
|
359 |
- rm(suggCall, msg) |
|
360 |
- } |
|
361 |
- if(verbose) message("Loading chip annotation information.") |
|
362 |
- loader("address.rda", .crlmmPkgEnv, pkgname) |
|
363 |
- } |
|
364 |
- aids = getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest |
|
365 |
- bids = getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest |
|
366 |
- snpbase = getVarInEnv("base") |
|
367 |
-## loader(‘file.rda’) |
|
368 |
-## x = getVarInEnv(‘x’) |
|
369 |
-## y = getVarInEnv(‘y’) |
|
370 |
-## |
|
371 |
-## I’d consider using something like: |
|
372 |
-## |
|
373 |
-## needToLoad = !all(sapply(c(‘x’, ‘y’), isLoaded)) |
|
374 |
-## if (needToLoad){ |
|
375 |
-## loader(‘file.rda’) |
|
376 |
-## x = getVarInEnv(‘x’) |
|
377 |
-## y = getVarInEnv(‘y’) |
|
378 |
-## } |
|
379 |
- ids = names(aids) |
|
380 |
- nsnps = length(aids) |
|
381 |
- narrays = ncol(RG) |
|
382 |
-# aidcol = match("AddressA_ID", colnames(annot)) |
|
383 |
-# if(is.na(aidcol)) |
|
384 |
-# aidcol = match("Address", colnames(annot)) |
|
385 |
-# if(is.na(bidcol)) |
|
386 |
-# bidcol = match("Address2", colnames(annot)) |
|
387 |
-# aids = annot[, aidcol] |
|
388 |
-# bids = annot[, bidcol] |
|
389 |
-# snpids = annot[,"Name"] |
|
390 |
-# snpbase = annot[,"SNP"] |
|
391 |
- infI = !is.na(bids) & bids!=0 |
|
392 |
- aord = match(aids, featureNames(RG)) # NAs are possible here |
|
393 |
- bord = match(bids, featureNames(RG)) # and here |
|
394 |
-# argrg = aids[rrgg] |
|
395 |
-# brgrg = bids[rrgg] |
|
396 |
- xyPhenoData = AnnotatedDataFrame(data=RG@phenoData@data,varMetadata=RG@phenoData@varMetadata) |
|
397 |
- levels(xyPhenoData@varMetadata$channel) = c("X","Y","zero","_ALL_") |
|
398 |
- XY <- new("NChannelSet", |
|
399 |
- assayDataNew(X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"), |
|
400 |
- Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"), |
|
401 |
- zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer")), |
|
402 |
- phenoData=xyPhenoData, protocolData=RG@protocolData, annotation=chipType) |
|
403 |
- storageMode(XY) = "environment" |
|
404 |
-# XY <- new("NChannelSet", |
|
405 |
-# X=matrix(0, nsnps, narrays), |
|
406 |
-# Y=matrix(0, nsnps, narrays), |
|
407 |
-# zero=matrix(0, nsnps, narrays), |
|
408 |
-# annotation=chipType, phenoData=RG@phenoData, |
|
409 |
-# protocolData=RG@protocolData, storage.mode="environment") |
|
410 |
- featureNames(XY) = ids |
|
411 |
- sampleNames(XY) = sampleNames(RG) |
|
412 |
- ## RS |
|
413 |
- rm(list=c("bord", "infI", "aids", "bids", "ids", "snpbase")) |
|
414 |
-# print(gc()) |
|
415 |
- gc(verbose=FALSE) |
|
416 |
- # Need to initialize - matrices filled with NAs to begin with |
|
417 |
-# XY@assayData$X[1:nsnps,] = 0 |
|
418 |
-# XY@assayData$Y[1:nsnps,] = 0 |
|
419 |
-# XY@assayData$zero[1:nsnps,] = 0 |
|
420 |
- |
|
421 |
- # First sort out Infinium II SNPs, X -> R (allele A) and Y -> G (allele B) from the same probe |
|
422 |
- ## RS added |
|
423 |
- not.na <- !is.na(aord) |
|
424 |
- not.na.aord <- aord[not.na] |
|
425 |
- ## RS substitution !is.na(aord) -> not.na |
|
426 |
- ##r <- as.matrix(exprs(channel(RG, "R"))[not.na.aord,]) # mostly red |
|
427 |
- r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ]) |
|
428 |
- XY@assayData$X[not.na,] <- r |
|
429 |
- rm(r);gc(verbose=FALSE) |
|
430 |
- g <- as.matrix(assayData(RG)[["G"]][not.na.aord,]) # mostly green |
|
431 |
- XY@assayData$Y[not.na,] <- g |
|
432 |
- rm(g); gc(verbose=FALSE) |
|
433 |
- ##z <- as.matrix(exprs(channel(RG, "zero"))[not.na.aord,]) # mostly green |
|
434 |
- z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ]) |
|
435 |
- XY@assayData$zero[not.na,] <- z |
|
436 |
- rm(z); gc(verbose=FALSE) |
|
437 |
- ##RS added |
|
438 |
- rm(RG) |
|
439 |
-# print(gc()) |
|
440 |
- gc(verbose=FALSE) |
|
441 |
- ## Warning - not 100% sure that the code below is correct - could be more complicated than this |
|
442 |
-# infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]") |
|
443 |
- |
|
444 |
-# X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red |
|
445 |
-# Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green |
|
446 |
- |
|
447 |
- # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe |
|
448 |
-# infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]") |
|
449 |
- |
|
450 |
-# X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red |
|
451 |
-# Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green |
|
327 |
+RGtoXY = function (RG, chipType, anno, verbose = TRUE) |
|
328 |
+{ |
|
329 |
+ if (chipType != "nopackage") { |
|
330 |
+ needToLoad <- !all(sapply(c("addressA", "addressB", "base"), |
|
331 |
+ isLoaded)) |
|
332 |
+ if (needToLoad) { |
|
333 |
+ chipList = c("human1mv1c", "human370v1c", "human650v3a", |
|
334 |
+ "human610quadv1b", "human660quadv1a", "human370quadv3c", |
|
335 |
+ "human550v3b", "human1mduov3b", "humanomni1quadv1b", |
|
336 |
+ "humanomni25quadv1b", "humanomni258v1a", "humanomni258v1p1b", |
|
337 |
+ "humanomni5quadv1b", "humanomniexpress12v1b", |
|
338 |
+ "humanimmuno12v1b", "humancytosnp12v2p1h", "humanexome12v1p2a", |
|
339 |
+ "humanomniexpexome8v1p1b") |
|
340 |
+ if (missing(chipType)) { |
|
341 |
+ chipType = match.arg(cleancdfname(annotation(RG)), |
|
342 |
+ chipList) |
|
343 |
+ } |
|
344 |
+ else chipType = match.arg(cleancdfname(chipType), |
|
345 |
+ chipList) |
|
346 |
+ pkgname = getCrlmmAnnotationName(chipType) |
|
347 |
+ if (!require(pkgname, character.only = TRUE, quietly = !verbose)) { |
|
348 |
+ suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", |
|
349 |
+ sep = "") |
|
350 |
+ msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", |
|
351 |
+ suggCall) |
|
352 |
+ message(strwrap(msg)) |
|
353 |
+ stop("Package ", pkgname, " could not be found.") |
|
354 |
+ rm(suggCall, msg) |
|
355 |
+ } |
|
356 |
+ if (verbose) |
|
357 |
+ message("Loading chip annotation information.") |
|
358 |
+ loader("address.rda", .crlmmPkgEnv, pkgname) |
|
359 |
+ } |
|
360 |
+ aids = getVarInEnv("addressA") |
|
361 |
+ bids = getVarInEnv("addressB") |
|
362 |
+ snpbase = getVarInEnv("base") |
|
363 |
+ ids = names(aids) |
|
364 |
+ nsnps = length(aids) |
|
365 |
+ narrays = ncol(RG) |
|
366 |
+ infI = !is.na(bids) & bids != 0 |
|
367 |
+ aord = match(aids, featureNames(RG)) |
|
368 |
+ bord = match(bids, featureNames(RG)) |
|
369 |
+ xyPhenoData = AnnotatedDataFrame(data = RG@phenoData@data, |
|
370 |
+ varMetadata = RG@phenoData@varMetadata) |
|
371 |
+ levels(xyPhenoData@varMetadata$channel) = c("X", "Y", |
|
372 |
+ "zero", "_ALL_") |
|
373 |
+ XY <- new("NChannelSet", assayDataNew(X = initializeBigMatrix(name = "X", |
|
374 |
+ nr = nsnps, nc = narrays, vmode = "integer"), Y = initializeBigMatrix(name = "Y", |
|
375 |
+ nr = nsnps, nc = narrays, vmode = "integer"), zero = initializeBigMatrix(name = "zero", |
|
376 |
+ nr = nsnps, nc = narrays, vmode = "integer")), phenoData = xyPhenoData, |
|
377 |
+ protocolData = RG@protocolData, annotation = chipType) |
|
378 |
+ storageMode(XY) = "environment" |
|
379 |
+ featureNames(XY) = ids |
|
380 |
+ sampleNames(XY) = sampleNames(RG) |
|
381 |
+ rm(list = c("bord", "infI", "aids", "bids", "ids", "snpbase")) |
|
382 |
+ gc(verbose = FALSE) |
|
383 |
+ not.na <- !is.na(aord) |
|
384 |
+ not.na.aord <- aord[not.na] |
|
385 |
+ r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ]) |
|
386 |
+ XY@assayData$X[not.na, ] <- r |
|
387 |
+ rm(r) |
|
388 |
+ gc(verbose = FALSE) |
|
389 |
+ g <- as.matrix(assayData(RG)[["G"]][not.na.aord, ]) |
|
390 |
+ XY@assayData$Y[not.na, ] <- g |
|
391 |
+ rm(g) |
|
392 |
+ gc(verbose = FALSE) |
|
393 |
+ z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ]) |
|
394 |
+ XY@assayData$zero[not.na, ] <- z |
|
395 |
+ rm(z) |
|
396 |
+ gc(verbose = FALSE) |
|
397 |
+ rm(RG) |
|
398 |
+ gc(verbose = FALSE) |
|
399 |
+ } |
|
400 |
+ if (chipType == "nopackage" && !is.null(anno)) { |
|
401 |
+ pkgname = NULL |
|
402 |
+ if(sum(colnames(anno@data)=="AddressA_ID")!=1){ |
|
403 |
+ |
|
404 |
+ message("annotation format is wrong, could not find a column with name: AddressA_ID") |
|
405 |
+ }else{ |
|
406 |
+ anno2=anno@data |
|
407 |
+ # chr = as.character(anno$Chr) |
|
408 |
+ #chr[chr=="X"] = 23 |
|
409 |
+ # chr[chr=="Y"] = 24 |
|
410 |
+ # chr[chr=="XY"] = 25 |
|
411 |
+#chr[chr=="MT"] = 26 |
|
412 |
+#anno[,"chromosome"]=as.integer(chr) |
|
452 | 413 |
|
453 |
- # For now zero out Infinium I probes |
|
454 |
-# XY@assayData$X[infI,] = 0 |
|
455 |
-# XY@assayData$Y[infI,] = 0 |
|
456 |
-# XY@assayData$zero[infI,] = 0 |
|
457 |
-# gc(verbose=FALSE) |
|
458 |
- } |
|
459 |
- if(chipType=="nopackage" && !is.null(anno)) { |
|
460 |
- pkgname=NULL |
|
461 |
-# anno=read.csv(annot,header=TRUE, as.is=TRUE, check.names=FALSE,skip=7) |
|
462 |
- aids=anno[,"AddressA_ID"] |
|
463 |
- bids=anno[,"AddressB_ID"] |
|
464 |
- snpbase=anno[,"SNP"] |
|
465 |
- names(aids)=ids=anno[,"Name"] |
|
466 |
-# m=match(aids,rownames(RG@assayData$R)) |
|
467 |
-# ARG=RG[m[!is.na(m)],] |
|
468 |
-# mm=match(rownames(ARG@assayData$R),aids) |
|
469 |
-# aids=aids[mm] |
|
470 |
-# bids=bids[mm] |
|
471 |
-# snpbas=snpbase[mm] |
|
472 |
- nsnps = length(aids) |
|
473 |
- narrays = ncol(RG) |
|
474 |
- infI = !is.na(bids) & bids != 0 |
|
475 |
- aord = match(aids, featureNames(RG)) |
|
476 |
- bord = match(bids, featureNames(RG)) |
|
477 |
- xyPhenoData = AnnotatedDataFrame(data = RG@phenoData@data, |
|
478 |
- varMetadata = RG@phenoData@varMetadata) |
|
479 |
- levels(xyPhenoData@varMetadata$channel) = c("X", "Y", "zero", "_ALL_") |
|
480 |
- XY <- new("NChannelSet", assayDataNew(X = initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"), |
|
481 |
- Y = initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"), |
|
482 |
- zero = initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer")), |
|
483 |
- phenoData = xyPhenoData, protocolData = RG@protocolData) |
|
484 |
- storageMode(XY) = "environment" |
|
485 |
- featureNames(XY) = names(aids) |
|
486 |
- sampleNames(XY) = sampleNames(RG) |
|
487 |
- rm(list = c("bord", "infI", "aids", "bids", "snpbase")) |
|
488 |
- gc(verbose = FALSE) |
|
489 |
- not.na <- !is.na(aord) |
|
490 |
- not.na.aord <- aord[not.na] |
|
491 |
- r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ]) |
|
492 |
- XY@assayData$X[not.na, ] <- r |
|
493 |
- rm(r) |
|
494 |
- gc(verbose = FALSE) |
|
495 |
- g <- as.matrix(assayData(RG)[["G"]][not.na.aord, ]) |
|
496 |
- XY@assayData$Y[not.na, ] <- g |
|
497 |
- rm(g) |
|
498 |
- gc(verbose = FALSE) |
|
499 |
- z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ]) |
|
500 |
- XY@assayData$zero[not.na, ] <- z |
|
501 |
- rm(z) |
|
502 |
- gc(verbose = FALSE) |
|
503 |
- rm(RG) |
|
504 |
- gc(verbose = FALSE) |
|
505 |
- } |
|
506 |
- XY |
|
414 |
+ aids = anno2[, "AddressA_ID"] |
|
415 |
+ bids = anno2[, "AddressB_ID"] |
|
416 |
+ snpbase = anno2[, "SNP"] |
|
417 |
+ names(aids) = ids = anno2[, "featureNames"] |
|
418 |
+ nsnps = length(aids) |
|
419 |
+ narrays = ncol(RG) |
|
420 |
+ infI = !is.na(bids) & bids != 0 |
|
421 |
+ aord = match(aids, featureNames(RG)) |
|
422 |
+ bord = match(bids,featureNames(RG)) |
|
423 |
+ xyPhenoData = AnnotatedDataFrame(data = RG@phenoData@data, |
|
424 |
+ varMetadata = RG@phenoData@varMetadata) |
|
425 |
+ levels(xyPhenoData@varMetadata$channel) = c("X", "Y", |
|
426 |
+ "zero", "_ALL_") |
|
427 |
+ XY <- new("NChannelSet", assayDataNew(X = initializeBigMatrix(name = "X", |
|
428 |
+ nr = nsnps, nc = narrays, vmode = "integer"), Y = initializeBigMatrix(name = "Y", |
|
429 |
+ nr = nsnps, nc = narrays, vmode = "integer"), zero = initializeBigMatrix(name = "zero", |
|
430 |
+ nr = nsnps, nc = narrays, vmode = "integer")), phenoData = xyPhenoData, |
|
431 |
+ protocolData = RG@protocolData) |
|
432 |
+ storageMode(XY) = "environment" |
|
433 |
+ # featureNames(XY) = names(aids) |
|
434 |
+ sampleNames(XY) = sampleNames(RG) |
|
435 |
+ rm(list = c("bord", "infI", "aids", "bids", "snpbase")) |
|
436 |
+ gc(verbose = FALSE) |
|
437 |
+ not.na <- !is.na(aord) |
|
438 |
+ not.na.aord <- aord[not.na] |
|
439 |
+ r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ]) |
|
440 |
+ XY@assayData$X[not.na, ] <- r |
|
441 |
+ rm(r) |
|
442 |
+ gc(verbose = FALSE) |
|
443 |
+ g <- as.matrix(assayData(RG)[["G"]][not.na.aord, ]) |
|
444 |
+ XY@assayData$Y[not.na, ] <- g |
|
445 |
+ rm(g) |
|
446 |
+ gc(verbose = FALSE) |
|
447 |
+ z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ]) |
|
448 |
+ XY@assayData$zero[not.na, ] <- z |
|
449 |
+ rm(z) |
|
450 |
+ gc(verbose = FALSE) |
|
451 |
+ rm(RG) |
|
452 |
+ gc(verbose = FALSE) |
|
453 |
+ |
|
454 |
+ |
|
455 |
+ |
|
456 |
+ |
|
457 |
+ } |
|
458 |
+ } |
|
459 |
+ XY |
|
507 | 460 |
} |
508 | 461 |
|
509 | 462 |
|
... | ... |
@@ -1155,7 +1108,10 @@ constructInf <- function(sampleSheet=NULL, |
1155 | 1108 |
genome <- getAvailableIlluminaGenomeBuild(path) |
1156 | 1109 |
featureData = getFeatureData(cdfName, copynumber=TRUE, genome=genome) |
1157 | 1110 |
} else { |
1158 |
- featureData=anno |
|
1111 |
+ anno = as(anno, "AnnotatedDataFrame") |
|
1112 |
+ anno = as(anno, "GenomeAnnotatedDataFrame") |
|
1113 |
+featureData=anno |
|
1114 |
+ |
|
1159 | 1115 |
} |
1160 | 1116 |
nr <- nrow(featureData) |
1161 | 1117 |
nc <- narrays |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@117087 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -12,7 +12,7 @@ readIdatFiles = function(sampleSheet=NULL, |
12 | 12 |
sep="_", |
13 | 13 |
fileExt=list(green="Grn.idat", red="Red.idat"), |
14 | 14 |
saveDate=FALSE, verbose=FALSE) { |
15 |
- verbose <- FALSE |
|
15 |
+ verbose <- FALSE |
|
16 | 16 |
if(!is.null(arrayNames)) { |
17 | 17 |
pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
18 | 18 |
} |
... | ... |
@@ -57,7 +57,7 @@ readIdatFiles = function(sampleSheet=NULL, |
57 | 57 |
grnidats = file.path(path, grnfiles) |
58 | 58 |
redidats = file.path(path, redfiles) |
59 | 59 |
} else { |
60 |
- message("path arg not set. Assuming files are in local directory, or that complete path is provided") |
|
60 |
+ message("'path' arg not set. Assuming files are in local directory, or that complete path is provided in 'arrayNames'") |
|
61 | 61 |
grnidats = grnfiles |
62 | 62 |
redidats = redfiles |
63 | 63 |
} |
... | ... |
@@ -102,13 +102,23 @@ readIdatFiles = function(sampleSheet=NULL, |
102 | 102 |
} # else stop("Could not find probe IDs") |
103 | 103 |
nprobes = length(ids) |
104 | 104 |
narrays = length(arrayNames) |
105 |
- RG = new("NChannelSet", |
|
106 |
- R=matrix(0, nprobes, narrays), |
|
107 |
- G=matrix(0, nprobes, narrays), |
|
108 |
- zero=matrix(1, nprobes, narrays), |
|
105 |
+# if(cdfName=="nopackage") { |
|
106 |
+ RG = new("NChannelSet", |
|
107 |
+ R = initializeBigMatrix(name = "R", nr = nprobes, nc = narrays, vmode = "integer"), |
|
108 |
+ G = initializeBigMatrix(name = "G", nr = nprobes, nc = narrays, vmode = "integer"), |
|
109 |
+ zero = initializeBigMatrix(name = "zero", nr = nprobes, nc = narrays, vmode = "integer"), |
|
109 | 110 |
annotation=headerInfo$Manifest[1], |
110 | 111 |
phenoData=pd, storage.mode="environment") |
112 |
+# } else { |
|
113 |
+# RG = new("NChannelSet", |
|
114 |
+# R = matrix(0, nprobes, narrays), |
|
115 |
+# G = matrix(0, nprobes, narrays), |
|
116 |
+# zero = matrix(1, nprobes, narrays), |
|
117 |
+# annotation=headerInfo$Manifest[1], |
|
118 |
+# phenoData=pd, storage.mode="environment") |
|
119 |
+# } |
|
111 | 120 |
featureNames(RG) = ids |
121 |
+ |
|
112 | 122 |
if(!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)){ |
113 | 123 |
sampleNames(RG) = make.unique(sampleSheet$Sample_ID) |
114 | 124 |
} else sampleNames(RG) = make.unique(basename(arrayNames)) |
... | ... |
@@ -289,6 +299,9 @@ readGenCallOutput = function(filenames, path=".", cdfName, |
289 | 299 |
# matching on SNP names? now assume they come in order |
290 | 300 |
} |
291 | 301 |
maxIndex = baseIndex + sampleCounts[i] - 1 |
302 |
+ m=match(totSNPNames, rownames(valuesThisFile$Xvalues)) |
|
303 |
+ valuesThisFile$Xvalues=valuesThisFile$Xvalues[m,] |
|
304 |
+ valuesThisFile$Yvalues=valuesThisFile$Yvalues[m,] |
|
292 | 305 |
X[, baseIndex:maxIndex] = valuesThisFile$Xvalues |
293 | 306 |
Y[, baseIndex:maxIndex] = valuesThisFile$Yvalues |
294 | 307 |
zero[, baseIndex:maxIndex] = (X[, baseIndex:maxIndex] == 0) || (Y[, baseIndex:maxIndex] == 0) |
... | ... |
@@ -311,7 +324,8 @@ readGenCallOutput = function(filenames, path=".", cdfName, |
311 | 324 |
|
312 | 325 |
|
313 | 326 |
|
314 |
-RGtoXY = function(RG, chipType, verbose=TRUE) { |
|
327 |
+RGtoXY = function(RG, chipType, anno, verbose=TRUE) { |
|
328 |
+ if(chipType!="nopackage") { |
|
315 | 329 |
needToLoad <- !all(sapply(c('addressA', 'addressB', 'base'), isLoaded)) |
316 | 330 |
if(needToLoad){ |
317 | 331 |
chipList = c("human1mv1c",# 1M |
... | ... |
@@ -368,7 +382,6 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
368 | 382 |
# aidcol = match("AddressA_ID", colnames(annot)) |
369 | 383 |
# if(is.na(aidcol)) |
370 | 384 |
# aidcol = match("Address", colnames(annot)) |
371 |
-# bidcol = match("AddressB_ID", colnames(annot)) |
|
372 | 385 |
# if(is.na(bidcol)) |
373 | 386 |
# bidcol = match("Address2", colnames(annot)) |
374 | 387 |
# aids = annot[, aidcol] |
... | ... |
@@ -383,7 +396,9 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
383 | 396 |
xyPhenoData = AnnotatedDataFrame(data=RG@phenoData@data,varMetadata=RG@phenoData@varMetadata) |
384 | 397 |
levels(xyPhenoData@varMetadata$channel) = c("X","Y","zero","_ALL_") |
385 | 398 |
XY <- new("NChannelSet", |
386 |
- assayDataNew(X=matrix(0, nsnps, narrays),Y=matrix(0, nsnps, narrays),zero=matrix(0, nsnps, narrays)), |
|
399 |
+ assayDataNew(X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"), |
|
400 |
+ Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"), |
|
401 |
+ zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer")), |
|
387 | 402 |
phenoData=xyPhenoData, protocolData=RG@protocolData, annotation=chipType) |
388 | 403 |
storageMode(XY) = "environment" |
389 | 404 |
# XY <- new("NChannelSet", |
... | ... |
@@ -440,7 +455,55 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
440 | 455 |
# XY@assayData$Y[infI,] = 0 |
441 | 456 |
# XY@assayData$zero[infI,] = 0 |
442 | 457 |
# gc(verbose=FALSE) |
443 |
- XY |
|
458 |
+ } |
|
459 |
+ if(chipType=="nopackage" && !is.null(anno)) { |
|
460 |
+ pkgname=NULL |
|
461 |
+# anno=read.csv(annot,header=TRUE, as.is=TRUE, check.names=FALSE,skip=7) |
|
462 |
+ aids=anno[,"AddressA_ID"] |
|
463 |
+ bids=anno[,"AddressB_ID"] |
|
464 |
+ snpbase=anno[,"SNP"] |
|
465 |
+ names(aids)=ids=anno[,"Name"] |
|
466 |
+# m=match(aids,rownames(RG@assayData$R)) |
|
467 |
+# ARG=RG[m[!is.na(m)],] |
|
468 |
+# mm=match(rownames(ARG@assayData$R),aids) |
|
469 |
+# aids=aids[mm] |
|
470 |
+# bids=bids[mm] |
|
471 |
+# snpbas=snpbase[mm] |
|
472 |
+ nsnps = length(aids) |
|
473 |
+ narrays = ncol(RG) |
|
474 |
+ infI = !is.na(bids) & bids != 0 |
|
475 |
+ aord = match(aids, featureNames(RG)) |
|
476 |
+ bord = match(bids, featureNames(RG)) |
|
477 |
+ xyPhenoData = AnnotatedDataFrame(data = RG@phenoData@data, |
|
478 |
+ varMetadata = RG@phenoData@varMetadata) |
|
479 |
+ levels(xyPhenoData@varMetadata$channel) = c("X", "Y", "zero", "_ALL_") |
|
480 |
+ XY <- new("NChannelSet", assayDataNew(X = initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"), |
|
481 |
+ Y = initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"), |
|
482 |
+ zero = initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer")), |
|
483 |
+ phenoData = xyPhenoData, protocolData = RG@protocolData) |
|
484 |
+ storageMode(XY) = "environment" |
|
485 |
+ featureNames(XY) = names(aids) |
|
486 |
+ sampleNames(XY) = sampleNames(RG) |
|
487 |
+ rm(list = c("bord", "infI", "aids", "bids", "snpbase")) |
|
488 |
+ gc(verbose = FALSE) |
|
489 |
+ not.na <- !is.na(aord) |
|
490 |
+ not.na.aord <- aord[not.na] |
|
491 |
+ r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ]) |
|
492 |
+ XY@assayData$X[not.na, ] <- r |
|
493 |
+ rm(r) |
|
494 |
+ gc(verbose = FALSE) |
|
495 |
+ g <- as.matrix(assayData(RG)[["G"]][not.na.aord, ]) |
|
496 |
+ XY@assayData$Y[not.na, ] <- g |
|
497 |
+ rm(g) |
|
498 |
+ gc(verbose = FALSE) |
|
499 |
+ z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ]) |
|
500 |
+ XY@assayData$zero[not.na, ] <- z |
|
501 |
+ rm(z) |
|
502 |
+ gc(verbose = FALSE) |
|
503 |
+ rm(RG) |
|
504 |
+ gc(verbose = FALSE) |
|
505 |
+ } |
|
506 |
+ XY |
|
444 | 507 |
} |
445 | 508 |
|
446 | 509 |
|
... | ... |
@@ -704,186 +767,188 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, |
704 | 767 |
return(res) |
705 | 768 |
} |
706 | 769 |
|
707 |
- |
|
708 |
-crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
|
709 |
- row.names=TRUE, col.names=TRUE, |
|
710 |
- probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
711 |
- seed=1, |
|
712 |
- mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
713 |
- cdfName, sns, recallMin=10, recallRegMin=1000, |
|
714 |
- returnParams=FALSE, badSNP=.7) { |
|
715 |
- if(missing(cdfName)) { |
|
716 |
- if(!missing(RG)) |
|
717 |
- cdfName = annotation(RG) |
|
718 |
- if(!missing(XY)) |
|
719 |
- cdfName = annotation(XY) |
|
720 |
- } |
|
721 |
- if(!isValidCdfName(cdfName)) |
|
722 |
- stop("cdfName not valid. see validCdfNames") |
|
723 |
- if(!missing(RG)) { |
|
724 |
- if(missing(XY)) |
|
725 |
- XY = RGtoXY(RG, chipType=cdfName) |
|
726 |
- else |
|
727 |
- stop("Both RG and XY specified - please use one or the other") |
|
728 |
- } |
|
729 |
- if (missing(sns)) sns = sampleNames(XY) |
|
730 |
- res <- preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, |
|
731 |
- fitMixture=TRUE, verbose=verbose, |
|
732 |
- seed=seed, eps=eps, cdfName=cdfName, sns=sns, |
|
733 |
- stripNorm=stripNorm, useTarget=useTarget) #, |
|
734 |
- if(row.names) row.names=res$gns else row.names=NULL |
|
735 |
- if(col.names) col.names=res$sns else col.names=NULL |
|
736 |
- res2 <- crlmmGT(A=res[["A"]], |
|
737 |
- B=res[["B"]], |
|
738 |
- SNR=res[["SNR"]], |
|
739 |
- mixtureParams=res[["mixtureParams"]], |
|
740 |
- cdfName=cdfName, |
|
741 |
- row.names=row.names, |
|
742 |
- col.names=col.names, |
|
743 |
- probs=probs, |
|
744 |
- DF=DF, |
|
745 |
- SNRMin=SNRMin, |
|
746 |
- recallMin=recallMin, |
|
747 |
- recallRegMin=recallRegMin, |
|
748 |
- gender=gender, |
|
749 |
- verbose=verbose, |
|
750 |
- returnParams=returnParams, |
|
751 |
- badSNP=badSNP) |
|
752 |
- res2[["SNR"]] = res[["SNR"]] |
|
753 |
- res2[["SKW"]] = res[["SKW"]] |
|
754 |
- rm(res); gc(verbose=FALSE) |
|
755 |
- return(list2SnpSet(res2, returnParams=returnParams)) |
|
756 |
-} |
|
757 |
- |
|
758 |
- |
|
770 |
+## crlmmIllumina and genotype.Illumina are now the same function |
|
771 |
+## Use genotype.Illumina instead |
|
772 |
+#crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
|
773 |
+# row.names=TRUE, col.names=TRUE, |
|
774 |
+# probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
775 |
+# seed=1, |
|
776 |
+# mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
777 |
+# cdfName, sns, recallMin=10, recallRegMin=1000, |
|
778 |
+# returnParams=FALSE, badSNP=.7) { |
|
779 |
+# if(missing(cdfName)) { |
|
780 |
+# if(!missing(RG)) |
|
781 |
+# cdfName = annotation(RG) |
|
782 |
+# if(!missing(XY)) |
|
783 |
+# cdfName = annotation(XY) |
|
784 |
+# } |
|
785 |
+# if(!isValidCdfName(cdfName)) |
|
786 |
+# stop("cdfName not valid. see validCdfNames") |
|
787 |
+# if(!missing(RG)) { |
|
788 |
+# if(missing(XY)) |
|
789 |
+# XY = RGtoXY(RG, chipType=cdfName) |
|
790 |
+# else |
|
791 |
+# stop("Both RG and XY specified - please use one or the other") |
|
792 |
+# } |
|
793 |
+# if (missing(sns)) sns = sampleNames(XY) |
|
794 |
+# res <- preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, |
|
795 |
+# fitMixture=TRUE, verbose=verbose, |
|
796 |
+# seed=seed, eps=eps, cdfName=cdfName, sns=sns, |
|
797 |
+# stripNorm=stripNorm, useTarget=useTarget) #, |
|
798 |
+# if(row.names) row.names=res$gns else row.names=NULL |
|
799 |
+# if(col.names) col.names=res$sns else col.names=NULL |
|
800 |
+# res2 <- crlmmGT(A=res[["A"]], |
|
801 |
+# B=res[["B"]], |
|
802 |
+# SNR=res[["SNR"]], |
|
803 |
+# mixtureParams=res[["mixtureParams"]], |
|
804 |
+# cdfName=cdfName, |
|
805 |
+# row.names=row.names, |
|
806 |
+# col.names=col.names, |
|
807 |
+# probs=probs, |
|
808 |
+# DF=DF, |
|
809 |
+# SNRMin=SNRMin, |
|
810 |
+# recallMin=recallMin, |
|
811 |
+# recallRegMin=recallRegMin, |
|
812 |
+# gender=gender, |
|
813 |
+# verbose=verbose, |
|
814 |
+# returnParams=returnParams, |
|
815 |
+# badSNP=badSNP) |
|
816 |
+# res2[["SNR"]] = res[["SNR"]] |
|
817 |
+# res2[["SKW"]] = res[["SKW"]] |
|
818 |
+# rm(res); gc(verbose=FALSE) |
|
819 |
+# return(list2SnpSet(res2, returnParams=returnParams)) |
|
820 |
+#} |
|
821 |
+ |
|
822 |
+ |
|
823 |
+## Deprecate crlmmIlluminaV2 function |
|
759 | 824 |
## MR: Below is a more memory efficient version of crlmmIllumina() which |
760 | 825 |
## reads in the .idats and genotypes in the one function and removes objects |
761 | 826 |
## after they have been used |
762 |
-crlmmIlluminaV2 = function(sampleSheet=NULL, |
|
763 |
- arrayNames=NULL, |
|
764 |
- ids=NULL, |
|
765 |
- path=".", |
|
766 |
- arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
|
767 |
- highDensity=FALSE, |
|
768 |
- sep="_", |
|
769 |
- fileExt=list(green="Grn.idat", red="Red.idat"), |
|
770 |
- saveDate=FALSE, |
|
771 |
- stripNorm=TRUE, |
|
772 |
- useTarget=TRUE, |
|
773 |
- # outdir=".", |
|
774 |
- row.names=TRUE, |
|
775 |
- col.names=TRUE, |
|
776 |
- probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
777 |
- seed=1, # save.it=FALSE, snpFile, cnFile, |
|
778 |
- mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
779 |
- cdfName, sns, recallMin=10, recallRegMin=1000, |
|
780 |
- returnParams=FALSE, badSNP=.7) { |
|
781 |
- |
|
782 |
- if(missing(cdfName)) stop("must specify cdfName") |
|
783 |
- if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames") |
|
784 |
- |
|
785 |
-# is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
|
786 |
- is.lds=FALSE |
|
787 |
- RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames, |
|
788 |
- ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, |
|
789 |
- highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate) |
|
790 |
- |
|
791 |
- |
|
792 |
- XY = RGtoXY(RG, chipType=cdfName) |
|
793 |
-# if(is.lds) { |
|
794 |
-# open(RG@assayData$R); open(RG@assayData$G); open(RG@assayData$zero) |
|
795 |
-# delete(RG@assayData$R); delete(RG@assayData$G); delete(RG@assayData$zero) |
|
796 |
-# } |
|
797 |
- rm(RG); gc(verbose=FALSE) |
|
798 |
- |
|
799 |
- if (missing(sns)) { sns = sampleNames(XY) |
|
800 |
- } |
|
801 |
- res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
|
802 |
- seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #, outdir=outdir) #, |
|
803 |
-# save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
|
804 |
- |
|
805 |
-# if(is.lds) { |
|
806 |
-# open(XY@assayData$X); open(XY@assayData$Y); open(XY@assayData$zero) |
|
807 |
-# delete(XY@assayData$X); delete(XY@assayData$Y); delete(XY@assayData$zero) |
|
808 |
-# } |
|
809 |
- rm(XY); gc(verbose=FALSE) |
|
810 |
- |
|
811 |
- if(row.names) row.names=res$gns else row.names=NULL |
|
812 |
- if(col.names) col.names=res$sns else col.names=NULL |
|
813 |
-## |
|
814 |
-## if(is.lds){ |
|
815 |
-## res2 <- crlmmGT2(A=res[["A"]], |
|
816 |
-## B=res[["B"]], |
|
817 |
-## SNR=res[["SNR"]], |
|
818 |
-## mixtureParams=res[["mixtureParams"]], |
|
819 |
-## cdfName=cdfName, |
|
820 |
-## row.names=row.names, |
|
821 |
-## col.names=col.names, |
|
822 |
-## probs=probs, |
|
823 |
-## DF=DF, |
|
824 |
-## SNRMin=SNRMin, |
|
825 |
-## recallMin=recallMin, |
|
826 |
-## recallRegMin=recallRegMin, |
|
827 |
-## gender=gender, |
|
828 |
-## verbose=verbose, |
|
829 |
-## returnParams=returnParams, |
|
830 |
-## badSNP=badSNP) |
|
831 |
-## } else { |
|
832 |
- res2 <- crlmmGT(A=res[["A"]], |
|
833 |
- B=res[["B"]], |
|
834 |
- SNR=res[["SNR"]], |
|
835 |
- mixtureParams=res[["mixtureParams"]], |
|
836 |
- cdfName=cdfName, |
|
837 |
- row.names=row.names, |
|
838 |
- col.names=col.names, |
|
839 |
- probs=probs, |
|
840 |
- DF=DF, |
|
841 |
- SNRMin=SNRMin, |
|
842 |
- recallMin=recallMin, |
|
843 |
- recallRegMin=recallRegMin, |
|
844 |
- gender=gender, |
|
845 |
- verbose=verbose, |
|
846 |
- returnParams=returnParams, |
|
847 |
- badSNP=badSNP) |
|
848 |
-## } |
|
849 |
- |
|
850 |
-## FUN = ifelse(is.lds, "crlmmGT2", "crlmmGT") |
|
851 |
-## ## genotyping |
|
852 |
-## crlmmGTfxn = function(FUN,...){ |
|
853 |
-## switch(FUN, |
|
854 |
-## crlmmGT2=crlmmGT2(...), |
|
855 |
-## crlmmGT=crlmmGT(...)) |
|
856 |
-## } |
|
857 |
-## res2 = crlmmGTfxn(FUN, |
|
858 |
-## A=res[["A"]], |
|
859 |
-## B=res[["B"]], |
|
860 |
-## SNR=res[["SNR"]], |
|
861 |
-## mixtureParams=res[["mixtureParams"]], |
|
862 |
-## cdfName=cdfName, |
|
863 |
-## row.names=row.names, |
|
864 |
-## col.names=col.names, |
|
865 |
-## probs=probs, |
|
866 |
-## DF=DF, |
|
867 |
-## SNRMin=SNRMin, |
|
868 |
-## recallMin=recallMin, |
|
869 |
-## recallRegMin=recallRegMin, |
|
870 |
-## gender=gender, |
|
871 |
-## verbose=verbose, |
|
872 |
-## returnParams=returnParams, |
|
873 |
-## badSNP=badSNP) |
|
874 |
- |
|
875 |
-# if(is.lds) { |
|
876 |
-# open(res[["SNR"]]); open(res[["SKW"]]) |
|
827 |
+#crlmmIlluminaV2 = function(sampleSheet=NULL, |
|
828 |
+# arrayNames=NULL, |
|
829 |
+# ids=NULL, |
|
830 |
+# path=".", |
|
831 |
+# arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
|
832 |
+# highDensity=FALSE, |
|
833 |
+# sep="_", |
|
834 |
+# fileExt=list(green="Grn.idat", red="Red.idat"), |
|
835 |
+# saveDate=FALSE, |
|
836 |
+# stripNorm=TRUE, |
|
837 |
+# useTarget=TRUE, |
|
838 |
+# # outdir=".", |
|
839 |
+# row.names=TRUE, |
|
840 |
+# col.names=TRUE, |
|
841 |
+# probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
842 |
+# seed=1, # save.it=FALSE, snpFile, cnFile, |
|
843 |
+# mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
844 |
+# cdfName, sns, recallMin=10, recallRegMin=1000, |
|
845 |
+# returnParams=FALSE, badSNP=.7) { |
|
846 |
+# |
|
847 |
+# if(missing(cdfName)) stop("must specify cdfName") |
|
848 |
+# if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames") |
|
849 |
+# |
|
850 |
+## is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
|
851 |
+# is.lds=FALSE |
|
852 |
+# RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames, |
|
853 |
+# ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, |
|
854 |
+# highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate) |
|
855 |
+# |
|
856 |
+# |
|
857 |
+# XY = RGtoXY(RG, chipType=cdfName) |
|
858 |
+## if(is.lds) { |
|
859 |
+## open(RG@assayData$R); open(RG@assayData$G); open(RG@assayData$zero) |
|
860 |
+## delete(RG@assayData$R); delete(RG@assayData$G); delete(RG@assayData$zero) |
|
861 |
+## } |
|
862 |
+# rm(RG); gc(verbose=FALSE) |
|
863 |
+# |
|
864 |
+# if (missing(sns)) { sns = sampleNames(XY) |
|
877 | 865 |
# } |
878 |
- res2[["SNR"]] = res[["SNR"]] |
|
879 |
- res2[["SKW"]] = res[["SKW"]] |
|
880 |
- # if(is.lds) { |
|
881 |
- # delete(res[["A"]]); delete(res[["B"]]) |
|
882 |
- # delete(res[["SKW"]]); delete(res[["SNR"]]); delete(res[["mixtureParams"]]) |
|
883 |
- # } |
|
884 |
- rm(res); gc(verbose=FALSE) |
|
885 |
- return(list2SnpSet(res2, returnParams=returnParams)) |
|
886 |
-} |
|
866 |
+# res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
|
867 |
+# seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #, outdir=outdir) #, |
|
868 |
+## save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
|
869 |
+# |
|
870 |
+## if(is.lds) { |
|
871 |
+## open(XY@assayData$X); open(XY@assayData$Y); open(XY@assayData$zero) |
|
872 |
+## delete(XY@assayData$X); delete(XY@assayData$Y); delete(XY@assayData$zero) |
|
873 |
+## } |
|
874 |
+# rm(XY); gc(verbose=FALSE) |
|
875 |
+# |
|
876 |
+# if(row.names) row.names=res$gns else row.names=NULL |
|
877 |
+# if(col.names) col.names=res$sns else col.names=NULL |
|
878 |
+### |
|
879 |
+### if(is.lds){ |
|
880 |
+### res2 <- crlmmGT2(A=res[["A"]], |
|
881 |
+### B=res[["B"]], |
|
882 |
+### SNR=res[["SNR"]], |
|
883 |
+### mixtureParams=res[["mixtureParams"]], |
|
884 |
+### cdfName=cdfName, |
|
885 |
+### row.names=row.names, |
|
886 |
+### col.names=col.names, |
|
887 |
+### probs=probs, |
|
888 |
+### DF=DF, |
|
889 |
+### SNRMin=SNRMin, |
|
890 |
+### recallMin=recallMin, |
|
891 |
+### recallRegMin=recallRegMin, |
|
892 |
+### gender=gender, |
|
893 |
+### verbose=verbose, |
|
894 |
+### returnParams=returnParams, |
|
895 |
+### badSNP=badSNP) |
|
896 |
+### } else { |
|
897 |
+# res2 <- crlmmGT(A=res[["A"]], |
|
898 |
+# B=res[["B"]], |
|
899 |
+# SNR=res[["SNR"]], |
|
900 |
+# mixtureParams=res[["mixtureParams"]], |
|
901 |
+# cdfName=cdfName, |
|
902 |
+# row.names=row.names, |
|
903 |
+# col.names=col.names, |
|
904 |
+# probs=probs, |
|
905 |
+# DF=DF, |
|
906 |
+# SNRMin=SNRMin, |
|
907 |
+# recallMin=recallMin, |
|
908 |
+# recallRegMin=recallRegMin, |
|
909 |
+# gender=gender, |
|
910 |
+# verbose=verbose, |
|
911 |
+# returnParams=returnParams, |
|
912 |
+# badSNP=badSNP) |
|
913 |
+### } |
|
914 |
+# |
|
915 |
+### FUN = ifelse(is.lds, "crlmmGT2", "crlmmGT") |
|
916 |
+### ## genotyping |
|
917 |
+### crlmmGTfxn = function(FUN,...){ |
|
918 |
+### switch(FUN, |
|
919 |
+### crlmmGT2=crlmmGT2(...), |
|
920 |
+### crlmmGT=crlmmGT(...)) |
|
921 |
+### } |
|
922 |
+### res2 = crlmmGTfxn(FUN, |
|
923 |
+### A=res[["A"]], |
|
924 |
+### B=res[["B"]], |
|
925 |
+### SNR=res[["SNR"]], |
|
926 |
+### mixtureParams=res[["mixtureParams"]], |
|
927 |
+### cdfName=cdfName, |
|
928 |
+### row.names=row.names, |
|
929 |
+### col.names=col.names, |
|
930 |
+### probs=probs, |
|
931 |
+### DF=DF, |
|
932 |
+### SNRMin=SNRMin, |
|
933 |
+### recallMin=recallMin, |
|
934 |
+### recallRegMin=recallRegMin, |
|
935 |
+### gender=gender, |
|
936 |
+### verbose=verbose, |
|
937 |
+### returnParams=returnParams, |
|
938 |
+### badSNP=badSNP) |
|
939 |
+# |
|
940 |
+## if(is.lds) { |
|
941 |
+## open(res[["SNR"]]); open(res[["SKW"]]) |
|
942 |
+## } |
|
943 |
+# res2[["SNR"]] = res[["SNR"]] |
|
944 |
+# res2[["SKW"]] = res[["SKW"]] |
|
945 |
+# # if(is.lds) { |
|
946 |
+# # delete(res[["A"]]); delete(res[["B"]]) |
|
947 |
+# # delete(res[["SKW"]]); delete(res[["SNR"]]); delete(res[["mixtureParams"]]) |
|
948 |
+# # } |
|
949 |
+# rm(res); gc(verbose=FALSE) |
|
950 |
+# return(list2SnpSet(res2, returnParams=returnParams)) |
|
951 |
+#} |
|
887 | 952 |
|
888 | 953 |
# Functions analogous to Rob's Affy functions to set up container |
889 | 954 |
getProtocolData.Illumina = function(filenames, sep="_", fileExt="Grn.idat", verbose=FALSE) { |
... | ... |
@@ -950,6 +1015,8 @@ constructInf <- function(sampleSheet=NULL, |
950 | 1015 |
fileExt=list(green="Grn.idat", red="Red.idat"), |
951 | 1016 |
XY, |
952 | 1017 |
cdfName, |
1018 |
+ anno, |
|
1019 |
+ genome, |
|
953 | 1020 |
verbose=FALSE, |
954 | 1021 |
batch=NULL, #fns, |
955 | 1022 |
saveDate=TRUE) { #, outdir="."){ |
... | ... |
@@ -1015,16 +1082,31 @@ constructInf <- function(sampleSheet=NULL, |
1015 | 1082 |
if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files") |
1016 | 1083 |
|
1017 | 1084 |
if(verbose) message("Initializing container for genotyping and copy number estimation") |
1018 |
- pkgname <- getCrlmmAnnotationName(cdfName) |
|
1019 |
- path <- system.file("extdata", package=pkgname) |
|
1020 |
- genome <- getAvailableIlluminaGenomeBuild(path) |
|
1021 |
- featureData = getFeatureData(cdfName, copynumber=TRUE, genome=genome) |
|
1022 |
- nr = nrow(featureData); nc = narrays |
|
1023 |
- sns <- if (!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)) { |
|
1085 |
+ if(cdfName!="nopackage") { |
|
1086 |
+ pkgname <- getCrlmmAnnotationName(cdfName) |
|
1087 |
+ path <- system.file("extdata", package=pkgname) |
|
1088 |
+ genome <- getAvailableIlluminaGenomeBuild(path) |
|
1089 |
+ featureData = getFeatureData(cdfName, copynumber=TRUE, genome=genome) |
|
1090 |
+ } else { |
|
1091 |
+ if(!is.integer(anno$chr)) { |
|
1092 |
+ chr = as.character(anno$chromosome) |
|
1093 |
+ chr[chr=="X"] = 23 |
|
1094 |
+ chr[chr=="Y"] = 24 |
|
1095 |
+ chr[chr=="XY"] = 25 |
|
1096 |
+ } |
|
1097 |
+ featureData = new("GenomeAnnotatedDataFrame", |
|
1098 |
+ isSnp=as.logical(anno$isSnp), |
|
1099 |
+ position=as.integer(anno$position), |
|
1100 |
+ chromosome=as.integer(chr), |
|
1101 |
+ row.names=anno$featureNames) |
|
1102 |
+ } |
|
1103 |
+ nr <- nrow(featureData) |
|
1104 |
+ nc <- narrays |
|
1105 |
+ sns <- if (!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)) { |
|
1024 | 1106 |
make.unique(sampleSheet$Sample_ID) |
1025 |
- } else{ |
|
1107 |
+ } else{ |
|
1026 | 1108 |
make.unique(basename(arrayNames)) |
1027 |
- } |
|
1109 |
+ } |
|
1028 | 1110 |
biga <- initializeBigMatrix(name="A", nr, nc) |
1029 | 1111 |
bigb <- initializeBigMatrix(name="B", nr, nc) |
1030 | 1112 |
bigc <- initializeBigMatrix(name="call", nr, nc) |
... | ... |
@@ -1067,11 +1149,16 @@ constructInf <- function(sampleSheet=NULL, |
1067 | 1149 |
batch = rep("batch1", narrays) # assume only one batch stop("Must specify 'batch'") |
1068 | 1150 |
} |
1069 | 1151 |
if(is(batch, "factor")) batch = as.character(batch) |
1070 |
- pkgname <- getCrlmmAnnotationName(cdfName) |
|
1071 |
- path <- system.file("extdata", package=pkgname) |
|
1072 |
- genome <- getAvailableIlluminaGenomeBuild(path) |
|
1073 |
- featureData = getFeatureData(cdfName, copynumber=TRUE, genome=genome) |
|