... |
... |
@@ -49,22 +49,24 @@ getFeatureData.Affy <- function(cdfName, copynumber=FALSE){
|
49 |
49 |
##A few of the snpProbes do not match -- I think it is chromosome Y.
|
50 |
50 |
M[is.na(M[, "isSnp"]), "isSnp"] <- 1L
|
51 |
51 |
return(new("AnnotatedDataFrame", data=data.frame(M)))
|
52 |
|
- ##list(snpIndex, npIndex, fns)
|
53 |
|
- ##crlmmOpts$snpRange <- range(snpIndex)
|
54 |
|
- ##crlmmOpts$npRange <- range(npIndex)
|
55 |
52 |
}
|
56 |
53 |
|
57 |
54 |
construct <- function(filenames, cdfName, copynumber=FALSE,
|
58 |
55 |
sns, verbose=TRUE, batch, fns){
|
|
56 |
+ if(!missing(batch)){
|
|
57 |
+ stopifnot(length(batch) == length(sns))
|
|
58 |
+ }
|
|
59 |
+ if(missing(sns) & missing(filenames)) stop("one of filenames or samplenames (sns) must be provided")
|
59 |
60 |
if(verbose) message("Initializing container for assay data elements alleleA, alleleB, call, callProbability, CA, CB")
|
60 |
|
- if(missing(sns)) sns <- basename(filenames)
|
61 |
|
- protocolData <- getProtocolData.Affy(filenames)
|
62 |
|
- if(missing(batch)){
|
|
61 |
+ if(!missing(filenames)){
|
|
62 |
+ if(missing(sns)) sns <- basename(filenames)
|
|
63 |
+ protocolData <- getProtocolData.Affy(filenames)
|
63 |
64 |
protocolData$batch <- as.numeric(as.factor(protocolData$ScanDate))
|
64 |
|
- } else {
|
65 |
|
- if(length(batch) != length(filenames))
|
66 |
|
- stop("batch variable must be the same length as the filenames")
|
67 |
|
- protocolData$batch <- batch
|
|
65 |
+ } else{
|
|
66 |
+ protocolData <- new("AnnotatedDataFrame",
|
|
67 |
+ data=data.frame(batch=batch),
|
|
68 |
+ varMetadata=data.frame(labelDescription="batch",
|
|
69 |
+ row.names="batch"))
|
68 |
70 |
}
|
69 |
71 |
rownames(pData(protocolData)) <- sns
|
70 |
72 |
featureData <- getFeatureData.Affy(cdfName, copynumber=copynumber)
|
... |
... |
@@ -73,7 +75,7 @@ construct <- function(filenames, cdfName, copynumber=FALSE,
|
73 |
75 |
if(all(is.na(index))) stop("fns not in featureNames")
|
74 |
76 |
featureData <- featureData[index, ]
|
75 |
77 |
}
|
76 |
|
- nr <- nrow(featureData); nc <- length(filenames)
|
|
78 |
+ nr <- nrow(featureData); nc <- length(sns)
|
77 |
79 |
ffObjects <- list(alleleA=initializeBigMatrix(name="A", nr, nc),
|
78 |
80 |
alleleB=initializeBigMatrix(name="B", nr, nc),
|
79 |
81 |
call=initializeBigMatrix(name="call", nr, nc),
|
... |
... |
@@ -99,8 +101,6 @@ construct <- function(filenames, cdfName, copynumber=FALSE,
|
99 |
101 |
return(callSet)
|
100 |
102 |
}
|
101 |
103 |
|
102 |
|
-
|
103 |
|
-
|
104 |
104 |
genotype <- function(filenames,
|
105 |
105 |
cdfName,
|
106 |
106 |
batch,
|
... |
... |
@@ -313,364 +313,6 @@ genotypeLD <- function(filenames,
|
313 |
313 |
}
|
314 |
314 |
genotype2 <- genotypeLD
|
315 |
315 |
|
316 |
|
-
|
317 |
|
-genotype3 <- function(filenames,
|
318 |
|
- cdfName,
|
319 |
|
- batch,
|
320 |
|
- mixtureSampleSize=10^5,
|
321 |
|
- eps=0.1,
|
322 |
|
- verbose=TRUE,
|
323 |
|
- seed=1,
|
324 |
|
- sns,
|
325 |
|
- copynumber=FALSE,
|
326 |
|
- probs=rep(1/3, 3),
|
327 |
|
- DF=6,
|
328 |
|
- SNRMin=5,
|
329 |
|
- recallMin=10,
|
330 |
|
- recallRegMin=1000,
|
331 |
|
- gender=NULL,
|
332 |
|
- returnParams=TRUE,
|
333 |
|
- badSNP=0.7){
|
334 |
|
- if(!isPackageLoaded("ff")) stop("Must load package 'ff'")
|
335 |
|
- if(!copynumber){
|
336 |
|
- callSet <- crlmm2(filenames=filenames,
|
337 |
|
- cdfName=cdfName,
|
338 |
|
- mixtureSampleSize=mixtureSampleSize,
|
339 |
|
- eps=eps,
|
340 |
|
- verbose=verbose,
|
341 |
|
- sns=sns,
|
342 |
|
- probs=probs,
|
343 |
|
- DF=DF,
|
344 |
|
- SNRMin=SNRMin,
|
345 |
|
- recallMin=recallMin,
|
346 |
|
- recallRegMin=recallRegMin,
|
347 |
|
- gender=gender,
|
348 |
|
- returnParams=returnParams,
|
349 |
|
- badSNP=badSNP)
|
350 |
|
- return(callSet)
|
351 |
|
- }
|
352 |
|
- if(missing(cdfName)) stop("must specify cdfName")
|
353 |
|
- if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames")
|
354 |
|
- if(missing(batch)){
|
355 |
|
- warning("The batch variable is not specified. The scan date of the array will be used as a surrogate for batch. The batch variable does not affect the preprocessing or genotyping, but is important for copy number estimation.")
|
356 |
|
- } else {
|
357 |
|
- if(length(batch) != length(filenames))
|
358 |
|
- stop("batch variable must be the same length as the filenames")
|
359 |
|
- }
|
360 |
|
- if(missing(sns)) sns <- basename(filenames)
|
361 |
|
- ## callSet contains potentially very big matrices
|
362 |
|
- callSet <- construct(filenames=filenames,
|
363 |
|
- cdfName=cdfName,
|
364 |
|
- copynumber=TRUE,
|
365 |
|
- sns=sns,
|
366 |
|
- verbose=verbose)
|
367 |
|
- if(missing(batch)){
|
368 |
|
- protocolData(callSet)$batch <- as.numeric(as.factor(protocolData(callSet)$ScanDate))
|
369 |
|
- }
|
370 |
|
- if(!missing(batch)) protocolData(callSet)$batch <- batch
|
371 |
|
- ##lM(callSet) <- initializeParamObject(list(featureNames(callSet), unique(protocolData(callSet)$batch)))
|
372 |
|
- mixtureParams <- matrix(NA, 4, length(filenames))
|
373 |
|
- snp.index <- which(isSnp(callSet)==1)
|
374 |
|
-## snprmaRes <- snprma2(filenames=filenames,
|
375 |
|
-## mixtureSampleSize=mixtureSampleSize,
|
376 |
|
-## fitMixture=TRUE,
|
377 |
|
-## eps=eps,
|
378 |
|
-## verbose=verbose,
|
379 |
|
-## seed=seed,
|
380 |
|
-## cdfName=cdfName,
|
381 |
|
-## sns=sns)
|
382 |
|
-## if(verbose) message("Finished preprocessing.")
|
383 |
|
-## open(snprmaRes[["A"]])
|
384 |
|
-## open(snprmaRes[["B"]])
|
385 |
|
-## open(snprmaRes[["SNR"]])
|
386 |
|
-## open(snprmaRes[["SKW"]])
|
387 |
|
-## open(snprmaRes[["mixtureParams"]])
|
388 |
|
-## if(verbose) message("Updating elements of callSet")
|
389 |
|
-#### bb = ocProbesets()*ncol(A)*8
|
390 |
|
-#### ffrowapply(A(callSet)[i1:i2, ] <- snprmaRes[["A"]][i1:i2, ], X=snprmaRes[["A"]], BATCHBYTES=bb)
|
391 |
|
-#### ffrowapply(B(callSet)[i1:i2, ] <- snprmaRes[["B"]][i1:i2, ], X=snprmaRes[["B"]], BATCHBYTES=bb)
|
392 |
|
-## ##batches <- splitIndicesByLength(1:nrow(snprmaRes[["A"]]), ocProbesets())
|
393 |
|
-## for(j in 1:ncol(callSet)){
|
394 |
|
-## A(callSet)[snp.index, j] <- snprmaRes[["A"]][, j]
|
395 |
|
-## B(callSet)[snp.index, j] <- snprmaRes[["B"]][, j]
|
396 |
|
-## }
|
397 |
|
-## if(verbose) message("Finished updating elements of callSet")
|
398 |
|
-## stopifnot(identical(featureNames(callSet)[snp.index], snprmaRes$gns))
|
399 |
|
-## pData(callSet)$SKW <- snprmaRes$SKW
|
400 |
|
-## pData(callSet)$SNR <- snprmaRes$SNR
|
401 |
|
-## mixtureParams <- snprmaRes$mixtureParams
|
402 |
|
- np.index <- which(isSnp(callSet) == 0)
|
403 |
|
- cnrmaRes <- cnrma2(A=A(callSet),
|
404 |
|
- filenames=filenames,
|
405 |
|
- row.names=featureNames(callSet)[np.index],
|
406 |
|
- cdfName=cdfName,
|
407 |
|
- sns=sns,
|
408 |
|
- seed=seed,
|
409 |
|
- verbose=verbose)
|
410 |
|
-## if(verbose) message("Entering crlmmGT2...")
|
411 |
|
-## rm(cnrmaRes); gc()
|
412 |
|
-## ## as.matrix needed when ffdf is used
|
413 |
|
-## tmp <- crlmmGT2(A=snprmaRes[["A"]],
|
414 |
|
-## B=snprmaRes[["B"]],
|
415 |
|
-## SNR=snprmaRes[["SNR"]],
|
416 |
|
-## mixtureParams=snprmaRes[["mixtureParams"]],
|
417 |
|
-## cdfName=cdfName,
|
418 |
|
-## row.names=NULL, ##featureNames(callSet),##[snp.index],
|
419 |
|
-## col.names=sampleNames(callSet),
|
420 |
|
-## probs=probs,
|
421 |
|
-## DF=DF,
|
422 |
|
-## SNRMin=SNRMin,
|
423 |
|
-## recallMin=recallMin,
|
424 |
|
-## recallRegMin=recallRegMin,
|
425 |
|
-## gender=gender,
|
426 |
|
-## verbose=verbose,
|
427 |
|
-## returnParams=returnParams,
|
428 |
|
-## badSNP=badSNP)
|
429 |
|
-## if(verbose) message("Leaving crlmmGT2")
|
430 |
|
-## open(tmp[["calls"]])
|
431 |
|
-## open(tmp[["confs"]])
|
432 |
|
-## ##bb = ocProbesets()*ncol(A)*8
|
433 |
|
-## for(j in 1:ncol(callSet)){
|
434 |
|
-## snpCall(callSet)[snp.index, j] <- tmp[["calls"]][, j]
|
435 |
|
-## snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]][, j]
|
436 |
|
-## }
|
437 |
|
-#### ffrowapply(snpCall(callSet)[i1:i2, ] <- tmp[["calls"]][i1:i2, ], X=tmp[["calls"]], BATCHBYTES=bb)
|
438 |
|
-#### ffrowapply(snpCallProbability(callSet)[i1:i2, ] <- tmp[["confs"]][i1:i2, ], X=tmp[["confs"]], BATCHBYTES=bb)
|
439 |
|
-## callSet$gender <- tmp$gender
|
440 |
|
-#### cnSet <- as(callSet, "CNSetLM")
|
441 |
|
-## return(callSet)
|
442 |
|
- return(cnrmaRes)
|
443 |
|
-}
|
444 |
|
-
|
445 |
|
-
|
446 |
|
-
|
447 |
|
-##---------------------------------------------------------------------------
|
448 |
|
-##---------------------------------------------------------------------------
|
449 |
|
-## For Illumina
|
450 |
|
-##---------------------------------------------------------------------------
|
451 |
|
-##---------------------------------------------------------------------------
|
452 |
|
-##getPhenoData <- function(sampleSheet=NULL, arrayNames=NULL,
|
453 |
|
-## arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A")){
|
454 |
|
-## if(!is.null(arrayNames)) {
|
455 |
|
-## pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
|
456 |
|
-## }
|
457 |
|
-## if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
|
458 |
|
-## if(is.null(arrayNames)){
|
459 |
|
-## ##arrayNames=NULL
|
460 |
|
-## if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
|
461 |
|
-## barcode = sampleSheet[,arrayInfoColNames$barcode]
|
462 |
|
-## arrayNames=barcode
|
463 |
|
-## }
|
464 |
|
-## if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {
|
465 |
|
-## position = sampleSheet[,arrayInfoColNames$position]
|
466 |
|
-## if(is.null(arrayNames))
|
467 |
|
-## arrayNames=position
|
468 |
|
-## else
|
469 |
|
-## arrayNames = paste(arrayNames, position, sep=sep)
|
470 |
|
-## if(highDensity) {
|
471 |
|
-## hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
|
472 |
|
-## for(i in names(hdExt))
|
473 |
|
-## arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
|
474 |
|
-## }
|
475 |
|
-## }
|
476 |
|
-## }
|
477 |
|
-## pd = new("AnnotatedDataFrame", data = sampleSheet)
|
478 |
|
-## sampleNames(pd) <- basename(arrayNames)
|
479 |
|
-## }
|
480 |
|
-## if(is.null(arrayNames)) {
|
481 |
|
-## arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
|
482 |
|
-## if(!is.null(sampleSheet)) {
|
483 |
|
-## sampleSheet=NULL
|
484 |
|
-## cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
|
485 |
|
-## }
|
486 |
|
-## pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
|
487 |
|
-## }
|
488 |
|
-## return(pd)
|
489 |
|
-##}
|
490 |
|
-##constructRG <- function(filenames, cdfName, sns, verbose, fileExt, sep, sampleSheet, arrayInfoColNames){
|
491 |
|
-## if(verbose) message("reading first idat file to extract feature data")
|
492 |
|
-## grnfile <- paste(filenames[1], fileExt$green, sep=sep)
|
493 |
|
-## if(!file.exists(grnfile)){
|
494 |
|
-## stop(paste(grnfile, " does not exist. Check fileExt argument"))
|
495 |
|
-## }
|
496 |
|
-## G <- readIDAT(grnfile)
|
497 |
|
-## idsG = rownames(G$Quants)
|
498 |
|
-## nr <- length(idsG)
|
499 |
|
-## fD <- new("AnnotatedDataFrame", data=data.frame(row.names=idsG))##, varMetadata=data.frame(labelDescript
|
500 |
|
-## nr <- nrow(fD)
|
501 |
|
-## dns <- list(featureNames(fD), basename(filenames))
|
502 |
|
-## RG <- new("NChannelSet",
|
503 |
|
-## R=initializeBigMatrix(name="R", nr=nr, nc=length(filenames)),
|
504 |
|
-## G=initializeBigMatrix(name="G", nr=nr, nc=length(filenames)),
|
505 |
|
-## zero=initializeBigMatrix(name="zero", nr=nr, nc=length(filenames)),
|
506 |
|
-## featureData=fD,
|
507 |
|
-## annotation=cdfName)
|
508 |
|
-## phenoData(RG) <- getPhenoData(sampleSheet=sampleSheet, arrayNames=filenames,
|
509 |
|
-## arrayInfoColNames=arrayInfoColNames)
|
510 |
|
-#### pD <- data.frame(matrix(NA, length(sampleNames(RG)), 12), row.names=sampleNames(RG))
|
511 |
|
-#### colnames(pD) <- c("Index","HapMap.Name","Name","ID",
|
512 |
|
-#### "Gender", "Plate", "Well", "Group", "Parent1",
|
513 |
|
-#### "Parent2","Replicate","SentrixPosition")
|
514 |
|
-## ##phenoData(RG) <- new("AnnotatedDataFrame", data=pD)
|
515 |
|
-## pD <- data.frame(matrix(NA, length(sampleNames(RG)), 1), row.names=sampleNames(RG))
|
516 |
|
-## colnames(pD) <- "ScanDate"
|
517 |
|
-## protocolData(RG) <- new("AnnotatedDataFrame", data=pD)
|
518 |
|
-## sampleNames(RG) <- basename(filenames)
|
519 |
|
-## storageMode(RG) <- "environment"
|
520 |
|
-## RG##featureData=ops$illuminaOpts[["featureData"]])
|
521 |
|
-##}
|
522 |
|
-##crlmmIlluminaRS <- function(sampleSheet=NULL,
|
523 |
|
-## arrayNames=NULL,
|
524 |
|
-## batch,
|
525 |
|
-## ids=NULL,
|
526 |
|
-## path=".",
|
527 |
|
-## arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
|
528 |
|
-## highDensity=FALSE,
|
529 |
|
-## sep="_",
|
530 |
|
-## fileExt=list(green="Grn.idat", red="Red.idat"),
|
531 |
|
-## stripNorm=TRUE,
|
532 |
|
-## useTarget=TRUE,
|
533 |
|
-## row.names=TRUE,
|
534 |
|
-## col.names=TRUE,
|
535 |
|
-## probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
|
536 |
|
-## seed=1, save.ab=FALSE, snpFile, cnFile,
|
537 |
|
-## mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
|
538 |
|
-## cdfName, sns, recallMin=10, recallRegMin=1000,
|
539 |
|
-## returnParams=FALSE, badSNP=.7,
|
540 |
|
-## copynumber=FALSE,
|
541 |
|
-## load.it=TRUE) {
|
542 |
|
-## if(missing(cdfName)) stop("must specify cdfName")
|
543 |
|
-## if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames")
|
544 |
|
-## if(missing(sns)) sns <- basename(arrayNames)
|
545 |
|
-## if(missing(batch)){
|
546 |
|
-## warning("The batch variable is not specified. The scan date of the array will be used as a surrogate for batch. The batch variable does not affect the preprocessing or genotyping, but is important for copy number estimation.")
|
547 |
|
-## } else {
|
548 |
|
-## if(length(batch) != length(sns))
|
549 |
|
-## stop("batch variable must be the same length as the filenames")
|
550 |
|
-## }
|
551 |
|
-## batches <- splitIndicesByLength(seq(along=arrayNames), ocSamples())
|
552 |
|
-## k <- 1
|
553 |
|
-## for(j in batches){
|
554 |
|
-## if(verbose) message("Batch ", k, " of ", length(batches))
|
555 |
|
-## RG <- readIdatFiles(sampleSheet=sampleSheet[j, ],
|
556 |
|
-## arrayNames=arrayNames[j],
|
557 |
|
-## ids=ids,
|
558 |
|
-## path=path,
|
559 |
|
-## arrayInfoColNames=arrayInfoColNames,
|
560 |
|
-## highDensity=highDensity,
|
561 |
|
-## sep=sep,
|
562 |
|
-## fileExt=fileExt,
|
563 |
|
-## saveDate=TRUE)
|
564 |
|
-## RG <- RGtoXY(RG, chipType=cdfName)
|
565 |
|
-## protocolData <- protocolData(RG)
|
566 |
|
-## res <- preprocessInfinium2(RG,
|
567 |
|
-## mixtureSampleSize=mixtureSampleSize,
|
568 |
|
-## fitMixture=TRUE,
|
569 |
|
-## verbose=verbose,
|
570 |
|
-## seed=seed,
|
571 |
|
-## eps=eps,
|
572 |
|
-## cdfName=cdfName,
|
573 |
|
-## sns=sns[j],
|
574 |
|
-## stripNorm=stripNorm,
|
575 |
|
-## useTarget=useTarget)
|
576 |
|
-## rm(RG); gc()
|
577 |
|
-## ## MR: number of rows should be number of SNPs + number of nonpolymorphic markers.
|
578 |
|
-## ## Here, I'm just using the # of rows returned from the above function
|
579 |
|
-## if(k == 1){
|
580 |
|
-## if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability")
|
581 |
|
-## callSet <- new("SnpSuperSet",
|
582 |
|
-## alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)),
|
583 |
|
-## alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
|
584 |
|
-## call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
|
585 |
|
-## callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
|
586 |
|
-## annotation=cdfName)
|
587 |
|
-## sampleNames(callSet) <- sns
|
588 |
|
-## phenoData(callSet) <- getPhenoData(sampleSheet=sampleSheet,
|
589 |
|
-## arrayNames=sns,
|
590 |
|
-## arrayInfoColNames=arrayInfoColNames)
|
591 |
|
-## pD <- data.frame(matrix(NA, length(sns), 1), row.names=sns)
|
592 |
|
-## colnames(pD) <- "ScanDate"
|
593 |
|
-## protocolData(callSet) <- new("AnnotatedDataFrame", data=pD)
|
594 |
|
-## pData(protocolData(callSet))[j, ] <- pData(protocolData)
|
595 |
|
-## featureNames(callSet) <- res[["gns"]]
|
596 |
|
-## pData(callSet)$SNR <- initializeBigVector("crlmmSNR-", length(sns), "double")
|
597 |
|
-## pData(callSet)$SKW <- initializeBigVector("crlmmSKW-", length(sns), "double")
|
598 |
|
-## ##pData(callSet)$SKW <- rep(NA, length(sns))
|
599 |
|
-## ##pData(callSet)$SNR <- rep(NA, length(sns))
|
600 |
|
-## pData(callSet)$gender <- rep(NA, length(sns))
|
601 |
|
-## mixtureParams <- initializeBigMatrix("crlmmMixt-", nr=4, nc=ncol(callSet), vmode="double")
|
602 |
|
-## save(mixtureParams, file=file.path(ldPath(), "mixtureParams.rda"))
|
603 |
|
-## if(missing(batch)){
|
604 |
|
-## protocolData(callSet)$batch <- rep(NA, length(sns))
|
605 |
|
-## } else{
|
606 |
|
-## protocolData(callSet)$batch <- batch
|
607 |
|
-## }
|
608 |
|
-## featureData(callSet) <- addFeatureAnnotation(callSet)
|
609 |
|
-## open(mixtureParams)
|
610 |
|
-## open(callSet$SNR)
|
611 |
|
-## open(callSet$SKW)
|
612 |
|
-## }
|
613 |
|
-## if(k > 1 & nrow(res[[1]]) != nrow(callSet)){
|
614 |
|
-## ##RS: I don't understand why the IDATS for the
|
615 |
|
-## ##same platform potentially have different lengths
|
616 |
|
-## res[["A"]] <- res[["A"]][res$gns %in% featureNames(callSet), ]
|
617 |
|
-## res[["B"]] <- res[["B"]][res$gns %in% featureNames(callSet), ]
|
618 |
|
-## }
|
619 |
|
-## if(missing(batch)){
|
620 |
|
-## protocolData(callSet)$batch[j] <- as.numeric(as.factor(protocolData$ScanDate))
|
621 |
|
-## }
|
622 |
|
-## ## MR: we need to define a snp.index vs np.index
|
623 |
|
-## snp.index <- match(res$gns, featureNames(callSet))
|
624 |
|
-## A(callSet)[snp.index, j] <- res[["A"]]
|
625 |
|
-## B(callSet)[snp.index, j] <- res[["B"]]
|
626 |
|
-## pData(callSet)$SKW[j] <- res$SKW
|
627 |
|
-## pData(callSet)$SNR[j] <- res$SNR
|
628 |
|
-## mixtureParams[, j] <- res$mixtureParams
|
629 |
|
-## rm(res); gc()
|
630 |
|
-## k <- k+1
|
631 |
|
-## }
|
632 |
|
-## save(callSet, file=file.path(ldPath(), "callSet.rda"))
|
633 |
|
-## ##otherwise, A and B get overwritten
|
634 |
|
-## ##AA <- initializeBigMatrix("crlmmA", nrow(callSet), ncol(callSet), "integer")
|
635 |
|
-## ##BB <- initializeBigMatrix("crlmmB", nrow(callSet), ncol(callSet), "integer")
|
636 |
|
-## ##bb = ocProbesets()*ncol(A)*8
|
637 |
|
-## AA <- clone(A(callSet))
|
638 |
|
-## BB <- clone(B(callSet))
|
639 |
|
-## ##ffrowapply(AA[i1:i2, ] <- A(callSet)[i1:i2, ], X=A(callSet), BATCHBYTES=bb)
|
640 |
|
-## ##ffrowapply(BB[i1:i2, ] <- B(callSet)[i1:i2, ], X=B(callSet), BATCHBYTES=bb)
|
641 |
|
-## ##crlmmGT2 overwrites A and B.
|
642 |
|
-## tmp <- crlmmGT2(A=A(callSet),
|
643 |
|
-## B=B(callSet),
|
644 |
|
-## SNR=callSet$SNR,
|
645 |
|
-## mixtureParams=mixtureParams,
|
646 |
|
-## cdfName=annotation(callSet),
|
647 |
|
-## row.names=featureNames(callSet),
|
648 |
|
-## col.names=sampleNames(callSet),
|
649 |
|
-## probs=probs,
|
650 |
|
-## DF=DF,
|
651 |
|
-## SNRMin=SNRMin,
|
652 |
|
-## recallMin=recallMin,
|
653 |
|
-## recallRegMin=recallRegMin,
|
654 |
|
-## gender=gender,
|
655 |
|
-## verbose=verbose,
|
656 |
|
-## returnParams=returnParams,
|
657 |
|
-## badSNP=badSNP)
|
658 |
|
-## open(tmp[["calls"]])
|
659 |
|
-## open(tmp[["confs"]])
|
660 |
|
-## A(callSet) <- AA
|
661 |
|
-## B(callSet) <- BB
|
662 |
|
-## snpCall(callSet) <- tmp[["calls"]]
|
663 |
|
-## ## MR: many zeros in the conf. scores (?)
|
664 |
|
-## snpCallProbability(callSet) <- tmp[["confs"]]
|
665 |
|
-## callSet$gender <- tmp$gender
|
666 |
|
-## if(copynumber) cnSet <- as(callSet, "CNSetLM")
|
667 |
|
-## close(mixtureParams)
|
668 |
|
-## rm(tmp); gc()
|
669 |
|
-## return(cnSet)
|
670 |
|
-##}
|
671 |
|
-##---------------------------------------------------------------------------
|
672 |
|
-##---------------------------------------------------------------------------
|
673 |
|
-
|
674 |
316 |
rowCovs <- function(x, y, ...){
|
675 |
317 |
notna <- !is.na(x)
|
676 |
318 |
N <- rowSums(notna)
|
... |
... |
@@ -706,8 +348,6 @@ applyByGenotype <- function(x, FUN, G){
|
706 |
348 |
tmp
|
707 |
349 |
}
|
708 |
350 |
|
709 |
|
-
|
710 |
|
-
|
711 |
351 |
rowCors <- function(x, y, ...){
|
712 |
352 |
N <- rowSums(!is.na(x))
|
713 |
353 |
x <- suppressWarnings(log2(x))
|
... |
... |
@@ -751,12 +391,13 @@ nuphiAllele2 <- function(allele, Ystar, W, Ns){
|
751 |
391 |
ses <- matrix(NA, 2, nrow(Ystar))
|
752 |
392 |
for(i in 1:nrow(Ystar)){
|
753 |
393 |
betahat[, i] <- crossprod(matrix(IXTX[, i], ncol(X), ncol(X)), crossprod(matrix(Xstar[, i], nrow=nrow(X)), Ystar[i, ]))
|
754 |
|
- ssr <- sum((Ystar[i, ] - matrix(Xstar[, i], nrow(X), ncol(X)) %*% matrix(betahat[, i], ncol(X), 1))^2)
|
755 |
|
- ses[, i] <- sqrt(diag(matrix(IXTX[, i], ncol(X), ncol(X)) * ssr))
|
|
394 |
+ ##ssr <- sum((Ystar[i, ] - matrix(Xstar[, i], nrow(X), ncol(X)) %*% matrix(betahat[, i], ncol(X), 1))^2)
|
|
395 |
+ ##ses[, i] <- sqrt(diag(matrix(IXTX[, i], ncol(X), ncol(X)) * ssr))
|
756 |
396 |
}
|
757 |
|
- nu <- betahat[1, ]
|
758 |
|
- phi <- betahat[2, ]
|
759 |
|
- return(list(nu, phi))
|
|
397 |
+## nu <- betahat[1, ]
|
|
398 |
+## phi <- betahat[2, ]
|
|
399 |
+## return(list(nu, phi))
|
|
400 |
+ return(betahat)
|
760 |
401 |
}
|
761 |
402 |
|
762 |
403 |
## linear regression without weights -- design matrix is same for all snps
|
... |
... |
@@ -1206,7 +847,7 @@ fit.lm1 <- function(idxBatch,
|
1206 |
847 |
MIN.PHI,
|
1207 |
848 |
verbose,
|
1208 |
849 |
weighted.lm, ...){
|
1209 |
|
- physical <- get("physical")
|
|
850 |
+ if(isPackageLoaded("ff")) physical <- get("physical")
|
1210 |
851 |
if(verbose) message("Probe batch ", idxBatch, " of ", length(snpBatches))
|
1211 |
852 |
snps <- snpBatches[[idxBatch]]
|
1212 |
853 |
batches <- split(seq(along=batch(object)), batch(object))
|
... |
... |
@@ -1323,8 +964,10 @@ fit.lm1 <- function(idxBatch,
|
1323 |
964 |
if(!weighted.lm){
|
1324 |
965 |
res <- nuphiAllele2(allele="B", Ystar=YB, W=wB, Ns=Ns)
|
1325 |
966 |
} else res <- linearModel.noweights(allele="B", Ystar=YB, W=wB, Ns=Ns)
|
1326 |
|
- nuB[, J] <- res[[1]]
|
1327 |
|
- phiB[, J] <- res[[2]]
|
|
967 |
+ ##nuB[, J] <- res[[1]]
|
|
968 |
+ nuB[, J] <- res[1, ]
|
|
969 |
+ ##phiB[, J] <- res[[2]]
|
|
970 |
+ phiB[, J] <- res[2, ]
|
1328 |
971 |
if(THR.NU.PHI){
|
1329 |
972 |
nuA[nuA[, J] < MIN.NU, J] <- MIN.NU
|
1330 |
973 |
nuB[nuB[, J] < MIN.NU, J] <- MIN.NU
|
... |
... |
@@ -1337,7 +980,6 @@ fit.lm1 <- function(idxBatch,
|
1337 |
980 |
rm(G, A, B, NORM, wA, wB, YA,YB, res, negA, negB, Np, Ns)
|
1338 |
981 |
gc()
|
1339 |
982 |
}
|
1340 |
|
-
|
1341 |
983 |
cA[cA < 0.05] <- 0.05
|
1342 |
984 |
cB[cB < 0.05] <- 0.05
|
1343 |
985 |
cA[cA > 5] <- 5
|
... |
... |
@@ -3238,8 +2880,8 @@ ellipseCenters <- function(object, index, allele, batch, log.it=TRUE){
|
3238 |
2880 |
return(centers)
|
3239 |
2881 |
}
|
3240 |
2882 |
|
3241 |
|
-computeCN <- function(filenames,
|
3242 |
|
- object,
|
|
2883 |
+computeCN <- function(object,
|
|
2884 |
+ filenames,
|
3243 |
2885 |
which.batches,
|
3244 |
2886 |
MIN.SAMPLES=10,
|
3245 |
2887 |
SNRMin=5,
|
... |
... |
@@ -3281,7 +2923,7 @@ computeCN <- function(filenames,
|
3281 |
2923 |
load(file.path(ldPath(), "flags.snps.rda"))
|
3282 |
2924 |
}
|
3283 |
2925 |
if(verbose) message("Estimating allele-specific copy number at autosomal SNPs")
|
3284 |
|
- FUN <- "fit.lm1"
|
|
2926 |
+ FUN <- fit.lm1
|
3285 |
2927 |
}
|
3286 |
2928 |
if(type=="autosome.nps"){
|
3287 |
2929 |
marker.index <- which(chromosome(object) < 23 & !isSnp(object) & !is.na(chromosome(object)))
|
... |
... |
@@ -3302,7 +2944,7 @@ computeCN <- function(filenames,
|
3302 |
2944 |
load(file.path(ldPath(), "flags.nps.rda"))
|
3303 |
2945 |
}
|
3304 |
2946 |
if(verbose) message("Estimating allele-specific copy number at autosomal SNPs")
|
3305 |
|
- FUN <- "fit.lm2"
|
|
2947 |
+ FUN <- fit.lm2
|
3306 |
2948 |
}
|
3307 |
2949 |
if(type=="X.snps"){
|
3308 |
2950 |
marker.index <- which(chromosome(object) == 23 & isSnp(object) & !is.na(chromosome(object)))
|
... |
... |
@@ -3323,7 +2965,7 @@ computeCN <- function(filenames,
|
3323 |
2965 |
load(file.path(ldPath(), "flags.X.snps.rda"))
|
3324 |
2966 |
}
|
3325 |
2967 |
if(verbose) message("Estimating allele-specific copy number at autosomal SNPs")
|
3326 |
|
- FUN <- "fit.lm3"
|
|
2968 |
+ FUN <- fit.lm3
|
3327 |
2969 |
}
|
3328 |
2970 |
if(type=="X.nps"){
|
3329 |
2971 |
marker.index <- which(chromosome(object) == 23 & !isSnp(object) & !is.na(chromosome(object)))
|
... |
... |
@@ -3343,7 +2985,7 @@ computeCN <- function(filenames,
|
3343 |
2985 |
load(file.path(ldPath(), "flags.X.nps.rda"))
|
3344 |
2986 |
}
|
3345 |
2987 |
if(verbose) message("Estimating allele-specific copy number at autosomal SNPs")
|
3346 |
|
- FUN <- "fit.lm4"
|
|
2988 |
+ FUN <- fit.lm4
|
3347 |
2989 |
}
|
3348 |
2990 |
index.strata <- splitIndicesByLength(marker.index, ocProbesets())
|
3349 |
2991 |
obj <- construct(filenames=filenames,
|
... |
... |
@@ -3353,9 +2995,10 @@ computeCN <- function(filenames,
|
3353 |
2995 |
sns=sampleNames(object),
|
3354 |
2996 |
fns=featureNames(object)[marker.index])
|
3355 |
2997 |
ocLapply(seq(along=index.strata),
|
3356 |
|
- match.fun(FUN),
|
|
2998 |
+ ##match.fun(FUN),
|
|
2999 |
+ FUN,
|
3357 |
3000 |
marker.index=marker.index,
|
3358 |
|
- object=object,
|
|
3001 |
+ object=obj,
|
3359 |
3002 |
Ns=Ns,
|
3360 |
3003 |
normal=normal,
|
3361 |
3004 |
snpflags=flags,
|