...
|
...
|
@@ -49,22 +49,24 @@ getFeatureData.Affy <- function(cdfName, copynumber=FALSE){
|
49
|
49
|
##A few of the snpProbes do not match -- I think it is chromosome Y.
|
50
|
50
|
M[is.na(M[, "isSnp"]), "isSnp"] <- 1L
|
51
|
51
|
return(new("AnnotatedDataFrame", data=data.frame(M)))
|
52
|
|
- ##list(snpIndex, npIndex, fns)
|
53
|
|
- ##crlmmOpts$snpRange <- range(snpIndex)
|
54
|
|
- ##crlmmOpts$npRange <- range(npIndex)
|
55
|
52
|
}
|
56
|
53
|
|
57
|
54
|
construct <- function(filenames, cdfName, copynumber=FALSE,
|
58
|
55
|
sns, verbose=TRUE, batch, fns){
|
|
56
|
+ if(!missing(batch)){
|
|
57
|
+ stopifnot(length(batch) == length(sns))
|
|
58
|
+ }
|
|
59
|
+ if(missing(sns) & missing(filenames)) stop("one of filenames or samplenames (sns) must be provided")
|
59
|
60
|
if(verbose) message("Initializing container for assay data elements alleleA, alleleB, call, callProbability, CA, CB")
|
60
|
|
- if(missing(sns)) sns <- basename(filenames)
|
61
|
|
- protocolData <- getProtocolData.Affy(filenames)
|
62
|
|
- if(missing(batch)){
|
|
61
|
+ if(!missing(filenames)){
|
|
62
|
+ if(missing(sns)) sns <- basename(filenames)
|
|
63
|
+ protocolData <- getProtocolData.Affy(filenames)
|
63
|
64
|
protocolData$batch <- as.numeric(as.factor(protocolData$ScanDate))
|
64
|
|
- } else {
|
65
|
|
- if(length(batch) != length(filenames))
|
66
|
|
- stop("batch variable must be the same length as the filenames")
|
67
|
|
- protocolData$batch <- batch
|
|
65
|
+ } else{
|
|
66
|
+ protocolData <- new("AnnotatedDataFrame",
|
|
67
|
+ data=data.frame(batch=batch),
|
|
68
|
+ varMetadata=data.frame(labelDescription="batch",
|
|
69
|
+ row.names="batch"))
|
68
|
70
|
}
|
69
|
71
|
rownames(pData(protocolData)) <- sns
|
70
|
72
|
featureData <- getFeatureData.Affy(cdfName, copynumber=copynumber)
|
...
|
...
|
@@ -73,7 +75,7 @@ construct <- function(filenames, cdfName, copynumber=FALSE,
|
73
|
75
|
if(all(is.na(index))) stop("fns not in featureNames")
|
74
|
76
|
featureData <- featureData[index, ]
|
75
|
77
|
}
|
76
|
|
- nr <- nrow(featureData); nc <- length(filenames)
|
|
78
|
+ nr <- nrow(featureData); nc <- length(sns)
|
77
|
79
|
ffObjects <- list(alleleA=initializeBigMatrix(name="A", nr, nc),
|
78
|
80
|
alleleB=initializeBigMatrix(name="B", nr, nc),
|
79
|
81
|
call=initializeBigMatrix(name="call", nr, nc),
|
...
|
...
|
@@ -99,8 +101,6 @@ construct <- function(filenames, cdfName, copynumber=FALSE,
|
99
|
101
|
return(callSet)
|
100
|
102
|
}
|
101
|
103
|
|
102
|
|
-
|
103
|
|
-
|
104
|
104
|
genotype <- function(filenames,
|
105
|
105
|
cdfName,
|
106
|
106
|
batch,
|
...
|
...
|
@@ -313,364 +313,6 @@ genotypeLD <- function(filenames,
|
313
|
313
|
}
|
314
|
314
|
genotype2 <- genotypeLD
|
315
|
315
|
|
316
|
|
-
|
317
|
|
-genotype3 <- function(filenames,
|
318
|
|
- cdfName,
|
319
|
|
- batch,
|
320
|
|
- mixtureSampleSize=10^5,
|
321
|
|
- eps=0.1,
|
322
|
|
- verbose=TRUE,
|
323
|
|
- seed=1,
|
324
|
|
- sns,
|
325
|
|
- copynumber=FALSE,
|
326
|
|
- probs=rep(1/3, 3),
|
327
|
|
- DF=6,
|
328
|
|
- SNRMin=5,
|
329
|
|
- recallMin=10,
|
330
|
|
- recallRegMin=1000,
|
331
|
|
- gender=NULL,
|
332
|
|
- returnParams=TRUE,
|
333
|
|
- badSNP=0.7){
|
334
|
|
- if(!isPackageLoaded("ff")) stop("Must load package 'ff'")
|
335
|
|
- if(!copynumber){
|
336
|
|
- callSet <- crlmm2(filenames=filenames,
|
337
|
|
- cdfName=cdfName,
|
338
|
|
- mixtureSampleSize=mixtureSampleSize,
|
339
|
|
- eps=eps,
|
340
|
|
- verbose=verbose,
|
341
|
|
- sns=sns,
|
342
|
|
- probs=probs,
|
343
|
|
- DF=DF,
|
344
|
|
- SNRMin=SNRMin,
|
345
|
|
- recallMin=recallMin,
|
346
|
|
- recallRegMin=recallRegMin,
|
347
|
|
- gender=gender,
|
348
|
|
- returnParams=returnParams,
|
349
|
|
- badSNP=badSNP)
|
350
|
|
- return(callSet)
|
351
|
|
- }
|
352
|
|
- if(missing(cdfName)) stop("must specify cdfName")
|
353
|
|
- if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames")
|
354
|
|
- if(missing(batch)){
|
355
|
|
- warning("The batch variable is not specified. The scan date of the array will be used as a surrogate for batch. The batch variable does not affect the preprocessing or genotyping, but is important for copy number estimation.")
|
356
|
|
- } else {
|
357
|
|
- if(length(batch) != length(filenames))
|
358
|
|
- stop("batch variable must be the same length as the filenames")
|
359
|
|
- }
|
360
|
|
- if(missing(sns)) sns <- basename(filenames)
|
361
|
|
- ## callSet contains potentially very big matrices
|
362
|
|
- callSet <- construct(filenames=filenames,
|
363
|
|
- cdfName=cdfName,
|
364
|
|
- copynumber=TRUE,
|
365
|
|
- sns=sns,
|
366
|
|
- verbose=verbose)
|
367
|
|
- if(missing(batch)){
|
368
|
|
- protocolData(callSet)$batch <- as.numeric(as.factor(protocolData(callSet)$ScanDate))
|
369
|
|
- }
|
370
|
|
- if(!missing(batch)) protocolData(callSet)$batch <- batch
|
371
|
|
- ##lM(callSet) <- initializeParamObject(list(featureNames(callSet), unique(protocolData(callSet)$batch)))
|
372
|
|
- mixtureParams <- matrix(NA, 4, length(filenames))
|
373
|
|
- snp.index <- which(isSnp(callSet)==1)
|
374
|
|
-## snprmaRes <- snprma2(filenames=filenames,
|
375
|
|
-## mixtureSampleSize=mixtureSampleSize,
|
376
|
|
-## fitMixture=TRUE,
|
377
|
|
-## eps=eps,
|
378
|
|
-## verbose=verbose,
|
379
|
|
-## seed=seed,
|
380
|
|
-## cdfName=cdfName,
|
381
|
|
-## sns=sns)
|
382
|
|
-## if(verbose) message("Finished preprocessing.")
|
383
|
|
-## open(snprmaRes[["A"]])
|
384
|
|
-## open(snprmaRes[["B"]])
|
385
|
|
-## open(snprmaRes[["SNR"]])
|
386
|
|
-## open(snprmaRes[["SKW"]])
|
387
|
|
-## open(snprmaRes[["mixtureParams"]])
|
388
|
|
-## if(verbose) message("Updating elements of callSet")
|
389
|
|
-#### bb = ocProbesets()*ncol(A)*8
|
390
|
|
-#### ffrowapply(A(callSet)[i1:i2, ] <- snprmaRes[["A"]][i1:i2, ], X=snprmaRes[["A"]], BATCHBYTES=bb)
|
391
|
|
-#### ffrowapply(B(callSet)[i1:i2, ] <- snprmaRes[["B"]][i1:i2, ], X=snprmaRes[["B"]], BATCHBYTES=bb)
|
392
|
|
-## ##batches <- splitIndicesByLength(1:nrow(snprmaRes[["A"]]), ocProbesets())
|
393
|
|
-## for(j in 1:ncol(callSet)){
|
394
|
|
-## A(callSet)[snp.index, j] <- snprmaRes[["A"]][, j]
|
395
|
|
-## B(callSet)[snp.index, j] <- snprmaRes[["B"]][, j]
|
396
|
|
-## }
|
397
|
|
-## if(verbose) message("Finished updating elements of callSet")
|
398
|
|
-## stopifnot(identical(featureNames(callSet)[snp.index], snprmaRes$gns))
|
399
|
|
-## pData(callSet)$SKW <- snprmaRes$SKW
|
400
|
|
-## pData(callSet)$SNR <- snprmaRes$SNR
|
401
|
|
-## mixtureParams <- snprmaRes$mixtureParams
|
402
|
|
- np.index <- which(isSnp(callSet) == 0)
|
403
|
|
- cnrmaRes <- cnrma2(A=A(callSet),
|
404
|
|
- filenames=filenames,
|
405
|
|
- row.names=featureNames(callSet)[np.index],
|
406
|
|
- cdfName=cdfName,
|
407
|
|
- sns=sns,
|
408
|
|
- seed=seed,
|
409
|
|
- verbose=verbose)
|
410
|
|
-## if(verbose) message("Entering crlmmGT2...")
|
411
|
|
-## rm(cnrmaRes); gc()
|
412
|
|
-## ## as.matrix needed when ffdf is used
|
413
|
|
-## tmp <- crlmmGT2(A=snprmaRes[["A"]],
|
414
|
|
-## B=snprmaRes[["B"]],
|
415
|
|
-## SNR=snprmaRes[["SNR"]],
|
416
|
|
-## mixtureParams=snprmaRes[["mixtureParams"]],
|
417
|
|
-## cdfName=cdfName,
|
418
|
|
-## row.names=NULL, ##featureNames(callSet),##[snp.index],
|
419
|
|
-## col.names=sampleNames(callSet),
|
420
|
|
-## probs=probs,
|
421
|
|
-## DF=DF,
|
422
|
|
-## SNRMin=SNRMin,
|
423
|
|
-## recallMin=recallMin,
|
424
|
|
-## recallRegMin=recallRegMin,
|
425
|
|
-## gender=gender,
|
426
|
|
-## verbose=verbose,
|
427
|
|
-## returnParams=returnParams,
|
428
|
|
-## badSNP=badSNP)
|
429
|
|
-## if(verbose) message("Leaving crlmmGT2")
|
430
|
|
-## open(tmp[["calls"]])
|
431
|
|
-## open(tmp[["confs"]])
|
432
|
|
-## ##bb = ocProbesets()*ncol(A)*8
|
433
|
|
-## for(j in 1:ncol(callSet)){
|
434
|
|
-## snpCall(callSet)[snp.index, j] <- tmp[["calls"]][, j]
|
435
|
|
-## snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]][, j]
|
436
|
|
-## }
|
437
|
|
-#### ffrowapply(snpCall(callSet)[i1:i2, ] <- tmp[["calls"]][i1:i2, ], X=tmp[["calls"]], BATCHBYTES=bb)
|
438
|
|
-#### ffrowapply(snpCallProbability(callSet)[i1:i2, ] <- tmp[["confs"]][i1:i2, ], X=tmp[["confs"]], BATCHBYTES=bb)
|
439
|
|
-## callSet$gender <- tmp$gender
|
440
|
|
-#### cnSet <- as(callSet, "CNSetLM")
|
441
|
|
-## return(callSet)
|
442
|
|
- return(cnrmaRes)
|
443
|
|
-}
|
444
|
|
-
|
445
|
|
-
|
446
|
|
-
|
447
|
|
-##---------------------------------------------------------------------------
|
448
|
|
-##---------------------------------------------------------------------------
|
449
|
|
-## For Illumina
|
450
|
|
-##---------------------------------------------------------------------------
|
451
|
|
-##---------------------------------------------------------------------------
|
452
|
|
-##getPhenoData <- function(sampleSheet=NULL, arrayNames=NULL,
|
453
|
|
-## arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A")){
|
454
|
|
-## if(!is.null(arrayNames)) {
|
455
|
|
-## pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
|
456
|
|
-## }
|
457
|
|
-## if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
|
458
|
|
-## if(is.null(arrayNames)){
|
459
|
|
-## ##arrayNames=NULL
|
460
|
|
-## if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
|
461
|
|
-## barcode = sampleSheet[,arrayInfoColNames$barcode]
|
462
|
|
-## arrayNames=barcode
|
463
|
|
-## }
|
464
|
|
-## if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {
|
465
|
|
-## position = sampleSheet[,arrayInfoColNames$position]
|
466
|
|
-## if(is.null(arrayNames))
|
467
|
|
-## arrayNames=position
|
468
|
|
-## else
|
469
|
|
-## arrayNames = paste(arrayNames, position, sep=sep)
|
470
|
|
-## if(highDensity) {
|
471
|
|
-## hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
|
472
|
|
-## for(i in names(hdExt))
|
473
|
|
-## arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
|
474
|
|
-## }
|
475
|
|
-## }
|
476
|
|
-## }
|
477
|
|
-## pd = new("AnnotatedDataFrame", data = sampleSheet)
|
478
|
|
-## sampleNames(pd) <- basename(arrayNames)
|
479
|
|
-## }
|
480
|
|
-## if(is.null(arrayNames)) {
|
481
|
|
-## arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
|
482
|
|
-## if(!is.null(sampleSheet)) {
|
483
|
|
-## sampleSheet=NULL
|
484
|
|
-## cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
|
485
|
|
-## }
|
486
|
|
-## pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
|
487
|
|
-## }
|
488
|
|
-## return(pd)
|
489
|
|
-##}
|
490
|
|
-##constructRG <- function(filenames, cdfName, sns, verbose, fileExt, sep, sampleSheet, arrayInfoColNames){
|
491
|
|
-## if(verbose) message("reading first idat file to extract feature data")
|
492
|
|
-## grnfile <- paste(filenames[1], fileExt$green, sep=sep)
|
493
|
|
-## if(!file.exists(grnfile)){
|
494
|
|
-## stop(paste(grnfile, " does not exist. Check fileExt argument"))
|
495
|
|
-## }
|
496
|
|
-## G <- readIDAT(grnfile)
|
497
|
|
-## idsG = rownames(G$Quants)
|
498
|
|
-## nr <- length(idsG)
|
499
|
|
-## fD <- new("AnnotatedDataFrame", data=data.frame(row.names=idsG))##, varMetadata=data.frame(labelDescript
|
500
|
|
-## nr <- nrow(fD)
|
501
|
|
-## dns <- list(featureNames(fD), basename(filenames))
|
502
|
|
-## RG <- new("NChannelSet",
|
503
|
|
-## R=initializeBigMatrix(name="R", nr=nr, nc=length(filenames)),
|
504
|
|
-## G=initializeBigMatrix(name="G", nr=nr, nc=length(filenames)),
|
505
|
|
-## zero=initializeBigMatrix(name="zero", nr=nr, nc=length(filenames)),
|
506
|
|
-## featureData=fD,
|
507
|
|
-## annotation=cdfName)
|
508
|
|
-## phenoData(RG) <- getPhenoData(sampleSheet=sampleSheet, arrayNames=filenames,
|
509
|
|
-## arrayInfoColNames=arrayInfoColNames)
|
510
|
|
-#### pD <- data.frame(matrix(NA, length(sampleNames(RG)), 12), row.names=sampleNames(RG))
|
511
|
|
-#### colnames(pD) <- c("Index","HapMap.Name","Name","ID",
|
512
|
|
-#### "Gender", "Plate", "Well", "Group", "Parent1",
|
513
|
|
-#### "Parent2","Replicate","SentrixPosition")
|
514
|
|
-## ##phenoData(RG) <- new("AnnotatedDataFrame", data=pD)
|
515
|
|
-## pD <- data.frame(matrix(NA, length(sampleNames(RG)), 1), row.names=sampleNames(RG))
|
516
|
|
-## colnames(pD) <- "ScanDate"
|
517
|
|
-## protocolData(RG) <- new("AnnotatedDataFrame", data=pD)
|
518
|
|
-## sampleNames(RG) <- basename(filenames)
|
519
|
|
-## storageMode(RG) <- "environment"
|
520
|
|
-## RG##featureData=ops$illuminaOpts[["featureData"]])
|
521
|
|
-##}
|
522
|
|
-##crlmmIlluminaRS <- function(sampleSheet=NULL,
|
523
|
|
-## arrayNames=NULL,
|
524
|
|
-## batch,
|
525
|
|
-## ids=NULL,
|
526
|
|
-## path=".",
|
527
|
|
-## arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
|
528
|
|
-## highDensity=FALSE,
|
529
|
|
-## sep="_",
|
530
|
|
-## fileExt=list(green="Grn.idat", red="Red.idat"),
|
531
|
|
-## stripNorm=TRUE,
|
532
|
|
-## useTarget=TRUE,
|
533
|
|
-## row.names=TRUE,
|
534
|
|
-## col.names=TRUE,
|
535
|
|
-## probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
|
536
|
|
-## seed=1, save.ab=FALSE, snpFile, cnFile,
|
537
|
|
-## mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
|
538
|
|
-## cdfName, sns, recallMin=10, recallRegMin=1000,
|
539
|
|
-## returnParams=FALSE, badSNP=.7,
|
540
|
|
-## copynumber=FALSE,
|
541
|
|
-## load.it=TRUE) {
|
542
|
|
-## if(missing(cdfName)) stop("must specify cdfName")
|
543
|
|
-## if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames")
|
544
|
|
-## if(missing(sns)) sns <- basename(arrayNames)
|
545
|
|
-## if(missing(batch)){
|
546
|
|
-## warning("The batch variable is not specified. The scan date of the array will be used as a surrogate for batch. The batch variable does not affect the preprocessing or genotyping, but is important for copy number estimation.")
|
547
|
|
-## } else {
|
548
|
|
-## if(length(batch) != length(sns))
|
549
|
|
-## stop("batch variable must be the same length as the filenames")
|
550
|
|
-## }
|
551
|
|
-## batches <- splitIndicesByLength(seq(along=arrayNames), ocSamples())
|
552
|
|
-## k <- 1
|
553
|
|
-## for(j in batches){
|
554
|
|
-## if(verbose) message("Batch ", k, " of ", length(batches))
|
555
|
|
-## RG <- readIdatFiles(sampleSheet=sampleSheet[j, ],
|
556
|
|
-## arrayNames=arrayNames[j],
|
557
|
|
-## ids=ids,
|
558
|
|
-## path=path,
|
559
|
|
-## arrayInfoColNames=arrayInfoColNames,
|
560
|
|
-## highDensity=highDensity,
|
561
|
|
-## sep=sep,
|
562
|
|
-## fileExt=fileExt,
|
563
|
|
-## saveDate=TRUE)
|
564
|
|
-## RG <- RGtoXY(RG, chipType=cdfName)
|
565
|
|
-## protocolData <- protocolData(RG)
|
566
|
|
-## res <- preprocessInfinium2(RG,
|
567
|
|
-## mixtureSampleSize=mixtureSampleSize,
|
568
|
|
-## fitMixture=TRUE,
|
569
|
|
-## verbose=verbose,
|
570
|
|
-## seed=seed,
|
571
|
|
-## eps=eps,
|
572
|
|
-## cdfName=cdfName,
|
573
|
|
-## sns=sns[j],
|
574
|
|
-## stripNorm=stripNorm,
|
575
|
|
-## useTarget=useTarget)
|
576
|
|
-## rm(RG); gc()
|
577
|
|
-## ## MR: number of rows should be number of SNPs + number of nonpolymorphic markers.
|
578
|
|
-## ## Here, I'm just using the # of rows returned from the above function
|
579
|
|
-## if(k == 1){
|
580
|
|
-## if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability")
|
581
|
|
-## callSet <- new("SnpSuperSet",
|
582
|
|
-## alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)),
|
583
|
|
-## alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
|
584
|
|
-## call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
|
585
|
|
-## callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
|
586
|
|
-## annotation=cdfName)
|
587
|
|
-## sampleNames(callSet) <- sns
|
588
|
|
-## phenoData(callSet) <- getPhenoData(sampleSheet=sampleSheet,
|
589
|
|
-## arrayNames=sns,
|
590
|
|
-## arrayInfoColNames=arrayInfoColNames)
|
591
|
|
-## pD <- data.frame(matrix(NA, length(sns), 1), row.names=sns)
|
592
|
|
-## colnames(pD) <- "ScanDate"
|
593
|
|
-## protocolData(callSet) <- new("AnnotatedDataFrame", data=pD)
|
594
|
|
-## pData(protocolData(callSet))[j, ] <- pData(protocolData)
|
595
|
|
-## featureNames(callSet) <- res[["gns"]]
|
596
|
|
-## pData(callSet)$SNR <- initializeBigVector("crlmmSNR-", length(sns), "double")
|
597
|
|
-## pData(callSet)$SKW <- initializeBigVector("crlmmSKW-", length(sns), "double")
|
598
|
|
-## ##pData(callSet)$SKW <- rep(NA, length(sns))
|
599
|
|
-## ##pData(callSet)$SNR <- rep(NA, length(sns))
|
600
|
|
-## pData(callSet)$gender <- rep(NA, length(sns))
|
601
|
|
-## mixtureParams <- initializeBigMatrix("crlmmMixt-", nr=4, nc=ncol(callSet), vmode="double")
|
602
|
|
-## save(mixtureParams, file=file.path(ldPath(), "mixtureParams.rda"))
|
603
|
|
-## if(missing(batch)){
|
604
|
|
-## protocolData(callSet)$batch <- rep(NA, length(sns))
|
605
|
|
-## } else{
|
606
|
|
-## protocolData(callSet)$batch <- batch
|
607
|
|
-## }
|
608
|
|
-## featureData(callSet) <- addFeatureAnnotation(callSet)
|
609
|
|
-## open(mixtureParams)
|
610
|
|
-## open(callSet$SNR)
|
611
|
|
-## open(callSet$SKW)
|
612
|
|
-## }
|
613
|
|
-## if(k > 1 & nrow(res[[1]]) != nrow(callSet)){
|
614
|
|
-## ##RS: I don't understand why the IDATS for the
|
615
|
|
-## ##same platform potentially have different lengths
|
616
|
|
-## res[["A"]] <- res[["A"]][res$gns %in% featureNames(callSet), ]
|
617
|
|
-## res[["B"]] <- res[["B"]][res$gns %in% featureNames(callSet), ]
|
618
|
|
-## }
|
619
|
|
-## if(missing(batch)){
|
620
|
|
-## protocolData(callSet)$batch[j] <- as.numeric(as.factor(protocolData$ScanDate))
|
621
|
|
-## }
|
622
|
|
-## ## MR: we need to define a snp.index vs np.index
|
623
|
|
-## snp.index <- match(res$gns, featureNames(callSet))
|
624
|
|
-## A(callSet)[snp.index, j] <- res[["A"]]
|
625
|
|
-## B(callSet)[snp.index, j] <- res[["B"]]
|
626
|
|
-## pData(callSet)$SKW[j] <- res$SKW
|
627
|
|
-## pData(callSet)$SNR[j] <- res$SNR
|
628
|
|
-## mixtureParams[, j] <- res$mixtureParams
|
629
|
|
-## rm(res); gc()
|
630
|
|
-## k <- k+1
|
631
|
|
-## }
|
632
|
|
-## save(callSet, file=file.path(ldPath(), "callSet.rda"))
|
633
|
|
-## ##otherwise, A and B get overwritten
|
634
|
|
-## ##AA <- initializeBigMatrix("crlmmA", nrow(callSet), ncol(callSet), "integer")
|
635
|
|
-## ##BB <- initializeBigMatrix("crlmmB", nrow(callSet), ncol(callSet), "integer")
|
636
|
|
-## ##bb = ocProbesets()*ncol(A)*8
|
637
|
|
-## AA <- clone(A(callSet))
|
638
|
|
-## BB <- clone(B(callSet))
|
639
|
|
-## ##ffrowapply(AA[i1:i2, ] <- A(callSet)[i1:i2, ], X=A(callSet), BATCHBYTES=bb)
|
640
|
|
-## ##ffrowapply(BB[i1:i2, ] <- B(callSet)[i1:i2, ], X=B(callSet), BATCHBYTES=bb)
|
641
|
|
-## ##crlmmGT2 overwrites A and B.
|
642
|
|
-## tmp <- crlmmGT2(A=A(callSet),
|
643
|
|
-## B=B(callSet),
|
644
|
|
-## SNR=callSet$SNR,
|
645
|
|
-## mixtureParams=mixtureParams,
|
646
|
|
-## cdfName=annotation(callSet),
|
647
|
|
-## row.names=featureNames(callSet),
|
648
|
|
-## col.names=sampleNames(callSet),
|
649
|
|
-## probs=probs,
|
650
|
|
-## DF=DF,
|
651
|
|
-## SNRMin=SNRMin,
|
652
|
|
-## recallMin=recallMin,
|
653
|
|
-## recallRegMin=recallRegMin,
|
654
|
|
-## gender=gender,
|
655
|
|
-## verbose=verbose,
|
656
|
|
-## returnParams=returnParams,
|
657
|
|
-## badSNP=badSNP)
|
658
|
|
-## open(tmp[["calls"]])
|
659
|
|
-## open(tmp[["confs"]])
|
660
|
|
-## A(callSet) <- AA
|
661
|
|
-## B(callSet) <- BB
|
662
|
|
-## snpCall(callSet) <- tmp[["calls"]]
|
663
|
|
-## ## MR: many zeros in the conf. scores (?)
|
664
|
|
-## snpCallProbability(callSet) <- tmp[["confs"]]
|
665
|
|
-## callSet$gender <- tmp$gender
|
666
|
|
-## if(copynumber) cnSet <- as(callSet, "CNSetLM")
|
667
|
|
-## close(mixtureParams)
|
668
|
|
-## rm(tmp); gc()
|
669
|
|
-## return(cnSet)
|
670
|
|
-##}
|
671
|
|
-##---------------------------------------------------------------------------
|
672
|
|
-##---------------------------------------------------------------------------
|
673
|
|
-
|
674
|
316
|
rowCovs <- function(x, y, ...){
|
675
|
317
|
notna <- !is.na(x)
|
676
|
318
|
N <- rowSums(notna)
|
...
|
...
|
@@ -706,8 +348,6 @@ applyByGenotype <- function(x, FUN, G){
|
706
|
348
|
tmp
|
707
|
349
|
}
|
708
|
350
|
|
709
|
|
-
|
710
|
|
-
|
711
|
351
|
rowCors <- function(x, y, ...){
|
712
|
352
|
N <- rowSums(!is.na(x))
|
713
|
353
|
x <- suppressWarnings(log2(x))
|
...
|
...
|
@@ -751,12 +391,13 @@ nuphiAllele2 <- function(allele, Ystar, W, Ns){
|
751
|
391
|
ses <- matrix(NA, 2, nrow(Ystar))
|
752
|
392
|
for(i in 1:nrow(Ystar)){
|
753
|
393
|
betahat[, i] <- crossprod(matrix(IXTX[, i], ncol(X), ncol(X)), crossprod(matrix(Xstar[, i], nrow=nrow(X)), Ystar[i, ]))
|
754
|
|
- ssr <- sum((Ystar[i, ] - matrix(Xstar[, i], nrow(X), ncol(X)) %*% matrix(betahat[, i], ncol(X), 1))^2)
|
755
|
|
- ses[, i] <- sqrt(diag(matrix(IXTX[, i], ncol(X), ncol(X)) * ssr))
|
|
394
|
+ ##ssr <- sum((Ystar[i, ] - matrix(Xstar[, i], nrow(X), ncol(X)) %*% matrix(betahat[, i], ncol(X), 1))^2)
|
|
395
|
+ ##ses[, i] <- sqrt(diag(matrix(IXTX[, i], ncol(X), ncol(X)) * ssr))
|
756
|
396
|
}
|
757
|
|
- nu <- betahat[1, ]
|
758
|
|
- phi <- betahat[2, ]
|
759
|
|
- return(list(nu, phi))
|
|
397
|
+## nu <- betahat[1, ]
|
|
398
|
+## phi <- betahat[2, ]
|
|
399
|
+## return(list(nu, phi))
|
|
400
|
+ return(betahat)
|
760
|
401
|
}
|
761
|
402
|
|
762
|
403
|
## linear regression without weights -- design matrix is same for all snps
|
...
|
...
|
@@ -1206,7 +847,7 @@ fit.lm1 <- function(idxBatch,
|
1206
|
847
|
MIN.PHI,
|
1207
|
848
|
verbose,
|
1208
|
849
|
weighted.lm, ...){
|
1209
|
|
- physical <- get("physical")
|
|
850
|
+ if(isPackageLoaded("ff")) physical <- get("physical")
|
1210
|
851
|
if(verbose) message("Probe batch ", idxBatch, " of ", length(snpBatches))
|
1211
|
852
|
snps <- snpBatches[[idxBatch]]
|
1212
|
853
|
batches <- split(seq(along=batch(object)), batch(object))
|
...
|
...
|
@@ -1323,8 +964,10 @@ fit.lm1 <- function(idxBatch,
|
1323
|
964
|
if(!weighted.lm){
|
1324
|
965
|
res <- nuphiAllele2(allele="B", Ystar=YB, W=wB, Ns=Ns)
|
1325
|
966
|
} else res <- linearModel.noweights(allele="B", Ystar=YB, W=wB, Ns=Ns)
|
1326
|
|
- nuB[, J] <- res[[1]]
|
1327
|
|
- phiB[, J] <- res[[2]]
|
|
967
|
+ ##nuB[, J] <- res[[1]]
|
|
968
|
+ nuB[, J] <- res[1, ]
|
|
969
|
+ ##phiB[, J] <- res[[2]]
|
|
970
|
+ phiB[, J] <- res[2, ]
|
1328
|
971
|
if(THR.NU.PHI){
|
1329
|
972
|
nuA[nuA[, J] < MIN.NU, J] <- MIN.NU
|
1330
|
973
|
nuB[nuB[, J] < MIN.NU, J] <- MIN.NU
|
...
|
...
|
@@ -1337,7 +980,6 @@ fit.lm1 <- function(idxBatch,
|
1337
|
980
|
rm(G, A, B, NORM, wA, wB, YA,YB, res, negA, negB, Np, Ns)
|
1338
|
981
|
gc()
|
1339
|
982
|
}
|
1340
|
|
-
|
1341
|
983
|
cA[cA < 0.05] <- 0.05
|
1342
|
984
|
cB[cB < 0.05] <- 0.05
|
1343
|
985
|
cA[cA > 5] <- 5
|
...
|
...
|
@@ -3238,8 +2880,8 @@ ellipseCenters <- function(object, index, allele, batch, log.it=TRUE){
|
3238
|
2880
|
return(centers)
|
3239
|
2881
|
}
|
3240
|
2882
|
|
3241
|
|
-computeCN <- function(filenames,
|
3242
|
|
- object,
|
|
2883
|
+computeCN <- function(object,
|
|
2884
|
+ filenames,
|
3243
|
2885
|
which.batches,
|
3244
|
2886
|
MIN.SAMPLES=10,
|
3245
|
2887
|
SNRMin=5,
|
...
|
...
|
@@ -3281,7 +2923,7 @@ computeCN <- function(filenames,
|
3281
|
2923
|
load(file.path(ldPath(), "flags.snps.rda"))
|
3282
|
2924
|
}
|
3283
|
2925
|
if(verbose) message("Estimating allele-specific copy number at autosomal SNPs")
|
3284
|
|
- FUN <- "fit.lm1"
|
|
2926
|
+ FUN <- fit.lm1
|
3285
|
2927
|
}
|
3286
|
2928
|
if(type=="autosome.nps"){
|
3287
|
2929
|
marker.index <- which(chromosome(object) < 23 & !isSnp(object) & !is.na(chromosome(object)))
|
...
|
...
|
@@ -3302,7 +2944,7 @@ computeCN <- function(filenames,
|
3302
|
2944
|
load(file.path(ldPath(), "flags.nps.rda"))
|
3303
|
2945
|
}
|
3304
|
2946
|
if(verbose) message("Estimating allele-specific copy number at autosomal SNPs")
|
3305
|
|
- FUN <- "fit.lm2"
|
|
2947
|
+ FUN <- fit.lm2
|
3306
|
2948
|
}
|
3307
|
2949
|
if(type=="X.snps"){
|
3308
|
2950
|
marker.index <- which(chromosome(object) == 23 & isSnp(object) & !is.na(chromosome(object)))
|
...
|
...
|
@@ -3323,7 +2965,7 @@ computeCN <- function(filenames,
|
3323
|
2965
|
load(file.path(ldPath(), "flags.X.snps.rda"))
|
3324
|
2966
|
}
|
3325
|
2967
|
if(verbose) message("Estimating allele-specific copy number at autosomal SNPs")
|
3326
|
|
- FUN <- "fit.lm3"
|
|
2968
|
+ FUN <- fit.lm3
|
3327
|
2969
|
}
|
3328
|
2970
|
if(type=="X.nps"){
|
3329
|
2971
|
marker.index <- which(chromosome(object) == 23 & !isSnp(object) & !is.na(chromosome(object)))
|
...
|
...
|
@@ -3343,7 +2985,7 @@ computeCN <- function(filenames,
|
3343
|
2985
|
load(file.path(ldPath(), "flags.X.nps.rda"))
|
3344
|
2986
|
}
|
3345
|
2987
|
if(verbose) message("Estimating allele-specific copy number at autosomal SNPs")
|
3346
|
|
- FUN <- "fit.lm4"
|
|
2988
|
+ FUN <- fit.lm4
|
3347
|
2989
|
}
|
3348
|
2990
|
index.strata <- splitIndicesByLength(marker.index, ocProbesets())
|
3349
|
2991
|
obj <- construct(filenames=filenames,
|
...
|
...
|
@@ -3353,9 +2995,10 @@ computeCN <- function(filenames,
|
3353
|
2995
|
sns=sampleNames(object),
|
3354
|
2996
|
fns=featureNames(object)[marker.index])
|
3355
|
2997
|
ocLapply(seq(along=index.strata),
|
3356
|
|
- match.fun(FUN),
|
|
2998
|
+ ##match.fun(FUN),
|
|
2999
|
+ FUN,
|
3357
|
3000
|
marker.index=marker.index,
|
3358
|
|
- object=object,
|
|
3001
|
+ object=obj,
|
3359
|
3002
|
Ns=Ns,
|
3360
|
3003
|
normal=normal,
|
3361
|
3004
|
snpflags=flags,
|