git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@49131 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -71,13 +71,20 @@ export(crlmm, |
71 | 71 |
snprma, |
72 | 72 |
snprma2, |
73 | 73 |
crlmm2, |
74 |
- genotype2, |
|
75 |
- batch, |
|
76 |
- crlmmCopynumber2, |
|
77 |
- constructCNSetForIllumina, |
|
78 |
- validCdfNames) |
|
79 |
-##export(initializeParamObject, biasAdjNP2) |
|
80 |
-##export(construct) |
|
81 |
- |
|
82 |
-export(plotCNSetLM) |
|
74 |
+ genotype2, genotypeLD, |
|
75 |
+ crlmmCopynumber2, crlmmCopynumberLD, crlmmCopynumber) |
|
76 |
+export(constructIlluminaCNSet) |
|
77 |
+export(totalCopynumber) |
|
78 |
+export(cnrma, cnrma2) |
|
79 |
+exportMethods(A, B, nuA, nuB, phiA, phiB, corr, tau2, Ns, medians, mads) |
|
80 |
+export(genotypeSummary, |
|
81 |
+ shrinkSummary, |
|
82 |
+ estimateCnParameters, |
|
83 |
+ shrinkGenotypeSummaries, indexComplete) |
|
84 |
+## export(fit.lm1, fit.lm2, fit.lm3, fit.lm4) |
|
85 |
+ |
|
86 |
+## For debugging |
|
87 |
+## exportPattern("^[^\\.]") |
|
88 |
+ |
|
89 |
+ |
|
83 | 90 |
|
... | ... |
@@ -522,7 +522,7 @@ fit.lm2 <- function(strata, |
522 | 522 |
B <- batches[[k]] |
523 | 523 |
this.batch <- unique(as.character(batch(object)[B])) |
524 | 524 |
X <- cbind(1, log2(c(medianA.AA[, k], medianB.BB[, k]))) |
525 |
- Y <- log2(c(phiA.snp, phiB.snp)) |
|
525 |
+ Y <- log2(c(phiA.snp[, k], phiB.snp[, k])) |
|
526 | 526 |
betahat <- solve(crossprod(X), crossprod(X, Y)) |
527 | 527 |
crosshyb <- max(median(medianA.AA[, k]) - median(medianA.AA.np[, k]), 0) |
528 | 528 |
X <- cbind(1, log2(medianA.AA.np[, k] + crosshyb)) |
... | ... |
@@ -833,7 +833,6 @@ fit.lm4 <- function(strata, |
833 | 833 |
batches <- split(seq_along(batch(object)), as.character(batch(object))) |
834 | 834 |
batches <- batches[sapply(batches, length) >= MIN.SAMPLES] |
835 | 835 |
nc <- length(batchNames(object)) |
836 |
- |
|
837 | 836 |
if(enough.males){ |
838 | 837 |
res <- summarizeMaleXNps(marker.index=marker.index, |
839 | 838 |
batches=batches, |
... | ... |
@@ -851,7 +850,6 @@ fit.lm4 <- function(strata, |
851 | 850 |
nuB <- as.matrix(nuB(object)[marker.index, ]) |
852 | 851 |
phiA <- as.matrix(phiA(object)[marker.index, ]) |
853 | 852 |
phiB <- as.matrix(phiB(object)[marker.index, ]) |
854 |
- |
|
855 | 853 |
ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object)) |
856 | 854 |
fns <- featureNames(object)[ii] |
857 | 855 |
flags <- as.matrix(flags(object)[ii, ]) |
... | ... |
@@ -1382,18 +1380,47 @@ constructIlluminaAssayData <- function(np, snp, object, storage.mode="environmen |
1382 | 1380 |
} |
1383 | 1381 |
np <- lapply(np, stripnames) |
1384 | 1382 |
snp <- lapply(snp, stripnames) |
1385 |
- A <- rbind(snp[[1]], np[[1]], deparse.level=0)[order.index, ] |
|
1386 |
- B <- rbind(snp[[2]], np[[2]], deparse.level=0)[order.index, ] |
|
1387 |
- gt <- stripnames(calls(object)) |
|
1388 |
- emptyMatrix <- matrix(integer(), nrow(np[[1]]), ncol(A)) |
|
1389 |
- gt <- rbind(gt, emptyMatrix, deparse.level=0)[order.index,] |
|
1390 |
- pr <- stripnames(snpCallProbability(object)) |
|
1391 |
- pr <- rbind(pr, emptyMatrix, deparse.level=0)[order.index, ] |
|
1392 |
- aD <- assayDataNew(storage.mode, |
|
1393 |
- alleleA=A, |
|
1394 |
- alleleB=B, |
|
1395 |
- call=gt, |
|
1396 |
- callProbability=pr) |
|
1383 |
+ if(is(snp[[1]], "ff")){ |
|
1384 |
+ lapply(snp, open) |
|
1385 |
+ open(calls(object)) |
|
1386 |
+ open(snpCallProbability(object)) |
|
1387 |
+## lapply(np, open) |
|
1388 |
+ } |
|
1389 |
+ ##tmp <- rbind(as.matrix(snp[[1]]), as.matrix(np[[1]]), deparse.level=0) |
|
1390 |
+ A.snp <- snp[[1]] |
|
1391 |
+ B.snp <- snp[[2]] |
|
1392 |
+ ##Why is np not a ff object? |
|
1393 |
+ A.np <- np[[1]] |
|
1394 |
+ B.np <- np[[2]] |
|
1395 |
+ nr <- length(order.index) |
|
1396 |
+ nc <- ncol(object) |
|
1397 |
+ if(is(A.snp, "ff")){ |
|
1398 |
+ NA.vec <- rep(NA, nrow(A.np)) |
|
1399 |
+ AA <- initializeBigMatrix("A", nr, nc, vmode="integer") |
|
1400 |
+ BB <- initializeBigMatrix("B", nr, nc, vmode="integer") |
|
1401 |
+ GG <- initializeBigMatrix("calls", nr, nc, vmode="integer") |
|
1402 |
+ PP <- initializeBigMatrix("confs", nr, nc, vmode="integer") |
|
1403 |
+ for(j in 1:ncol(object)){ |
|
1404 |
+ AA[, j] <- c(snp[[1]][, j], np[[1]][, j])[order.index] |
|
1405 |
+ BB[, j] <- c(snp[[2]][, j], np[[2]][, j])[order.index] |
|
1406 |
+ GG[, j] <- c(calls(object)[, j], NA.vec)[order.index] |
|
1407 |
+ PP[, j] <- c(snpCallProbability(object)[, j], NA.vec)[order.index] |
|
1408 |
+ |
|
1409 |
+ } |
|
1410 |
+ } else { |
|
1411 |
+ AA <- rbind(snp[[1]], np[[1]], deparse.level=0)[order.index, ] |
|
1412 |
+ BB <- rbind(snp[[2]], np[[2]], deparse.level=0)[order.index, ] |
|
1413 |
+ gt <- stripnames(calls(object)) |
|
1414 |
+ emptyMatrix <- matrix(integer(), nrow(np[[1]]), ncol(A)) |
|
1415 |
+ GG <- rbind(gt, emptyMatrix, deparse.level=0)[order.index,] |
|
1416 |
+ pr <- stripnames(snpCallProbability(object)) |
|
1417 |
+ PP <- rbind(pr, emptyMatrix, deparse.level=0)[order.index, ] |
|
1418 |
+ } |
|
1419 |
+ assayDataNew(storage.mode, |
|
1420 |
+ alleleA=AA, |
|
1421 |
+ alleleB=BB, |
|
1422 |
+ call=GG, |
|
1423 |
+ callProbability=PP) |
|
1397 | 1424 |
} |
1398 | 1425 |
constructIlluminaCNSet <- function(crlmmResult, |
1399 | 1426 |
path, |
... | ... |
@@ -1408,20 +1435,16 @@ constructIlluminaCNSet <- function(crlmmResult, |
1408 | 1435 |
fD <- fD[new.order, ] |
1409 | 1436 |
aD <- constructIlluminaAssayData(cnAB, res, crlmmResult, order.index=new.order) |
1410 | 1437 |
##protocolData(crlmmResult)$batch <- vector("integer", ncol(crlmmResult)) |
1411 |
- batch <- vector("integer", ncol(crlmmResult)) |
|
1412 |
- container <- new("CNSet", |
|
1413 |
- call=aD[["call"]], |
|
1414 |
- callProbability=aD[["callProbability"]], |
|
1415 |
- alleleA=aD[["alleleA"]], |
|
1416 |
- alleleB=aD[["alleleB"]], |
|
1417 |
- phenoData=phenoData(crlmmResult), |
|
1418 |
- protocolData=protocolData(crlmmResult), |
|
1419 |
- featureData=fD, |
|
1420 |
- batch=batch, |
|
1421 |
- annotation="human370v1c") |
|
1422 |
-## lM(container) <- initializeParamObject(list(featureNames(container), unique(protocolData(container)$batch))) |
|
1423 |
-## lM(container) <- initializeLmFrom(container) |
|
1424 |
- container |
|
1438 |
+ new("CNSet", |
|
1439 |
+ call=aD[["call"]], |
|
1440 |
+ callProbability=aD[["callProbability"]], |
|
1441 |
+ alleleA=aD[["alleleA"]], |
|
1442 |
+ alleleB=aD[["alleleB"]], |
|
1443 |
+ phenoData=phenoData(crlmmResult), |
|
1444 |
+ protocolData=protocolData(crlmmResult), |
|
1445 |
+ featureData=fD, |
|
1446 |
+ batch=batch, |
|
1447 |
+ annotation="human370v1c") |
|
1425 | 1448 |
} |
1426 | 1449 |
|
1427 | 1450 |
|
... | ... |
@@ -1456,8 +1479,8 @@ shrinkSummary <- function(object, |
1456 | 1479 |
verbose=TRUE, |
1457 | 1480 |
marker.index, |
1458 | 1481 |
is.lds){ |
1459 |
- stopifnot(type %in% c("SNP", "X.SNP")) |
|
1460 |
- if(type == "X.SNP"){ |
|
1482 |
+ stopifnot(type[[1]] %in% c("SNP", "X.SNP")) |
|
1483 |
+ if(type[[1]] == "X.SNP"){ |
|
1461 | 1484 |
gender <- object$gender |
1462 | 1485 |
if(sum(gender == 2) < 3) { |
1463 | 1486 |
return("too few females to estimate within genotype summary statistics on CHR X") |
... | ... |
@@ -1474,17 +1497,10 @@ shrinkSummary <- function(object, |
1474 | 1497 |
if(is.lds) stopifnot(isPackageLoaded("ff")) |
1475 | 1498 |
marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X) |
1476 | 1499 |
} |
1477 |
- summaryFxn <- function(type){ |
|
1478 |
- switch(type, |
|
1479 |
- SNP="shrinkGenotypeSummaries", |
|
1480 |
- X.SNP="shrinkGenotypeSummaries", ## this shrinks for the females only |
|
1481 |
- stop()) |
|
1482 |
- } |
|
1483 |
- FUN <- summaryFxn(type[[1]]) |
|
1484 | 1500 |
if(is.lds){ |
1485 | 1501 |
index.list <- splitIndicesByLength(marker.index, ocProbesets()) |
1486 | 1502 |
ocLapply(seq(along=index.list), |
1487 |
- FUN, |
|
1503 |
+ shrinkGenotypeSummaries, |
|
1488 | 1504 |
index.list=index.list, |
1489 | 1505 |
object=object, |
1490 | 1506 |
verbose=verbose, |
... | ... |
@@ -1494,8 +1510,7 @@ shrinkSummary <- function(object, |
1494 | 1510 |
is.lds=is.lds, |
1495 | 1511 |
neededPkgs="crlmm") |
1496 | 1512 |
} else { |
1497 |
- FUN <- match.fun(FUN) |
|
1498 |
- object <- FUN(strata=1, |
|
1513 |
+ object <- shrinkGenotypeSummaries(strata=1, |
|
1499 | 1514 |
index.list=list(marker.index), |
1500 | 1515 |
object=object, |
1501 | 1516 |
verbose=verbose, |
... | ... |
@@ -1750,7 +1765,7 @@ crlmmCopynumber <- function(object, |
1750 | 1765 |
X.SNP="chromosome X SNPs", |
1751 | 1766 |
X.NP="chromosome X nonpolymorphic markers") |
1752 | 1767 |
} |
1753 |
- if(verbose) message("Computing summary statistics for genotype clusters for each batch") |
|
1768 |
+ if(verbose) message("Computing summary statistics of the genotype clusters for each batch") |
|
1754 | 1769 |
for(i in seq_along(type)){ |
1755 | 1770 |
## do all types |
1756 | 1771 |
marker.type <- type[i] |
... | ... |
@@ -1765,7 +1780,7 @@ crlmmCopynumber <- function(object, |
1765 | 1780 |
marker.index=marker.index, |
1766 | 1781 |
is.lds=is.lds) |
1767 | 1782 |
} |
1768 |
- if(verbose) message("Imputing unobserved cluster medians and shrinking the variances at polymorphic loci")##SNPs only |
|
1783 |
+ if(verbose) message("Imputing unobserved genotype medians and shrinking the variances (within-batch, across loci) ")##SNPs only |
|
1769 | 1784 |
for(i in seq_along(type)){ |
1770 | 1785 |
marker.type <- type[i] |
1771 | 1786 |
if(!marker.type %in% c("SNP", "X.SNP")) next() |
... | ... |
@@ -12,7 +12,187 @@ readIdatFiles <- function(sampleSheet=NULL, |
12 | 12 |
sep="_", |
13 | 13 |
fileExt=list(green="Grn.idat", red="Red.idat"), |
14 | 14 |
saveDate=FALSE) { |
15 |
+# if(!is.null(arrayNames)) { |
|
16 |
+# arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path)) |
|
17 |
+# if(!is.null(sampleSheet)) { |
|
18 |
+# sampleSheet=NULL |
|
19 |
+# cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n") |
|
20 |
+# } |
|
21 |
+# pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
|
22 |
+# } |
|
23 |
+ if(!is.null(arrayNames)) { |
|
24 |
+ pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
|
25 |
+ } |
|
26 |
+ if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet |
|
27 |
+ if(is.null(arrayNames)){ |
|
28 |
+ ##arrayNames=NULL |
|
29 |
+ if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) { |
|
30 |
+ barcode = sampleSheet[,arrayInfoColNames$barcode] |
|
31 |
+ arrayNames=barcode |
|
32 |
+ } |
|
33 |
+ if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) { |
|
34 |
+ position = sampleSheet[,arrayInfoColNames$position] |
|
35 |
+ if(is.null(arrayNames)) |
|
36 |
+ arrayNames=position |
|
37 |
+ else |
|
38 |
+ arrayNames = paste(arrayNames, position, sep=sep) |
|
39 |
+ if(highDensity) { |
|
40 |
+ hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02") |
|
41 |
+ for(i in names(hdExt)) |
|
42 |
+ arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames) |
|
43 |
+ } |
|
44 |
+ } |
|
45 |
+ } |
|
46 |
+ pd = new("AnnotatedDataFrame", data = sampleSheet) |
|
47 |
+ sampleNames(pd) <- basename(arrayNames) |
|
48 |
+ } |
|
49 |
+ if(is.null(arrayNames)) { |
|
50 |
+ arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path)) |
|
51 |
+ if(!is.null(sampleSheet)) { |
|
52 |
+ sampleSheet=NULL |
|
53 |
+ cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n") |
|
54 |
+ } |
|
55 |
+ pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
|
56 |
+ } |
|
57 |
+ narrays = length(arrayNames) |
|
58 |
+ grnfiles = paste(arrayNames, fileExt$green, sep=sep) |
|
59 |
+ redfiles = paste(arrayNames, fileExt$red, sep=sep) |
|
60 |
+ if(length(grnfiles)==0 || length(redfiles)==0) |
|
61 |
+ stop("Cannot find .idat files") |
|
62 |
+ if(length(grnfiles)!=length(redfiles)) |
|
63 |
+ stop("Cannot find matching .idat files") |
|
64 |
+ if(path[1] != "."){ |
|
65 |
+ grnidats = file.path(path, grnfiles) |
|
66 |
+ redidats = file.path(path, redfiles) |
|
67 |
+ } else { |
|
68 |
+ message("path arg not set. Assuming files are in local directory") |
|
69 |
+ grnidats <- grnfiles |
|
70 |
+ redidats <- redfiles |
|
71 |
+ } |
|
72 |
+ if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files") |
|
73 |
+ if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files") |
|
74 |
+## if(!all(c(redfiles,grnfiles) %in% dir(path=path))){ |
|
75 |
+## stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n", |
|
76 |
+## paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" ")) |
|
77 |
+## } |
|
78 |
+ headerInfo = list(nProbes = rep(NA, narrays), |
|
79 |
+ Barcode = rep(NA, narrays), |
|
80 |
+ ChipType = rep(NA, narrays), |
|
81 |
+ Manifest = rep(NA, narrays), # not sure about this one - sometimes blank |
|
82 |
+ Position = rep(NA, narrays)) # this may also vary a bit |
|
83 |
+ dates = list(decode=rep(NA, narrays), |
|
84 |
+ scan=rep(NA, narrays)) |
|
85 |
+ ## read in the data |
|
86 |
+ for(i in seq(along=arrayNames)) { |
|
87 |
+ cat("reading", arrayNames[i], "\t") |
|
88 |
+ idsG = idsR = G = R = NULL |
|
89 |
+ cat(paste(sep, fileExt$green, sep=""), "\t") |
|
90 |
+ G = readIDAT(grnidats[i]) |
|
91 |
+ idsG = rownames(G$Quants) |
|
92 |
+ headerInfo$nProbes[i] = G$nSNPsRead |
|
93 |
+ headerInfo$Barcode[i] = G$Barcode |
|
94 |
+ headerInfo$ChipType[i] = G$ChipType |
|
95 |
+ headerInfo$Manifest[i] = G$Unknown$MostlyNull |
|
96 |
+ headerInfo$Position[i] = G$Unknowns$MostlyA |
|
97 |
+ if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) { |
|
98 |
+ ## headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) { |
|
99 |
+ ## || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest |
|
100 |
+ ## but differed by a few SNPs for some reason - most of the chip was the same though |
|
101 |
+ ## stop("Chips are not of all of the same type - please check your data") |
|
102 |
+ warning("Chips are not of the same type. Skipping ", basename(grnidats[i]), " and ", basename(redidats[i])) |
|
103 |
+ next() |
|
104 |
+ } |
|
105 |
+ dates$decode[i] = G$RunInfo[1, 1] |
|
106 |
+ dates$scan[i] = G$RunInfo[2, 1] |
|
107 |
+ if(i==1) { |
|
108 |
+ if(is.null(ids) && !is.null(G)){ |
|
109 |
+ ids = idsG |
|
110 |
+ }else stop("Could not find probe IDs") |
|
111 |
+ nprobes = length(ids) |
|
112 |
+ narrays = length(arrayNames) |
|
113 |
+ tmpmat = matrix(NA, nprobes, narrays) |
|
114 |
+ rownames(tmpmat) = ids |
|
115 |
+ if(!is.null(sampleSheet)){ |
|
116 |
+ colnames(tmpmat) = sampleSheet$Sample_ID |
|
117 |
+ }else colnames(tmpmat) = arrayNames |
|
118 |
+ RG <- new("NChannelSet", |
|
119 |
+ R=tmpmat, |
|
120 |
+ G=tmpmat, |
|
121 |
+ zero=tmpmat, |
|
122 |
+# Rnb=tmpmat, |
|
123 |
+# Gnb=tmpmat, |
|
124 |
+# Rse=tmpmat, |
|
125 |
+# Gse=tmpmat, |
|
126 |
+ annotation=headerInfo$Manifest[1], |
|
127 |
+ phenoData=pd, |
|
128 |
+ storage.mode="environment") |
|
129 |
+ rm(tmpmat) |
|
130 |
+ gc() |
|
131 |
+ } |
|
132 |
+ if(length(ids)==length(idsG)) { |
|
133 |
+ if(sum(ids==idsG)==nprobes) { |
|
134 |
+ RG@assayData$G[,i] = G$Quants[, "Mean"] |
|
135 |
+ zeroG = G$Quants[, "NBeads"]==0 |
|
136 |
+# RG@assayData$Gnb[,i] = G$Quants[, "NBeads"] |
|
137 |
+# RG@assayData$Gse[,i] = G$Quants[, "SD"] |
|
138 |
+ } |
|
139 |
+ } else { |
|
140 |
+ indG = match(ids, idsG) |
|
141 |
+ RG@assayData$G[,i] = G$Quants[indG, "Mean"] |
|
142 |
+ zeroG = G$Quants[indG, "NBeads"]==0 |
|
143 |
+# RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"] |
|
144 |
+# RG@assayData$Gse[,i] = G$Quants[indG, "SD"] |
|
145 |
+ } |
|
146 |
+ rm(G) |
|
147 |
+ gc() |
|
148 |
+ |
|
149 |
+ cat(paste(sep, fileExt$red, sep=""), "\n") |
|
150 |
+ R = readIDAT(redidats[i]) |
|
151 |
+ idsR = rownames(R$Quants) |
|
15 | 152 |
|
153 |
+ if(length(ids)==length(idsG)) { |
|
154 |
+ if(sum(ids==idsR)==nprobes) { |
|
155 |
+ RG@assayData$R[,i] = R$Quants[ ,"Mean"] |
|
156 |
+ zeroR = R$Quants[ ,"NBeads"]==0 |
|
157 |
+# RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"] |
|
158 |
+# RG@assayData$Rse[,i] = R$Quants[ ,"SD"] |
|
159 |
+ } |
|
160 |
+ } else { |
|
161 |
+ indR = match(ids, idsR) |
|
162 |
+ RG@assayData$R[,i] = R$Quants[indR, "Mean"] |
|
163 |
+ zeroR = R$Quants[indR, "NBeads"]==0 |
|
164 |
+# RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"] |
|
165 |
+# RG@assayData$Rse[,i] = R$Quants[indR, "SD"] |
|
166 |
+ } |
|
167 |
+ RG@assayData$zero[,i] = zeroG | zeroR |
|
168 |
+ rm(R, zeroG, zeroR) |
|
169 |
+ gc() |
|
170 |
+ } |
|
171 |
+ if(saveDate) { |
|
172 |
+ protocolData(RG)[["ScanDate"]] = dates$scan |
|
173 |
+ } |
|
174 |
+ storageMode(RG) = "lockedEnvironment" |
|
175 |
+ RG |
|
176 |
+} |
|
177 |
+ |
|
178 |
+ |
|
179 |
+readIdatFiles2 <- function(sampleSheet=NULL, |
|
180 |
+ arrayNames=NULL, |
|
181 |
+ ids=NULL, |
|
182 |
+ path=".", |
|
183 |
+ arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"), |
|
184 |
+ highDensity=FALSE, |
|
185 |
+ sep="_", |
|
186 |
+ fileExt=list(green="Grn.idat", red="Red.idat"), |
|
187 |
+ saveDate=FALSE) { |
|
188 |
+# if(!is.null(arrayNames)) { |
|
189 |
+# arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path)) |
|
190 |
+# if(!is.null(sampleSheet)) { |
|
191 |
+# sampleSheet=NULL |
|
192 |
+# cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n") |
|
193 |
+# } |
|
194 |
+# pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
|
195 |
+# } |
|
16 | 196 |
if(!is.null(arrayNames)) { |
17 | 197 |
pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames)) |
18 | 198 |
} |
... | ... |
@@ -64,6 +244,10 @@ readIdatFiles <- function(sampleSheet=NULL, |
64 | 244 |
} |
65 | 245 |
if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files") |
66 | 246 |
if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files") |
247 |
+## if(!all(c(redfiles,grnfiles) %in% dir(path=path))){ |
|
248 |
+## stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n", |
|
249 |
+## paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" ")) |
|
250 |
+## } |
|
67 | 251 |
headerInfo = list(nProbes = rep(NA, narrays), |
68 | 252 |
Barcode = rep(NA, narrays), |
69 | 253 |
ChipType = rep(NA, narrays), |
... | ... |
@@ -410,6 +594,113 @@ readBPM <- function(bpmFile){ |
410 | 594 |
} |
411 | 595 |
|
412 | 596 |
|
597 |
+RGtoXY = function(RG, chipType, verbose=TRUE) { |
|
598 |
+ chipList = c("human1mv1c", # 1M |
|
599 |
+ "human370v1c", # 370CNV |
|
600 |
+ "human650v3a", # 650Y |
|
601 |
+ "human610quadv1b", # 610 quad |
|
602 |
+ "human660quadv1a", # 660 quad |
|
603 |
+ "human370quadv3c", # 370CNV quad |
|
604 |
+ "human550v3b", # 550K |
|
605 |
+ "human1mduov3b", # 1M Duo |
|
606 |
+ "humanomni1quadv1b", # Omni1 quad |
|
607 |
+ "humanomniexpress12v1b") # Omni express 12 |
|
608 |
+ if(missing(chipType)){ |
|
609 |
+ chipType = match.arg(annotation(RG), chipList) |
|
610 |
+ } else chipType = match.arg(chipType, chipList) |
|
611 |
+ |
|
612 |
+ pkgname <- getCrlmmAnnotationName(chipType) |
|
613 |
+ if(!require(pkgname, character.only=TRUE, quietly=!verbose)){ |
|
614 |
+ suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="") |
|
615 |
+ msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall) |
|
616 |
+ message(strwrap(msg)) |
|
617 |
+ stop("Package ", pkgname, " could not be found.") |
|
618 |
+ rm(suggCall, msg) |
|
619 |
+ } |
|
620 |
+ if(verbose) message("Loading chip annotation information.") |
|
621 |
+ loader("address.rda", .crlmmPkgEnv, pkgname) |
|
622 |
+# data(annotation, package=pkgname, envir=.crlmmPkgEnv) |
|
623 |
+ aids <- getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest |
|
624 |
+ bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest |
|
625 |
+ ids <- names(aids) |
|
626 |
+ snpbase <- getVarInEnv("base") |
|
627 |
+ |
|
628 |
+ nsnps = length(aids) |
|
629 |
+ narrays = ncol(RG) |
|
630 |
+ |
|
631 |
+# aidcol = match("AddressA_ID", colnames(annot)) |
|
632 |
+# if(is.na(aidcol)) |
|
633 |
+# aidcol = match("Address", colnames(annot)) |
|
634 |
+# bidcol = match("AddressB_ID", colnames(annot)) |
|
635 |
+# if(is.na(bidcol)) |
|
636 |
+# bidcol = match("Address2", colnames(annot)) |
|
637 |
+# aids = annot[, aidcol] |
|
638 |
+# bids = annot[, bidcol] |
|
639 |
+# snpids = annot[,"Name"] |
|
640 |
+# snpbase = annot[,"SNP"] |
|
641 |
+ infI = !is.na(bids) & bids!=0 |
|
642 |
+ aord = match(aids, featureNames(RG)) # NAs are possible here |
|
643 |
+ bord = match(bids, featureNames(RG)) # and here |
|
644 |
+# argrg = aids[rrgg] |
|
645 |
+# brgrg = bids[rrgg] |
|
646 |
+ |
|
647 |
+ tmpmat = matrix(as.integer(0), nsnps, narrays) |
|
648 |
+ rownames(tmpmat) = ids |
|
649 |
+ colnames(tmpmat) = sampleNames(RG) |
|
650 |
+ XY = new("NChannelSet", X=tmpmat, Y=tmpmat, zero=tmpmat, # Xnb=tmpmat, Ynb=tmpmat, Xse=tmpmat, Yse=tmpmat, zero=tmpmat, |
|
651 |
+ annotation=chipType, phenoData=RG@phenoData, protocolData=RG@protocolData, storage.mode="environment") |
|
652 |
+ rm(tmpmat) |
|
653 |
+ gc() |
|
654 |
+ |
|
655 |
+ # First sort out Infinium II SNPs, X -> R (allele A) and Y -> G (allele B) from the same probe |
|
656 |
+ XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red |
|
657 |
+ XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green |
|
658 |
+ XY@assayData$zero[!is.na(aord),] = exprs(channel(RG, "zero"))[aord[!is.na(aord)],] # mostly green |
|
659 |
+# XY@assayData$Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],] |
|
660 |
+# XY@assayData$Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],] |
|
661 |
+# XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],] |
|
662 |
+# XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],] |
|
663 |
+ gc() |
|
664 |
+ |
|
665 |
+ ## Warning - not 100% sure that the code below is correct - could be more complicated than this |
|
666 |
+ |
|
667 |
+ # Next Infinium I where X -> R from allele A probe and Y -> R from allele B probe |
|
668 |
+# infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]") |
|
669 |
+ |
|
670 |
+# X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red |
|
671 |
+# Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green |
|
672 |
+# Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],] |
|
673 |
+# Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],] |
|
674 |
+# Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],] |
|
675 |
+# Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],] |
|
676 |
+ |
|
677 |
+ # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe |
|
678 |
+# infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]") |
|
679 |
+ |
|
680 |
+# X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red |
|
681 |
+# Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green |
|
682 |
+# Xnb[infIGG,] = exprs(channel(RG, "Gnb"))[aord[infIGG],] |
|
683 |
+# Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],] |
|
684 |
+# Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],] |
|
685 |
+# Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],] |
|
686 |
+ |
|
687 |
+ # For now zero out Infinium I probes |
|
688 |
+ XY@assayData$X[infI,] = 0 |
|
689 |
+ XY@assayData$Y[infI,] = 0 |
|
690 |
+ XY@assayData$zero[infI,] = 0 |
|
691 |
+# XY@assayData$Xnb[infI,] = 0 |
|
692 |
+# XY@assayData$Ynb[infI,] = 0 |
|
693 |
+# XY@assayData$Xse[infI,] = 0 |
|
694 |
+# XY@assayData$Yse[infI,] = 0 |
|
695 |
+ |
|
696 |
+# XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1 |
|
697 |
+ gc() |
|
698 |
+ |
|
699 |
+# storageMode(XY) = "lockedEnvironment" |
|
700 |
+ XY |
|
701 |
+} |
|
702 |
+ |
|
703 |
+ |
|
413 | 704 |
RGtoXY = function(RG, chipType, verbose=TRUE) { |
414 | 705 |
chipList = c("human1mv1c", # 1M |
415 | 706 |
"human370v1c", # 370CNV |
... | ... |
@@ -472,9 +763,6 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
472 | 763 |
XY@assayData$Y[1:nsnps,] = 0 |
473 | 764 |
XY@assayData$zero[1:nsnps,] = 0 |
474 | 765 |
|
475 |
- open(RG@assayData$R) |
|
476 |
- open(RG@assayData$G) |
|
477 |
- open(RG@assayData$zero) |
|
478 | 766 |
|
479 | 767 |
# First sort out Infinium II SNPs, X -> R (allele A) and Y -> G (allele B) from the same probe |
480 | 768 |
XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red |
... | ... |
@@ -482,10 +770,6 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
482 | 770 |
XY@assayData$zero[!is.na(aord),] = exprs(channel(RG, "zero"))[aord[!is.na(aord)],] # mostly green |
483 | 771 |
gc() |
484 | 772 |
|
485 |
- close(RG@assayData$R) |
|
486 |
- close(RG@assayData$G) |
|
487 |
- close(RG@assayData$zero) |
|
488 |
- |
|
489 | 773 |
## Warning - not 100% sure that the code below is correct - could be more complicated than this |
490 | 774 |
|
491 | 775 |
# Next Infinium I where X -> R from allele A probe and Y -> R from allele B probe |
... | ... |
@@ -493,17 +777,31 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
493 | 777 |
|
494 | 778 |
# X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red |
495 | 779 |
# Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green |
780 |
+# Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],] |
|
781 |
+# Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],] |
|
782 |
+# Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],] |
|
783 |
+# Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],] |
|
496 | 784 |
|
497 | 785 |
# Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe |
498 | 786 |
# infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]") |
499 | 787 |
|
500 | 788 |
# X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red |
501 | 789 |
# Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green |
790 |
+# Xnb[infIGG,] = exprs(channel(RG, "Gnb"))[aord[infIGG],] |
|
791 |
+# Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],] |
|
792 |
+# Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],] |
|
793 |
+# Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],] |
|
502 | 794 |
|
503 | 795 |
# For now zero out Infinium I probes |
504 | 796 |
XY@assayData$X[infI,] = 0 |
505 | 797 |
XY@assayData$Y[infI,] = 0 |
506 | 798 |
XY@assayData$zero[infI,] = 0 |
799 |
+# XY@assayData$Xnb[infI,] = 0 |
|
800 |
+# XY@assayData$Ynb[infI,] = 0 |
|
801 |
+# XY@assayData$Xse[infI,] = 0 |
|
802 |
+# XY@assayData$Yse[infI,] = 0 |
|
803 |
+ |
|
804 |
+# XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1 |
|
507 | 805 |
gc() |
508 | 806 |
|
509 | 807 |
# if(class(XY@assayData$X)[1]=="ff_matrix") { |
... | ... |
@@ -534,11 +832,14 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) { |
534 | 832 |
if(useTarget) |
535 | 833 |
targetdist = getVarInEnv("reference") |
536 | 834 |
|
835 |
+# Xqws = Yqws = matrix(0, nrow(XY), ncol(XY)) |
|
836 |
+# colnames(Xqws) = colnames(Yqws) = sampleNames(XY) #$X |
|
837 |
+# rownames(Xqws) = rownames(Yqws) = featureNames(XY) |
|
838 |
+ |
|
537 | 839 |
if(verbose){ |
538 | 840 |
message("Quantile normalizing ", ncol(XY), " arrays by ", max(stripnum), " strips.") |
539 | 841 |
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=max(stripnum), style=3) |
540 | 842 |
} |
541 |
- |
|
542 | 843 |
# if(class(XY@assayData$X)[1]=="ff_matrix") { |
543 | 844 |
open(XY@assayData$X) |
544 | 845 |
open(XY@assayData$Y) |
... | ... |
@@ -574,6 +875,146 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) { |
574 | 875 |
|
575 | 876 |
|
576 | 877 |
preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
878 |
+ fitMixture=TRUE, |
|
879 |
+ eps=0.1, |
|
880 |
+ verbose=TRUE, |
|
881 |
+ seed=1, |
|
882 |
+ cdfName, |
|
883 |
+ sns, |
|
884 |
+ stripNorm=TRUE, |
|
885 |
+ useTarget=TRUE, |
|
886 |
+ save.it=FALSE, |
|
887 |
+ snpFile, |
|
888 |
+ cnFile) { |
|
889 |
+ if(stripNorm) |
|
890 |
+ XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose) |
|
891 |
+ |
|
892 |
+## MR: the code below is mostly straight from snprma.R |
|
893 |
+ if (missing(sns)) sns <- sampleNames(XY) #$X |
|
894 |
+ if(missing(cdfName)) |
|
895 |
+ cdfName <- annotation(XY) |
|
896 |
+## stuffDir <- changeToCrlmmAnnotationName(cdfName) |
|
897 |
+ pkgname <- getCrlmmAnnotationName(cdfName) |
|
898 |
+ if(!require(pkgname, character.only=TRUE, quietly=!verbose)){ |
|
899 |
+ suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="") |
|
900 |
+ msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall) |
|
901 |
+ message(strwrap(msg)) |
|
902 |
+ stop("Package ", pkgname, " could not be found.") |
|
903 |
+ rm(suggCall, msg) |
|
904 |
+ } |
|
905 |
+ if(verbose) message("Loading snp annotation and mixture model parameters.") |
|
906 |
+ loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
|
907 |
+ loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
|
908 |
+ loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname) |
|
909 |
+ loader("npProbesFid.rda", .crlmmPkgEnv, pkgname) |
|
910 |
+# data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv) |
|
911 |
+ autosomeIndex <- getVarInEnv("autosomeIndex") |
|
912 |
+ # pnsa <- getVarInEnv("pnsa") |
|
913 |
+ # pnsb <- getVarInEnv("pnsb") |
|
914 |
+ # fid <- getVarInEnv("fid") |
|
915 |
+ # reference <- getVarInEnv("reference") |
|
916 |
+ # aIndex <- getVarInEnv("aIndex") |
|
917 |
+ # bIndex <- getVarInEnv("bIndex") |
|
918 |
+ SMEDIAN <- getVarInEnv("SMEDIAN") |
|
919 |
+ theKnots <- getVarInEnv("theKnots") |
|
920 |
+ gns <- getVarInEnv("gns") |
|
921 |
+ |
|
922 |
+ # separate out copy number probes |
|
923 |
+ npIndex = getVarInEnv("npProbesFid") |
|
924 |
+ nprobes = length(npIndex) |
|
925 |
+ if(length(nprobes)>0) { |
|
926 |
+ narrays = ncol(XY) |
|
927 |
+ A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays) |
|
928 |
+ B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays) |
|
929 |
+ |
|
930 |
+ # new lines below - useful to keep track of zeroed out probes |
|
931 |
+ zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays) |
|
932 |
+ |
|
933 |
+ colnames(A) <- colnames(B) <- colnames(zero) <- sns |
|
934 |
+ rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex) |
|
935 |
+ |
|
936 |
+ cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName) |
|
937 |
+ if(save.it & !missing(cnFile)) { |
|
938 |
+ t0 <- proc.time() |
|
939 |
+ save(cnAB, file=cnFile) |
|
940 |
+ t0 <- proc.time()-t0 |
|
941 |
+ if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".") |
|
942 |
+ } |
|
943 |
+ rm(cnAB, B, zero) |
|
944 |
+ } |
|
945 |
+ |
|
946 |
+ # next process snp probes |
|
947 |
+ snpIndex = getVarInEnv("snpProbesFid") |
|
948 |
+ nprobes <- length(snpIndex) |
|
949 |
+ |
|
950 |
+ ##We will read each cel file, summarize, and run EM one by one |
|
951 |
+ ##We will save parameters of EM to use later |
|
952 |
+ mixtureParams <- matrix(0, 4, narrays) |
|
953 |
+ SNR <- vector("numeric", narrays) |
|
954 |
+ SKW <- vector("numeric", narrays) |
|
955 |
+ |
|
956 |
+ ## This is the sample for the fitting of splines |
|
957 |
+ ## BC: I like better the idea of the user passing the seed, |
|
958 |
+ ## because this might intefere with other analyses |
|
959 |
+ ## (like what happened to GCRMA) |
|
960 |
+ set.seed(seed) |
|
961 |
+ idx <- sort(sample(autosomeIndex, mixtureSampleSize)) |
|
962 |
+ idx2 <- sample(nprobes, 10^5) |
|
963 |
+ |
|
964 |
+ ##S will hold (A+B)/2 and M will hold A-B |
|
965 |
+ ##NOTE: We actually dont need to save S. Only for pics etc... |
|
966 |
+ ##f is the correction. we save to avoid recomputing |
|
967 |
+ A <- matrix(as.integer(exprs(channel(XY, "X"))[snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames)) |
|
968 |
+ B <- matrix(as.integer(exprs(channel(XY, "Y"))[snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames)) |
|
969 |
+ |
|
970 |
+ # new lines below - useful to keep track of zeroed out SNPs |
|
971 |
+ zero <- matrix(as.integer(exprs(channel(XY, "zero"))[snpIndex,]), nprobes, narrays) |
|
972 |
+ |
|
973 |
+ colnames(A) <- colnames(B) <- colnames(zero) <- sns |
|
974 |
+ rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY) |
|
975 |
+ |
|
976 |
+ if(verbose){ |
|
977 |
+ message("Calibrating ", narrays, " arrays.") |
|
978 |
+ if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3) |
|
979 |
+ } |
|
980 |
+ |
|
981 |
+ |
|
982 |
+ for(i in 1:narrays){ |
|
983 |
+ SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3) |
|
984 |
+ if(fitMixture){ |
|
985 |
+ S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN |
|
986 |
+ M <- log2(A[idx, i])-log2(B[idx, i]) |
|
987 |
+ |
|
988 |
+ ##we need to test the choice of eps.. it is not the max diff between funcs |
|
989 |
+ tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps) |
|
990 |
+ |
|
991 |
+ mixtureParams[, i] <- tmp[["coef"]] |
|
992 |
+ SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2) |
|
993 |
+ } |
|
994 |
+ if(verbose) { |
|
995 |
+ if (getRversion() > '2.7.0') setTxtProgressBar(pb, i) |
|
996 |
+ else cat(".") |
|
997 |
+ } |
|
998 |
+ } |
|
999 |
+ if (verbose) { |
|
1000 |
+ if (getRversion() > '2.7.0') close(pb) |
|
1001 |
+ else cat("\n") |
|
1002 |
+ } |
|
1003 |
+ if (!fitMixture) SNR <- mixtureParams <- NA |
|
1004 |
+ ## gns comes from preprocStuff.rda |
|
1005 |
+ res = list(A=A, B=B, zero=zero, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName) |
|
1006 |
+ |
|
1007 |
+ if(save.it & !missing(snpFile)) { |
|
1008 |
+ t0 <- proc.time() |
|
1009 |
+ save(res, file=snpFile) |
|
1010 |
+ t0 <- proc.time()-t0 |
|
1011 |
+ if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".") |
|
1012 |
+ } |
|
1013 |
+ return(res) |
|
1014 |
+} |
|
1015 |
+ |
|
1016 |
+ |
|
1017 |
+preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5, |
|
577 | 1018 |
fitMixture=TRUE, |
578 | 1019 |
eps=0.1, |
579 | 1020 |
verbose=TRUE, |
... | ... |
@@ -625,10 +1066,6 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
625 | 1066 |
# new lines below - useful to keep track of zeroed out probes |
626 | 1067 |
zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays) |
627 | 1068 |
|
628 |
- close(XY@assayData$X) |
|
629 |
- close(XY@assayData$Y) |
|
630 |
- close(XY@assayData$zero) |
|
631 |
- |
|
632 | 1069 |
colnames(A) <- colnames(B) <- colnames(zero) <- sns |
633 | 1070 |
rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex) |
634 | 1071 |
|
... | ... |
@@ -651,6 +1088,9 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
651 | 1088 |
mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, narrays, "double") |
652 | 1089 |
SNR <- initializeBigVector("crlmmSNR-", narrays, "double") |
653 | 1090 |
SKW <- initializeBigVector("crlmmSKW-", narrays, "double") |
1091 |
+# mixtureParams <- matrix(0, 4, narrays) |
|
1092 |
+# SNR <- vector("numeric", narrays) |
|
1093 |
+# SKW <- vector("numeric", narrays) |
|
654 | 1094 |
|
655 | 1095 |
## This is the sample for the fitting of splines |
656 | 1096 |
## BC: I like better the idea of the user passing the seed, |
... | ... |
@@ -663,6 +1103,25 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
663 | 1103 |
##S will hold (A+B)/2 and M will hold A-B |
664 | 1104 |
##NOTE: We actually dont need to save S. Only for pics etc... |
665 | 1105 |
##f is the correction. we save to avoid recomputing |
1106 |
+# A <- exprs(channel(XY, "X"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames)) |
|
1107 |
+# B <- exprs(channel(XY, "Y"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames)) |
|
1108 |
+ |
|
1109 |
+ # new lines below - useful to keep track of zeroed out SNPs |
|
1110 |
+# zero <- exprs(channel(XY, "zero"))[,] # )) #[snpIndex,]), nprobes, narrays) |
|
1111 |
+ |
|
1112 |
+# if(!is.matrix(A)) { |
|
1113 |
+# A = A[,] |
|
1114 |
+# B = B[,] |
|
1115 |
+# zero = zero[,] |
|
1116 |
+# } |
|
1117 |
+ |
|
1118 |
+# if(!is.integer(A)) { |
|
1119 |
+# A = matrix(as.integer(A), nrow(A), ncol(A)) |
|
1120 |
+# B = matrix(as.integer(B), nrow(B), ncol(B)) |
|
1121 |
+# } |
|
1122 |
+ |
|
1123 |
+# colnames(A) <- colnames(B) <- colnames(zero) <- sns |
|
1124 |
+# rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY) |
|
666 | 1125 |
|
667 | 1126 |
A <- initializeBigMatrix("crlmmA-", nprobes, narrays, "integer") |
668 | 1127 |
B <- initializeBigMatrix("crlmmB-", nprobes, narrays, "integer") |
... | ... |
@@ -705,10 +1164,12 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5, |
705 | 1164 |
if (!fitMixture) SNR <- mixtureParams <- NA |
706 | 1165 |
## gns comes from preprocStuff.rda |
707 | 1166 |
|
708 |
- close(XY@assayData$X) |
|
709 |
- close(XY@assayData$Y) |
|
710 |
- close(XY@assayData$zero) |
|
711 |
- |
|
1167 |
+ if(save.it & !missing(snpFile)) { |
|
1168 |
+ t0 <- proc.time() |
|
1169 |
+ save(res, file=snpFile) |
|
1170 |
+ t0 <- proc.time()-t0 |
|
1171 |
+ if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".") |
|
1172 |
+ } |
|
712 | 1173 |
# if(class(A)[1]=="ff_matrix") { |
713 | 1174 |
close(A) |
714 | 1175 |
close(B) |
... | ... |
@@ -774,6 +1235,61 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
774 | 1235 |
res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
775 | 1236 |
seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, |
776 | 1237 |
save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
1238 |
+ }else{ |
|
1239 |
+ if(verbose) message("Loading ", snpFile, ".") |
|
1240 |
+ obj <- load(snpFile) |
|
1241 |
+ if(verbose) message("Done.") |
|
1242 |
+ if(!any(obj == "res")) |
|
1243 |
+ stop("Object in ", snpFile, " seems to be invalid.") |
|
1244 |
+ } |
|
1245 |
+ if(row.names) row.names=res[["gns"]] else row.names=NULL |
|
1246 |
+ if(col.names) col.names=res[["sns"]] else col.names=NULL |
|
1247 |
+ |
|
1248 |
+ res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]], |
|
1249 |
+ res[["mixtureParams"]], res[["cdfName"]], |
|
1250 |
+ gender=gender, row.names=row.names, |
|
1251 |
+ col.names=col.names, recallMin=recallMin, |
|
1252 |
+ recallRegMin=1000, SNRMin=SNRMin, |
|
1253 |
+ returnParams=returnParams, badSNP=badSNP, |
|
1254 |
+ verbose=verbose) |
|
1255 |
+ |
|
1256 |
+ res2[["SNR"]] <- res[["SNR"]] |
|
1257 |
+ res2[["SKW"]] <- res[["SKW"]] |
|
1258 |
+ rm(res) |
|
1259 |
+ gc() |
|
1260 |
+ return(list2SnpSet(res2, returnParams=returnParams)) # return(res2) |
|
1261 |
+} |
|
1262 |
+ |
|
1263 |
+ |
|
1264 |
+crlmmIllumina2 <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
|
1265 |
+ row.names=TRUE, col.names=TRUE, |
|
1266 |
+ probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
|
1267 |
+ seed=1, save.it=FALSE, load.it=FALSE, snpFile, cnFile, |
|
1268 |
+ mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, |
|
1269 |
+ cdfName, sns, recallMin=10, recallRegMin=1000, |
|
1270 |
+ returnParams=FALSE, badSNP=.7) { |
|
1271 |
+ if (save.it & (missing(snpFile) | missing(cnFile))) |
|
1272 |
+ stop("'snpFile' and/or 'cnFile' is missing and you chose to save.it") |
|
1273 |
+ if (load.it & missing(snpFile)) |
|
1274 |
+ stop("'snpFile' is missing and you chose to load.it") |
|
1275 |
+ if (!missing(snpFile)) |
|
1276 |
+ if (load.it & !file.exists(snpFile)){ |
|
1277 |
+ load.it <- FALSE |
|
1278 |
+ message("File ", snpFile, " does not exist.") |
|
1279 |
+ stop("Cannot load SNP data.") |
|
1280 |
+ } |
|
1281 |
+ if (!load.it){ |
|
1282 |
+ if(!missing(RG)) { |
|
1283 |
+ if(missing(XY)) |
|
1284 |
+ XY = RGtoXY2(RG, chipType=cdfName) |
|
1285 |
+ else |
|
1286 |
+ stop("Both RG and XY specified - please use one or the other") |
|
1287 |
+ } |
|
1288 |
+ if (missing(sns)) sns <- sampleNames(XY) #$X |
|
1289 |
+ |
|
1290 |
+ res = preprocessInfinium2v2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
|
1291 |
+ seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #, |
|
1292 |
+ # save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
|
777 | 1293 |
|
778 | 1294 |
# fD = featureData(XY) |
779 | 1295 |
# phenD = XY@phenoData |
... | ... |
@@ -801,12 +1317,7 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
801 | 1317 |
stop("Object in ", snpFile, " seems to be invalid.") |
802 | 1318 |
} |
803 | 1319 |
|
804 |
- open(res[["A"]]) |
|
805 |
- open(res[["B"]]) |
|
806 |
- open(res[["SNR"]]) |
|
807 |
- open(res[["mixtureParams"]]) |
|
808 |
- |
|
809 |
-# rm(phenD, protD , fD) |
|
1320 |
+ # rm(phenD, protD , fD) |
|
810 | 1321 |
|
811 | 1322 |
# snp.index <- res$snpIndex #match(res$gns, featureNames(callSet)) |
812 | 1323 |
# suppressWarnings(A(callSet) <- res[["A"]]) |
... | ... |
@@ -867,7 +1378,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
867 | 1378 |
# rgFile, |
868 | 1379 |
stripNorm=TRUE, |
869 | 1380 |
useTarget=TRUE, |
870 |
- row.names=TRUE, |
|
1381 |
+ row.names=TRUE, |
|
871 | 1382 |
col.names=TRUE, |
872 | 1383 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, |
873 | 1384 |
seed=1, save.it=FALSE, snpFile, cnFile, |
... | ... |
@@ -1,11 +1,15 @@ |
1 | 1 |
setMethod("Ns", signature(object="AssayData"), |
2 |
- function(object, i, j, ...){ |
|
3 |
- if(!missing(j)){ |
|
4 |
- batchnames <- unique(as.character(batch(object)[j])) |
|
5 |
- } else batchnames <- batchNames(object) |
|
6 |
- nc <- length(batchnames) |
|
7 |
- if(!missing(i)) nr <- length(i) else nr <- nrow(object) |
|
8 |
- res <- array(NA, dim=c(nr, 3, nc)) |
|
2 |
+ function(object, ...){ |
|
3 |
+ dotArgs <- names(list(...)) |
|
4 |
+ missing.i <- !("i" %in% dotArgs) |
|
5 |
+ missing.j <- !("j" %in% dotArgs) |
|
6 |
+ batchnames <- batchNames(object) |
|
7 |
+ if(!missing.j) batchnames <- batchnames[j] |
|
8 |
+ if(missing.i & missing.j) stop("Must specify either the rows i or batches j") |
|
9 |
+ N.AA <- as.matrix(assayDataElement(object, "N.AA")[...]) |
|
10 |
+ N.AB <- as.matrix(assayDataElement(object, "N.AB")[...]) |
|
11 |
+ N.BB <- as.matrix(assayDataElement(object, "N.BB")[...]) |
|
12 |
+ res <- array(NA, dim=c(nrow(N.AA), 3, ncol(N.AA))) |
|
9 | 13 |
dimnames(res)[[2]] <- c("AA", "AB", "BB") |
10 | 14 |
dimnames(res)[[3]] <- batchnames |
11 | 15 |
if(missing(i) & missing(j)){ |
... | ... |
@@ -39,12 +43,16 @@ setMethod("Ns", signature(object="AssayData"), |
39 | 43 |
}) |
40 | 44 |
setMethod("corr", signature(object="AssayData"), |
41 | 45 |
function(object, i, j, ...){ |
42 |
- if(!missing(j)){ |
|
43 |
- batchnames <- unique(as.character(batch(object)[j])) |
|
44 |
- } else batchnames <- batchNames(object) |
|
45 |
- nc <- length(batchnames) |
|
46 |
- if(!missing(i)) nr <- length(i) else nr <- nrow(object) |
|
47 |
- res <- array(NA, dim=c(nr, 3, nc)) |
|
46 |
+ dotArgs <- names(list(...)) |
|
47 |
+ missing.i <- !("i" %in% dotArgs) |
|
48 |
+ missing.j <- !("j" %in% dotArgs) |
|
49 |
+ batchnames <- batchNames(object) |
|
50 |
+ if(!missing.j) batchnames <- batchnames[j] |
|
51 |
+ if(missing.i & missing.j) stop("Must specify either the rows i or batches j") |
|
52 |
+ corrAA <- as.matrix(assayDataElement(object, "corrAA")[...]) |
|
53 |
+ corrAB <- as.matrix(assayDataElement(object, "corrAB")[...]) |
|
54 |
+ corrBB <- as.matrix(assayDataElement(object, "corrBB")[...]) |
|
55 |
+ res <- array(NA, dim=c(nrow(corrAA), 3, ncol(corrAA))) |
|
48 | 56 |
dimnames(res)[[2]] <- c("AA", "AB", "BB") |
49 | 57 |
dimnames(res)[[3]] <- batchnames |
50 | 58 |
if(missing(i) & missing(j)){ |
... | ... |
@@ -78,13 +86,20 @@ setMethod("corr", signature(object="AssayData"), |
78 | 86 |
}) |
79 | 87 |
|
80 | 88 |
setMethod("medians", signature(object="AssayData"), |
81 |
- function(object, i, j, ...){ |
|
82 |
- if(!missing(j)){ |
|
83 |
- batchnames <- unique(as.character(batch(object)[j])) |
|
84 |
- } else batchnames <- batchNames(object) |
|
85 |
- nc <- length(batchnames) |
|
86 |
- if(!missing(i)) nr <- length(i) else nr <- nrow(object) |
|
87 |
- res <- array(NA, dim=c(nr, 2, 3, nc)) |
|
89 |
+ function(object, ...){ |
|
90 |
+ dotArgs <- names(list(...)) |
|
91 |
+ missing.i <- !("i" %in% dotArgs) |
|
92 |
+ missing.j <- !("j" %in% dotArgs) |
|
93 |
+ batchnames <- batchNames(object) |
|
94 |
+ if(!missing.j) batchnames <- batchnames[j] |
|
95 |
+ if(missing.i & missing.j) stop("Must specify either the rows i or batches j") |
|
96 |
+ medianA.AA <- as.matrix(assayDataElement(object, "medianA.AA")[...]) |
|
97 |
+ medianA.AB <- as.matrix(assayDataElement(object, "medianA.AB")[...]) |
|
98 |
+ medianA.BB <- as.matrix(assayDataElement(object, "medianA.BB")[...]) |
|
99 |
+ medianB.AA <- as.matrix(assayDataElement(object, "medianB.AA")[...]) |
|
100 |
+ medianB.AB <- as.matrix(assayDataElement(object, "medianB.AB")[...]) |
|
101 |
+ medianB.BB <- as.matrix(assayDataElement(object, "medianB.BB")[...]) |
|
102 |
+ res <- array(NA, dim=c(nrow(medianA.AA), 2, 3, ncol(medianA.AA))) |
|
88 | 103 |
dimnames(res)[[2]] <- c("A", "B") |
89 | 104 |
dimnames(res)[[3]] <- c("AA", "AB", "BB") |
90 | 105 |
dimnames(res)[[4]] <- batchnames |
... | ... |
@@ -134,14 +149,21 @@ setMethod("medians", signature(object="AssayData"), |
134 | 149 |
return(res) |
135 | 150 |
}) |
136 | 151 |
|
137 |
-setMethod("medians", signature(object="AssayData"), |
|
138 |
- function(object, i, j, ...){ |
|
139 |
- if(!missing(j)){ |
|
140 |
- batchnames <- unique(as.character(batch(object)[j])) |
|
141 |
- } else batchnames <- batchNames(object) |
|
142 |
- nc <- length(batchnames) |
|
143 |
- if(!missing(i)) nr <- length(i) else nr <- nrow(object) |
|
144 |
- res <- array(NA, dim=c(nr, 2, 3, nc)) |
|
152 |
+setMethod("mads", signature(object="AssayData"), |
|
153 |
+ function(object, ...){ |
|
154 |
+ dotArgs <- names(list(...)) |
|
155 |
+ missing.i <- !("i" %in% dotArgs) |
|
156 |
+ missing.j <- !("j" %in% dotArgs) |
|
157 |
+ batchnames <- batchNames(object) |
|
158 |
+ if(!missing.j) batchnames <- batchnames[j] |
|
159 |
+ if(missing.i & missing.j) stop("Must specify either the rows i or batches j") |
|
160 |
+ madA.AA <- as.matrix(assayDataElement(object, "madA.AA")[...]) |
|
161 |
+ madA.AB <- as.matrix(assayDataElement(object, "madA.AB")[...]) |
|
162 |
+ madA.BB <- as.matrix(assayDataElement(object, "madA.BB")[...]) |
|
163 |
+ madB.AA <- as.matrix(assayDataElement(object, "madB.AA")[...]) |
|
164 |
+ madB.AB <- as.matrix(assayDataElement(object, "madB.AB")[...]) |
|
165 |
+ madB.BB <- as.matrix(assayDataElement(object, "madB.BB")[...]) |
|
166 |
+ res <- array(NA, dim=c(nrow(madA.AA), 2, 3, ncol(madA.AA))) |
|
145 | 167 |
dimnames(res)[[2]] <- c("A", "B") |
146 | 168 |
dimnames(res)[[3]] <- c("AA", "AB", "BB") |
147 | 169 |
dimnames(res)[[4]] <- batchnames |
... | ... |
@@ -184,7 +184,7 @@ if(!file.exists(file.path(outdir, "cnSet.rda"))){ |
184 | 184 |
} |
185 | 185 |
@ |
186 | 186 |
|
187 |
-\subsection{Copy number algorithm.} |
|
187 |
+\subsection{Copy number estimation.} |
|
188 | 188 |
|
189 | 189 |
The \Rfunction{crlmmCopynumber} computes summary statistics for each |
190 | 190 |
genotype, imputes unobserved genotype centers, shrinks the |
... | ... |
@@ -193,10 +193,14 @@ allele-specific copy number. With \texttt{verbose=TRUE}, the above |
193 | 193 |
steps for CN estimation are displayed. |
194 | 194 |
|
195 | 195 |
<<LDS_copynumber>>= |
196 |
-cnSet <- checkExists("cnSet", |
|
197 |
- .path=outdir, |
|
198 |
- .FUN=crlmmCopynumber, |
|
199 |
- object=gtSet) |
|
196 |
+##trace(fit.lm4, browser) |
|
197 |
+##estimateCnParameters(gtSet,type="X.NP") |
|
198 |
+##estimateCnParameters(gtSet,type="X.SNP") |
|
199 |
+##trace(fit.lm2, browser) |
|
200 |
+##estimateCnParameters(gtSet,type="NP") |
|
201 |
+##estimateCnParameters(gtSet,type="SNP") |
|
202 |
+cnSet <- checkExists("cnSet", .path=outdir, .FUN=crlmmCopynumber, object=gtSet) |
|
203 |
+##cnSet <- crlmmCopynumber(gtSet) |
|
200 | 204 |
@ |
201 | 205 |
|
202 | 206 |
The \Rpackage{crlmmCopynumber} function no longer stores the |
... | ... |
@@ -221,7 +225,7 @@ scaling by the slope coefficient. Specifically, |
221 | 225 |
\noindent See \cite{Scharpf2010} for details. |
222 | 226 |
|
223 | 227 |
The functions \Rfunction{CA}, \Rfunction{CB}, and |
224 |
-\Rfunction{totalCopyNumber} can be used to extract CN estimates from |
|
228 |
+\Rfunction{totalCopynumber} can be used to extract CN estimates from |
|
225 | 229 |
the \Robject{CNSet} container. |
226 | 230 |
|
227 | 231 |
<<ca>>= |
... | ... |
@@ -235,7 +239,7 @@ ct <- ca+cb |
235 | 239 |
|
236 | 240 |
Alternatively, total copy number can be obtained by |
237 | 241 |
<<totalCopynumber.snps>>= |
238 |
-ct2 <- totalCopyNumber(cnSet, i=snp.index, j=1:5) |
|
242 |
+ct2 <- totalCopynumber(cnSet, i=snp.index, j=1:5) |
|
239 | 243 |
all.equal(ct, ct2) |
240 | 244 |
@ |
241 | 245 |
|
... | ... |
@@ -243,13 +247,13 @@ At nonpolymorphic loci, either the 'CA' or totalCopynumber method can |
243 | 247 |
be used to obtain total copy number. |
244 | 248 |
|
245 | 249 |
<<ca>>= |
246 |
-cn.nonpolymorphic <- CA(cnSet, i=which(!isSnp(obj))) |
|
250 |
+cn.nonpolymorphic <- CA(cnSet, i=which(!isSnp(cnSet))) |
|
247 | 251 |
@ |
248 | 252 |
|
249 | 253 |
Total copy number at both polymorphic and nonpolymorphic loci: |
250 | 254 |
<<totalCopynumber>>= |
251 | 255 |
##cn <- copyNumber(x) |
252 |
-cn <- totalCopyNumber(cnSet, sample(1:nrow(cnSet), 1e4), 1:5) |
|
256 |
+cn <- totalCopynumber(cnSet, sample(1:nrow(cnSet), 1e4), 1:5) |
|
253 | 257 |
apply(cn, 2, median, na.rm=TRUE) |
254 | 258 |
@ |
255 | 259 |
|
... | ... |
@@ -57,6 +57,15 @@ outdir <- paste("/thumper/ctsa/snpmicroarray/rs/ProcessedData/crlmm/", rpath, "/ |
57 | 57 |
datadir <- "/thumper/ctsa/snpmicroarray/illumina/IDATS/370k" |
58 | 58 |
@ |
59 | 59 |
|
60 |
+Options for controlling RAM with the \Rpackage{ff} package. |
|
61 |
+ |
|
62 |
+<<ldOptions>>= |
|
63 |
+ldPath(outdir) |
|
64 |
+ocProbesets(150e3) |
|
65 |
+ocSamples(200) |
|
66 |
+@ |
|
67 |
+ |
|
68 |
+ |
|
60 | 69 |
\paragraph{About this vignette.} This vignette was created using |
61 | 70 |
Illumina IDAT files that are located in a specific directory on my |
62 | 71 |
computer (\Robject{pathToCels}). Long computations are saved in the |
... | ... |
@@ -105,41 +114,19 @@ grnfiles = all(file.exists(paste(arrayNames, "_Grn.idat", sep=""))) |
105 | 114 |
redfiles = all(file.exists(paste(arrayNames, "_Red.idat", sep=""))) |
106 | 115 |
@ |
107 | 116 |
|
108 |
-<<samplesToProcess2, echo=FALSE>>= |
|
109 |
-## To speed up repeated calls to Sweave |
|
110 |
-RG <- checkExists("RG", .path=outdir, |
|
111 |
- .FUN=readIdatFiles2, |
|
112 |
- sampleSheet=samplesheet, |
|
113 |
- path=dirname(arrayNames[1]), |
|
114 |
- arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), |
|
115 |
- saveDate=TRUE) |
|
116 |
-annotation(RG) <- "human370v1c" |
|
117 |
+<<samplesToProcess2>>= |
|
117 | 118 |
crlmmResult <- checkExists("crlmmResult", |
118 | 119 |
.path=outdir, |
119 |
- .FUN=crlmmIllumina2, |
|
120 |
- RG=RG, |
|
121 |
- sns=pData(RG)$ID, |
|
120 |
+ .FUN=crlmm:::crlmmIlluminaV2, |
|
121 |
+ sampleSheet=samplesheet, |
|
122 |
+ path=dirname(arrayNames[1]), |
|
123 |
+ arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), |
|
122 | 124 |
returnParams=TRUE, |
123 | 125 |
cnFile=file.path(outdir, "cnFile.rda"), |
124 | 126 |
snpFile=file.path(outdir, "snpFile.rda"), |
125 |
- save.it=TRUE) |
|
126 |
-protocolData(crlmmResult)$ScanDate <- protocolData(RG)$ScanDate |
|
127 |
-@ |
|
128 |
- |
|
129 |
-<<samplesToProcess3, eval=FALSE>>= |
|
130 |
-RG <- readIdatFiles2(samplesheet[1:10, ], |
|
131 |
- path=dirname(arrayNames[1]), |
|
132 |
- arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), |
|
133 |
- saveDate=TRUE) |
|
134 |
-annotation(RG) <- "human370v1c" |
|
135 |
-crlmmResult <- crlmmIllumina2(RG=RG, |
|
136 |
- cdfName="human370v1c", |
|
137 |
- sns=pData(RG)$ID, |
|
138 |
- returnParams=TRUE, |
|
139 |
- cnFile=file.path(outdir, "cnFile.rda"), |
|
140 |
- snpFile=file.path(outdir, "snpFile.rda"), |
|
141 |
- save.it=TRUE) |
|
142 |
-protocolData(crlmmResult)$ScanDate <- protocolData(RG)$ScanDate |
|
127 |
+ save.ab=TRUE, |
|
128 |
+ cdfName="human370v1c") |
|
129 |
+##protocolData(crlmmResult)$ScanDate <- protocolData(RG)$ScanDate |
|
143 | 130 |
@ |
144 | 131 |
|
145 | 132 |
%\noindent Finally, we load a few of the intermediate files that were |
... | ... |
@@ -163,21 +150,11 @@ allele-specific copy number estimates. A few helper functions for |
163 | 150 |
facilitating the construction of this container have been added to the |
164 | 151 |
inst/scripts directory of this package and can be sourced as follows. |
165 | 152 |
|
166 |
- |
|
167 |
-<<constructContainer1, echo=FALSE>>= |
|
168 |
-##only load if cnSet object has not been created |
|
169 |
-if(!file.exists(file.path(outdir, "cnSet.rda"))){ |
|
170 |
- container <- checkExists("container", |
|
171 |
- .path=outdir, |
|
172 |
- .FUN=constructIlluminaCNSet, |
|
173 |
- crlmmResult=crlmmResult, |
|
174 |
- snpFile=snpFile, |
|
175 |
- cnFile=cnFile) |
|
176 |
-} |
|
177 |
-@ |
|
178 |
- |
|
179 |
-<<constructContainer2, eval=FALSE>>= |
|
180 |
-container <- constructIlluminaCNSet(crlmmResult=crlmmResult, snpFile=snpFile, cnFile=cnFile) |
|
153 |
+<<constructContainer2>>= |
|
154 |
+batch <- as.factor(rep("1", ncol(crlmmResult))) |
|
155 |
+##trace(constructIlluminaCNSet |
|
156 |
+##trace(constructIlluminaAssayData, browser) |
|
157 |
+container <- constructIlluminaCNSet(crlmmResult=crlmmResult, snpFile=file.path(outdir, "snpFile.rda"), cnFile=file.path(outdir, "cnFile.rda"), batch=batch) |
|
181 | 158 |
@ |
182 | 159 |
|
183 | 160 |
As described in the \texttt{copynumber} vignette, two \R{} functions |
... | ... |
@@ -192,10 +169,11 @@ decided at the beginning of the analysis and then propogated to both |
192 | 169 |
the genotyping and copy number estimation steps. Here, we use the |
193 | 170 |
\Rfunction{crlmmCopynumber} to estimate copy number. |
194 | 171 |
|
195 |
-<<estimateCopynumber1, echo=FALSE>>= |
|
196 |
-cnSet <- checkExists("cnSet", .path=outdir, |
|
197 |
- .FUN=crlmmCopynumber, |
|
198 |
- object=container) |
|
172 |
+<<crlmmCopynumber>>= |
|
173 |
+stop() |
|
174 |
+trace(shrinkGenotypeSummaries, browser) |
|
175 |
+options(error=recover) |
|
176 |
+cnSet <- checkExists("cnSet", .path=outdir, .FUN=crlmmCopynumber, object=container) |
|
199 | 177 |
@ |
200 | 178 |
|
201 | 179 |
<<estimateCopynumber, eval=FALSE>>= |
... | ... |
@@ -123,6 +123,10 @@ crlmmCopynumber(object, MIN.SAMPLES=10, SNRMin = 5, MIN.OBS = 1, |
123 | 123 |
|
124 | 124 |
\details{ |
125 | 125 |
|
126 |
+ We suggest a minimum of 10 samples per batch for using |
|
127 |
+ \code{crlmmCopynumber}. 50 or more samples per batch is preferred |
|
128 |
+ and will improve the estimates. |
|
129 |
+ |
|
126 | 130 |
The function crlmmCopynumber uses matrices instead of ff objects |
127 | 131 |
if the ff library is not loaded. When the ff package is loaded, |
128 | 132 |
large data support is enabled. Normalized intensities |