git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@48961 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -29,7 +29,7 @@ importClassesFrom(oligoClasses, SnpSuperSet, AlleleSet, CNSet) |
29 | 29 |
importMethodsFrom(oligoClasses, allele, calls, "calls<-", confs, |
30 | 30 |
"confs<-", cnConfidence, "cnConfidence<-", isSnp, |
31 | 31 |
chromosome, position, A, B, |
32 |
- "A<-", "B<-", open, close, lM, flags, |
|
32 |
+ "A<-", "B<-", open, close, flags, |
|
33 | 33 |
batchStatistics, "batchStatistics<-", updateObject) |
34 | 34 |
|
35 | 35 |
|
... | ... |
@@ -190,7 +190,6 @@ genotype <- function(filenames, |
190 | 190 |
verbose=TRUE, |
191 | 191 |
seed=1, |
192 | 192 |
sns, |
193 |
- copynumber=TRUE, |
|
194 | 193 |
probs=rep(1/3, 3), |
195 | 194 |
DF=6, |
196 | 195 |
SNRMin=5, |
... | ... |
@@ -200,24 +199,6 @@ genotype <- function(filenames, |
200 | 199 |
returnParams=TRUE, |
201 | 200 |
badSNP=0.7){ |
202 | 201 |
is.lds <- ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
203 |
- if(!copynumber){ |
|
204 |
- FUN <- ifelse(is.lds, "crlmm2", "crlmm") |
|
205 |
- callSet <- FUN(filenames=filenames, |
|
206 |
- cdfName=cdfName, |
|
207 |
- mixtureSampleSize=mixtureSampleSize, |
|
208 |
- eps=eps, |
|
209 |
- verbose=verbose, |
|
210 |
- sns=sns, |
|
211 |
- probs=probs, |
|
212 |
- DF=DF, |
|
213 |
- SNRMin=SNRMin, |
|
214 |
- recallMin=recallMin, |
|
215 |
- recallRegMin=recallRegMin, |
|
216 |
- gender=gender, |
|
217 |
- returnParams=returnParams, |
|
218 |
- badSNP=badSNP) |
|
219 |
- return(callSet) |
|
220 |
- } |
|
221 | 202 |
if(missing(cdfName)) stop("must specify cdfName") |
222 | 203 |
if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames") |
223 | 204 |
if(missing(sns)) sns <- basename(filenames) |
... | ... |
@@ -308,6 +289,8 @@ genotype <- function(filenames, |
308 | 289 |
open(tmp[["confs"]]) |
309 | 290 |
ffrowapply(snpCall(callSet)[i1:i2, ] <- tmp[["calls"]][i1:i2, ], X=tmp[["calls"]], BATCHBYTES=bb) |
310 | 291 |
ffrowapply(snpCallProbability(callSet)[i1:i2, ] <- tmp[["confs"]][i1:i2, ], X=tmp[["confs"]], BATCHBYTES=bb) |
292 |
+ close(tmp[["calls"]]) |
|
293 |
+ close(tmp[["confs"]]) |
|
311 | 294 |
} else { |
312 | 295 |
calls(callSet)[snp.index, ] <- tmp[["calls"]] |
313 | 296 |
snpCallProbability(callSet)[snp.index, ] <- tmp[["confs"]] |
... | ... |
@@ -859,12 +842,12 @@ shrinkGenotypeSummaries <- function(strata, index.list, object, MIN.OBS, MIN.SAM |
859 | 842 |
shrink.corrAA[, k] <- shrink(cor.AA, NN[, 1], DF.PRIOR) |
860 | 843 |
shrink.corrAB[, k] <- shrink(cor.AB, NN[, 2], DF.PRIOR) |
861 | 844 |
shrink.corrBB[, k] <- shrink(cor.BB, NN[, 3], DF.PRIOR) |
862 |
- |
|
845 |
+ ## |
|
863 | 846 |
##--------------------------------------------------------------------------- |
864 | 847 |
## SNPs that we'll use for imputing location/scale of unobserved genotypes |
865 | 848 |
##--------------------------------------------------------------------------- |
866 | 849 |
index.complete <- indexComplete(NN, medianA[[k]], medianB[[k]], MIN.OBS) |
867 |
- |
|
850 |
+ ## |
|
868 | 851 |
##--------------------------------------------------------------------------- |
869 | 852 |
## Impute sufficient statistics for unobserved genotypes (plate-specific) |
870 | 853 |
##--------------------------------------------------------------------------- |
... | ... |
@@ -880,7 +863,7 @@ shrinkGenotypeSummaries <- function(strata, index.list, object, MIN.OBS, MIN.SAM |
880 | 863 |
medianB[[k]] <- res[[2]] |
881 | 864 |
rm(res) |
882 | 865 |
##the NA's in 'medianA' and 'medianB' are monomorphic if MIN.OBS = 1 |
883 |
- |
|
866 |
+ ## |
|
884 | 867 |
## RS: For Monomorphic SNPs a mixture model may be better |
885 | 868 |
## RS: Further, we can improve estimation by borrowing strength across batch |
886 | 869 |
unobserved.index[[1]] <- which(unobservedAA & unobservedAB) |
... | ... |
@@ -902,14 +885,14 @@ shrinkGenotypeSummaries <- function(strata, index.list, object, MIN.OBS, MIN.SAM |
902 | 885 |
medianB.AA(object)[marker.index, ] <- do.call("cbind", lapply(medianB, function(x) x[, 1])) |
903 | 886 |
medianB.AB(object)[marker.index, ] <- do.call("cbind", lapply(medianB, function(x) x[, 2])) |
904 | 887 |
medianB.BB(object)[marker.index, ] <- do.call("cbind", lapply(medianB, function(x) x[, 3])) |
905 |
- |
|
888 |
+ ## |
|
906 | 889 |
madA.AA(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 1])) |
907 | 890 |
madA.AB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 2])) |
908 | 891 |
madA.BB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 3])) |
909 | 892 |
madB.AA(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 1])) |
910 | 893 |
madB.AB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 2])) |
911 | 894 |
madB.BB(object)[marker.index, ] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 3])) |
912 |
- |
|
895 |
+ ## |
|
913 | 896 |
corrAA(object)[marker.index, ] <- shrink.corrAA |
914 | 897 |
corrAB(object)[marker.index, ] <- shrink.corrAB |
915 | 898 |
corrBB(object)[marker.index, ] <- shrink.corrBB |
... | ... |
@@ -917,7 +900,7 @@ shrinkGenotypeSummaries <- function(strata, index.list, object, MIN.OBS, MIN.SAM |
917 | 900 |
tau2A.BB(object)[marker.index,] <- shrink.tau2A.BB |
918 | 901 |
tau2B.AA(object)[marker.index,] <- shrink.tau2B.AA |
919 | 902 |
tau2B.BB(object)[marker.index,] <- shrink.tau2B.BB |
920 |
- if(is.lds) return(TRUE) else retrun(object) |
|
903 |
+ if(is.lds) return(TRUE) else return(object) |
|
921 | 904 |
} |
922 | 905 |
|
923 | 906 |
|
... | ... |
@@ -925,11 +908,7 @@ shrinkGenotypeSummaries <- function(strata, index.list, object, MIN.OBS, MIN.SAM |
925 | 908 |
fit.lm1 <- function(strata, |
926 | 909 |
index.list, |
927 | 910 |
object, |
928 |
- SNRMin, |
|
929 | 911 |
MIN.SAMPLES, |
930 |
- MIN.OBS, |
|
931 |
- DF.PRIOR, |
|
932 |
- GT.CONF.THR, |
|
933 | 912 |
THR.NU.PHI, |
934 | 913 |
MIN.NU, |
935 | 914 |
MIN.PHI, |
... | ... |
@@ -970,6 +949,7 @@ fit.lm1 <- function(strata, |
970 | 949 |
flags <- as.matrix(flags(object)[snps, ]) |
971 | 950 |
for(k in seq(along=batches)){ |
972 | 951 |
B <- batches[[k]] |
952 |
+ if(length(B) < MIN.SAMPLES) next() |
|
973 | 953 |
this.batch <- unique(as.character(batch(object)[B])) |
974 | 954 |
medianA <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k]) |
975 | 955 |
medianB <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k]) |
... | ... |
@@ -981,9 +961,6 @@ fit.lm1 <- function(strata, |
981 | 961 |
nuA[, k] <- res[1, ] |
982 | 962 |
phiA[, k] <- res[2, ] |
983 | 963 |
rm(res) |
984 |
- ##res <- fit.wls(allele="A", Ystar=YA, W=wA, Ns=Ns) |
|
985 |
- ##nuA[, J] <- res[[1]] |
|
986 |
- ##phiA[, J] <- res[[2]] |
|
987 | 964 |
res <- fit.wls(NN=NN, sigma=madB, allele="B", Y=medianB, autosome=!CHR.X)##allele="B", Ystar=YB, W=wB, Ns=Ns) |
988 | 965 |
nuB[, k] <- res[1, ] |
989 | 966 |
phiB[, k] <- res[2, ] |
... | ... |
@@ -996,14 +973,6 @@ fit.lm1 <- function(strata, |
996 | 973 |
phiA[phiA < MIN.PHI] <- MIN.PHI |
997 | 974 |
phiB[phiB < MIN.PHI] <- MIN.PHI |
998 | 975 |
} |
999 |
-## cA[cA < 0.05] <- 0.05 |
|
1000 |
-## cB[cB < 0.05] <- 0.05 |
|
1001 |
-## cA[cA > 5] <- 5 |
|
1002 |
-## cB[cB > 5] <- 5 |
|
1003 |
-## cA <- matrix(as.integer(cA*100), nrow(cA), ncol(cA)) |
|
1004 |
-## cB <- matrix(as.integer(cB*100), nrow(cB), ncol(cB)) |
|
1005 |
-## CA(object)[snps, ] <- cA |
|
1006 |
-## CB(object)[snps, ] <- cB |
|
1007 | 976 |
nuA(object)[snps, ] <- nuA |
1008 | 977 |
nuB(object)[snps, ] <- nuB |
1009 | 978 |
phiA(object)[snps, ] <- phiA |
... | ... |
@@ -1019,11 +988,7 @@ fit.lm1 <- function(strata, |
1019 | 988 |
fit.lm2 <- function(strata, |
1020 | 989 |
index.list, |
1021 | 990 |
object, |
1022 |
- SNRMin, |
|
1023 | 991 |
MIN.SAMPLES, |
1024 |
- MIN.OBS, |
|
1025 |
- DF.PRIOR, |
|
1026 |
- GT.CONF.THR, |
|
1027 | 992 |
THR.NU.PHI, |
1028 | 993 |
MIN.NU, |
1029 | 994 |
MIN.PHI, |
... | ... |
@@ -1034,24 +999,16 @@ fit.lm2 <- function(strata, |
1034 | 999 |
batches <- split(seq_along(batch(object)), as.character(batch(object))) |
1035 | 1000 |
batches <- batches[sapply(batches, length) >= MIN.SAMPLES] |
1036 | 1001 |
|
1037 |
- |
|
1038 | 1002 |
ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object)) |
1039 | 1003 |
flags <- as.matrix(flags(object)[ii, ]) |
1040 | 1004 |
fns <- featureNames(object)[ii] |
1041 | 1005 |
fns.noflags <- fns[rowSums(flags, na.rm=T) == 0] |
1042 | 1006 |
snp.index <- sample(match(fns.noflags, featureNames(object)), 5000) |
1043 | 1007 |
|
1044 |
- ##flags <- as.matrix(snpflags[,]) |
|
1045 |
- ##noflags <- rowSums(flags, na.rm=TRUE) == 0 ##NA's for unevaluated batches |
|
1046 |
- |
|
1047 | 1008 |
nuA.np <- as.matrix(nuA(object)[marker.index, ]) |
1048 | 1009 |
phiA.np <- as.matrix(phiA(object)[marker.index, ]) |
1049 | 1010 |
tau2A.AA <- as.matrix(tau2A.AA(object)[marker.index, ]) |
1050 | 1011 |
|
1051 |
- ##nuA.np <- phiA.np <- sig2A.np <- matrix(NA, length(marker.index), length(unique(batch(object)))) |
|
1052 |
- ## for imputation, we need the corresponding parameters of the snps |
|
1053 |
- ##NN <- min(10e3, length(which(ii & noflags))) |
|
1054 |
- ##snp.ind <- sample(which(ii & noflags), NN) |
|
1055 | 1012 |
nuA.snp <- as.matrix(nuA(object)[snp.index, ]) |
1056 | 1013 |
nuB.snp <- as.matrix(nuB(object)[snp.index, ]) |
1057 | 1014 |
phiA.snp <- as.matrix(phiA(object)[snp.index, ]) |
... | ... |
@@ -1059,70 +1016,24 @@ fit.lm2 <- function(strata, |
1059 | 1016 |
medianA.AA <- as.matrix(medianA.AA(object)[snp.index,]) |
1060 | 1017 |
medianB.BB <- as.matrix(medianB.BB(object)[snp.index,]) |
1061 | 1018 |
|
1062 |
- |
|
1063 |
- |
|
1064 | 1019 |
medianA.AA.np <- as.matrix(medianA.AA(object)[marker.index,]) |
1065 |
- |
|
1066 |
-## nnuA.snp <- as.matrix(physical(lM(object))$nuA[snp.ind,]) |
|
1067 |
-## pphiA.snp <- as.matrix(physical(lM(object))$phiA[snp.ind,]) |
|
1068 |
-## nnuB.snp <- as.matrix(physical(lM(object))$nuB[snp.ind,]) |
|
1069 |
-## pphiB.snp <- as.matrix(physical(lM(object))$phiB[snp.ind,]) |
|
1070 |
- |
|
1071 |
-## AA.snp <- as.matrix(A(object)[snp.ind, ]) |
|
1072 |
-## BB.snp <- as.matrix(B(object)[snp.ind, ]) |
|
1073 |
-## NNORM.snp <- as.matrix(normal[snp.ind, ]) |
|
1074 |
-## NORM.np <- as.matrix(normal[snps, ]) |
|
1075 |
-## AA.np <- as.matrix(A(object)[marker.index, ]) |
|
1076 |
-## GG <- as.matrix(calls(object)[snp.ind, ]) |
|
1077 |
-## CP <- as.matrix(snpCallProbability(object)[snp.ind, ]) |
|
1078 | 1020 |
for(k in seq_along(batches)){ |
1079 | 1021 |
B <- batches[[k]] |
1080 | 1022 |
this.batch <- unique(as.character(batch(object)[B])) |
1081 |
-## phiA.snp <- pphiA.snp[, J] |
|
1082 |
-## phiB.snp <- pphiB.snp[, J] |
|
1083 |
-## A.snp <- AA.snp[, k] |
|
1084 |
-## B.snp <- BB.snp[, k] |
|
1085 |
-## NORM.snp <- NNORM.snp[, k] |
|
1086 |
-## G <- GG[, k] |
|
1087 |
-## xx <- CP[, k] |
|
1088 |
-## highConf <- (1-exp(-xx/1000)) > GT.CONF.THR |
|
1089 |
-## G <- G*highConf*NORM.snp |
|
1090 |
-## G[G==0] <- NA |
|
1091 |
- ##nonpolymorphic |
|
1092 |
-## A.np <- AA.np[, k] |
|
1093 |
-## Ns <- applyByGenotype(matrix(1, nrow(G), ncol(G)), rowSums, G) |
|
1094 |
-## muA <- applyByGenotype(A.snp, rowMedians, G) |
|
1095 |
-## muB <- applyByGenotype(B.snp, rowMedians, G) |
|
1096 |
-## muA <- muA[, 1] |
|
1097 |
-## muB <- muB[, 3] |
|
1098 |
-## X <- cbind(1, log2(c(muA, muB))) |
|
1099 | 1023 |
X <- cbind(1, log2(c(medianA.AA[, k], medianB.BB[, k]))) |
1100 | 1024 |
Y <- log2(c(phiA.snp, phiB.snp)) |
1101 | 1025 |
betahat <- solve(crossprod(X), crossprod(X, Y)) |
1102 |
- ## |
|
1103 |
-## mus <- rowMedians(A.np * NORM.np[, k], na.rm=TRUE) |
|
1104 |
-## averaging across markers, is there a difference in the |
|
1105 |
-## typical AA intensity for SNPs and the AA intensity for |
|
1106 |
-## nonpolymorphic loci |
|
1107 |
-## crosshyb <- max(median(muA) - median(mus), 0) |
|
1108 | 1026 |
crosshyb <- max(median(medianA.AA[, k]) - median(medianA.AA.np[, k]), 0) |
1109 |
-## X <- cbind(1, log2(mus+crosshyb)) |
|
1110 | 1027 |
X <- cbind(1, log2(medianA.AA.np[, k] + crosshyb)) |
1111 | 1028 |
logPhiT <- X %*% betahat |
1112 | 1029 |
phiA.np[, k] <- 2^(logPhiT) |
1113 | 1030 |
nuA.np[, k] <- medianA.AA.np[,k]-2*phiA.np[, k] |
1114 | 1031 |
## cA[, k] <- 1/phiA.np[, J] * (A.np - nuA.np[, J]) |
1115 |
-## sig2A.np[, J] <- rowMAD(log2(A.np*NORM.np[, k]), na.rm=TRUE) |
|
1116 |
-## rm(NORM.snp, highConf, xx, G, Ns, A.np, X, Y, betahat, mus, logPhiT) |
|
1117 |
-## gc() |
|
1118 | 1032 |
} |
1119 | 1033 |
if(THR.NU.PHI){ |
1120 | 1034 |
nuA.np[nuA.np < MIN.NU] <- MIN.NU |
1121 | 1035 |
phiA.np[phiA.np < MIN.PHI] <- MIN.PHI |
1122 | 1036 |
} |
1123 |
-## cA[cA < 0.05] <- 0.05 |
|
1124 |
-## cA[cA > 5] <- 5 |
|
1125 |
-## cA <- matrix(as.integer(cA*100), nrow(cA), ncol(cA)) |
|
1126 | 1037 |
nuA(object)[marker.index, ] <- nuA.np |
1127 | 1038 |
phiA(object)[marker.index, ] <- phiA.np |
1128 | 1039 |
if(is.lds) { close(object); return(TRUE)} |
... | ... |
@@ -1409,17 +1320,13 @@ fit.lm3 <- function(strata, |
1409 | 1320 |
phiB(object)[marker.index, ] <- phiB |
1410 | 1321 |
phiPrimeA(object)[marker.index, ] <- phiA2 |
1411 | 1322 |
phiPrimeB(object)[marker.index, ] <- phiB2 |
1412 |
- TRUE |
|
1323 |
+ if(is.lds) {close(object); return(TRUE)} else return(object) |
|
1413 | 1324 |
} |
1414 | 1325 |
|
1415 | 1326 |
fit.lm4 <- function(strata, |
1416 | 1327 |
index.list, |
1417 | 1328 |
object, |
1418 |
- SNRMin, |
|
1419 | 1329 |
MIN.SAMPLES, |
1420 |
- MIN.OBS, |
|
1421 |
- DF.PRIOR, |
|
1422 |
- GT.CONF.THR, |
|
1423 | 1330 |
THR.NU.PHI, |
1424 | 1331 |
MIN.NU, |
1425 | 1332 |
MIN.PHI, |
... | ... |
@@ -1606,379 +1513,6 @@ processCEL2 <- function(i, filenames, row.names, A, seed, cdfName, pkgname){ |
1606 | 1513 |
} |
1607 | 1514 |
|
1608 | 1515 |
|
1609 |
-# steps: quantile normalize hapmap: create 1m_reference_cn.rda object |
|
1610 |
-##cnrma <- function(filenames, cdfName, row.names, sns, seed=1, verbose=FALSE){ |
|
1611 |
-## if(missing(cdfName)) stop("must specify cdfName") |
|
1612 |
-## pkgname <- getCrlmmAnnotationName(cdfName) |
|
1613 |
-## require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available") |
|
1614 |
-## if (missing(sns)) sns <- basename(filenames) |
|
1615 |
-## loader("npProbesFid.rda", .crlmmPkgEnv, pkgname) |
|
1616 |
-## fid <- getVarInEnv("npProbesFid") |
|
1617 |
-## fid <- fid[match(row.names, names(fid))] |
|
1618 |
-## set.seed(seed) |
|
1619 |
-## idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything |
|
1620 |
-## SKW <- vector("numeric", length(filenames)) |
|
1621 |
-## NP <- matrix(NA, length(fid), length(filenames)) |
|
1622 |
-## verbose <- TRUE |
|
1623 |
-## if(verbose){ |
|
1624 |
-## message("Processing ", length(filenames), " files.") |
|
1625 |
-## if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3) |
|
1626 |
-## } |
|
1627 |
-## if(cdfName=="genomewidesnp6"){ |
|
1628 |
-## loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname) |
|
1629 |
-## } |
|
1630 |
-## if(cdfName=="genomewidesnp5"){ |
|
1631 |
-## loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname) |
|
1632 |
-## } |
|
1633 |
-## reference <- getVarInEnv("reference") |
|
1634 |
-## ##if(!is.matrix(reference)) stop("target distribution for quantile normalization not available.") |
|
1635 |
-## for(i in seq(along=filenames)){ |
|
1636 |
-## y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid]) |
|
1637 |
-## x <- log2(y[idx2]) |
|
1638 |
-## SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3) |
|
1639 |
-## rm(x) |
|
1640 |
-## NP[, i] <- as.integer(normalize.quantiles.use.target(y, target=reference)) |
|
1641 |
-## if (verbose) |
|
1642 |
-## if (getRversion() > '2.7.0') setTxtProgressBar(pb, i) |
|
1643 |
-## else cat(".") |
|
1644 |
-## } |
|
1645 |
-## dimnames(NP) <- list(names(fid), sns) |
|
1646 |
-## ##dimnames(NP) <- list(map[, "man_fsetid"], sns) |
|
1647 |
-## ##res3 <- list(NP=NP, SKW=SKW) |
|
1648 |
-## cat("\n") |
|
1649 |
-## return(NP) |
|
1650 |
-##} |
|
1651 |
- |
|
1652 |
-##getFlags <- function(object, PHI.THR){ |
|
1653 |
-## batch <- unique(object$batch) |
|
1654 |
-## nuA <- getParam(object, "nuA", batch) |
|
1655 |
-## nuB <- getParam(object, "nuB", batch) |
|
1656 |
-## phiA <- getParam(object, "phiA", batch) |
|
1657 |
-## phiB <- getParam(object, "phiB", batch) |
|
1658 |
-## negativeNus <- nuA < 1 | nuB < 1 |
|
1659 |
-## negativePhis <- phiA < PHI.THR | phiB < PHI.THR |
|
1660 |
-## negativeCoef <- negativeNus | negativePhis |
|
1661 |
-## notfinitePhi <- !is.finite(phiA) | !is.finite(phiB) |
|
1662 |
-## flags <- negativeCoef | notfinitePhi |
|
1663 |
-## return(flags) |
|
1664 |
-##} |
|
1665 |
- |
|
1666 |
- |
|
1667 |
-##instantiateObjects <- function(object, cnOptions){ |
|
1668 |
-## Ns <- matrix(NA, nrow(object), 5) |
|
1669 |
-## colnames(Ns) <- c("A", "B", "AA", "AB", "BB") |
|
1670 |
-## vA <- vB <- muB <- muA <- Ns |
|
1671 |
-## normal <- matrix(TRUE, nrow(object), ncol(object)) |
|
1672 |
-## dimnames(normal) <- list(featureNames(object), sampleNames(object)) |
|
1673 |
-## tmp.objects <- list(vA=vA, |
|
1674 |
-## vB=vB, |
|
1675 |
-## muB=muB, |
|
1676 |
-## muA=muA, |
|
1677 |
-## Ns=Ns, |
|
1678 |
-## normal=normal) |
|
1679 |
-## return(tmp.objects) |
|
1680 |
-##} |
|
1681 |
-## |
|
1682 |
-##thresholdCopynumber <- function(object){ |
|
1683 |
-## ca <- CA(object) |
|
1684 |
-## cb <- CB(object) |
|
1685 |
-## ca[ca < 0.05] <- 0.05 |
|
1686 |
-## ca[ca > 5] <- 5 |
|
1687 |
-## cb[cb < 0.05] <- 0.05 |
|
1688 |
-## cb[cb > 5] <- 5 |
|
1689 |
-## CA(object) <- ca |
|
1690 |
-## CB(object) <- cb |
|
1691 |
-## return(object) |
|
1692 |
-##} |
|
1693 |
-## |
|
1694 |
-####linear model parameters |
|
1695 |
-##lm.parameters <- function(object, batch){##cnOptions){ |
|
1696 |
-## fD <- fData(object) |
|
1697 |
-## ##batch <- object$batch |
|
1698 |
-## uplate <- unique(batch) |
|
1699 |
-## parameterNames <- c(paste("tau2A", uplate, sep="_"), |
|
1700 |
-## paste("tau2B", uplate, sep="_"), |
|
1701 |
-## paste("sig2A", uplate, sep="_"), |
|
1702 |
-## paste("sig2B", uplate, sep="_"), |
|
1703 |
-## paste("nuA", uplate, sep="_"), |
|
1704 |
-## paste("nuA.se", uplate, sep="_"), |
|
1705 |
-## paste("nuB", uplate, sep="_"), |
|
1706 |
-## paste("nuB.se", uplate, sep="_"), |
|
1707 |
-## paste("phiA", uplate, sep="_"), |
|
1708 |
-## paste("phiA.se", uplate, sep="_"), |
|
1709 |
-## paste("phiB", uplate, sep="_"), |
|
1710 |
-## paste("phiB.se", uplate, sep="_"), |
|
1711 |
-## paste("phiAX", uplate, sep="_"), |
|
1712 |
-## paste("phiBX", uplate, sep="_"), |
|
1713 |
-## paste("corr", uplate, sep="_"), |
|
1714 |
-## paste("corrA.BB", uplate, sep="_"), |
|
1715 |
-## paste("corrB.AA", uplate, sep="_")) |
|
1716 |
-## pMatrix <- data.frame(matrix(numeric(0), |
|
1717 |
-## nrow(object), |
|
1718 |
-## length(parameterNames)), |
|
1719 |
-## row.names=featureNames(object)) |
|
1720 |
-## colnames(pMatrix) <- parameterNames |
|
1721 |
-## fD2 <- cbind(fD, pMatrix) |
|
1722 |
-## new("AnnotatedDataFrame", data=fD2, |
|
1723 |
-## varMetadata=data.frame(labelDescription=colnames(fD2), |
|
1724 |
-## row.names=colnames(fD2))) |
|
1725 |
-##} |
|
1726 |
-## |
|
1727 |
-##nonpolymorphic <- function(object, cnOptions, tmp.objects){ |
|
1728 |
-## batch <- unique(object$batch) |
|
1729 |
-## CHR <- unique(chromosome(object)) |
|
1730 |
-## goodSnps <- function(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR){ |
|
1731 |
-## Ns <- tmp.objects[["Ns"]] |
|
1732 |
-## ##Ns <- get("Ns", envir) |
|
1733 |
-## flags <- getFlags(object, PHI.THR) |
|
1734 |
-## fewAA <- Ns[, "AA"] < nAA.THR |
|
1735 |
-## fewBB <- Ns[, "BB"] < nBB.THR |
|
1736 |
-## flagsA <- flags | fewAA |
|
1737 |
-## flagsB <- flags | fewBB |
|
1738 |
-## flags <- list(A=flagsA, B=flagsB) |
|
1739 |
-## return(flags) |
|
1740 |
-## } |
|
1741 |
-## nAA.THR <- cnOptions$nHOM.THR |
|
1742 |
-## nBB.THR <- cnOptions$nHOM.THR |
|
1743 |
-## PHI.THR <- cnOptions$PHI.THR |
|
1744 |
-## snpflags <- goodSnps(object, PHI.THR, tmp.objects, nAA.THR, nBB.THR) |
|
1745 |
-## flagsA <- snpflags$A |
|
1746 |
-## flagsB <- snpflags$B |
|
1747 |
-#### if(all(flagsA) | all(flagsB)) stop("all snps are flagged") |
|
1748 |
-## nuA <- getParam(object, "nuA", batch) |
|
1749 |
-## nuB <- getParam(object, "nuB", batch) |
|
1750 |
-## phiA <- getParam(object, "phiA", batch) |
|
1751 |
-## phiB <- getParam(object, "phiB", batch) |
|
1752 |
-## sns <- sampleNames(object) |
|
1753 |
-## muA <- tmp.objects[["muA"]] |
|
1754 |
-## muB <- tmp.objects[["muB"]] |
|
1755 |
-## A <- A(object) |
|
1756 |
-## B <- B(object) |
|
1757 |
-#### CA <- CA(object) |
|
1758 |
-#### CB <- CB(object) |
|
1759 |
-## if(CHR == 23){ |
|
1760 |
-## phiAX <- getParam(object, "phiAX", batch) |
|
1761 |
-## phiBX <- getParam(object, "phiBX", batch) |
|
1762 |
-## } |
|
1763 |
-## ##--------------------------------------------------------------------------- |
|
1764 |
-## ## Train on unflagged SNPs |
|
1765 |
-## ##--------------------------------------------------------------------------- |
|
1766 |
-## ##Might be best to train using the X chromosome, since for the |
|
1767 |
-## ##X phi and nu have been adjusted for cross-hybridization |
|
1768 |
-## ##plateInd <- plate == uplate[p] |
|
1769 |
-## ##muA <- muA[!flagsA, p, c("A", "AA")] |
|
1770 |
-## ##muB <- muB[!flagsB, p, c("B", "BB")] |
|
1771 |
-## muA <- muA[!flagsA, "AA"] |
|
1772 |
-## muB <- muB[!flagsB, "BB"] |
|
1773 |
-## X <- cbind(1, log2(c(muA, muB))) |
|
1774 |
-## Y <- log2(c(phiA[!flagsA], phiB[!flagsB])) |
|
1775 |
-## if(nrow(X) > 5000){ |
|
1776 |
-## ix <- sample(1:nrow(X), 5000) |
|
1777 |
-## } else { |
|
1778 |
-## ix <- 1:nrow(X) |
|
1779 |
-## } |
|
1780 |
-## betahat <- solve(crossprod(X[ix, ]), crossprod(X[ix, ], Y[ix])) |
|
1781 |
-## normal <- tmp.objects[["normal"]][!isSnp(object), ] |
|
1782 |
-## if(CHR == 23){ |
|
1783 |
-## ##normalNP <- envir[["normalNP"]] |
|
1784 |
-## ##normalNP <- normalNP[, plate==uplate[p]] |
|
1785 |
-## ##nuT <- envir[["nuT"]] |
|
1786 |
-## ##phiT <- envir[["phiT"]] |
|
1787 |
-## |
|
1788 |
-## ##cnvs <- envir[["cnvs"]] |
|
1789 |
-## ##loader("cnProbes.rda", pkgname=pkgname, envir=.crlmmPkgEnv) |
|
1790 |
-## ##cnProbes <- get("cnProbes", envir=.crlmmPkgEnv) |
|
1791 |
-## ##cnProbes <- cnProbes[match(cnvs, rownames(cnProbes)), ] |
|
1792 |
-## |
|
1793 |
-## ##For build Hg18 |
|
1794 |
-## ##http://genome.ucsc.edu/cgi-bin/hgGateway |
|
1795 |
-## ##pseudo-autosomal regions on X |
|
1796 |
-## ##chrX:1-2,709,520 and chrX:154584237-154913754, respectively |
|
1797 |
-## ##par:pseudo-autosomal regions |
|
1798 |
-## pseudoAR <- position(object) < 2709520 | (position(object) > 154584237 & position(object) < 154913754) |
|
1799 |
-## ##pseudoAR <- cnProbes[, "position"] < 2709520 | (cnProbes[, "position"] > 154584237 & cnProbes[, "position"] < 154913754) |
|
1800 |
-## ##in case some of the cnProbes are not annotated |
|
1801 |
-## pseudoAR[is.na(pseudoAR)] <- FALSE |
|
1802 |
-## pseudoAR <- pseudoAR[!isSnp(object)] |
|
1803 |
-## ##gender <- envir[["gender"]] |
|
1804 |
-## gender <- object$gender |
|
1805 |
-## obj1 <- object[!isSnp(object), ] |
|
1806 |
-## A.male <- A(obj1[, gender==1]) |
|
1807 |
-## mu1 <- rowMedians(A.male, na.rm=TRUE) |
|
1808 |
-## ##mu1 <- rowMedians(NP[, gender=="male"], na.rm=TRUE) |
|
1809 |
-## ##mu2 <- rowMedians(NP[, gender=="female"], na.rm=TRUE) |
|
1810 |
-## A.female <- A(obj1[, gender==2]) |
|
1811 |
-## mu2 <- rowMedians(A.female, na.rm=TRUE) |
|
1812 |
-## mus <- log2(cbind(mu1, mu2)) |
|
1813 |
-## X.men <- cbind(1, mus[, 1]) |
|
1814 |
-## X.fem <- cbind(1, mus[, 2]) |
|
1815 |
-## |
|
1816 |
-## Yhat1 <- as.numeric(X.men %*% betahat) |
|
1817 |
-## Yhat2 <- as.numeric(X.fem %*% betahat) |
|
1818 |
-## phi1 <- 2^(Yhat1) |
|
1819 |
-## phi2 <- 2^(Yhat2) |
|
1820 |
-## nu1 <- 2^(mus[, 1]) - phi1 |
|
1821 |
-## nu2 <- 2^(mus[, 2]) - 2*phi2 |
|
1822 |
-## |
|
1823 |
-## if(any(pseudoAR)){ |
|
1824 |
-## nu1[pseudoAR] <- 2^(mus[pseudoAR, 1]) - 2*phi1[pseudoAR] |
|
1825 |
-## } |
|
1826 |
-#### CT1 <- 1/phi1*(A.male-nu1) |
|
1827 |
-#### CT2 <- 1/phi2*(A.female-nu2) |
|
1828 |
-#### CA <- CA(obj1) |
|
1829 |
-#### CA[, gender==1] <- CT1 |
|
1830 |
-#### CA[, gender==2] <- CT2 |
|
1831 |
-#### CA(object)[!isSnp(object), ] <- CA |
|
1832 |
-## ##only using females to compute the variance |
|
1833 |
-## ##normalNP[, gender=="male"] <- NA |
|
1834 |
-## normal[, gender==1] <- NA |
|
1835 |
-## sig2A <- getParam(object, "sig2A", batch) |
|
1836 |
-## normal.f <- normal[, object$gender==2] |
|
1837 |
-## sig2A[!isSnp(object)] <- rowMAD(log2(A.female*normal.f), na.rm=TRUE)^2 |
|
1838 |
-## sig2A[!isSnp(object) & is.na(sig2A)] <- median(sig2A[!isSnp(object)], na.rm=TRUE) |
|
1839 |
-## ##sig2T[, p] <- rowMAD(log2(NP*normalNP), na.rm=TRUE)^2 |
|
1840 |
-## object <- pr(object, "sig2A", batch, sig2A) |
|
1841 |
-## |
|
1842 |
-## nuA[!isSnp(object)] <- nu2 |
|
1843 |
-## phiA[!isSnp(object)] <- phi2 |
|
1844 |
-## |
|
1845 |
-## THR.NU.PHI <- cnOptions$THR.NU.PHI |
|
1846 |
-## if(THR.NU.PHI){ |
|
1847 |
-## verbose <- cnOptions$verbose |
|
1848 |
-## ##Assign values to object |
|
1849 |
-## object <- pr(object, "nuA", batch, nuA) |
|
1850 |
-## object <- pr(object, "phiA", batch, phiA) |
|
1851 |
-## ##if(verbose) message("Thresholding nu and phi") |
|
1852 |
-## object <- thresholdModelParams(object, cnOptions) |
|
1853 |
-## } else { |
|
1854 |
-## object <- pr(object, "nuA", batch, nuA) |
|
1855 |
-## object <- pr(object, "phiA", batch, phiA) |
|
1856 |
-## } |
|
1857 |
-## } else { |
|
1858 |
-## A <- A(object)[!isSnp(object), ] |
|
1859 |
-## mus <- rowMedians(A * normal, na.rm=TRUE) |
|
1860 |
-## crosshyb <- max(median(muA) - median(mus), 0) |
|
1861 |
-## X <- cbind(1, log2(mus+crosshyb)) |
|
1862 |
-## logPhiT <- X %*% betahat |
|
1863 |
-## phiA[!isSnp(object)] <- 2^(logPhiT) |
|
1864 |
-## nuA[!isSnp(object)] <- mus-2*phiA[!isSnp(object)] |
|
1865 |
-## |
|
1866 |
-## THR.NU.PHI <- cnOptions$THR.NU.PHI |
|
1867 |
-## if(THR.NU.PHI){ |
|
1868 |
-## verbose <- cnOptions$verbose |
|
1869 |
-## ##Assign values to object |
|
1870 |
-## object <- pr(object, "nuA", batch, nuA) |
|
1871 |
-## object <- pr(object, "phiA", batch, phiA) |
|
1872 |
-## ##if(verbose) message("Thresholding nu and phi") |
|
1873 |
-## object <- thresholdModelParams(object, cnOptions) |
|
1874 |
-## ##reassign values (now thresholded at MIN.NU and MIN.PHI |
|
1875 |
-## nuA <- getParam(object, "nuA", batch) |
|
1876 |
-## phiA <- getParam(object, "phiA", batch) |
|
1877 |
-## } |
|
1878 |
-## ##CA(object)[!isSnp(object), ] <- 1/phiA[!isSnp(object)]*(A - nuA[!isSnp(object)]) |
|
1879 |
-## sig2A <- getParam(object, "sig2A", batch) |
|
1880 |
-## sig2A[!isSnp(object)] <- rowMAD(log2(A*normal), na.rm=TRUE)^2 |
|
1881 |
-## object <- pr(object, "sig2A", batch, sig2A) |
|
1882 |
-## ##added |
|
1883 |
-## object <- pr(object, "nuA", batch, nuA) |
|
1884 |
-## object <- pr(object, "phiA", batch, phiA) |
|
1885 |
-## } |
|
1886 |
-## return(object) |
|
1887 |
-##} |
|
1888 |
-## |
|
1889 |
-##sufficient statistics on the intensity scale |
|
1890 |
-##withinGenotypeMoments <- function(object, cnOptions, tmp.objects){ |
|
1891 |
-## normal <- tmp.objects[["normal"]] |
|
1892 |
-## ## muA, muB: robust estimates of the within-genotype center (intensity scale) |
|
1893 |
-## muA <- tmp.objects[["muA"]] |
|
1894 |
-## muB <- tmp.objects[["muB"]] |
|
1895 |
-## ## vA, vB: robust estimates of the within-genotype variance (intensity scale) |
|
1896 |
-## vA <- tmp.objects[["vA"]] |
|
1897 |
-## vB <- tmp.objects[["vB"]] |
|
1898 |
-## Ns <- tmp.objects[["Ns"]] |
|
1899 |
-## G <- snpCall(object) |
|
1900 |
-## GT.CONF.THR <- cnOptions$GT.CONF.THR |
|
1901 |
-## CHR <- unique(chromosome(object)) |
|
1902 |
-## A <- A(object) |
|
1903 |
-## B <- B(object) |
|
1904 |
-#### highConf <- (1-exp(-confs(object)/1000)) > GT.CONF.THR |
|
1905 |
-## xx <- snpCallProbability(object) |
|
1906 |
-## highConf <- (1-exp(-xx/1000)) > GT.CONF.THR |
|
1907 |
-## ##highConf <- confs(object) > GT.CONF.THR |
|
1908 |
-## ##highConf <- highConf > GT.CONF.THR |
|
1909 |
-## if(CHR == 23){ |
|
1910 |
-## gender <- object$gender |
|
1911 |
-#### gender <- envir[["gender"]] |
|
1912 |
-## IX <- matrix(gender, nrow(G), ncol(G), byrow=TRUE) |
|
1913 |
-#### IX <- IX == "female" |
|
1914 |
-## IX <- IX == 2 ##2=female, 1=male |
|
1915 |
-## } else IX <- matrix(TRUE, nrow(G), ncol(G)) |
|
1916 |
-## index <- GT.B <- GT.A <- vector("list", 3) |
|
1917 |
-## names(index) <- names(GT.B) <- names(GT.A) <- c("AA", "AB", "BB") |
|
1918 |
-## ##-------------------------------------------------- |
|
1919 |
-## ##within-genotype sufficient statistics |
|
1920 |
-## ##-------------------------------------------------- |
|
1921 |
-## ##GT.B <- GT.A <- list() |
|
1922 |
-## snpIndicator <- matrix(isSnp(object), nrow(object), ncol(object)) ##RS: added |
|
1923 |
-## for(j in 1:3){ |
|
1924 |
-## GT <- G==j & highConf & IX & snpIndicator |
|
1925 |
-## GT <- GT * normal |
|
1926 |
-## Ns[, j+2] <- rowSums(GT, na.rm=TRUE) |
|
1927 |
-## GT[GT == FALSE] <- NA |
|
1928 |
-## GT.A[[j]] <- GT*A |
|
1929 |
-## GT.B[[j]] <- GT*B |
|
1930 |
-## index[[j]] <- which(Ns[, j+2] > 0 & isSnp(object)) ##RS: added |
|
1931 |
-## muA[, j+2] <- rowMedians(GT.A[[j]], na.rm=TRUE) |
|
1932 |
-## muB[, j+2] <- rowMedians(GT.B[[j]], na.rm=TRUE) |
|
1933 |
-## vA[, j+2] <- rowMAD(GT.A[[j]], na.rm=TRUE) |
|
1934 |
-## vB[, j+2] <- rowMAD(GT.B[[j]], na.rm=TRUE) |
|
1935 |
-## |
|
1936 |
-## ##Shrink towards the typical variance |
|
1937 |
-## DF <- Ns[, j+2]-1 |
|
1938 |
-## DF[DF < 1] <- 1 |
|
1939 |
-## v0A <- median(vA[, j+2], na.rm=TRUE) |
|
1940 |
-## v0B <- median(vB[, j+2], na.rm=TRUE) |
|
1941 |
-## if(v0A == 0) v0A <- NA |
|
1942 |
-## if(v0B == 0) v0B <- NA |
|
1943 |
-## DF.PRIOR <- cnOptions$DF.PRIOR |
|
1944 |
-## vA[, j+2] <- (vA[, j+2]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF) |
|
1945 |
-## vA[is.na(vA[, j+2]), j+2] <- v0A |
|
1946 |
-## vB[, j+2] <- (vB[, j+2]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF) |
|
1947 |
-## vB[is.na(vB[, j+2]), j+2] <- v0B |
|
1948 |
-## } |
|
1949 |
-## if(CHR == 23){ |
|
1950 |
-## k <- 1 |
|
1951 |
-## for(j in c(1,3)){ |
|
1952 |
-## GT <- G==j & highConf & !IX |
|
1953 |
-## Ns[, k] <- rowSums(GT) |
|
1954 |
-## GT[GT == FALSE] <- NA |
|
1955 |
-## muA[, k] <- rowMedians(GT*A, na.rm=TRUE) |
|
1956 |
-## muB[, k] <- rowMedians(GT*B, na.rm=TRUE) |
|
1957 |
-## vA[, k] <- rowMAD(GT*A, na.rm=TRUE) |
|
1958 |
-## vB[, k] <- rowMAD(GT*B, na.rm=TRUE) |
|
1959 |
-## |
|
1960 |
-## DF <- Ns[, k]-1 |
|
1961 |
-## DF[DF < 1] <- 1 |
|
1962 |
-## v0A <- median(vA[, k], na.rm=TRUE) |
|
1963 |
-## v0B <- median(vB[, k], na.rm=TRUE) |
|
1964 |
-## vA[, k] <- (vA[, k]*DF + v0A*DF.PRIOR)/(DF.PRIOR+DF) |
|
1965 |
-## vA[is.na(vA[, k]), k] <- v0A |
|
1966 |
-## vB[, k] <- (vB[, k]*DF + v0B*DF.PRIOR)/(DF.PRIOR+DF) |
|
1967 |
-## vB[is.na(vB[, k]), k] <- v0B |
|
1968 |
-## k <- k+1 |
|
1969 |
-## } |
|
1970 |
-## } |
|
1971 |
-## tmp.objects[["Ns"]] <- Ns |
|
1972 |
-## tmp.objects[["vA"]] <- vA |
|
1973 |
-## tmp.objects[["vB"]] <- vB |
|
1974 |
-## tmp.objects[["muA"]] <- muA |
|
1975 |
-## tmp.objects[["muB"]] <- muB |
|
1976 |
-## tmp.objects$index <- index |
|
1977 |
-## tmp.objects$GT.A <- GT.A |
|
1978 |
-## tmp.objects$GT.B <- GT.B |
|
1979 |
-## return(tmp.objects) |
|
1980 |
-##} |
|
1981 |
- |
|
1982 | 1516 |
imputeCenter <- function(muA, muB, index.complete, index.missing){ |
1983 | 1517 |
index <- index.missing |
1984 | 1518 |
mnA <- muA |
... | ... |
@@ -2011,7 +1545,7 @@ indexComplete <- function(NN, medianA, medianB, MIN.OBS){ |
2011 | 1545 |
imputeCentersForMonomorphicSnps <- function(medianA, medianB, index.complete, unobserved.index){ |
2012 | 1546 |
cols <- c(3, 1, 2) |
2013 | 1547 |
for(j in 1:3){ |
2014 |
- if(sum(unobserved.index[[j]]) == 0) next() |
|
1548 |
+ if(length(unobserved.index[[j]]) == 0) next() |
|
2015 | 1549 |
kk <- cols[j] |
2016 | 1550 |
X <- cbind(1, medianA[index.complete, kk], medianB[index.complete, kk]) |
2017 | 1551 |
Y <- cbind(medianA[index.complete, -kk], |
... | ... |
@@ -2872,42 +2406,35 @@ ellipseCenters <- function(object, index, allele, batch, log.it=TRUE){ |
2872 | 2406 |
|
2873 | 2407 |
|
2874 | 2408 |
shrinkSummary <- function(object, |
2875 |
- type=c("SNP", "NP", "X.SNP", "X.NP"), ##"X.snps", "X.nps"), |
|
2409 |
+ type=c("SNP", "X.SNP"), ##"X.snps", "X.nps"), |
|
2876 | 2410 |
MIN.OBS=1, |
2877 | 2411 |
MIN.SAMPLES=10, |
2878 | 2412 |
DF.PRIOR=50, |
2879 |
- verbose=TRUE){ |
|
2880 |
- if(type == "X.SNP" | type=="X.NP"){ |
|
2413 |
+ verbose=TRUE, |
|
2414 |
+ marker.index, |
|
2415 |
+ is.lds){ |
|
2416 |
+ stopifnot(type %in% c("SNP", "X.SNP")) |
|
2417 |
+ if(type == "X.SNP"){ |
|
2881 | 2418 |
gender <- object$gender |
2882 | 2419 |
if(sum(gender == 2) < 3) { |
2883 | 2420 |
return("too few females to estimate within genotype summary statistics on CHR X") |
2884 | 2421 |
} |
2885 | 2422 |
CHR.X <- TRUE |
2886 | 2423 |
} else CHR.X <- FALSE |
2887 |
- batch <- batch(object) |
|
2888 |
- is.snp <- isSnp(object) |
|
2889 |
- is.autosome <- chromosome(object) < 23 |
|
2890 |
- is.annotated <- !is.na(chromosome(object)) |
|
2891 |
- is.X <- chromosome(object) == 23 |
|
2892 |
- is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix") |
|
2893 |
- if(is.lds) require(ff) |
|
2894 |
- whichMarkers <- function(type, is.snp, is.autosome, is.annotated, is.X){ |
|
2895 |
- switch(type, |
|
2896 |
- SNP=which(is.snp & is.autosome & is.annotated), |
|
2897 |
- NP=which(!is.snp & is.autosome), |
|
2898 |
- X.SNP=which(is.snp & is.X), |
|
2899 |
- X.NP=which(!is.snp & is.X), |
|
2900 |
- stop("'type' must be one of 'SNP', 'NP', 'X.SNP', or 'X.NP'") |
|
2901 |
- ) |
|
2424 |
+ if(missing(marker.index)){ |
|
2425 |
+ batch <- batch(object) |
|
2426 |
+ is.snp <- isSnp(object) |
|
2427 |
+ is.autosome <- chromosome(object) < 23 |
|
2428 |
+ is.annotated <- !is.na(chromosome(object)) |
|
2429 |
+ is.X <- chromosome(object) == 23 |
|
2430 |
+ is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix") |
|
2431 |
+ if(is.lds) require(ff) |
|
2432 |
+ marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X) |
|
2902 | 2433 |
} |
2903 |
- marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X) |
|
2904 | 2434 |
summaryFxn <- function(type){ |
2905 | 2435 |
switch(type, |
2906 | 2436 |
SNP="shrinkGenotypeSummaries", |
2907 | 2437 |
X.SNP="shrinkGenotypeSummaries", ## this shrinks for the females only |
2908 |
-## NP="summarizeNps", |
|
2909 |
-## X.SNP="summarizeSnps", |
|
2910 |
-## X.NP="summarizeNps") |
|
2911 | 2438 |
stop()) |
2912 | 2439 |
} |
2913 | 2440 |
FUN <- summaryFxn(type[[1]]) |
... | ... |
@@ -2925,13 +2452,13 @@ shrinkSummary <- function(object, |
2925 | 2452 |
neededPkgs="crlmm") |
2926 | 2453 |
} else { |
2927 | 2454 |
FUN <- match.fun(FUN) |
2928 |
- object <- FUN(strata.index=1, |
|
2455 |
+ object <- FUN(strata=1, |
|
2929 | 2456 |
index.list=list(marker.index), |
2930 | 2457 |
object=object, |
2458 |
+ verbose=verbose, |
|
2931 | 2459 |
MIN.OBS=MIN.OBS, |
2932 | 2460 |
MIN.SAMPLES=MIN.SAMPLES, |
2933 | 2461 |
DF.PRIOR=DF.PRIOR, |
2934 |
- verbose=verbose, |
|
2935 | 2462 |
is.lds=is.lds) |
2936 | 2463 |
} |
2937 | 2464 |
return(object) |
... | ... |
@@ -2940,7 +2467,9 @@ shrinkSummary <- function(object, |
2940 | 2467 |
genotypeSummary <- function(object, |
2941 | 2468 |
GT.CONF.THR=0.95, |
2942 | 2469 |
type=c("SNP", "NP", "X.SNP", "X.NP"), ##"X.snps", "X.nps"), |
2943 |
- verbose=TRUE){ |
|
2470 |
+ verbose=TRUE, |
|
2471 |
+ marker.index, |
|
2472 |
+ is.lds){ |
|
2944 | 2473 |
if(type == "X.SNP" | type=="X.NP"){ |
2945 | 2474 |
gender <- object$gender |
2946 | 2475 |
if(sum(gender == 2) < 3) { |
... | ... |
@@ -2948,23 +2477,16 @@ genotypeSummary <- function(object, |
2948 | 2477 |
} |
2949 | 2478 |
CHR.X <- TRUE |
2950 | 2479 |
} else CHR.X <- FALSE |
2951 |
- batch <- batch(object) |
|
2952 |
- is.snp <- isSnp(object) |
|
2953 |
- is.autosome <- chromosome(object) < 23 |
|
2954 |
- is.annotated <- !is.na(chromosome(object)) |
|
2955 |
- is.X <- chromosome(object) == 23 |
|
2956 |
- is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix") |
|
2957 |
- if(is.lds) require(ff) |
|
2958 |
- whichMarkers <- function(type, is.snp, is.autosome, is.annotated, is.X){ |
|
2959 |
- switch(type, |
|
2960 |
- SNP=which(is.snp & is.autosome & is.annotated), |
|
2961 |
- NP=which(!is.snp & is.autosome), |
|
2962 |
- X.SNP=which(is.snp & is.X), |
|
2963 |
- X.NP=which(!is.snp & is.X), |
|
2964 |
- stop("'type' must be one of 'SNP', 'NP', 'X.SNP', or 'X.NP'") |
|
2965 |
- ) |
|
2480 |
+ if(missing(marker.index)){ |
|
2481 |
+ batch <- batch(object) |
|
2482 |
+ is.snp <- isSnp(object) |
|
2483 |
+ is.autosome <- chromosome(object) < 23 |
|
2484 |
+ is.annotated <- !is.na(chromosome(object)) |
|
2485 |
+ is.X <- chromosome(object) == 23 |
|
2486 |
+ is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix") |
|
2487 |
+ if(is.lds) stopifnot(isPackagedLoaded("ff")) |
|
2488 |
+ marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X) |
|
2966 | 2489 |
} |
2967 |
- marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X) |
|
2968 | 2490 |
summaryFxn <- function(type){ |
2969 | 2491 |
switch(type, |
2970 | 2492 |
SNP="summarizeSnps", |
... | ... |
@@ -2978,7 +2500,6 @@ genotypeSummary <- function(object, |
2978 | 2500 |
ocLapply(seq(along=index.list), |
2979 | 2501 |
FUN, |
2980 | 2502 |
index.list=index.list, |
2981 |
-## marker.index=marker.index, |
|
2982 | 2503 |
object=object, |
2983 | 2504 |
batchSize=ocProbesets(), |
2984 | 2505 |
GT.CONF.THR=GT.CONF.THR, |
... | ... |
@@ -2988,19 +2509,28 @@ genotypeSummary <- function(object, |
2988 | 2509 |
neededPkgs="crlmm") |
2989 | 2510 |
} else { |
2990 | 2511 |
FUN <- match.fun(FUN) |
2991 |
- object <- FUN(strata.index=1, |
|
2512 |
+ object <- FUN(strata=1, |
|
2992 | 2513 |
index.list=list(marker.index), |
2993 |
-## marker.index=marker.index, |
|
2994 | 2514 |
object=object, |
2995 | 2515 |
batchSize=ocProbesets(), |
2996 | 2516 |
GT.CONF.THR=GT.CONF.THR, |
2997 | 2517 |
verbose=verbose, |
2998 |
- CHR.X=CHR.X, |
|
2999 |
- is.lds=is.lds) |
|
2518 |
+ is.lds=is.lds, |
|
2519 |
+ CHR.X=CHR.X) |
|
3000 | 2520 |
} |
3001 | 2521 |
return(object) |
3002 | 2522 |
} |
3003 | 2523 |
|
2524 |
+whichMarkers <- function(type, is.snp, is.autosome, is.annotated, is.X){ |
|
2525 |
+ switch(type, |
|
2526 |
+ SNP=which(is.snp & is.autosome & is.annotated), |
|
2527 |
+ NP=which(!is.snp & is.autosome), |
|
2528 |
+ X.SNP=which(is.snp & is.X), |
|
2529 |
+ X.NP=which(!is.snp & is.X), |
|
2530 |
+ stop("'type' must be one of 'SNP', 'NP', 'X.SNP', or 'X.NP'") |
|
2531 |
+ ) |
|
2532 |
+} |
|
2533 |
+ |
|
3004 | 2534 |
summarizeNps <- function(strata, index.list, object, batchSize, |
3005 | 2535 |
GT.CONF.THR, verbose, is.lds, CHR.X, ...){ |
3006 | 2536 |
if(is.lds) {physical <- get("physical"); open(object)} |
... | ... |
@@ -3151,44 +2681,115 @@ crlmmCopynumber <- function(object, |
3151 | 2681 |
prior.prob=rep(1/4,4), |
3152 | 2682 |
seed=1, |
3153 | 2683 |
verbose=TRUE, |
3154 |
- GT.CONF.THR=0.99, |
|
2684 |
+ GT.CONF.THR=0.95, |
|
3155 | 2685 |
PHI.THR=2^6, |
3156 | 2686 |
nHOM.THR=5, |
3157 | 2687 |
MIN.NU=2^3, |
3158 | 2688 |
MIN.PHI=2^3, |
3159 | 2689 |
THR.NU.PHI=TRUE, |
3160 | 2690 |
type=c("SNP", "NP", "X.SNP", "X.NP")){ |
3161 |
- if(type == "X.SNP" | type=="X.NP"){ |
|
3162 |
- gender <- object$gender |
|
3163 |
- if(sum(gender == 2) < 3) { |
|
3164 |
- warning("too few females to estimate within genotype summary statistics on CHR X") |
|
3165 |
- return(object) |
|
3166 |
- } |
|
3167 |
- CHR.X <- TRUE |
|
3168 |
- } else CHR.X <- FALSE |
|
2691 |
+ stopifnot(type %in% c("SNP", "NP", "X.SNP", "X.NP")) |
|
3169 | 2692 |
batch <- batch(object) |
3170 | 2693 |
is.snp <- isSnp(object) |
3171 | 2694 |
is.autosome <- chromosome(object) < 23 |
3172 | 2695 |
is.annotated <- !is.na(chromosome(object)) |
3173 | 2696 |
is.X <- chromosome(object) == 23 |
3174 | 2697 |
is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix") |
3175 |
- if(is.lds) require(ff) |
|
2698 |
+ if(is.lds) stopifnot(isPackageLoaded("ff")) |
|
3176 | 2699 |
samplesPerBatch <- table(as.character(batch(object))) |
3177 | 2700 |
if(any(samplesPerBatch < MIN.SAMPLES)){ |
3178 | 2701 |
warning("The following batches have fewer than ", MIN.SAMPLES, ":") |
3179 | 2702 |
message(paste(samplesPerBatch[samplesPerBatch < MIN.SAMPLES], collapse=", ")) |
3180 | 2703 |
message("Not estimating copy number for the above batches") |
3181 | 2704 |
} |
3182 |
- whichMarkers <- function(type, is.snp, is.autosome, is.annotated, is.X){ |
|
3183 |
- switch(type, |
|
3184 |
- SNP=which(is.snp & is.autosome & is.annotated), |
|
3185 |
- NP=which(!is.snp & is.autosome), |
|
3186 |
- X.SNP=which(is.snp & is.X), |
|
3187 |
- X.NP=which(!is.snp & is.X), |
|
3188 |
- stop("'type' must be one of 'SNP', 'NP', 'X.SNP', or 'X.NP'") |
|
3189 |
- ) |
|
2705 |
+ mylabel <- function(marker.type){ |
|
2706 |
+ switch(marker.type, |
|
2707 |
+ SNP="autosomal SNPs", |
|
2708 |
+ NP="autosomal nonpolymorphic markers", |
|
2709 |
+ X.SNP="chromosome X SNPs", |
|
2710 |
+ X.NP="chromosome X nonpolymorphic markers") |
|
2711 |
+ } |
|
2712 |
+ if(verbose) message("Computing summary statistics for genotype clusters for each batch") |
|
2713 |
+ for(i in seq_along(type)){ |
|
2714 |
+ ## do all types |
|
2715 |
+ marker.type <- type[i] |
|
2716 |
+ if(verbose) message(paste("...", mylabel(marker.type))) |
|
2717 |
+ ##if(verbose) message(paste("Computing summary statistics for ", mylabel(marker.type), " genotype clusters for each batch") |
|
2718 |
+ marker.index <- whichMarkers(marker.type, is.snp, |
|
2719 |
+ is.autosome, is.annotated, is.X) |
|
2720 |
+ object <- genotypeSummary(object=object, |
|
2721 |
+ GT.CONF.THR=GT.CONF.THR, |
|
2722 |
+ type=marker.type, |
|
2723 |
+ verbose=verbose, |
|
2724 |
+ marker.index=marker.index, |
|
2725 |
+ is.lds=is.lds) |
|
2726 |
+ } |
|
2727 |
+ if(verbose) message("Imputing unobserved cluster medians and shrinking the variances at polymorphic loci")##SNPs only |
|
2728 |
+ for(i in seq_along(type)){ |
|
2729 |
+ marker.type <- type[i] |
|
2730 |
+ if(!marker.type %in% c("SNP", "X.SNP")) next() |
|
2731 |
+ message(paste("...", mylabel(marker.type))) |
|
2732 |
+ marker.index <- whichMarkers(marker.type, is.snp, |
|
2733 |
+ is.autosome, is.annotated, is.X) |
|
2734 |
+ object <- shrinkSummary(object=object, |
|
2735 |
+ MIN.OBS=MIN.OBS, |
|
2736 |
+ MIN.SAMPLES=MIN.SAMPLES, |
|
2737 |
+ DF.PRIOR=DF.PRIOR, |
|
2738 |
+ type=marker.type, |
|
2739 |
+ verbose=verbose, |
|
2740 |
+ marker.index=marker.index, |
|
2741 |
+ is.lds=is.lds) |
|
2742 |
+ } |
|
2743 |
+ if(verbose) message("Estimating parameters for copy number")##SNPs only |
|
2744 |
+ for(i in seq_along(type)){ |
|
2745 |
+ marker.type <- type[i] |
|
2746 |
+ message(paste("...", mylabel(marker.type))) |
|
2747 |
+ CHR.X <- ifelse(marker.type %in% c("X.SNP", "X.NP"), TRUE, FALSE) |
|
2748 |
+ marker.index <- whichMarkers(marker.type, is.snp, |
|
2749 |
+ is.autosome, is.annotated, is.X) |
|
2750 |
+ object <- estimateCnParameters(object=object, |
|
2751 |
+ type=marker.type, |
|
2752 |
+ SNRMin=SNRMin, |
|
2753 |
+ DF.PRIOR=DF.PRIOR, |
|
2754 |
+ GT.CONF.THR=GT.CONF.THR, |
|
2755 |
+ MIN.SAMPLES=MIN.SAMPLES, |
|
2756 |
+ MIN.OBS=MIN.OBS, |
|
2757 |
+ MIN.NU=MIN.NU, |
|
2758 |
+ MIN.PHI=MIN.PHI, |
|
2759 |
+ THR.NU.PHI=THR.NU.PHI, |
|
2760 |
+ verbose=verbose, |
|
2761 |
+ marker.index=marker.index, |
|
2762 |
+ is.lds=is.lds, |
|
2763 |
+ CHR.X=CHR.X) |
|
2764 |
+ } |
|
2765 |
+ return(object) |
|
2766 |
+} |
|
2767 |
+ |
|
2768 |
+estimateCnParameters <- function(object, |
|
2769 |
+ type=c("SNP", "NP", "X.SNP", "X.NP"), |
|
2770 |
+ verbose=TRUE, |
|
2771 |
+ SNRMin=5, |
|
2772 |
+ DF.PRIOR=50, |
|
2773 |
+ GT.CONF.THR=0.95, |
|
2774 |
+ MIN.SAMPLES=10, |
|
2775 |
+ MIN.OBS=1, |
|
2776 |
+ MIN.NU=8, |
|
2777 |
+ MIN.PHI=8, |
|
2778 |
+ THR.NU.PHI=TRUE, |
|
2779 |
+ marker.index, |
|
2780 |
+ CHR.X, |
|
2781 |
+ is.lds){ |
|
2782 |
+ if(missing(marker.index)){ |
|
2783 |
+ batch <- batch(object) |
|
2784 |
+ is.snp <- isSnp(object) |
|
2785 |
+ is.autosome <- chromosome(object) < 23 |
|
2786 |
+ is.annotated <- !is.na(chromosome(object)) |
|
2787 |
+ is.X <- chromosome(object) == 23 |
|
2788 |
+ is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix") |
|
2789 |
+ if(is.lds) require(ff) |
|
2790 |
+ CHR.X <- ifelse(type[[1]] %in% c("X.SNP", "X.NP"), TRUE, FALSE) |
|
2791 |
+ marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X) |
|
3190 | 2792 |
} |
3191 |
- marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X) |
|
3192 | 2793 |
lmFxn <- function(type){ |
3193 | 2794 |
switch(type, |
3194 | 2795 |
SNP="fit.lm1", |
... | ... |
@@ -359,12 +359,14 @@ ACN <- function(object, allele, i , j){ |
359 | 359 |
ii <- which(chromosome(object) < 23) |
360 | 360 |
if(length(ii) > 0){ |
361 | 361 |
## calculate ca only for batches indexed by j |
362 |
- batches <- unique(batch(object)[j]) |
|
362 |
+ batches <- unique(as.character(batch(object))[j]) |
|
363 | 363 |
for(k in seq_along(batches)){ |
364 |
- l <- match(batches[k], bns) |
|
364 |
+ this.batch <- batches[k] |
|
365 |
+ jj <- j[batch(object)[j] %in% this.batch] |
|
366 |
+ l <- match(this.batch, bns) |
|
365 | 367 |
bg <- nu(object, allele)[ii, l] |
366 | 368 |
slope <- phi(object, allele)[ii, l] |
367 |
- I <- allele(object, allele)[ii, j] |
|
369 |
+ I <- allele(object, allele)[ii, jj] |
|
368 | 370 |
acn[[k]] <- 1/slope*(I - bg) |
369 | 371 |
} |
370 | 372 |
} |
... | ... |
@@ -375,24 +377,27 @@ ACN <- function(object, allele, i , j){ |
375 | 377 |
} |
376 | 378 |
if(!missing(i) & missing(j)){ |
377 | 379 |
## calculate ca, cb for all batches |
378 |
- batches <- unique(batch(object)) |
|
380 |
+ batches <- batchNames(object) |
|
379 | 381 |
for(k in seq_along(batches)){ |
382 |
+ this.batch <- batches[k] |
|
383 |
+ l <- match(this.batch, bns) |
|
380 | 384 |
##bb <- batches[k] |
381 |
- bg <- nu(object, allele)[i, k] |
|
382 |
- slope <- phi(object, allele)[i, k] |
|
383 |
- I <- allele(object, allele)[i, j] |
|
385 |
+ bg <- nu(object, allele)[i, l] |
|
386 |
+ slope <- phi(object, allele)[i, l] |
|
387 |
+ I <- allele(object, allele)[i, batch(object) == this.batch] |
|
384 | 388 |
acn[[k]] <- 1/slope*(I - bg) |
385 | 389 |
} |
386 | 390 |
} |
387 | 391 |
if(!missing(i) & !missing(j)){ |
388 |
- ubatch <- unique(batch(object)) |
|
389 |
- batches <- unique(batch(object)[j]) |
|
392 |
+ batches <- unique(as.character(batch(object)[j])) |
|
390 | 393 |
acn <- list() |
391 | 394 |
for(k in seq_along(batches)){ |
392 |
- l <- match(batches[k], ubatch) |
|
395 |
+ this.batch <- batches[k] |
|
396 |
+ jj <- j[batch(object)[j] %in% this.batch] |
|
397 |
+ l <- match(this.batch, bns) |
|
393 | 398 |
bg <- nu(object, allele)[i, l] |
394 | 399 |
slope <- phi(object, allele)[i, l] |
395 |
- I <- allele(object, allele)[i, j] |
|
400 |
+ I <- allele(object, allele)[i, jj] |
|
396 | 401 |
acn[[k]] <- 1/slope*(I - bg) |
397 | 402 |
} |
398 | 403 |
} |
... | ... |
@@ -404,15 +409,20 @@ ACN <- function(object, allele, i , j){ |
404 | 409 |
setMethod("CA", signature=signature(object="CNSet"), |
405 | 410 |
function(object, ...){ |
406 | 411 |
ca <- ACN(object, allele="A", ...) |
412 |
+ ca[ca < 0.05] <- 0.05 |
|
413 |
+ ca[ca > 5] <- 5 |
|
407 | 414 |
return(ca) |
408 | 415 |
}) |
409 | 416 |
setMethod("CB", signature=signature(object="CNSet"), |
410 | 417 |
function(object, ...) { |
411 | 418 |
cb <- ACN(object, allele="B", ...) |
419 |
+ cb[cb < 0.05] <- 0.05 |
|
420 |
+ cb[cb > 5] <- 5 |
|
412 | 421 |
return(cb) |
413 | 422 |
}) |
414 | 423 |
|
415 | 424 |
totalCopyNumber <- function(object, ...){ |
425 |
+ message("copy number at nonpolymorphic probes is not currently available") |
|
416 | 426 |
ca <- CA(object, ...) |
417 | 427 |
cb <- CB(object, ...) |
418 | 428 |
is.snp <- isSnp(object) |
... | ... |
@@ -432,7 +442,7 @@ setReplaceMethod("snpCall", c("CNSet", "ff_or_matrix"), |
432 | 442 |
function(object, ..., value){ |
433 | 443 |
assayDataElementReplace(object, "call", value) |
434 | 444 |
}) |
435 |
-##setReplaceMethod("snpCallProbability", c("CNSet", "ff_or_matrix"), |
|
436 |
-## function(object, ..., value){ |
|
437 |
-## assayDataElementReplace(object, "callProbability", value) |
|
438 |
-## }) |
|
445 |
+setReplaceMethod("snpCallProbability", c("CNSet", "ff_or_matrix"), |
|
446 |
+ function(object, ..., value){ |
|
447 |
+ assayDataElementReplace(object, "callProbability", value) |
|
448 |
+ }) |
... | ... |
@@ -38,9 +38,9 @@ options(continue=" ", width=70) |
38 | 38 |
|
39 | 39 |
\end{abstract} |
40 | 40 |
|
41 |
-\section{Simple usage} |
|
42 |
- |
|
41 |
+\section{Copy number estimation} |
|
43 | 42 |
|
43 |
+\subsection{Set up} |
|
44 | 44 |
|
45 | 45 |
<<cdfname>>= |
46 | 46 |
library(crlmm) |
... | ... |
@@ -91,7 +91,7 @@ calls to \verb@Sweave@ will load the saved computations from disk. For |
91 | 91 |
additional information, see the examples in the help file for the |
92 | 92 |
\Rfunction{checkExists} function. |
93 | 93 |
|
94 |
-\paragraph{Preprocessing and genotyping.} |
|
94 |
+\subsection{Preprocessing and genotyping.} |
|
95 | 95 |
|
96 | 96 |
In the following code chunk, we provide the complete path to the |
97 | 97 |
Affymetrix CEL files and define a 'batch' variable. The |
... | ... |
@@ -132,169 +132,34 @@ The function \Rfunction{genotype} checks to see whether the |
132 | 132 |
genotype are stored as \Robject{ff} objects on disk. Otherwise, the |
133 | 133 |
genotypes and normalized intensities are stored in matrices. A word |
134 | 134 |
of caution: the \Rfunction{genotype} function without \Rpackage{ff} |
135 |
-requires a potentially large amount of RAM. Therefore, we illustrate |
|
136 |
-the \Robject{genotype} function using only the first 20 CEL files. We |
|
137 |
-wrap the function \Rfunction{genotype} in the \Rfunction{checkExists} |
|
138 |
-so that a user can load and inspect the saved object. We first check |
|
139 |
-that the file \Robject{cnSet.rda} does not exist. Existence of this |
|
140 |
-file implies that we have already run the copy number estimation and, |
|
141 |
-therefore, we do not need to preprocess and genotype the files a |
|
142 |
-second time. |
|
135 |
+requires a potentially large amount of RAM. We illustrate the |
|
136 |
+\Robject{genotype} function using only the first 5 CEL files (a number |
|
137 |
+to small to be used for copy number estimation). We wrap the function |
|
138 |
+\Rfunction{genotype} in the \Rfunction{checkExists} to speed up |
|
139 |
+Sweaving after the initial batch job. |
|
143 | 140 |
|
144 | 141 |
<<genotype>>= |
145 |
-tmpdir <- "~/tmp3" |
|
146 |
-##stop() |
|
147 |
-##if(FALSE){ |
|
148 |
-##if(file.exists(tmpdir)) unlink(tmpdir, recursive=TRUE) |
|
149 |
-dir.create(tmpdir) |
|
150 |
-##trace(genotype, browser) |
|
151 |
-system.time(gtSet2 <- genotype(filenames=celFiles[1:75], |
|
152 |
- cdfName=cdfName, |
|
153 |
- batch=batch[1:75])) |
|
154 |
-save(gtSet2, file=file.path(tmpdir, "gtSet2.rda")) |
|
155 |
-library(ff) |
|
156 |
-ldPath(tmpdir) |
|
157 |
-system.time(gtSet <- genotype(filenames=celFiles[1:75], |
|
158 |
- cdfName=cdfName, |
|
159 |
- batch=batch[1:75])) |
|
160 |
-save(gtSet, file=file.path(tmpdir, "gtSet.rda")) |
|
161 |
-q("no") |
|
162 |
-if(FALSE){ |
|
163 |
- library(ff) |
|
164 |
- tmpdir <- "~/tmp" |
|
165 |
- dir.create("~/tmp2") |
|
166 |
- load(file.path(tmpdir, "gtSet.rda")) |
|
167 |
- ldPath("~/tmp2") |
|
168 |
- gtSet <- updateObject(gtSet) ## neeed to remove the old ff objects |
|
169 |
- save(gtSet, file=file.path(ldPath(), "gtSet.rda")) |
|
170 |
-} |
|
171 |
- |
|
172 |
-##trace(summarizeSnps, browser) |
|
173 |
-ocProbesets(100e3) |
|
174 |
-system.time(gtSet <- genotypeSummary(gtSet, GT.CONF.THR=0.95, type="SNP")) |
|
175 |
-system.time(gtSet <- genotypeSummary(gtSet, GT.CONF.THR=0.95, type="NP")) |
|
176 |
-system.time(gtSet <- genotypeSummary(gtSet, GT.CONF.THR=0.95, type="X.SNP")) |
|
177 |
-system.time(gtSet <- genotypeSummary(gtSet, GT.CONF.THR=0.95, type="X.NP")) |
|
178 |
-library(ff) |
|
179 |
-load("~/tmp2/gtSet.rda") |
|
180 |
-invisible(open(gtSet)) |
|
181 |
-ocProbesets(100e3) |
|
182 |
- |
|
183 |
-system.time(gtSet <- shrinkSummary(gtSet, type="SNP")) |
|
184 |
-##system.time(gtSet <- shrinkSummary(gtSet, type="NP")) |
|
185 |
-system.time(gtSet <- shrinkSummary(gtSet, type="X.SNP")) |
|
186 |
-##system.time(gtSet <- shrinkSummary(gtSet, type="X.NP")) |
|
187 |
- |
|
188 |
-## test: try females only. try males only |
|
189 |
- |
|
190 |
-## fit linear model |
|
191 |
-##trace(crlmmCopynumber, browser) |
|
192 |
-##trace(fit.wls, browser) |
|
193 |
-trace(fit.lm1, browser) |
|
194 |
-system.time(cnSet <- crlmmCopynumber(gtSet, type="SNP")) |
|
195 |
-CA(cnSet)[1:5, 1:5] |
|
196 |
-totalCopyNumber(cnSet, i=1:5, j=1:5) |
|
197 |
- |
|
198 |
-trace(fit.lm2, browser) |
|
199 |
-system.time(cnSet <- crlmmCopynumber(gtSet, type="NP")) |
|
200 |
-index <- which(!isSnp(cnSet))[1:10] |
|
201 |
-totalCopyNumber(cnSet, i=index, j=1:5) |
|
202 |
- |
|
203 |
-##trace(fit.lm3, browser) |
|
204 |
-##trace(summarizeMaleXGenotypes, browser) |
|
205 |
-##trace(fit.wls, browser) |
|
206 |
-system.time(cnSet <- crlmmCopynumber(gtSet, type="X.SNP")) |
|
207 |
- |
|
208 |
-trace(fit.lm4, browser) |
|
209 |
-system.time(cnSet <- crlmmCopynumber(gtSet, type="X.NP")) |
|
210 |
- |
|
211 |
- |
|
212 |
-Ns(gtSet, batchname="C")[1:5, ] |
|
213 |
-medians(gtSet, "A")[1:5, ] |
|
214 |
-mads(gtSet, "A")[1:5, ] |
|
215 |
-##log scale |
|
216 |
-tau2(gtSet, allele="A")[1:5, ] |
|
217 |
-tau2(gtSet, allele="B")[1:5, ] |
|
218 |
-corr(gtSet)[1:5, ] |
|
219 |
- |
|
220 |
- |
|
221 |
- |
|
222 |
-cnSet <- crlmmCopynumber(gtSet, type="autosome.snps") |
|
223 |
- |
|
224 |
- |
|
225 |
- |
|
226 |
- |
|
227 |
- |
|
228 |
- |
|
229 |
-gtSet |
|
230 |
-tmp=Ns(gtSet) |
|
231 |
- |
|
232 |
- |
|
233 |
- |
|
234 |
-NN <- N.AA(gtSet)[1:1000,] |
|
235 |
-N.AA(gtSet)[1:1000,] <- NN##do.call(cbind, lapply(NN, function(x) x[, 1])) |
|
236 |
- |
|
237 |
- |
|
238 |
-##tmp=Ns(gtSet) |
|
239 |
-##tmp <- Ns(gtSet, genotype="AA") |
|
240 |
-##tmp <- Ns(gtSet, genotype="AA", batchname="C") |
|
241 |
-##tmp <- Ns(gtSet, batchname="C") |
|
242 |
-##Ns(gtSet, batchname="C") <- tmp |
|
243 |
- |
|
244 |
-load(file.path(tmpdir, "gtSet2.rda")) |
|
245 |
-gtSet2.update <- updateObject(gtSet2) |
|
246 |
-isCurrent(gtSet2) |
|
247 |
- |
|
248 |
- |
|
249 |
- |
|
250 |
- |
|
251 |
- |
|
252 |
- |
|
253 |
- |
|
254 |
- |
|
255 |
-##using matrices |
|
256 |
-if(!file.exists(file.path(outdir, "cnSet.assayData_matrix.rda"))){ |
|
257 |
- gtSet <- checkExists("gtSet.assayData_matrix", |
|
258 |
- .path=outdir, |
|
259 |
- .FUN=genotype, |
|
260 |
- filenames=celFiles[1:20], |
|
261 |
- cdfName=cdfName, |
|
262 |
- batch=batch[1:20]) |
|
263 |
- class(calls(gtSet.assayData_matrix)) |
|
142 |
+gtSet <- checkExists("gtSet.assayData_matrix", |
|
143 |
+ .path=outdir, |
|
144 |
+ .FUN=genotype, |
|
145 |
+ filenames=celFiles[1:5], |
|
146 |
+ cdfName=cdfName, |
|
147 |
+ batch=batch[1:5]) |
|
264 | 148 |
} |
265 |
-##q("no") |
|
266 | 149 |
@ |
267 | 150 |
|
268 |
-Next, we estimate copy number for the 20 CEL files. The copy number |
|
269 |
-estimation step requirees a minimum number of CEL files in each batch |
|
270 |
-(we suggest 10). While estimation can be performed using only the 20 |
|
271 |
-samples as we do here, it would be far preferable to process all of |
|
272 |
-the files that were on the same chemistry plate. The code chunk below |
|
273 |
-will load \Robject{cnSet.assayData_matrix} from disk if this |
|
274 |
- |
|
275 |
-computation had already been performed as part of the batch job. |
|
276 |
- |
|
277 |
-<<copynumber, eval=FALSE>>= |
|
278 |
-cnSet.assayData_matrix <- checkExists("cnSet.assayData_matrix", |
|
279 |
- .path=outdir, |
|
280 |
- .FUN=crlmmCopynumber, |
|
281 |
- object=gtSet.assayData_matrix, |
|
282 |
- chromosome=22) |
|
283 |
-@ |
|
284 |
- |
|
285 |
- |
|
286 | 151 |
A more memory efficient approach to preprocessing and genotyping is |
287 |
-implemented in the \R{} function \Rfunction{genotypeLD} and requires |
|
288 |
-the \Rpackage{ff} package. The functions \Rfunction{ocProbesets} and |
|
289 |
-\Rfunction{ocSamples} can be used to manage how many probesets and |
|
290 |
-samples are to processed at a time and can therefore be used to fine |
|
291 |
-tune the required RAM. The function \Rfunction{ldPath} indicates that |
|
292 |
-\Robject{ff} objects will be stored in the directory \Robject{outdir}. |
|
152 |
+requires the \Rpackage{ff} package. In particular, the functions |
|
153 |
+\Rfunction{ocProbesets} and \Rfunction{ocSamples} can be used to |
|
154 |
+manage how many probesets and samples are to processed at a time and |
|
155 |
+can therefore be used to fine tune the required RAM. The function |
|
156 |
+\Rfunction{ldPath} indicates that \Robject{ff} objects will be stored |
|
157 |
+in the directory \Robject{outdir}. |
|
293 | 158 |
|
294 | 159 |
<<ff>>= |
295 | 160 |
library(ff) |
296 | 161 |
ldPath(outdir) |
297 |
-ocProbesets(50000) |
|
162 |
+ocProbesets(100000) |
|
298 | 163 |
ocSamples(200) |
299 | 164 |
@ |
300 | 165 |
|
... | ... |
@@ -308,68 +173,130 @@ so that subsequent calls to \verb@Sweave@ can be run interactively. |
308 | 173 |
|
309 | 174 |
|
310 | 175 |
<<LDS_genotype>>= |
311 |
-if(!file.exists(file.path(outdir, "cnSet.assayData_ff.rda"))){ |
|
312 |
- Rprof(filename="Rprof_genotypeff.out", interval=0.1) |
|
313 |
- gtSet.assayData_ff <- checkExists("gtSet.assayData_ff", |
|
314 |
- .path=outdir, |
|
315 |
- .FUN=genotypeLD, |
|
316 |
- filenames=celFiles, |
|
317 |
- cdfName=cdfName, |
|
318 |
- batch=batch) |
|
319 |
- Rprof(NULL) |
|
320 |
- class(calls(gtSet.assayData_ff)) |
|
176 |
+if(!file.exists(file.path(outdir, "cnSet.rda"))){ |
|
177 |
+ gtSet <- checkExists("gtSet", |
|
178 |
+ .path=outdir, |
|
179 |
+ .FUN=genotypeLD, |
|
180 |
+ filenames=celFiles, |
|
181 |
+ cdfName=cdfName, |
|
182 |
+ batch=batch) |
|
183 |
+ class(calls(gtSet)) |
|
321 | 184 |
} |
322 | 185 |
@ |
323 | 186 |
|
324 |
-The analogous function for copy number estimation follows. In |
|
325 |
-practice, once the object \Robject{cnSet.assayData_ff} is created the |
|
326 |
-\Robject{gtSet.assayData_ff} is no longer needed and can be removed |
|
327 |
-from the \Robject{outdir}. |
|
187 |
+\subsection{Copy number algorithm.} |
|
188 |
+ |
|
189 |
+The \Rfunction{crlmmCopynumber} computes summary statistics for each |
|
190 |
+genotype, imputes unobserved genotype centers, shrinks the |
|
191 |
+within-genotype variances, and estimates parameters for |
|
192 |
+allele-specific copy number. With \texttt{verbose=TRUE}, the above |
|
193 |
+steps for CN estimation are displayed. |
|
328 | 194 |
|
329 | 195 |
<<LDS_copynumber>>= |
330 |
-Rprof(filename="Rprof_cnff.out", interval=0.1) |
|
331 |
-##trace(fit.wls, browser) |
|
332 |
-cnSet.assayData_ff <- checkExists("cnSet.assayData_ff", |
|
333 |
- .path=outdir, |
|
334 |
- .FUN=crlmmCopynumberLD, |
|
335 |
- object=gtSet.assayData_ff) |
|
336 |
-Rprof(NULL) |
|
337 |
-##if(file.exists(file.path(outdir, "gtSet.assayData_ff.rda"))) |
|
338 |
-## unlink(file.path(outdir, "gtSet.assayData_ff.rda")) |
|
339 |
-gc() |
|
340 |
-q("no") |
|
341 |
-@ |
|
342 |
- |
|
343 |
-The objects returned by the \Rfunction{genotypeLD} and |
|
344 |
-\Rfunction{crlmmCopynumberLD} have assay data elements that are |
|
345 |
-pointers to \Robject{ff} objects stored in the directory |
|
346 |
-\Robject{outdir}. The functions \Rfunction{open} and |
|
347 |
-\Rfunction{close} open and close the connections to the |
|
348 |
-\Robject{assayData} elements. Subsetting an \Robject{ff} object pulls |
|
349 |
-the data from disk into memory and should therefore be used with |
|
350 |
-caution. In particular, subsetting the \Robject{gt.assayData_ff} would |
|
351 |
-subset each elements in the \Robject{assayData} slot, returning an |
|
352 |
-object of the same class but with \Robject{assayData} elements that |
|
353 |
-are matrices. Such an operation can be exceedingly slow and require |
|
196 |
+cnSet <- checkExists("cnSet", |
|
197 |
+ .path=outdir, |
|
198 |
+ .FUN=crlmmCopynumber, |
|
199 |
+ object=gtSet) |
|
200 |
+@ |
|
201 |
+ |
|
202 |
+The \Rpackage{crlmmCopynumber} function no longer stores the |
|
203 |
+allele-specific estimates of copy number. Rather, several functions |
|
204 |
+are available that will compute relatively quickly the allele-specific |
|
205 |
+estimates from the stored normalized intensities and the linear model |
|
206 |
+parameters. For instance, for allele $k$, marker $i$, sample $j$, and |
|
207 |
+batch $p$, the estimate of allele-specific copy number are updated by |
|
208 |
+subtracting the estimated background from the observed intensity and |
|
209 |
+scaling by the slope coefficient. Specifically, |
|
210 |
+ |
|
211 |
+\newcommand{\A}{A} |
|
212 |
+\newcommand{\B}{B} |
|
213 |
+\begin{eqnarray} |
|
214 |
+ \label{eq:cnK} |
|
215 |
+{\hat c}_{k,ijp} &=& \mbox{max}\left\{\frac{1}{{\hat |
|
216 |
+ \phi}_{k,ip}}\left(I_{k,ijp}-{\hat \nu}_{k,ip}\right), ~0\right\} |
|
217 |
+\mbox{~for~} k \in \{\A, \B\}. |
|
218 |
+% \label{eq:cnB} {\hat c}_{\B,ijp} &=& \mbox{max}\left\{\frac{1}{{\hat |
|
219 |
+% \phi}_{\B,ip}}\left(I_{\B,ijp}-{\hat \nu}_{\B,ip}\right), ~0\right\}. |
|
220 |
+\end{eqnarray}. |
|
221 |
+\noindent See \cite{Scharpf2010} for details. |
|
222 |
+ |
|
223 |
+The functions \Rfunction{CA}, \Rfunction{CB}, and |
|
224 |
+\Rfunction{totalCopyNumber} can be used to extract CN estimates from |
|
225 |
+the \Robject{CNSet} container. |
|
226 |
+ |
|
227 |
+<<ca>>= |
|
228 |
+##ca <- CA(x[snp.index, ])/100 |
|
229 |
+snp.index <- which(isSnp(cnSet)) |
|
230 |
+ca <- CA(cnSet, i=snp.index, j=1:5) |
|
231 |
+cb <- CB(cnSet, i=snp.index, j=1:5) |
|
232 |
+ct <- ca+cb |
|
233 |
+##boxplot(data.frame(ct[, 1:5]), pch=".") |
|
234 |
+@ |
|
235 |
+ |
|
236 |
+Alternatively, total copy number can be obtained by |
|
237 |
+<<totalCopynumber.snps>>= |
|
238 |
+ct2 <- totalCopyNumber(cnSet, i=snp.index, j=1:5) |
|
239 |
+all.equal(ct, ct2) |
|
240 |
+@ |
|
241 |
+ |
|
242 |
+At nonpolymorphic loci, either the 'CA' or totalCopynumber method can |
|
243 |
+be used to obtain total copy number. |
|
244 |
+ |
|
245 |
+<<ca>>= |
|
246 |
+cn.nonpolymorphic <- CA(obj, i=which(!isSnp(obj))) |
|
247 |
+@ |
|
248 |
+ |
|
249 |
+Total copy number at both polymorphic and nonpolymorphic loci: |
|
250 |
+<<totalCopynumber>>= |
|
251 |
+##cn <- copyNumber(x) |
|
252 |
+cn <- totalCopyNumber(x, sample(1:nrow(x), 1e4), 1:5) |
|
253 |
+apply(cn, 2, median, na.rm=TRUE) |
|
254 |
+@ |
|
255 |
+ |
|
256 |
+The above estimates can be plotted versus physical position. |
|
257 |
+ |
|
258 |
+ |
|
259 |
+\section{The CNSet container} |
|
260 |
+ |
|
261 |
+The objects returned by the \Rfunction{genotype} and |
|
262 |
+\Rfunction{crlmmCopynumber} have assay data elements that are pointers |
|
263 |
+to \Robject{ff} objects stored in the directory \Robject{outdir}. The |
|
264 |
+functions \Rfunction{open} and \Rfunction{close} open and close the |
|
265 |
+connections to the \Robject{assayData} elements. Subsetting an |
|
266 |
+\Robject{ff} object pulls the data from disk into memory and should |
|
267 |
+therefore be used with caution. In particular, subsetting the |
|
268 |
+\Robject{gtSet} would subset each element in the \Robject{assayData} |
|
269 |
+slot, returning an object of the same class but with |
|
270 |
+\Robject{assayData} elements that are matrices. Such an operation can |
|
271 |
+be exceedingly slow when performed over a network and require |
|
354 | 272 |
subsantial RAM. The preferred approach is to extract only the assay |
355 | 273 |
data element that is needed. In the example below, we extract the |
356 | 274 |
genotype calls for the the first 50 samples. |
357 | 275 |
|
358 | 276 |
<<check>>= |
359 |
-dims(cnSet.assayData_ff) |
|
360 |
-dims(cnSet.assayData_matrix) |
|
361 |
-print(object.size(cnSet.assayData_matrix), units="Mb") |
|
362 |
-print(object.size(cnSet.assayData_ff), units="Mb") |
|
363 |
-if(FALSE){ |
|
364 |
- replicate(5, system.time(gt1 <- calls(cnSet.assayData_matrix)[, 1:50])[[1]]) |
|
365 |
- open(calls(cnSet2)) |
|
366 |
- replicate(5, system.time(gt2 <- calls(cnSet.assayData_ff)[, 1:50])[[1]]) |
|
367 |
-} |
|
368 |
-gt1 <- calls(cnSet.assayData_matrix)[, 1:50] |
|
369 |
-gt2 <- calls(cnSet.assayData_ff)[, 1:50] |
|
370 |
-all.equal(gt1, gt2) |
|
277 |
+dims(cnSet) |
|
278 |
+dims(cnSet) |
|
279 |
+print(object.size(cnSet), units="Mb") |
|
280 |
+gt <- calls(cnSet)[, 1:50] |
|
281 |
+@ |
|
282 |
+ |
|
283 |
+The \Robject{CNSet} class also contains the slot |
|
284 |
+\Robject{batchStatistics} that contains batch-specific summaries |
|
285 |
+needed for copy number estimation. In particular, each element is a |
|
286 |
+matrix (or an ff object) with R rows and C columns, correspoinding to |
|
287 |
+R markers and C batches. The summaries includes the within genotype |
|
288 |
+cluster medians and median absolute deviations (mads), but also |
|
289 |
+parameters estimated from the linear model. (For unobserved |
|
290 |
+genotypes, the medians are imputed and the variance is obtained the |
|
291 |
+median variance (across markers) within a batch. ) The elements of the |
|
292 |
+slot can be listed as follows. |
|
293 |
+ |
|
294 |
+<<batchStatistics>>= |
|
295 |
+assayDataElementNames(batchStatistics(cnSet)) |
|
371 | 296 |
@ |
372 | 297 |
|
298 |
+ |
|
299 |
+ |
|
373 | 300 |
Note that for the Affymetrix 6.0 platform the assay data elements each |
374 | 301 |
have a row dimension corresponding to the total number of polymorphic |
375 | 302 |
and nonpolymorphic markers interrogated by the Affymetrix 6.0 |
... | ... |
@@ -383,39 +310,24 @@ the CRLMM genotype calls are represented as integers. The accessor |
383 | 310 |
scores. When stored as matrices, converting the integer |
384 | 311 |
representation back to the probability scale is straightforward as |
385 | 312 |
shown below. However, for the \Robject{ff} objects we must first |
386 |
-bring the data into memory. To bring all the scores into memory, one |
|
387 |
-could use the function \Rfunction{[,]} but this could be slow and |
|
388 |
-require a lot of RAM depending on the size of the dataset. Instead, |
|
389 |
-we suggest pulling only the needed rows and columns from memory. In |
|
390 |
-the following example, we convert the integer scores to probabilities |
|
391 |
-for the CEPH samples. As genotype confidence scores are not |
|
392 |
-applicable to the nonpolymorphic markers, we extract only the |
|
393 |
-polymorphic markers using the \Rfunction{isSnp} function. |
|
313 |
+convert the ff object to a matrix. One could use the function |
|
314 |
+\Rfunction{[,]} but this could be slow and require a lot of RAM |
|
315 |
+depending on the size of the dataset. We suggest pulling only the |
|
316 |
+needed rows and columns from memory. In the following example, we |
|
317 |
+convert the integer scores to probabilities for the CEPH samples. As |
|
318 |
+genotype confidence scores are not applicable to the nonpolymorphic |
|
319 |
+markers, we extract only the polymorphic markers using the |
|
320 |
+\Rfunction{isSnp} function. |
|
394 | 321 |
|
395 | 322 |
<<assayData>>= |
396 | 323 |
rows <- which(isSnp(cnSet)) |
397 | 324 |
cols <- which(batch == "C") |
398 |
-posterior.prob <- tryCatch(i2p(snpCallProbability(cnSet.assayData_ff)), |
|
399 |
- error=function(e) print("This will not work for an ff object.")) |
|
400 |
-posterior.prob1 <- i2p(snpCallProbability(cnSet.assayData_matrix)[rows, cols]) |
|
401 |
-posterior.prob2 <- i2p(snpCallProbability(cnSet.assayData_ff)[rows, cols]) |
|
402 |
-all.equal(posterior.prob1, posterior.prob2) |
|
403 |
-@ |
|
404 |
- |
|
405 |
-Next, we obtain locus-level estimates of copy number by fitting a linear |
|
406 |
-model to each SNP. A variable named 'batch' must be indicated in the |
|
407 |
-\Robject{protocolData} of the \Robject{cnSet} object. As the inverse |
|
408 |
-variance of the within-genotype normalized intensities are used as |
|
409 |
-weights in the linear model (and hence the design matrix in the |
|
410 |
-regression changes for each SNP), the time is linear with the number of |
|
411 |
-markers on the array. Copy number estimation at nonpolymorphic markers |
|
412 |
-and polymorphic markers with unobserved genotypes is more difficult. We |
|
413 |
-refer the interested reader to the technical report \citep{Scharpf2009}. |
|
414 |
-Again, we peform the copy number estimation using the matrix version and |
|
415 |
-the ff version in parallel and encourage users with large datasets to |
|
416 |
-explore the latter. As with the preprocessing and genotyping, the |
|
417 |
-following code should be submitted as part of the batch job as it is too |
|
418 |
-slow for interactive analysis. |
|
325 |
+posterior.prob <- tryCatch(i2p(snpCallProbability(cnSet)), |
|
326 |
+ error=function(e) print("This will not work for an ff object.")) |
|
327 |
+posterior.prob2 <- p2i(snpCallProbability(cnSet)[rows, cols]) |
|
328 |
+@ |
|
329 |
+ |
|
330 |
+Next, we obtain locus-level estimates of copy number. |
|
419 | 331 |
|
420 | 332 |
<<cn.ff, echo=FALSE, eval=FALSE>>= |
421 | 333 |
rm(cnSet); gc() |
... | ... |
@@ -502,28 +414,7 @@ genotypeConf <- integerScoreToProbability(snpCallProbability(x)[snp.index[1:10], |
502 | 414 |
|
503 | 415 |
\paragraph{\Robject{CopyNumberSet}: allele-specific copy number} |
504 | 416 |
|
505 |
-Allele-specific copy number at polymorphic loci: |
|
506 |
-<<ca>>= |
|
507 |
-##ca <- CA(x[snp.index, ])/100 |
|
508 |
-snp.index <- which(isSnp(x)) |
|
509 |
-ca <- CA(x, i=snp.index) |
|
510 |
-##or |
|
511 |
-ca <- ACN(x, "A", i=snp.index) |
|
512 |
-cb <- CB(x, i=snp.index) |
|
513 |
-ct <- ca+cb |
|
514 |
-@ |
|
515 |
- |
|
516 |
-Total copy number at nonpolymorphic loci: |
|
517 |
-<<ca>>= |
|
518 |
-cn.nonpolymorphic <- CA(obj, i=which(!isSnp(obj))) |
|
519 |
-@ |
|
520 | 417 |
|
521 |
-Total copy number at both polymorphic and nonpolymorphic loci: |
|
522 |
-<<totalCopynumber>>= |
|
523 |
-##cn <- copyNumber(x) |
|
524 |
-cn <- totalCopyNumber(x, sample(1:nrow(x), 1e4), 1:5) |
|
525 |
-apply(cn, 2, median, na.rm=TRUE) |
|
526 |
-@ |
|
527 | 418 |
|
528 | 419 |
\subsection{Other accessors} |
529 | 420 |
|
... | ... |
@@ -1,11 +1,10 @@ |
1 |
-@UNPUBLISHED{Scharpf2009, |
|
1 |
+@ARTICLE{Scharpf2010, |
|
2 | 2 |
author = {Robert B Scharpf and Ingo Ruczinski and Benilton Carvalho and Betty |
3 | 3 |
Doan and Aravinda Chakravarti and Rafael Irizarry}, |
4 | 4 |
title = {A multilevel model to address batch effects in copy number estimation |
5 | 5 |
using SNP arrays}, |
6 |
- month = {May}, |
|
7 |
- year = {2009}, |
|
8 |
- url={http://www.bepress.com/cgi/viewcontent.cgi?article=1193&context=jhubiostat} |
|
6 |
+ journal={Biostatistics}, |
|
7 |
+ year = {2010}, |
|
9 | 8 |
} |
10 | 9 |
|
11 | 10 |
@ARTICLE{Carvalho2007a, |