git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@52859 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -188,7 +188,6 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, |
188 | 188 |
processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR, |
189 | 189 |
mixtureParams, eps, seed, mixtureSampleSize, |
190 | 190 |
pkgname){ |
191 |
- |
|
192 | 191 |
obj1 <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
193 | 192 |
obj2 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
194 | 193 |
obj3 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
Goal is to initialize container for A and B of the right dimension and avoid unnecessary read / writes
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@52852 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -15,7 +15,7 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, |
15 | 15 |
message(strwrap(msg)) |
16 | 16 |
stop("Package ", pkgname, " could not be found.") |
17 | 17 |
} |
18 |
- |
|
18 |
+ |
|
19 | 19 |
if(verbose) message("Loading annotations and mixture model parameters.") |
20 | 20 |
loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
21 | 21 |
loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
... | ... |
@@ -30,7 +30,7 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, |
30 | 30 |
SMEDIAN <- getVarInEnv("SMEDIAN") |
31 | 31 |
theKnots <- getVarInEnv("theKnots") |
32 | 32 |
gns <- getVarInEnv("gns") |
33 |
- |
|
33 |
+ |
|
34 | 34 |
##We will read each cel file, summarize, and run EM one by one |
35 | 35 |
##We will save parameters of EM to use later |
36 | 36 |
mixtureParams <- matrix(0, 4, length(filenames)) |
... | ... |
@@ -39,7 +39,7 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, |
39 | 39 |
## mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames)) |
40 | 40 |
## SNR <- initializeBigVector("crlmmSNR-", length(filenames), "numeric") |
41 | 41 |
## SKW <- initializeBigVector("crlmmSKW-", length(filenames), "numeric") |
42 |
- |
|
42 |
+ |
|
43 | 43 |
## This is the sample for the fitting of splines |
44 | 44 |
## BC: I like better the idea of the user passing the seed, |
45 | 45 |
## because this might intefere with other analyses |
... | ... |
@@ -51,15 +51,15 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, |
51 | 51 |
##f is the correction. we save to avoid recomputing |
52 | 52 |
A <- matrix(as.integer(0), length(pnsa), length(filenames)) |
53 | 53 |
B <- matrix(as.integer(0), length(pnsb), length(filenames)) |
54 |
- |
|
54 |
+ |
|
55 | 55 |
if(verbose){ |
56 | 56 |
message("Processing ", length(filenames), " files.") |
57 | 57 |
pb <- txtProgressBar(min=0, max=length(filenames), style=3) |
58 | 58 |
} |
59 |
- |
|
59 |
+ |
|
60 | 60 |
##for skewness. no need to do everything |
61 | 61 |
idx2 <- sample(length(fid), 10^5) |
62 |
- |
|
62 |
+ |
|
63 | 63 |
##We start looping throug cel files |
64 | 64 |
for(i in seq(along=filenames)){ |
65 | 65 |
y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid]) |
... | ... |
@@ -73,10 +73,10 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, |
73 | 73 |
if(fitMixture){ |
74 | 74 |
S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN |
75 | 75 |
M <- log2(A[idx, i])-log2(B[idx, i]) |
76 |
- |
|
76 |
+ |
|
77 | 77 |
##we need to test the choice of eps.. it is not the max diff between funcs |
78 | 78 |
tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps) |
79 |
- |
|
79 |
+ |
|
80 | 80 |
mixtureParams[, i] <- tmp[["coef"]] |
81 | 81 |
SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2) |
82 | 82 |
} |
... | ... |
@@ -97,12 +97,12 @@ fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=1 |
97 | 97 |
mus <- append(quantile(M, c(1, 5)/6, names=FALSE), 0, 1) |
98 | 98 |
sigmas <- rep(mad(c(M[M<mus[1]]-mus[1], M[M>mus[3]]-mus[3])), 3) |
99 | 99 |
sigmas[2] <- sigmas[2]/2 |
100 |
- |
|
100 |
+ |
|
101 | 101 |
weights <- apply(cbind(mus, sigmas), 1, function(p) dnorm(M, p[1], p[2])) |
102 | 102 |
previousF1 <- -Inf |
103 | 103 |
change <- eps+1 |
104 | 104 |
it <- 0 |
105 |
- |
|
105 |
+ |
|
106 | 106 |
if(verbose) message("Max change must be under ", eps, ".") |
107 | 107 |
matS <- stupidSplineBasis(S, knots) |
108 | 108 |
while (change > eps & it < maxit){ |
... | ... |
@@ -112,19 +112,19 @@ fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=1 |
112 | 112 |
LogLik <- rowSums(z) |
113 | 113 |
z <- sweep(z, 1, LogLik, "/") |
114 | 114 |
probs <- colMeans(z) |
115 |
- |
|
115 |
+ |
|
116 | 116 |
## M |
117 | 117 |
fit1 <- crossprod(chol2inv(chol(crossprod(sweep(matS, 1, z[, 1], FUN="*"), matS))), crossprod(matS, z[, 1]*M)) |
118 |
- |
|
118 |
+ |
|
119 | 119 |
fit2 <- sum(z[, 2]*M)/sum(z[, 2]) |
120 | 120 |
F1 <- matS%*%fit1 |
121 | 121 |
sigmas[c(1, 3)] <- sqrt(sum(z[, 1]*(M-F1)^2)/sum(z[, 1])) |
122 | 122 |
sigmas[2] <- sqrt(sum(z[, 2]*(M-fit2)^2)/sum(z[, 2])) |
123 |
- |
|
123 |
+ |
|
124 | 124 |
weights[, 1] <- dnorm(M, F1, sigmas[1]) |
125 | 125 |
weights[, 2] <- dnorm(M, fit2, sigmas[2]) |
126 | 126 |
weights[, 3] <- dnorm(M, -F1, sigmas[3]) |
127 |
- |
|
127 |
+ |
|
128 | 128 |
change <- max(abs(F1-previousF1)) |
129 | 129 |
previousF1 <- F1 |
130 | 130 |
if(verbose) message("Iter ", it, ": ", change, ".") |
... | ... |
@@ -140,7 +140,7 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, |
140 | 140 |
cdfName <- read.celfile.header(filenames[1])[["cdfName"]] |
141 | 141 |
pkgname <- getCrlmmAnnotationName(cdfName) |
142 | 142 |
stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose)) |
143 |
- |
|
143 |
+ |
|
144 | 144 |
if(verbose) message("Loading annotations and mixture model parameters.") |
145 | 145 |
obj <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
146 | 146 |
pnsa <- getVarInEnv("pnsa") |
... | ... |
@@ -148,14 +148,14 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, |
148 | 148 |
gns <- getVarInEnv("gns") |
149 | 149 |
rm(list=obj, envir=.crlmmPkgEnv) |
150 | 150 |
rm(obj) |
151 |
- |
|
151 |
+ |
|
152 | 152 |
##We will read each cel file, summarize, and run EM one by one |
153 | 153 |
##We will save parameters of EM to use later |
154 | 154 |
if(verbose) message("Initializing objects.") |
155 | 155 |
mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames), "double") |
156 | 156 |
SNR <- initializeBigVector("crlmmSNR-", length(filenames), "double") |
157 | 157 |
SKW <- initializeBigVector("crlmmSKW-", length(filenames), "double") |
158 |
- |
|
158 |
+ |
|
159 | 159 |
## This is the sample for the fitting of splines |
160 | 160 |
## BC: I like better the idea of the user passing the seed, |
161 | 161 |
## because this might intefere with other analyses |
... | ... |
@@ -180,7 +180,7 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, |
180 | 180 |
close(SKW) |
181 | 181 |
close(A) |
182 | 182 |
close(B) |
183 |
- |
|
183 |
+ |
|
184 | 184 |
list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName) |
185 | 185 |
} |
186 | 186 |
|
... | ... |
@@ -188,7 +188,7 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, |
188 | 188 |
processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR, |
189 | 189 |
mixtureParams, eps, seed, mixtureSampleSize, |
190 | 190 |
pkgname){ |
191 |
- |
|
191 |
+ |
|
192 | 192 |
obj1 <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
193 | 193 |
obj2 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
194 | 194 |
obj3 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
... | ... |
@@ -226,7 +226,7 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR, |
226 | 226 |
A[, k] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa) |
227 | 227 |
B[, k] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb) |
228 | 228 |
rm(y) |
229 |
- |
|
229 |
+ |
|
230 | 230 |
if(fitMixture){ |
231 | 231 |
S <- (log2(A[idx,k])+log2(B[idx, k]))/2 - SMEDIAN |
232 | 232 |
M <- log2(A[idx, k])-log2(B[idx, k]) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@46584 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -142,10 +142,12 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, |
142 | 142 |
stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose)) |
143 | 143 |
|
144 | 144 |
if(verbose) message("Loading annotations and mixture model parameters.") |
145 |
- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
145 |
+ obj <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
146 | 146 |
pnsa <- getVarInEnv("pnsa") |
147 | 147 |
pnsb <- getVarInEnv("pnsb") |
148 | 148 |
gns <- getVarInEnv("gns") |
149 |
+ rm(list=obj, envir=.crlmmPkgEnv) |
|
150 |
+ rm(obj) |
|
149 | 151 |
|
150 | 152 |
##We will read each cel file, summarize, and run EM one by one |
151 | 153 |
##We will save parameters of EM to use later |
... | ... |
@@ -173,6 +175,11 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, |
173 | 175 |
mixtureParams=mixtureParams, eps=eps, seed=seed, |
174 | 176 |
mixtureSampleSize=mixtureSampleSize, pkgname=pkgname, |
175 | 177 |
neededPkgs=c("crlmm", pkgname)) |
178 |
+ close(mixtureParams) |
|
179 |
+ close(SNR) |
|
180 |
+ close(SKW) |
|
181 |
+ close(A) |
|
182 |
+ close(B) |
|
176 | 183 |
|
177 | 184 |
list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName) |
178 | 185 |
} |
... | ... |
@@ -182,9 +189,9 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR, |
182 | 189 |
mixtureParams, eps, seed, mixtureSampleSize, |
183 | 190 |
pkgname){ |
184 | 191 |
|
185 |
- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
186 |
- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
|
187 |
- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
|
192 |
+ obj1 <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
193 |
+ obj2 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
|
194 |
+ obj3 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
|
188 | 195 |
autosomeIndex <- getVarInEnv("autosomeIndex") |
189 | 196 |
pnsa <- getVarInEnv("pnsa") |
190 | 197 |
pnsb <- getVarInEnv("pnsb") |
... | ... |
@@ -195,6 +202,8 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR, |
195 | 202 |
SMEDIAN <- getVarInEnv("SMEDIAN") |
196 | 203 |
theKnots <- getVarInEnv("theKnots") |
197 | 204 |
gns <- getVarInEnv("gns") |
205 |
+ rm(list=c(obj1, obj2, obj3), envir=.crlmmPkgEnv) |
|
206 |
+ rm(obj1, obj2, obj3) |
|
198 | 207 |
|
199 | 208 |
## for mixture |
200 | 209 |
set.seed(seed) |
... | ... |
@@ -222,8 +231,10 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR, |
222 | 231 |
S <- (log2(A[idx,k])+log2(B[idx, k]))/2 - SMEDIAN |
223 | 232 |
M <- log2(A[idx, k])-log2(B[idx, k]) |
224 | 233 |
tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps) |
234 |
+ rm(S, M) |
|
225 | 235 |
mixtureParams[, k] <- tmp[["coef"]] |
226 | 236 |
SNR[k] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2) |
237 |
+ rm(tmp) |
|
227 | 238 |
} else { |
228 | 239 |
mixtureParams[, k] <- NA |
229 | 240 |
SNR[k] <- NA |
... | ... |
@@ -234,5 +245,7 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR, |
234 | 245 |
close(SKW) |
235 | 246 |
close(mixtureParams) |
236 | 247 |
close(SNR) |
248 |
+ rm(list=ls()) |
|
249 |
+ gc(verbose=FALSE) |
|
237 | 250 |
TRUE |
238 | 251 |
} |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45498 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,17 +1,21 @@ |
1 |
-snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1, verbose=TRUE, seed=1, cdfName, sns){ |
|
1 |
+snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, |
|
2 |
+ eps=0.1, verbose=TRUE, seed=1, cdfName, sns){ |
|
2 | 3 |
if (missing(sns)) sns <- basename(filenames) |
3 |
- ##ADD CHECK TO SEE IF LOADED |
|
4 | 4 |
if (missing(cdfName)) |
5 | 5 |
cdfName <- read.celfile.header(filenames[1])$cdfName |
6 |
- ## stuffDir <- changeToCrlmmAnnotationName(cdfName) |
|
7 | 6 |
pkgname <- getCrlmmAnnotationName(cdfName) |
7 |
+ |
|
8 | 8 |
if(!require(pkgname, character.only=TRUE, quietly=!verbose)){ |
9 |
- suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="") |
|
10 |
- msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall) |
|
9 |
+ suggCall <- paste("library(", pkgname, |
|
10 |
+ ", lib.loc='/Altern/Lib/Loc')", |
|
11 |
+ sep="") |
|
12 |
+ msg <- paste("If", pkgname, |
|
13 |
+ "is installed on an alternative location,", |
|
14 |
+ "please load it manually by using", suggCall) |
|
11 | 15 |
message(strwrap(msg)) |
12 | 16 |
stop("Package ", pkgname, " could not be found.") |
13 |
- rm(suggCall, msg) |
|
14 | 17 |
} |
18 |
+ |
|
15 | 19 |
if(verbose) message("Loading annotations and mixture model parameters.") |
16 | 20 |
loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
17 | 21 |
loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
... | ... |
@@ -32,6 +36,9 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1, |
32 | 36 |
mixtureParams <- matrix(0, 4, length(filenames)) |
33 | 37 |
SNR <- vector("numeric", length(filenames)) |
34 | 38 |
SKW <- vector("numeric", length(filenames)) |
39 |
+## mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames)) |
|
40 |
+## SNR <- initializeBigVector("crlmmSNR-", length(filenames), "numeric") |
|
41 |
+## SKW <- initializeBigVector("crlmmSKW-", length(filenames), "numeric") |
|
35 | 42 |
|
36 | 43 |
## This is the sample for the fitting of splines |
37 | 44 |
## BC: I like better the idea of the user passing the seed, |
... | ... |
@@ -47,10 +54,13 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1, |
47 | 54 |
|
48 | 55 |
if(verbose){ |
49 | 56 |
message("Processing ", length(filenames), " files.") |
50 |
- if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3) |
|
57 |
+ pb <- txtProgressBar(min=0, max=length(filenames), style=3) |
|
51 | 58 |
} |
59 |
+ |
|
60 |
+ ##for skewness. no need to do everything |
|
61 |
+ idx2 <- sample(length(fid), 10^5) |
|
62 |
+ |
|
52 | 63 |
##We start looping throug cel files |
53 |
- idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything |
|
54 | 64 |
for(i in seq(along=filenames)){ |
55 | 65 |
y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid]) |
56 | 66 |
x <- log2(y[idx2]) |
... | ... |
@@ -70,13 +80,9 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1, |
70 | 80 |
mixtureParams[, i] <- tmp[["coef"]] |
71 | 81 |
SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2) |
72 | 82 |
} |
73 |
- if (verbose) |
|
74 |
- if (getRversion() > '2.7.0') setTxtProgressBar(pb, i) |
|
75 |
- else cat(".") |
|
83 |
+ if (verbose) setTxtProgressBar(pb, i) |
|
76 | 84 |
} |
77 |
- if (verbose) |
|
78 |
- if (getRversion() > '2.7.0') close(pb) |
|
79 |
- else cat("\n") |
|
85 |
+ if (verbose) close(pb) |
|
80 | 86 |
if (!fitMixture) SNR <- mixtureParams <- NA |
81 | 87 |
## gns comes from preprocStuff.rda |
82 | 88 |
list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName) |
... | ... |
@@ -127,5 +133,106 @@ fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=1 |
127 | 133 |
return(list(coef= -fit1, medF1=medF1, sigma1=sigmas[1], sigma2=sigmas[2])) |
128 | 134 |
} |
129 | 135 |
|
136 |
+snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, |
|
137 |
+ eps=0.1, verbose=TRUE, seed=1, cdfName, sns){ |
|
138 |
+ if (missing(sns)) sns <- basename(filenames) |
|
139 |
+ if (missing(cdfName)) |
|
140 |
+ cdfName <- read.celfile.header(filenames[1])[["cdfName"]] |
|
141 |
+ pkgname <- getCrlmmAnnotationName(cdfName) |
|
142 |
+ stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose)) |
|
143 |
+ |
|
144 |
+ if(verbose) message("Loading annotations and mixture model parameters.") |
|
145 |
+ loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
146 |
+ pnsa <- getVarInEnv("pnsa") |
|
147 |
+ pnsb <- getVarInEnv("pnsb") |
|
148 |
+ gns <- getVarInEnv("gns") |
|
149 |
+ |
|
150 |
+ ##We will read each cel file, summarize, and run EM one by one |
|
151 |
+ ##We will save parameters of EM to use later |
|
152 |
+ if(verbose) message("Initializing objects.") |
|
153 |
+ mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames), "double") |
|
154 |
+ SNR <- initializeBigVector("crlmmSNR-", length(filenames), "double") |
|
155 |
+ SKW <- initializeBigVector("crlmmSKW-", length(filenames), "double") |
|
156 |
+ |
|
157 |
+ ## This is the sample for the fitting of splines |
|
158 |
+ ## BC: I like better the idea of the user passing the seed, |
|
159 |
+ ## because this might intefere with other analyses |
|
160 |
+ ## (like what happened to GCRMA) |
|
161 |
+ ##S will hold (A+B)/2 and M will hold A-B |
|
162 |
+ ##NOTE: We actually dont need to save S. Only for pics etc... |
|
163 |
+ ##f is the correction. we save to avoid recomputing |
|
164 |
+ A <- initializeBigMatrix("crlmmA-", length(pnsa), length(filenames), "integer") |
|
165 |
+ B <- initializeBigMatrix("crlmmB-", length(pnsb), length(filenames), "integer") |
|
166 |
+ |
|
167 |
+ sampleBatches <- splitIndicesByNode(seq(along=filenames)) |
|
168 |
+ |
|
169 |
+ if(verbose) message("Processing ", length(filenames), " files.") |
|
170 |
+ |
|
171 |
+ ocLapply(sampleBatches, processCEL, filenames=filenames, |
|
172 |
+ fitMixture=fitMixture, A=A, B=B, SKW=SKW, SNR=SNR, |
|
173 |
+ mixtureParams=mixtureParams, eps=eps, seed=seed, |
|
174 |
+ mixtureSampleSize=mixtureSampleSize, pkgname=pkgname, |
|
175 |
+ neededPkgs=c("crlmm", pkgname)) |
|
176 |
+ |
|
177 |
+ list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName) |
|
178 |
+} |
|
179 |
+ |
|
130 | 180 |
|
181 |
+processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR, |
|
182 |
+ mixtureParams, eps, seed, mixtureSampleSize, |
|
183 |
+ pkgname){ |
|
184 |
+ |
|
185 |
+ loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
186 |
+ loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
|
187 |
+ loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
|
188 |
+ autosomeIndex <- getVarInEnv("autosomeIndex") |
|
189 |
+ pnsa <- getVarInEnv("pnsa") |
|
190 |
+ pnsb <- getVarInEnv("pnsb") |
|
191 |
+ fid <- getVarInEnv("fid") |
|
192 |
+ reference <- getVarInEnv("reference") |
|
193 |
+ aIndex <- getVarInEnv("aIndex") |
|
194 |
+ bIndex <- getVarInEnv("bIndex") |
|
195 |
+ SMEDIAN <- getVarInEnv("SMEDIAN") |
|
196 |
+ theKnots <- getVarInEnv("theKnots") |
|
197 |
+ gns <- getVarInEnv("gns") |
|
198 |
+ |
|
199 |
+ ## for mixture |
|
200 |
+ set.seed(seed) |
|
201 |
+ idx <- sort(sample(autosomeIndex, mixtureSampleSize)) |
|
202 |
+ ##for skewness. no need to do everything |
|
203 |
+ idx2 <- sample(length(fid), 10^5) |
|
131 | 204 |
|
205 |
+ open(A) |
|
206 |
+ open(B) |
|
207 |
+ open(SKW) |
|
208 |
+ open(mixtureParams) |
|
209 |
+ open(SNR) |
|
210 |
+ |
|
211 |
+ for (k in i){ |
|
212 |
+ y <- as.matrix(read.celfile(filenames[k], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid]) |
|
213 |
+ x <- log2(y[idx2]) |
|
214 |
+ SKW[k] <- mean((x-mean(x))^3)/(sd(x)^3) |
|
215 |
+ rm(x) |
|
216 |
+ y <- normalize.quantiles.use.target(y, target=reference) |
|
217 |
+ A[, k] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa) |
|
218 |
+ B[, k] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb) |
|
219 |
+ rm(y) |
|
220 |
+ |
|
221 |
+ if(fitMixture){ |
|
222 |
+ S <- (log2(A[idx,k])+log2(B[idx, k]))/2 - SMEDIAN |
|
223 |
+ M <- log2(A[idx, k])-log2(B[idx, k]) |
|
224 |
+ tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps) |
|
225 |
+ mixtureParams[, k] <- tmp[["coef"]] |
|
226 |
+ SNR[k] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2) |
|
227 |
+ } else { |
|
228 |
+ mixtureParams[, k] <- NA |
|
229 |
+ SNR[k] <- NA |
|
230 |
+ } |
|
231 |
+ } |
|
232 |
+ close(A) |
|
233 |
+ close(B) |
|
234 |
+ close(SKW) |
|
235 |
+ close(mixtureParams) |
|
236 |
+ close(SNR) |
|
237 |
+ TRUE |
|
238 |
+} |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45221 bc3139a8-67e5-0310-9ffc-ced21a209358
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45171 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -42,11 +42,8 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1, |
42 | 42 |
##S will hold (A+B)/2 and M will hold A-B |
43 | 43 |
##NOTE: We actually dont need to save S. Only for pics etc... |
44 | 44 |
##f is the correction. we save to avoid recomputing |
45 |
- ##**RS** |
|
46 |
- ##A <- matrix(as.integer(0), length(pnsa), length(filenames)) |
|
47 |
- ##B <- matrix(as.integer(0), length(pnsb), length(filenames)) |
|
48 |
- A <- initializeBigMatrix(name="tmpA", length(pnsa), length(filenames)) |
|
49 |
- B <- initializeBigMatrix(name="tmpB", length(pnsa), length(filenames)) |
|
45 |
+ A <- matrix(as.integer(0), length(pnsa), length(filenames)) |
|
46 |
+ B <- matrix(as.integer(0), length(pnsb), length(filenames)) |
|
50 | 47 |
|
51 | 48 |
if(verbose){ |
52 | 49 |
message("Processing ", length(filenames), " files.") |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45165 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -42,8 +42,11 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1, |
42 | 42 |
##S will hold (A+B)/2 and M will hold A-B |
43 | 43 |
##NOTE: We actually dont need to save S. Only for pics etc... |
44 | 44 |
##f is the correction. we save to avoid recomputing |
45 |
- A <- matrix(as.integer(0), length(pnsa), length(filenames)) |
|
46 |
- B <- matrix(as.integer(0), length(pnsb), length(filenames)) |
|
45 |
+ ##**RS** |
|
46 |
+ ##A <- matrix(as.integer(0), length(pnsa), length(filenames)) |
|
47 |
+ ##B <- matrix(as.integer(0), length(pnsb), length(filenames)) |
|
48 |
+ A <- initializeBigMatrix(name="tmpA", length(pnsa), length(filenames)) |
|
49 |
+ B <- initializeBigMatrix(name="tmpB", length(pnsa), length(filenames)) |
|
47 | 50 |
|
48 | 51 |
if(verbose){ |
49 | 52 |
message("Processing ", length(filenames), " files.") |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45126 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,129 +1,85 @@ |
1 |
-setMethod("snprma", "AffymetrixAlleleSet", function(object, filenames){ |
|
2 |
- snprma.AffymetrixAlleleSet(object, filenames) |
|
3 |
-}) |
|
4 |
-setMethod("snprma", "NChannelSet", function(object, ops, ...){ |
|
5 |
- object <- preprocessInfinium2(object, ops) |
|
6 |
-}) |
|
7 |
- |
|
8 |
-snprma.AffymetrixAlleleSet <- function(object, filenames){ |
|
9 |
-## filenames, |
|
10 |
-## mixtureSampleSize=10^5, |
|
11 |
-## fitMixture=FALSE, |
|
12 |
-## eps=0.1, |
|
13 |
-## verbose=TRUE, |
|
14 |
-## seed=1, |
|
15 |
-## cdfName, |
|
16 |
-## AFile="A.rda", |
|
17 |
-## BFile="B.rda"){ |
|
18 |
- ops <- crlmmOptions(object) |
|
19 |
- rmaOps <- ops$snprmaOpts |
|
20 |
- mixtureSampleSize <- rmaOps$mixtureSampleSize |
|
21 |
- fitMixture <- rmaOps$fitMixture |
|
22 |
- eps <- rmaOps$eps |
|
23 |
- verbose <- ops$verbose |
|
24 |
- seed <- ops$seed |
|
25 |
- cdfName <- annotation(object) |
|
26 |
- A <- A(object) |
|
27 |
- B <- B(object) |
|
28 |
- sns <- basename(filenames) |
|
29 |
- ##ADD CHECK TO SEE IF LOADED |
|
30 |
- if (missing(cdfName)) |
|
31 |
- cdfName <- read.celfile.header(filenames[1])$cdfName |
|
32 |
- ## stuffDir <- changeToCrlmmAnnotationName(cdfName) |
|
33 |
- pkgname <- getCrlmmAnnotationName(cdfName) |
|
34 |
- if(!require(pkgname, character.only=TRUE, quietly=!verbose)){ |
|
35 |
- suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="") |
|
36 |
- msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall) |
|
37 |
- message(strwrap(msg)) |
|
38 |
- stop("Package ", pkgname, " could not be found.") |
|
39 |
- rm(suggCall, msg) |
|
40 |
- } |
|
41 |
- if(verbose) message("Loading annotations and mixture model parameters.") |
|
42 |
- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
43 |
- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
|
44 |
- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
|
45 |
- autosomeIndex <- getVarInEnv("autosomeIndex") |
|
46 |
- pnsa <- getVarInEnv("pnsa") |
|
47 |
- pnsb <- getVarInEnv("pnsb") |
|
48 |
- fid <- getVarInEnv("fid") |
|
49 |
- reference <- getVarInEnv("reference") |
|
50 |
- aIndex <- getVarInEnv("aIndex") |
|
51 |
- bIndex <- getVarInEnv("bIndex") |
|
52 |
- SMEDIAN <- getVarInEnv("SMEDIAN") |
|
53 |
- theKnots <- getVarInEnv("theKnots") |
|
54 |
- gns <- getVarInEnv("gns") |
|
55 |
- snpIndex <- which(isSnp(object)==1) |
|
56 |
- stopifnot(identical(featureNames(object)[snpIndex], gns)) |
|
1 |
+snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1, verbose=TRUE, seed=1, cdfName, sns){ |
|
2 |
+ if (missing(sns)) sns <- basename(filenames) |
|
3 |
+ ##ADD CHECK TO SEE IF LOADED |
|
4 |
+ if (missing(cdfName)) |
|
5 |
+ cdfName <- read.celfile.header(filenames[1])$cdfName |
|
6 |
+ ## stuffDir <- changeToCrlmmAnnotationName(cdfName) |
|
7 |
+ pkgname <- getCrlmmAnnotationName(cdfName) |
|
8 |
+ if(!require(pkgname, character.only=TRUE, quietly=!verbose)){ |
|
9 |
+ suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="") |
|
10 |
+ msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall) |
|
11 |
+ message(strwrap(msg)) |
|
12 |
+ stop("Package ", pkgname, " could not be found.") |
|
13 |
+ rm(suggCall, msg) |
|
14 |
+ } |
|
15 |
+ if(verbose) message("Loading annotations and mixture model parameters.") |
|
16 |
+ loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
17 |
+ loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
|
18 |
+ loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
|
19 |
+ autosomeIndex <- getVarInEnv("autosomeIndex") |
|
20 |
+ pnsa <- getVarInEnv("pnsa") |
|
21 |
+ pnsb <- getVarInEnv("pnsb") |
|
22 |
+ fid <- getVarInEnv("fid") |
|
23 |
+ reference <- getVarInEnv("reference") |
|
24 |
+ aIndex <- getVarInEnv("aIndex") |
|
25 |
+ bIndex <- getVarInEnv("bIndex") |
|
26 |
+ SMEDIAN <- getVarInEnv("SMEDIAN") |
|
27 |
+ theKnots <- getVarInEnv("theKnots") |
|
28 |
+ gns <- getVarInEnv("gns") |
|
29 |
+ |
|
30 |
+ ##We will read each cel file, summarize, and run EM one by one |
|
31 |
+ ##We will save parameters of EM to use later |
|
32 |
+ mixtureParams <- matrix(0, 4, length(filenames)) |
|
33 |
+ SNR <- vector("numeric", length(filenames)) |
|
34 |
+ SKW <- vector("numeric", length(filenames)) |
|
57 | 35 |
|
58 |
- ##We will read each cel file, summarize, and run EM one by one |
|
59 |
- ##We will save parameters of EM to use later |
|
60 |
- mixtureParams <- matrix(0, 4, length(filenames)) |
|
61 |
- colnames(mixtureParams) <- basename(filenames) |
|
62 |
- SNR <- vector("numeric", length(filenames)) |
|
63 |
- SKW <- vector("numeric", length(filenames)) |
|
36 |
+ ## This is the sample for the fitting of splines |
|
37 |
+ ## BC: I like better the idea of the user passing the seed, |
|
38 |
+ ## because this might intefere with other analyses |
|
39 |
+ ## (like what happened to GCRMA) |
|
40 |
+ set.seed(seed) |
|
41 |
+ idx <- sort(sample(autosomeIndex, mixtureSampleSize)) |
|
42 |
+ ##S will hold (A+B)/2 and M will hold A-B |
|
43 |
+ ##NOTE: We actually dont need to save S. Only for pics etc... |
|
44 |
+ ##f is the correction. we save to avoid recomputing |
|
45 |
+ A <- matrix(as.integer(0), length(pnsa), length(filenames)) |
|
46 |
+ B <- matrix(as.integer(0), length(pnsb), length(filenames)) |
|
64 | 47 |
|
65 |
- ## This is the sample for the fitting of splines |
|
66 |
- ## BC: I like better the idea of the user passing the seed, |
|
67 |
- ## because this might intefere with other analyses |
|
68 |
- ## (like what happened to GCRMA) |
|
69 |
- set.seed(seed) |
|
70 |
- idx <- sort(sample(autosomeIndex, mixtureSampleSize)) |
|
71 |
- ##S will hold (A+B)/2 and M will hold A-B |
|
72 |
- ##NOTE: We actually dont need to save S. Only for pics etc... |
|
73 |
- ##f is the correction. we save to avoid recomputing |
|
74 |
- if(verbose){ |
|
75 |
- message("Processing ", length(filenames), " files.") |
|
76 |
- if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3) |
|
77 |
- } |
|
78 |
- ##We start looping through cel files |
|
79 |
- idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything |
|
80 |
- message("Quantile normalizing to HapMap target distribution") |
|
81 |
- for(i in seq(along=filenames)){ |
|
82 |
- y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid]) |
|
83 |
- x <- log2(y[idx2]) |
|
84 |
- SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3) |
|
85 |
- rm(x) |
|
86 |
- y <- normalize.quantiles.use.target(y, target=reference) |
|
87 |
- A[1:length(pnsa), i] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa) |
|
88 |
- B[1:length(pnsa), i] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb) |
|
89 |
- ##Now to fit the EM |
|
90 |
- if(fitMixture){ |
|
91 |
- S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN |
|
92 |
- M <- log2(A[idx, i])-log2(B[idx, i]) |
|
93 |
- |
|
94 |
- ##we need to test the choice of eps.. it is not the max diff between funcs |
|
95 |
- tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps) |
|
96 |
- |
|
97 |
- mixtureParams[, i] <- tmp[["coef"]] |
|
98 |
- SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2) |
|
99 |
- } |
|
100 |
- if (verbose) |
|
101 |
- if (getRversion() > '2.7.0') setTxtProgressBar(pb, i) |
|
102 |
- else cat(".") |
|
103 |
- } |
|
104 |
-## A(object) <- A |
|
105 |
-## B(object) <- B |
|
106 |
- object@assayData[["alleleA"]] <- A |
|
107 |
- object@assayData[["alleleB"]] <- B |
|
108 |
- ##closeObject(object, A) |
|
109 |
- ##closeObject(object, B) |
|
110 |
- ##if(isPackageLoaded("ff")) { close(A); close(B)} else {save(A, file=AFile); save(B, file=BFile)} |
|
111 |
- if (verbose) |
|
112 |
- if (getRversion() > '2.7.0') close(pb) else cat("\n") |
|
113 |
- ##else cat("\n") |
|
114 |
- if (!fitMixture) SNR <- mixtureParams <- NA |
|
115 |
- sampleStats <- data.frame(SKW=SKW, |
|
116 |
- SNR=SNR) |
|
117 |
-## batch=ops$cnOpts$batch) |
|
118 |
- crlmmOpts <- crlmmOptions(object)$crlmmOpts |
|
119 |
- crlmmOpts$mixtureParams <- mixtureParams |
|
120 |
- crlmmOptions(object)$crlmmOpts <- crlmmOpts |
|
121 |
- pD <- new("AnnotatedDataFrame", |
|
122 |
- data=sampleStats, |
|
123 |
- varMetadata=data.frame(labelDescription=colnames(sampleStats))) |
|
124 |
- sampleNames(pD) <- sampleNames(object) |
|
125 |
- phenoData(object) <- pD |
|
126 |
- return(object) |
|
48 |
+ if(verbose){ |
|
49 |
+ message("Processing ", length(filenames), " files.") |
|
50 |
+ if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3) |
|
51 |
+ } |
|
52 |
+ ##We start looping throug cel files |
|
53 |
+ idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything |
|
54 |
+ for(i in seq(along=filenames)){ |
|
55 |
+ y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid]) |
|
56 |
+ x <- log2(y[idx2]) |
|
57 |
+ SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3) |
|
58 |
+ rm(x) |
|
59 |
+ y <- normalize.quantiles.use.target(y, target=reference) |
|
60 |
+ A[, i] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa) |
|
61 |
+ B[, i] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb) |
|
62 |
+ ##Now to fit the EM |
|
63 |
+ if(fitMixture){ |
|
64 |
+ S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN |
|
65 |
+ M <- log2(A[idx, i])-log2(B[idx, i]) |
|
66 |
+ |
|
67 |
+ ##we need to test the choice of eps.. it is not the max diff between funcs |
|
68 |
+ tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps) |
|
69 |
+ |
|
70 |
+ mixtureParams[, i] <- tmp[["coef"]] |
|
71 |
+ SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2) |
|
72 |
+ } |
|
73 |
+ if (verbose) |
|
74 |
+ if (getRversion() > '2.7.0') setTxtProgressBar(pb, i) |
|
75 |
+ else cat(".") |
|
76 |
+ } |
|
77 |
+ if (verbose) |
|
78 |
+ if (getRversion() > '2.7.0') close(pb) |
|
79 |
+ else cat("\n") |
|
80 |
+ if (!fitMixture) SNR <- mixtureParams <- NA |
|
81 |
+ ## gns comes from preprocStuff.rda |
|
82 |
+ list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName) |
|
127 | 83 |
} |
128 | 84 |
|
129 | 85 |
fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){ |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45083 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,7 +1,31 @@ |
1 |
-snprma <- function(filenames, mixtureSampleSize=10^5, |
|
2 |
- fitMixture=FALSE, eps=0.1, verbose=TRUE, |
|
3 |
- seed=1, cdfName, sns, AFile="A.rda", BFile="B.rda"){ |
|
4 |
- if (missing(sns)) sns <- basename(filenames) |
|
1 |
+setMethod("snprma", "AffymetrixAlleleSet", function(object, filenames){ |
|
2 |
+ snprma.AffymetrixAlleleSet(object, filenames) |
|
3 |
+}) |
|
4 |
+setMethod("snprma", "NChannelSet", function(object, ops, ...){ |
|
5 |
+ object <- preprocessInfinium2(object, ops) |
|
6 |
+}) |
|
7 |
+ |
|
8 |
+snprma.AffymetrixAlleleSet <- function(object, filenames){ |
|
9 |
+## filenames, |
|
10 |
+## mixtureSampleSize=10^5, |
|
11 |
+## fitMixture=FALSE, |
|
12 |
+## eps=0.1, |
|
13 |
+## verbose=TRUE, |
|
14 |
+## seed=1, |
|
15 |
+## cdfName, |
|
16 |
+## AFile="A.rda", |
|
17 |
+## BFile="B.rda"){ |
|
18 |
+ ops <- crlmmOptions(object) |
|
19 |
+ rmaOps <- ops$snprmaOpts |
|
20 |
+ mixtureSampleSize <- rmaOps$mixtureSampleSize |
|
21 |
+ fitMixture <- rmaOps$fitMixture |
|
22 |
+ eps <- rmaOps$eps |
|
23 |
+ verbose <- ops$verbose |
|
24 |
+ seed <- ops$seed |
|
25 |
+ cdfName <- annotation(object) |
|
26 |
+ A <- A(object) |
|
27 |
+ B <- B(object) |
|
28 |
+ sns <- basename(filenames) |
|
5 | 29 |
##ADD CHECK TO SEE IF LOADED |
6 | 30 |
if (missing(cdfName)) |
7 | 31 |
cdfName <- read.celfile.header(filenames[1])$cdfName |
... | ... |
@@ -28,10 +52,13 @@ snprma <- function(filenames, mixtureSampleSize=10^5, |
28 | 52 |
SMEDIAN <- getVarInEnv("SMEDIAN") |
29 | 53 |
theKnots <- getVarInEnv("theKnots") |
30 | 54 |
gns <- getVarInEnv("gns") |
55 |
+ snpIndex <- which(isSnp(object)==1) |
|
56 |
+ stopifnot(identical(featureNames(object)[snpIndex], gns)) |
|
31 | 57 |
|
32 | 58 |
##We will read each cel file, summarize, and run EM one by one |
33 | 59 |
##We will save parameters of EM to use later |
34 | 60 |
mixtureParams <- matrix(0, 4, length(filenames)) |
61 |
+ colnames(mixtureParams) <- basename(filenames) |
|
35 | 62 |
SNR <- vector("numeric", length(filenames)) |
36 | 63 |
SKW <- vector("numeric", length(filenames)) |
37 | 64 |
|
... | ... |
@@ -44,23 +71,6 @@ snprma <- function(filenames, mixtureSampleSize=10^5, |
44 | 71 |
##S will hold (A+B)/2 and M will hold A-B |
45 | 72 |
##NOTE: We actually dont need to save S. Only for pics etc... |
46 | 73 |
##f is the correction. we save to avoid recomputing |
47 |
- |
|
48 |
- path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep="")) |
|
49 |
- load(file.path(path, "cnProbes.rda")) |
|
50 |
- cnProbes <- get("cnProbes") |
|
51 |
- nr <- length(pnsa) + nrow(cnProbes) |
|
52 |
- nc <- length(filenames) |
|
53 |
- dns <- list(c(gns, rownames(cnProbes)), sns) |
|
54 |
- if(isPackageLoaded("ff")){ |
|
55 |
- message("ff package is loaded. Creating ff objects for storing normalized and summarized intensities") |
|
56 |
- A <- initializeBigMatrix(nr, nc) |
|
57 |
- B <- initializeBigMatrix(nr, nc) |
|
58 |
- open(A) |
|
59 |
- open(B) |
|
60 |
- } else { |
|
61 |
- A <- matrix(0L, nr, nc, dimnames=dns) |
|
62 |
- B <- matrix(0L, nr, nc, dimnames=dns) |
|
63 |
- } |
|
64 | 74 |
if(verbose){ |
65 | 75 |
message("Processing ", length(filenames), " files.") |
66 | 76 |
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3) |
... | ... |
@@ -91,15 +101,29 @@ snprma <- function(filenames, mixtureSampleSize=10^5, |
91 | 101 |
if (getRversion() > '2.7.0') setTxtProgressBar(pb, i) |
92 | 102 |
else cat(".") |
93 | 103 |
} |
94 |
- if(isPackageLoaded("ff")) { close(A); close(B)} |
|
104 |
+## A(object) <- A |
|
105 |
+## B(object) <- B |
|
106 |
+ object@assayData[["alleleA"]] <- A |
|
107 |
+ object@assayData[["alleleB"]] <- B |
|
108 |
+ ##closeObject(object, A) |
|
109 |
+ ##closeObject(object, B) |
|
110 |
+ ##if(isPackageLoaded("ff")) { close(A); close(B)} else {save(A, file=AFile); save(B, file=BFile)} |
|
95 | 111 |
if (verbose) |
96 |
- if (getRversion() > '2.7.0') close(pb) |
|
97 |
- else cat("\n") |
|
112 |
+ if (getRversion() > '2.7.0') close(pb) else cat("\n") |
|
113 |
+ ##else cat("\n") |
|
98 | 114 |
if (!fitMixture) SNR <- mixtureParams <- NA |
99 |
- save(A, file=AFile) |
|
100 |
- save(B, file=BFile) |
|
101 |
- return(list(sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName, |
|
102 |
- cnnames=rownames(cnProbes))) |
|
115 |
+ sampleStats <- data.frame(SKW=SKW, |
|
116 |
+ SNR=SNR) |
|
117 |
+## batch=ops$cnOpts$batch) |
|
118 |
+ crlmmOpts <- crlmmOptions(object)$crlmmOpts |
|
119 |
+ crlmmOpts$mixtureParams <- mixtureParams |
|
120 |
+ crlmmOptions(object)$crlmmOpts <- crlmmOpts |
|
121 |
+ pD <- new("AnnotatedDataFrame", |
|
122 |
+ data=sampleStats, |
|
123 |
+ varMetadata=data.frame(labelDescription=colnames(sampleStats))) |
|
124 |
+ sampleNames(pD) <- sampleNames(object) |
|
125 |
+ phenoData(object) <- pD |
|
126 |
+ return(object) |
|
103 | 127 |
} |
104 | 128 |
|
105 | 129 |
fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){ |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@44783 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -50,13 +50,13 @@ snprma <- function(filenames, mixtureSampleSize=10^5, |
50 | 50 |
cnProbes <- get("cnProbes") |
51 | 51 |
nr <- length(pnsa) + nrow(cnProbes) |
52 | 52 |
nc <- length(filenames) |
53 |
- dns <- list(c(gns, rownames(cnProbes)), sns) |
|
53 |
+ dns <- list(c(gns, rownames(cnProbes)), sns) |
|
54 | 54 |
if(isPackageLoaded("ff")){ |
55 | 55 |
message("ff package is loaded. Creating ff objects for storing normalized and summarized intensities") |
56 |
- A <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dns, |
|
57 |
- overwrite=TRUE) |
|
58 |
- B <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dns, |
|
59 |
- overwrite=TRUE) |
|
56 |
+ A <- initializeBigMatrix(nr, nc) |
|
57 |
+ B <- initializeBigMatrix(nr, nc) |
|
58 |
+ open(A) |
|
59 |
+ open(B) |
|
60 | 60 |
} else { |
61 | 61 |
A <- matrix(0L, nr, nc, dimnames=dns) |
62 | 62 |
B <- matrix(0L, nr, nc, dimnames=dns) |
... | ... |
@@ -98,7 +98,8 @@ snprma <- function(filenames, mixtureSampleSize=10^5, |
98 | 98 |
if (!fitMixture) SNR <- mixtureParams <- NA |
99 | 99 |
save(A, file=AFile) |
100 | 100 |
save(B, file=BFile) |
101 |
- return(list(sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)) |
|
101 |
+ return(list(sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName, |
|
102 |
+ cnnames=rownames(cnProbes))) |
|
102 | 103 |
} |
103 | 104 |
|
104 | 105 |
fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){ |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@44778 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,85 +1,104 @@ |
1 |
-snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1, verbose=TRUE, seed=1, cdfName, sns){ |
|
2 |
- if (missing(sns)) sns <- basename(filenames) |
|
3 |
- ##ADD CHECK TO SEE IF LOADED |
|
4 |
- if (missing(cdfName)) |
|
5 |
- cdfName <- read.celfile.header(filenames[1])$cdfName |
|
6 |
- ## stuffDir <- changeToCrlmmAnnotationName(cdfName) |
|
7 |
- pkgname <- getCrlmmAnnotationName(cdfName) |
|
8 |
- if(!require(pkgname, character.only=TRUE, quietly=!verbose)){ |
|
9 |
- suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="") |
|
10 |
- msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall) |
|
11 |
- message(strwrap(msg)) |
|
12 |
- stop("Package ", pkgname, " could not be found.") |
|
13 |
- rm(suggCall, msg) |
|
14 |
- } |
|
15 |
- if(verbose) message("Loading annotations and mixture model parameters.") |
|
16 |
- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
17 |
- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
|
18 |
- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
|
19 |
- autosomeIndex <- getVarInEnv("autosomeIndex") |
|
20 |
- pnsa <- getVarInEnv("pnsa") |
|
21 |
- pnsb <- getVarInEnv("pnsb") |
|
22 |
- fid <- getVarInEnv("fid") |
|
23 |
- reference <- getVarInEnv("reference") |
|
24 |
- aIndex <- getVarInEnv("aIndex") |
|
25 |
- bIndex <- getVarInEnv("bIndex") |
|
26 |
- SMEDIAN <- getVarInEnv("SMEDIAN") |
|
27 |
- theKnots <- getVarInEnv("theKnots") |
|
28 |
- gns <- getVarInEnv("gns") |
|
1 |
+snprma <- function(filenames, mixtureSampleSize=10^5, |
|
2 |
+ fitMixture=FALSE, eps=0.1, verbose=TRUE, |
|
3 |
+ seed=1, cdfName, sns, AFile="A.rda", BFile="B.rda"){ |
|
4 |
+ if (missing(sns)) sns <- basename(filenames) |
|
5 |
+ ##ADD CHECK TO SEE IF LOADED |
|
6 |
+ if (missing(cdfName)) |
|
7 |
+ cdfName <- read.celfile.header(filenames[1])$cdfName |
|
8 |
+ ## stuffDir <- changeToCrlmmAnnotationName(cdfName) |
|
9 |
+ pkgname <- getCrlmmAnnotationName(cdfName) |
|
10 |
+ if(!require(pkgname, character.only=TRUE, quietly=!verbose)){ |
|
11 |
+ suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="") |
|
12 |
+ msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall) |
|
13 |
+ message(strwrap(msg)) |
|
14 |
+ stop("Package ", pkgname, " could not be found.") |
|
15 |
+ rm(suggCall, msg) |
|
16 |
+ } |
|
17 |
+ if(verbose) message("Loading annotations and mixture model parameters.") |
|
18 |
+ loader("preprocStuff.rda", .crlmmPkgEnv, pkgname) |
|
19 |
+ loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname) |
|
20 |
+ loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname) |
|
21 |
+ autosomeIndex <- getVarInEnv("autosomeIndex") |
|
22 |
+ pnsa <- getVarInEnv("pnsa") |
|
23 |
+ pnsb <- getVarInEnv("pnsb") |
|
24 |
+ fid <- getVarInEnv("fid") |
|
25 |
+ reference <- getVarInEnv("reference") |
|
26 |
+ aIndex <- getVarInEnv("aIndex") |
|
27 |
+ bIndex <- getVarInEnv("bIndex") |
|
28 |
+ SMEDIAN <- getVarInEnv("SMEDIAN") |
|
29 |
+ theKnots <- getVarInEnv("theKnots") |
|
30 |
+ gns <- getVarInEnv("gns") |
|
29 | 31 |
|
30 |
- ##We will read each cel file, summarize, and run EM one by one |
|
31 |
- ##We will save parameters of EM to use later |
|
32 |
- mixtureParams <- matrix(0, 4, length(filenames)) |
|
33 |
- SNR <- vector("numeric", length(filenames)) |
|
34 |
- SKW <- vector("numeric", length(filenames)) |
|
32 |
+ ##We will read each cel file, summarize, and run EM one by one |
|
33 |
+ ##We will save parameters of EM to use later |
|
34 |
+ mixtureParams <- matrix(0, 4, length(filenames)) |
|
35 |
+ SNR <- vector("numeric", length(filenames)) |
|
36 |
+ SKW <- vector("numeric", length(filenames)) |
|
35 | 37 |
|
36 |
- ## This is the sample for the fitting of splines |
|
37 |
- ## BC: I like better the idea of the user passing the seed, |
|
38 |
- ## because this might intefere with other analyses |
|
39 |
- ## (like what happened to GCRMA) |
|
40 |
- set.seed(seed) |
|
41 |
- idx <- sort(sample(autosomeIndex, mixtureSampleSize)) |
|
42 |
- ##S will hold (A+B)/2 and M will hold A-B |
|
43 |
- ##NOTE: We actually dont need to save S. Only for pics etc... |
|
44 |
- ##f is the correction. we save to avoid recomputing |
|
45 |
- A <- matrix(as.integer(0), length(pnsa), length(filenames)) |
|
46 |
- B <- matrix(as.integer(0), length(pnsb), length(filenames)) |
|
38 |
+ ## This is the sample for the fitting of splines |
|
39 |
+ ## BC: I like better the idea of the user passing the seed, |
|
40 |
+ ## because this might intefere with other analyses |
|
41 |
+ ## (like what happened to GCRMA) |
|
42 |
+ set.seed(seed) |
|
43 |
+ idx <- sort(sample(autosomeIndex, mixtureSampleSize)) |
|
44 |
+ ##S will hold (A+B)/2 and M will hold A-B |
|
45 |
+ ##NOTE: We actually dont need to save S. Only for pics etc... |
|
46 |
+ ##f is the correction. we save to avoid recomputing |
|
47 | 47 |
|
48 |
- if(verbose){ |
|
49 |
- message("Processing ", length(filenames), " files.") |
|
50 |
- if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3) |
|
51 |
- } |
|
52 |
- ##We start looping throug cel files |
|
53 |
- idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything |
|
54 |
- for(i in seq(along=filenames)){ |
|
55 |
- y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid]) |
|
56 |
- x <- log2(y[idx2]) |
|
57 |
- SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3) |
|
58 |
- rm(x) |
|
59 |
- y <- normalize.quantiles.use.target(y, target=reference) |
|
60 |