Browse code

commit after bioc rebase

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@52859 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 16/02/2011 16:19:11
Showing 1 changed files
... ...
@@ -188,7 +188,6 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
188 188
 processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR,
189 189
                        mixtureParams, eps, seed, mixtureSampleSize,
190 190
                        pkgname){
191
-
192 191
   obj1 <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
193 192
   obj2 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
194 193
   obj3 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
Browse code

copied processCEL to rsprocessCEL. Removed call to snprma2 from genotype function.

Goal is to initialize container for A and B of the right dimension and avoid unnecessary read / writes

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@52852 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 16/02/2011 15:59:24
Showing 1 changed files
... ...
@@ -15,7 +15,7 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
15 15
     message(strwrap(msg))
16 16
     stop("Package ", pkgname, " could not be found.")
17 17
   }
18
-  
18
+
19 19
   if(verbose) message("Loading annotations and mixture model parameters.")
20 20
   loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
21 21
   loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
... ...
@@ -30,7 +30,7 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
30 30
   SMEDIAN <- getVarInEnv("SMEDIAN")
31 31
   theKnots <- getVarInEnv("theKnots")
32 32
   gns <- getVarInEnv("gns")
33
-  
33
+
34 34
   ##We will read each cel file, summarize, and run EM one by one
35 35
   ##We will save parameters of EM to use later
36 36
   mixtureParams <- matrix(0, 4, length(filenames))
... ...
@@ -39,7 +39,7 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
39 39
 ##   mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames))
40 40
 ##   SNR <- initializeBigVector("crlmmSNR-", length(filenames), "numeric")
41 41
 ##   SKW <- initializeBigVector("crlmmSKW-", length(filenames), "numeric")
42
-  
42
+
43 43
   ## This is the sample for the fitting of splines
44 44
   ## BC: I like better the idea of the user passing the seed,
45 45
   ##     because this might intefere with other analyses
... ...
@@ -51,15 +51,15 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
51 51
   ##f is the correction. we save to avoid recomputing
52 52
   A <- matrix(as.integer(0), length(pnsa), length(filenames))
53 53
   B <- matrix(as.integer(0), length(pnsb), length(filenames))
54
-  
54
+
55 55
   if(verbose){
56 56
     message("Processing ", length(filenames), " files.")
57 57
     pb <- txtProgressBar(min=0, max=length(filenames), style=3)
58 58
   }
59
-  
59
+
60 60
   ##for skewness. no need to do everything
61 61
   idx2 <- sample(length(fid), 10^5)
62
-  
62
+
63 63
   ##We start looping throug cel files
64 64
   for(i in seq(along=filenames)){
65 65
     y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
... ...
@@ -73,10 +73,10 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
73 73
     if(fitMixture){
74 74
       S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN
75 75
       M <- log2(A[idx, i])-log2(B[idx, i])
76
-      
76
+
77 77
       ##we need to test the choice of eps.. it is not the max diff between funcs
78 78
       tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
79
-      
79
+
80 80
       mixtureParams[, i] <- tmp[["coef"]]
81 81
       SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
82 82
     }
... ...
@@ -97,12 +97,12 @@ fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=1
97 97
   mus <- append(quantile(M, c(1, 5)/6, names=FALSE), 0, 1)
98 98
   sigmas <- rep(mad(c(M[M<mus[1]]-mus[1], M[M>mus[3]]-mus[3])), 3)
99 99
   sigmas[2] <- sigmas[2]/2
100
- 
100
+
101 101
   weights <- apply(cbind(mus, sigmas), 1, function(p) dnorm(M, p[1], p[2]))
102 102
   previousF1 <- -Inf
103 103
   change <- eps+1
104 104
   it <- 0
105
- 
105
+
106 106
   if(verbose) message("Max change must be under ", eps, ".")
107 107
   matS <- stupidSplineBasis(S, knots)
108 108
   while (change > eps & it < maxit){
... ...
@@ -112,19 +112,19 @@ fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=1
112 112
     LogLik <- rowSums(z)
113 113
     z <- sweep(z, 1, LogLik, "/")
114 114
     probs <- colMeans(z)
115
- 
115
+
116 116
     ## M
117 117
     fit1 <- crossprod(chol2inv(chol(crossprod(sweep(matS, 1, z[, 1], FUN="*"), matS))), crossprod(matS, z[, 1]*M))
118
- 
118
+
119 119
     fit2 <- sum(z[, 2]*M)/sum(z[, 2])
120 120
     F1 <- matS%*%fit1
121 121
     sigmas[c(1, 3)] <- sqrt(sum(z[, 1]*(M-F1)^2)/sum(z[, 1]))
122 122
     sigmas[2] <- sqrt(sum(z[, 2]*(M-fit2)^2)/sum(z[, 2]))
123
- 
123
+
124 124
     weights[, 1] <- dnorm(M, F1, sigmas[1])
125 125
     weights[, 2] <- dnorm(M, fit2, sigmas[2])
126 126
     weights[, 3] <- dnorm(M, -F1, sigmas[3])
127
-    
127
+
128 128
     change <- max(abs(F1-previousF1))
129 129
     previousF1 <- F1
130 130
     if(verbose) message("Iter ", it, ": ", change, ".")
... ...
@@ -140,7 +140,7 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
140 140
     cdfName <- read.celfile.header(filenames[1])[["cdfName"]]
141 141
   pkgname <- getCrlmmAnnotationName(cdfName)
142 142
   stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose))
143
-  
143
+
144 144
   if(verbose) message("Loading annotations and mixture model parameters.")
145 145
   obj <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
146 146
   pnsa <- getVarInEnv("pnsa")
... ...
@@ -148,14 +148,14 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
148 148
   gns <- getVarInEnv("gns")
149 149
   rm(list=obj, envir=.crlmmPkgEnv)
150 150
   rm(obj)
151
-  
151
+
152 152
   ##We will read each cel file, summarize, and run EM one by one
153 153
   ##We will save parameters of EM to use later
154 154
   if(verbose) message("Initializing objects.")
155 155
   mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames), "double")
156 156
   SNR <- initializeBigVector("crlmmSNR-", length(filenames), "double")
157 157
   SKW <- initializeBigVector("crlmmSKW-", length(filenames), "double")
158
-  
158
+
159 159
   ## This is the sample for the fitting of splines
160 160
   ## BC: I like better the idea of the user passing the seed,
161 161
   ##     because this might intefere with other analyses
... ...
@@ -180,7 +180,7 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
180 180
   close(SKW)
181 181
   close(A)
182 182
   close(B)
183
-  
183
+
184 184
   list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
185 185
 }
186 186
 
... ...
@@ -188,7 +188,7 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
188 188
 processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR,
189 189
                        mixtureParams, eps, seed, mixtureSampleSize,
190 190
                        pkgname){
191
-  
191
+
192 192
   obj1 <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
193 193
   obj2 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
194 194
   obj3 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
... ...
@@ -226,7 +226,7 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR,
226 226
     A[, k] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
227 227
     B[, k] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
228 228
     rm(y)
229
-    
229
+
230 230
     if(fitMixture){
231 231
       S <- (log2(A[idx,k])+log2(B[idx, k]))/2 - SMEDIAN
232 232
       M <- log2(A[idx, k])-log2(B[idx, k])
Browse code

Fixed problem with memory management

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@46584 bc3139a8-67e5-0310-9ffc-ced21a209358

Benilton Carvalho authored on 27/04/2010 14:40:30
Showing 1 changed files
... ...
@@ -142,10 +142,12 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
142 142
   stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose))
143 143
   
144 144
   if(verbose) message("Loading annotations and mixture model parameters.")
145
-  loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
145
+  obj <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
146 146
   pnsa <- getVarInEnv("pnsa")
147 147
   pnsb <- getVarInEnv("pnsb")
148 148
   gns <- getVarInEnv("gns")
149
+  rm(list=obj, envir=.crlmmPkgEnv)
150
+  rm(obj)
149 151
   
150 152
   ##We will read each cel file, summarize, and run EM one by one
151 153
   ##We will save parameters of EM to use later
... ...
@@ -173,6 +175,11 @@ snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
173 175
            mixtureParams=mixtureParams, eps=eps, seed=seed,
174 176
            mixtureSampleSize=mixtureSampleSize, pkgname=pkgname,
175 177
            neededPkgs=c("crlmm", pkgname))
178
+  close(mixtureParams)
179
+  close(SNR)
180
+  close(SKW)
181
+  close(A)
182
+  close(B)
176 183
   
177 184
   list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
178 185
 }
... ...
@@ -182,9 +189,9 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR,
182 189
                        mixtureParams, eps, seed, mixtureSampleSize,
183 190
                        pkgname){
184 191
   
185
-  loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
186
-  loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
187
-  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
192
+  obj1 <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
193
+  obj2 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
194
+  obj3 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
188 195
   autosomeIndex <- getVarInEnv("autosomeIndex")
189 196
   pnsa <- getVarInEnv("pnsa")
190 197
   pnsb <- getVarInEnv("pnsb")
... ...
@@ -195,6 +202,8 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR,
195 202
   SMEDIAN <- getVarInEnv("SMEDIAN")
196 203
   theKnots <- getVarInEnv("theKnots")
197 204
   gns <- getVarInEnv("gns")
205
+  rm(list=c(obj1, obj2, obj3), envir=.crlmmPkgEnv)
206
+  rm(obj1, obj2, obj3)
198 207
 
199 208
   ## for mixture
200 209
   set.seed(seed)
... ...
@@ -222,8 +231,10 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR,
222 231
       S <- (log2(A[idx,k])+log2(B[idx, k]))/2 - SMEDIAN
223 232
       M <- log2(A[idx, k])-log2(B[idx, k])
224 233
       tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
234
+      rm(S, M)
225 235
       mixtureParams[, k] <- tmp[["coef"]]
226 236
       SNR[k] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
237
+      rm(tmp)
227 238
     } else {
228 239
       mixtureParams[, k] <- NA
229 240
       SNR[k] <- NA
... ...
@@ -234,5 +245,7 @@ processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR,
234 245
   close(SKW)
235 246
   close(mixtureParams)
236 247
   close(SNR)
248
+  rm(list=ls())
249
+  gc(verbose=FALSE)
237 250
   TRUE
238 251
 }
Browse code

HPC for snprma/crlmm on Affy

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45498 bc3139a8-67e5-0310-9ffc-ced21a209358

Benilton Carvalho authored on 25/03/2010 13:14:54
Showing 1 changed files
... ...
@@ -1,17 +1,21 @@
1
-snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1, verbose=TRUE, seed=1, cdfName, sns){
1
+snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
2
+                   eps=0.1, verbose=TRUE, seed=1, cdfName, sns){
2 3
   if (missing(sns)) sns <- basename(filenames)
3
-  ##ADD CHECK TO SEE IF LOADED
4 4
   if (missing(cdfName))
5 5
     cdfName <- read.celfile.header(filenames[1])$cdfName
6
-  ##  stuffDir <- changeToCrlmmAnnotationName(cdfName)
7 6
   pkgname <- getCrlmmAnnotationName(cdfName)
7
+
8 8
   if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
9
-    suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
10
-    msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
9
+    suggCall <- paste("library(", pkgname,
10
+                      ", lib.loc='/Altern/Lib/Loc')",
11
+                      sep="")
12
+    msg <- paste("If", pkgname,
13
+                 "is installed on an alternative location,",
14
+                 "please load it manually by using", suggCall)
11 15
     message(strwrap(msg))
12 16
     stop("Package ", pkgname, " could not be found.")
13
-    rm(suggCall, msg)
14 17
   }
18
+  
15 19
   if(verbose) message("Loading annotations and mixture model parameters.")
16 20
   loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
17 21
   loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
... ...
@@ -32,6 +36,9 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1,
32 36
   mixtureParams <- matrix(0, 4, length(filenames))
33 37
   SNR <- vector("numeric", length(filenames))
34 38
   SKW <- vector("numeric", length(filenames))
39
+##   mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames))
40
+##   SNR <- initializeBigVector("crlmmSNR-", length(filenames), "numeric")
41
+##   SKW <- initializeBigVector("crlmmSKW-", length(filenames), "numeric")
35 42
   
36 43
   ## This is the sample for the fitting of splines
37 44
   ## BC: I like better the idea of the user passing the seed,
... ...
@@ -47,10 +54,13 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1,
47 54
   
48 55
   if(verbose){
49 56
     message("Processing ", length(filenames), " files.")
50
-    if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
57
+    pb <- txtProgressBar(min=0, max=length(filenames), style=3)
51 58
   }
59
+  
60
+  ##for skewness. no need to do everything
61
+  idx2 <- sample(length(fid), 10^5)
62
+  
52 63
   ##We start looping throug cel files
53
-  idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
54 64
   for(i in seq(along=filenames)){
55 65
     y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
56 66
     x <- log2(y[idx2])
... ...
@@ -70,13 +80,9 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1,
70 80
       mixtureParams[, i] <- tmp[["coef"]]
71 81
       SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
72 82
     }
73
-    if (verbose)
74
-      if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
75
-      else cat(".")
83
+    if (verbose) setTxtProgressBar(pb, i)
76 84
   }
77
-  if (verbose)
78
-    if (getRversion() > '2.7.0') close(pb)
79
-    else cat("\n")
85
+  if (verbose) close(pb)
80 86
   if (!fitMixture) SNR <- mixtureParams <- NA
81 87
   ## gns comes from preprocStuff.rda
82 88
   list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
... ...
@@ -127,5 +133,106 @@ fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=1
127 133
  return(list(coef= -fit1, medF1=medF1, sigma1=sigmas[1], sigma2=sigmas[2]))
128 134
 }
129 135
 
136
+snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
137
+                    eps=0.1, verbose=TRUE, seed=1, cdfName, sns){
138
+  if (missing(sns)) sns <- basename(filenames)
139
+  if (missing(cdfName))
140
+    cdfName <- read.celfile.header(filenames[1])[["cdfName"]]
141
+  pkgname <- getCrlmmAnnotationName(cdfName)
142
+  stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose))
143
+  
144
+  if(verbose) message("Loading annotations and mixture model parameters.")
145
+  loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
146
+  pnsa <- getVarInEnv("pnsa")
147
+  pnsb <- getVarInEnv("pnsb")
148
+  gns <- getVarInEnv("gns")
149
+  
150
+  ##We will read each cel file, summarize, and run EM one by one
151
+  ##We will save parameters of EM to use later
152
+  if(verbose) message("Initializing objects.")
153
+  mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames), "double")
154
+  SNR <- initializeBigVector("crlmmSNR-", length(filenames), "double")
155
+  SKW <- initializeBigVector("crlmmSKW-", length(filenames), "double")
156
+  
157
+  ## This is the sample for the fitting of splines
158
+  ## BC: I like better the idea of the user passing the seed,
159
+  ##     because this might intefere with other analyses
160
+  ##     (like what happened to GCRMA)
161
+  ##S will hold (A+B)/2 and M will hold A-B
162
+  ##NOTE: We actually dont need to save S. Only for pics etc...
163
+  ##f is the correction. we save to avoid recomputing
164
+  A <- initializeBigMatrix("crlmmA-", length(pnsa), length(filenames), "integer")
165
+  B <- initializeBigMatrix("crlmmB-", length(pnsb), length(filenames), "integer")
166
+
167
+  sampleBatches <- splitIndicesByNode(seq(along=filenames))
168
+
169
+  if(verbose) message("Processing ", length(filenames), " files.")
170
+
171
+  ocLapply(sampleBatches, processCEL, filenames=filenames,
172
+           fitMixture=fitMixture, A=A, B=B, SKW=SKW, SNR=SNR,
173
+           mixtureParams=mixtureParams, eps=eps, seed=seed,
174
+           mixtureSampleSize=mixtureSampleSize, pkgname=pkgname,
175
+           neededPkgs=c("crlmm", pkgname))
176
+  
177
+  list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
178
+}
179
+
130 180
 
181
+processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR,
182
+                       mixtureParams, eps, seed, mixtureSampleSize,
183
+                       pkgname){
184
+  
185
+  loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
186
+  loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
187
+  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
188
+  autosomeIndex <- getVarInEnv("autosomeIndex")
189
+  pnsa <- getVarInEnv("pnsa")
190
+  pnsb <- getVarInEnv("pnsb")
191
+  fid <- getVarInEnv("fid")
192
+  reference <- getVarInEnv("reference")
193
+  aIndex <- getVarInEnv("aIndex")
194
+  bIndex <- getVarInEnv("bIndex")
195
+  SMEDIAN <- getVarInEnv("SMEDIAN")
196
+  theKnots <- getVarInEnv("theKnots")
197
+  gns <- getVarInEnv("gns")
198
+
199
+  ## for mixture
200
+  set.seed(seed)
201
+  idx <- sort(sample(autosomeIndex, mixtureSampleSize))
202
+  ##for skewness. no need to do everything
203
+  idx2 <- sample(length(fid), 10^5)
131 204
 
205
+  open(A)
206
+  open(B)
207
+  open(SKW)
208
+  open(mixtureParams)
209
+  open(SNR)
210
+
211
+  for (k in i){
212
+    y <- as.matrix(read.celfile(filenames[k], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
213
+    x <- log2(y[idx2])
214
+    SKW[k] <- mean((x-mean(x))^3)/(sd(x)^3)
215
+    rm(x)
216
+    y <- normalize.quantiles.use.target(y, target=reference)
217
+    A[, k] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
218
+    B[, k] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
219
+    rm(y)
220
+    
221
+    if(fitMixture){
222
+      S <- (log2(A[idx,k])+log2(B[idx, k]))/2 - SMEDIAN
223
+      M <- log2(A[idx, k])-log2(B[idx, k])
224
+      tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
225
+      mixtureParams[, k] <- tmp[["coef"]]
226
+      SNR[k] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
227
+    } else {
228
+      mixtureParams[, k] <- NA
229
+      SNR[k] <- NA
230
+    }
231
+  }
232
+  close(A)
233
+  close(B)
234
+  close(SKW)
235
+  close(mixtureParams)
236
+  close(SNR)
237
+  TRUE
238
+}
Browse code

testing crlmmIlluminaRS in cnrma-functions

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45221 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 15/03/2010 01:43:23
Showing 1 changed files
... ...
@@ -127,3 +127,5 @@ fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=1
127 127
  return(list(coef= -fit1, medF1=medF1, sigma1=sigmas[1], sigma2=sigmas[2]))
128 128
 }
129 129
 
130
+
131
+
Browse code

updated genotype function. snprma now untouched

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45171 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 11/03/2010 16:22:32
Showing 1 changed files
... ...
@@ -42,11 +42,8 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1,
42 42
   ##S will hold (A+B)/2 and M will hold A-B
43 43
   ##NOTE: We actually dont need to save S. Only for pics etc...
44 44
   ##f is the correction. we save to avoid recomputing
45
-  ##**RS**
46
-  ##A <- matrix(as.integer(0), length(pnsa), length(filenames))
47
-  ##B <- matrix(as.integer(0), length(pnsb), length(filenames))
48
-  A <- initializeBigMatrix(name="tmpA", length(pnsa), length(filenames))
49
-  B <- initializeBigMatrix(name="tmpB", length(pnsa), length(filenames))
45
+  A <- matrix(as.integer(0), length(pnsa), length(filenames))
46
+  B <- matrix(as.integer(0), length(pnsb), length(filenames))
50 47
   
51 48
   if(verbose){
52 49
     message("Processing ", length(filenames), " files.")
Browse code

added genotype function to cnrma-functions. Added AllClasses.R file to set ff_matrix and ffdf classes

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45165 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 11/03/2010 11:02:23
Showing 1 changed files
... ...
@@ -42,8 +42,11 @@ snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1,
42 42
   ##S will hold (A+B)/2 and M will hold A-B
43 43
   ##NOTE: We actually dont need to save S. Only for pics etc...
44 44
   ##f is the correction. we save to avoid recomputing
45
-  A <- matrix(as.integer(0), length(pnsa), length(filenames))
46
-  B <- matrix(as.integer(0), length(pnsb), length(filenames))
45
+  ##**RS**
46
+  ##A <- matrix(as.integer(0), length(pnsa), length(filenames))
47
+  ##B <- matrix(as.integer(0), length(pnsb), length(filenames))
48
+  A <- initializeBigMatrix(name="tmpA", length(pnsa), length(filenames))
49
+  B <- initializeBigMatrix(name="tmpB", length(pnsa), length(filenames))
47 50
   
48 51
   if(verbose){
49 52
     message("Processing ", length(filenames), " files.")
Browse code

roll back to crlmm version 1.5.24

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45126 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 10/03/2010 01:27:04
Showing 1 changed files
... ...
@@ -1,129 +1,85 @@
1
-setMethod("snprma", "AffymetrixAlleleSet", function(object, filenames){
2
-	snprma.AffymetrixAlleleSet(object, filenames)
3
-})
4
-setMethod("snprma", "NChannelSet", function(object, ops, ...){
5
-	object <- preprocessInfinium2(object, ops)
6
-})
7
-
8
-snprma.AffymetrixAlleleSet <- function(object, filenames){
9
-##		   filenames,
10
-##		   mixtureSampleSize=10^5,
11
-##		   fitMixture=FALSE,
12
-##		   eps=0.1,
13
-##		   verbose=TRUE,
14
-##		   seed=1,
15
-##		   cdfName,
16
-##		   AFile="A.rda",
17
-##		   BFile="B.rda"){
18
-	ops <- crlmmOptions(object)
19
-	rmaOps <- ops$snprmaOpts
20
-	mixtureSampleSize <- rmaOps$mixtureSampleSize
21
-	fitMixture <- rmaOps$fitMixture
22
-	eps <- rmaOps$eps
23
-	verbose <- ops$verbose
24
-	seed <- ops$seed
25
-	cdfName <- annotation(object)
26
-	A <- A(object)
27
-	B <- B(object)
28
-	sns <- basename(filenames)
29
-	##ADD CHECK TO SEE IF LOADED
30
-	if (missing(cdfName))
31
-		cdfName <- read.celfile.header(filenames[1])$cdfName
32
-	##  stuffDir <- changeToCrlmmAnnotationName(cdfName)
33
-	pkgname <- getCrlmmAnnotationName(cdfName)
34
-	if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
35
-		suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
36
-		msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
37
-		message(strwrap(msg))
38
-		stop("Package ", pkgname, " could not be found.")
39
-		rm(suggCall, msg)
40
-	}
41
-	if(verbose) message("Loading annotations and mixture model parameters.")
42
-	loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
43
-	loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
44
-	loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
45
-	autosomeIndex <- getVarInEnv("autosomeIndex")
46
-	pnsa <- getVarInEnv("pnsa")
47
-	pnsb <- getVarInEnv("pnsb")
48
-	fid <- getVarInEnv("fid")
49
-	reference <- getVarInEnv("reference")
50
-	aIndex <- getVarInEnv("aIndex")
51
-	bIndex <- getVarInEnv("bIndex")
52
-	SMEDIAN <- getVarInEnv("SMEDIAN")
53
-	theKnots <- getVarInEnv("theKnots")
54
-	gns <- getVarInEnv("gns")
55
-	snpIndex <- which(isSnp(object)==1)
56
-	stopifnot(identical(featureNames(object)[snpIndex], gns))
1
+snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1, verbose=TRUE, seed=1, cdfName, sns){
2
+  if (missing(sns)) sns <- basename(filenames)
3
+  ##ADD CHECK TO SEE IF LOADED
4
+  if (missing(cdfName))
5
+    cdfName <- read.celfile.header(filenames[1])$cdfName
6
+  ##  stuffDir <- changeToCrlmmAnnotationName(cdfName)
7
+  pkgname <- getCrlmmAnnotationName(cdfName)
8
+  if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
9
+    suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
10
+    msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
11
+    message(strwrap(msg))
12
+    stop("Package ", pkgname, " could not be found.")
13
+    rm(suggCall, msg)
14
+  }
15
+  if(verbose) message("Loading annotations and mixture model parameters.")
16
+  loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
17
+  loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
18
+  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
19
+  autosomeIndex <- getVarInEnv("autosomeIndex")
20
+  pnsa <- getVarInEnv("pnsa")
21
+  pnsb <- getVarInEnv("pnsb")
22
+  fid <- getVarInEnv("fid")
23
+  reference <- getVarInEnv("reference")
24
+  aIndex <- getVarInEnv("aIndex")
25
+  bIndex <- getVarInEnv("bIndex")
26
+  SMEDIAN <- getVarInEnv("SMEDIAN")
27
+  theKnots <- getVarInEnv("theKnots")
28
+  gns <- getVarInEnv("gns")
29
+  
30
+  ##We will read each cel file, summarize, and run EM one by one
31
+  ##We will save parameters of EM to use later
32
+  mixtureParams <- matrix(0, 4, length(filenames))
33
+  SNR <- vector("numeric", length(filenames))
34
+  SKW <- vector("numeric", length(filenames))
57 35
   
58
-	##We will read each cel file, summarize, and run EM one by one
59
-	##We will save parameters of EM to use later
60
-	mixtureParams <- matrix(0, 4, length(filenames))
61
-	colnames(mixtureParams) <- basename(filenames)
62
-	SNR <- vector("numeric", length(filenames))
63
-	SKW <- vector("numeric", length(filenames))
36
+  ## This is the sample for the fitting of splines
37
+  ## BC: I like better the idea of the user passing the seed,
38
+  ##     because this might intefere with other analyses
39
+  ##     (like what happened to GCRMA)
40
+  set.seed(seed)
41
+  idx <- sort(sample(autosomeIndex, mixtureSampleSize))
42
+  ##S will hold (A+B)/2 and M will hold A-B
43
+  ##NOTE: We actually dont need to save S. Only for pics etc...
44
+  ##f is the correction. we save to avoid recomputing
45
+  A <- matrix(as.integer(0), length(pnsa), length(filenames))
46
+  B <- matrix(as.integer(0), length(pnsb), length(filenames))
64 47
   
65
-	## This is the sample for the fitting of splines
66
-	## BC: I like better the idea of the user passing the seed,
67
-	##     because this might intefere with other analyses
68
-	##     (like what happened to GCRMA)
69
-	set.seed(seed)
70
-	idx <- sort(sample(autosomeIndex, mixtureSampleSize))
71
-	##S will hold (A+B)/2 and M will hold A-B
72
-	##NOTE: We actually dont need to save S. Only for pics etc...
73
-	##f is the correction. we save to avoid recomputing
74
-	if(verbose){
75
-		message("Processing ", length(filenames), " files.")
76
-		if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
77
-	}
78
-	##We start looping through cel files
79
-	idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
80
-	message("Quantile normalizing to HapMap target distribution")
81
-	for(i in seq(along=filenames)){
82
-		y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
83
-		x <- log2(y[idx2])
84
-		SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
85
-		rm(x)
86
-		y <- normalize.quantiles.use.target(y, target=reference)
87
-		A[1:length(pnsa), i] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
88
-		B[1:length(pnsa), i] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
89
-		##Now to fit the EM
90
-		if(fitMixture){
91
-			S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN
92
-			M <- log2(A[idx, i])-log2(B[idx, i])
93
-			
94
-			##we need to test the choice of eps.. it is not the max diff between funcs
95
-			tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
96
-			
97
-			mixtureParams[, i] <- tmp[["coef"]]
98
-			SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
99
-		}
100
-		if (verbose)
101
-			if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
102
-			else cat(".")
103
-	}
104
-##	A(object) <- A
105
-##	B(object) <- B
106
-	object@assayData[["alleleA"]] <- A
107
-	object@assayData[["alleleB"]] <- B
108
-	##closeObject(object, A)
109
-	##closeObject(object, B)
110
-	##if(isPackageLoaded("ff")) { close(A); close(B)} else {save(A, file=AFile); save(B, file=BFile)}
111
-	if (verbose)
112
-		if (getRversion() > '2.7.0') close(pb) else cat("\n")
113
-	##else cat("\n")
114
-	if (!fitMixture) SNR <- mixtureParams <- NA
115
-	sampleStats <- data.frame(SKW=SKW,
116
-				  SNR=SNR)
117
-##				  batch=ops$cnOpts$batch)
118
-	crlmmOpts <- crlmmOptions(object)$crlmmOpts
119
-	crlmmOpts$mixtureParams <- mixtureParams
120
-	crlmmOptions(object)$crlmmOpts <- crlmmOpts
121
-	pD <- new("AnnotatedDataFrame",
122
-		  data=sampleStats,
123
-		  varMetadata=data.frame(labelDescription=colnames(sampleStats)))
124
-	sampleNames(pD) <- sampleNames(object)
125
-	phenoData(object) <- pD
126
-	return(object)
48
+  if(verbose){
49
+    message("Processing ", length(filenames), " files.")
50
+    if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
51
+  }
52
+  ##We start looping throug cel files
53
+  idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
54
+  for(i in seq(along=filenames)){
55
+    y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
56
+    x <- log2(y[idx2])
57
+    SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
58
+    rm(x)
59
+    y <- normalize.quantiles.use.target(y, target=reference)
60
+    A[, i] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
61
+    B[, i] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
62
+    ##Now to fit the EM
63
+    if(fitMixture){
64
+      S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN
65
+      M <- log2(A[idx, i])-log2(B[idx, i])
66
+      
67
+      ##we need to test the choice of eps.. it is not the max diff between funcs
68
+      tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
69
+      
70
+      mixtureParams[, i] <- tmp[["coef"]]
71
+      SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
72
+    }
73
+    if (verbose)
74
+      if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
75
+      else cat(".")
76
+  }
77
+  if (verbose)
78
+    if (getRversion() > '2.7.0') close(pb)
79
+    else cat("\n")
80
+  if (!fitMixture) SNR <- mixtureParams <- NA
81
+  ## gns comes from preprocStuff.rda
82
+  list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
127 83
 }
128 84
 
129 85
 fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){
Browse code

several updates for ff. new classes for affy/illumina processing. More s4-style code

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@45083 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 08/03/2010 04:46:55
Showing 1 changed files
... ...
@@ -1,7 +1,31 @@
1
-snprma <- function(filenames, mixtureSampleSize=10^5,
2
-		   fitMixture=FALSE, eps=0.1, verbose=TRUE,
3
-		   seed=1, cdfName, sns, AFile="A.rda", BFile="B.rda"){
4
-	if (missing(sns)) sns <- basename(filenames)
1
+setMethod("snprma", "AffymetrixAlleleSet", function(object, filenames){
2
+	snprma.AffymetrixAlleleSet(object, filenames)
3
+})
4
+setMethod("snprma", "NChannelSet", function(object, ops, ...){
5
+	object <- preprocessInfinium2(object, ops)
6
+})
7
+
8
+snprma.AffymetrixAlleleSet <- function(object, filenames){
9
+##		   filenames,
10
+##		   mixtureSampleSize=10^5,
11
+##		   fitMixture=FALSE,
12
+##		   eps=0.1,
13
+##		   verbose=TRUE,
14
+##		   seed=1,
15
+##		   cdfName,
16
+##		   AFile="A.rda",
17
+##		   BFile="B.rda"){
18
+	ops <- crlmmOptions(object)
19
+	rmaOps <- ops$snprmaOpts
20
+	mixtureSampleSize <- rmaOps$mixtureSampleSize
21
+	fitMixture <- rmaOps$fitMixture
22
+	eps <- rmaOps$eps
23
+	verbose <- ops$verbose
24
+	seed <- ops$seed
25
+	cdfName <- annotation(object)
26
+	A <- A(object)
27
+	B <- B(object)
28
+	sns <- basename(filenames)
5 29
 	##ADD CHECK TO SEE IF LOADED
6 30
 	if (missing(cdfName))
7 31
 		cdfName <- read.celfile.header(filenames[1])$cdfName
... ...
@@ -28,10 +52,13 @@ snprma <- function(filenames, mixtureSampleSize=10^5,
28 52
 	SMEDIAN <- getVarInEnv("SMEDIAN")
29 53
 	theKnots <- getVarInEnv("theKnots")
30 54
 	gns <- getVarInEnv("gns")
55
+	snpIndex <- which(isSnp(object)==1)
56
+	stopifnot(identical(featureNames(object)[snpIndex], gns))
31 57
   
32 58
 	##We will read each cel file, summarize, and run EM one by one
33 59
 	##We will save parameters of EM to use later
34 60
 	mixtureParams <- matrix(0, 4, length(filenames))
61
+	colnames(mixtureParams) <- basename(filenames)
35 62
 	SNR <- vector("numeric", length(filenames))
36 63
 	SKW <- vector("numeric", length(filenames))
37 64
   
... ...
@@ -44,23 +71,6 @@ snprma <- function(filenames, mixtureSampleSize=10^5,
44 71
 	##S will hold (A+B)/2 and M will hold A-B
45 72
 	##NOTE: We actually dont need to save S. Only for pics etc...
46 73
 	##f is the correction. we save to avoid recomputing
47
-  
48
-	path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep=""))
49
-	load(file.path(path, "cnProbes.rda"))
50
-	cnProbes <- get("cnProbes")
51
-	nr <- length(pnsa) + nrow(cnProbes)
52
-	nc <- length(filenames)
53
-	dns <- list(c(gns, rownames(cnProbes)), sns)
54
-	if(isPackageLoaded("ff")){
55
-		message("ff package is loaded.  Creating ff objects for storing normalized and summarized intensities")
56
-		A <- initializeBigMatrix(nr, nc)
57
-		B <- initializeBigMatrix(nr, nc)
58
-		open(A)
59
-		open(B)
60
-	}  else {
61
-		A <- matrix(0L, nr, nc, dimnames=dns)
62
-		B <- matrix(0L, nr, nc, dimnames=dns)
63
-	}
64 74
 	if(verbose){
65 75
 		message("Processing ", length(filenames), " files.")
66 76
 		if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
... ...
@@ -91,15 +101,29 @@ snprma <- function(filenames, mixtureSampleSize=10^5,
91 101
 			if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
92 102
 			else cat(".")
93 103
 	}
94
-	if(isPackageLoaded("ff")) { close(A); close(B)}
104
+##	A(object) <- A
105
+##	B(object) <- B
106
+	object@assayData[["alleleA"]] <- A
107
+	object@assayData[["alleleB"]] <- B
108
+	##closeObject(object, A)
109
+	##closeObject(object, B)
110
+	##if(isPackageLoaded("ff")) { close(A); close(B)} else {save(A, file=AFile); save(B, file=BFile)}
95 111
 	if (verbose)
96
-		if (getRversion() > '2.7.0') close(pb)
97
-		else cat("\n")
112
+		if (getRversion() > '2.7.0') close(pb) else cat("\n")
113
+	##else cat("\n")
98 114
 	if (!fitMixture) SNR <- mixtureParams <- NA
99
-	save(A, file=AFile)
100
-	save(B, file=BFile)
101
-	return(list(sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName,
102
-		    cnnames=rownames(cnProbes)))
115
+	sampleStats <- data.frame(SKW=SKW,
116
+				  SNR=SNR)
117
+##				  batch=ops$cnOpts$batch)
118
+	crlmmOpts <- crlmmOptions(object)$crlmmOpts
119
+	crlmmOpts$mixtureParams <- mixtureParams
120
+	crlmmOptions(object)$crlmmOpts <- crlmmOpts
121
+	pD <- new("AnnotatedDataFrame",
122
+		  data=sampleStats,
123
+		  varMetadata=data.frame(labelDescription=colnames(sampleStats)))
124
+	sampleNames(pD) <- sampleNames(object)
125
+	phenoData(object) <- pD
126
+	return(object)
103 127
 }
104 128
 
105 129
 fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){
Browse code

more ff support

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@44783 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 22/02/2010 03:25:51
Showing 1 changed files
... ...
@@ -50,13 +50,13 @@ snprma <- function(filenames, mixtureSampleSize=10^5,
50 50
 	cnProbes <- get("cnProbes")
51 51
 	nr <- length(pnsa) + nrow(cnProbes)
52 52
 	nc <- length(filenames)
53
-	dns <- list(c(gns, rownames(cnProbes)), sns)  
53
+	dns <- list(c(gns, rownames(cnProbes)), sns)
54 54
 	if(isPackageLoaded("ff")){
55 55
 		message("ff package is loaded.  Creating ff objects for storing normalized and summarized intensities")
56
-		A <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dns,
57
-			overwrite=TRUE)
58
-		B <- ff(dim=c(nr, nc), vmode="integer", finalizer="close", dimnames=dns,
59
-			overwrite=TRUE)
56
+		A <- initializeBigMatrix(nr, nc)
57
+		B <- initializeBigMatrix(nr, nc)
58
+		open(A)
59
+		open(B)
60 60
 	}  else {
61 61
 		A <- matrix(0L, nr, nc, dimnames=dns)
62 62
 		B <- matrix(0L, nr, nc, dimnames=dns)
... ...
@@ -98,7 +98,8 @@ snprma <- function(filenames, mixtureSampleSize=10^5,
98 98
 	if (!fitMixture) SNR <- mixtureParams <- NA
99 99
 	save(A, file=AFile)
100 100
 	save(B, file=BFile)
101
-	return(list(sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName))
101
+	return(list(sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName,
102
+		    cnnames=rownames(cnProbes)))
102 103
 }
103 104
 
104 105
 fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){
Browse code

begin adding support for ff. added an option to use poe

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@44778 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 20/02/2010 23:39:45
Showing 1 changed files
... ...
@@ -1,85 +1,104 @@
1
-snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE, eps=0.1, verbose=TRUE, seed=1, cdfName, sns){
2
-  if (missing(sns)) sns <- basename(filenames)
3
-  ##ADD CHECK TO SEE IF LOADED
4
-  if (missing(cdfName))
5
-    cdfName <- read.celfile.header(filenames[1])$cdfName
6
-  ##  stuffDir <- changeToCrlmmAnnotationName(cdfName)
7
-  pkgname <- getCrlmmAnnotationName(cdfName)
8
-  if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
9
-    suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
10
-    msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
11
-    message(strwrap(msg))
12
-    stop("Package ", pkgname, " could not be found.")
13
-    rm(suggCall, msg)
14
-  }
15
-  if(verbose) message("Loading annotations and mixture model parameters.")
16
-  loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
17
-  loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
18
-  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
19
-  autosomeIndex <- getVarInEnv("autosomeIndex")
20
-  pnsa <- getVarInEnv("pnsa")
21
-  pnsb <- getVarInEnv("pnsb")
22
-  fid <- getVarInEnv("fid")
23
-  reference <- getVarInEnv("reference")
24
-  aIndex <- getVarInEnv("aIndex")
25
-  bIndex <- getVarInEnv("bIndex")
26
-  SMEDIAN <- getVarInEnv("SMEDIAN")
27
-  theKnots <- getVarInEnv("theKnots")
28
-  gns <- getVarInEnv("gns")
1
+snprma <- function(filenames, mixtureSampleSize=10^5,
2
+		   fitMixture=FALSE, eps=0.1, verbose=TRUE,
3
+		   seed=1, cdfName, sns, AFile="A.rda", BFile="B.rda"){
4
+	if (missing(sns)) sns <- basename(filenames)
5
+	##ADD CHECK TO SEE IF LOADED
6
+	if (missing(cdfName))
7
+		cdfName <- read.celfile.header(filenames[1])$cdfName
8
+	##  stuffDir <- changeToCrlmmAnnotationName(cdfName)
9
+	pkgname <- getCrlmmAnnotationName(cdfName)
10
+	if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
11
+		suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
12
+		msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
13
+		message(strwrap(msg))
14
+		stop("Package ", pkgname, " could not be found.")
15
+		rm(suggCall, msg)
16
+	}
17
+	if(verbose) message("Loading annotations and mixture model parameters.")
18
+	loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
19
+	loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
20
+	loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
21
+	autosomeIndex <- getVarInEnv("autosomeIndex")
22
+	pnsa <- getVarInEnv("pnsa")
23
+	pnsb <- getVarInEnv("pnsb")
24
+	fid <- getVarInEnv("fid")
25
+	reference <- getVarInEnv("reference")
26
+	aIndex <- getVarInEnv("aIndex")
27
+	bIndex <- getVarInEnv("bIndex")
28
+	SMEDIAN <- getVarInEnv("SMEDIAN")
29
+	theKnots <- getVarInEnv("theKnots")
30
+	gns <- getVarInEnv("gns")
29 31
   
30
-  ##We will read each cel file, summarize, and run EM one by one
31
-  ##We will save parameters of EM to use later
32
-  mixtureParams <- matrix(0, 4, length(filenames))
33
-  SNR <- vector("numeric", length(filenames))
34
-  SKW <- vector("numeric", length(filenames))
32
+	##We will read each cel file, summarize, and run EM one by one
33
+	##We will save parameters of EM to use later
34
+	mixtureParams <- matrix(0, 4, length(filenames))
35
+	SNR <- vector("numeric", length(filenames))
36
+	SKW <- vector("numeric", length(filenames))
35 37
   
36
-  ## This is the sample for the fitting of splines
37
-  ## BC: I like better the idea of the user passing the seed,
38
-  ##     because this might intefere with other analyses
39
-  ##     (like what happened to GCRMA)
40
-  set.seed(seed)
41
-  idx <- sort(sample(autosomeIndex, mixtureSampleSize))
42
-  ##S will hold (A+B)/2 and M will hold A-B
43
-  ##NOTE: We actually dont need to save S. Only for pics etc...
44
-  ##f is the correction. we save to avoid recomputing
45
-  A <- matrix(as.integer(0), length(pnsa), length(filenames))
46
-  B <- matrix(as.integer(0), length(pnsb), length(filenames))
38
+	## This is the sample for the fitting of splines
39
+	## BC: I like better the idea of the user passing the seed,
40
+	##     because this might intefere with other analyses
41
+	##     (like what happened to GCRMA)
42
+	set.seed(seed)
43
+	idx <- sort(sample(autosomeIndex, mixtureSampleSize))
44
+	##S will hold (A+B)/2 and M will hold A-B
45
+	##NOTE: We actually dont need to save S. Only for pics etc...
46
+	##f is the correction. we save to avoid recomputing
47 47
   
48
-  if(verbose){
49
-    message("Processing ", length(filenames), " files.")
50
-    if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=length(filenames), style=3)
51
-  }
52
-  ##We start looping throug cel files
53
-  idx2 <- sample(length(fid), 10^5) ##for skewness. no need to do everything
54
-  for(i in seq(along=filenames)){
55
-    y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
56
-    x <- log2(y[idx2])
57
-    SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
58
-    rm(x)
59
-    y <- normalize.quantiles.use.target(y, target=reference)
60