Browse code

Commit made by the Bioconductor Git-SVN bridge. Consists of 1 commit.

Commit information:

Commit id: 7a7b52509b1e09bf8d3f517244f90f9045d102ca
Commit message: Set enough.men and enough.women to FALSE by default in fit.lm3 function.
Committed by: Rob Scharpf
Author Name: Rob Scharpf
Commit date: 2014-08-25 21:44:19 -0400
Author date: 2014-08-25 21:44:19 -0400



git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@93599 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 26/08/2014 01:45:03
Showing1 changed files
... ...
@@ -1034,7 +1034,8 @@ fit.lm3 <- function(strata,
1034 1034
 		medianB.Flist <- res[["medianB"]]
1035 1035
 		NN.Flist <- res[["NN"]]
1036 1036
 		rm(res)
1037
-	}
1037
+              }
1038
+        enough.men <- enough.women <- FALSE
1038 1039
 	for(k in seq_along(batches)){
1039 1040
 		sample.index <- batches[[k]]
1040 1041
 		if(is.null(sample.index)) next()
... ...
@@ -2844,4 +2845,3 @@ calculateRTheta <- function(object, ##genotype=c("AA", "AB", "BB"),
2844 2845
 	lrr <- integerMatrix(lrr, 100)
2845 2846
 	return(list(baf=bf, lrr=lrr))
2846 2847
 }
2847
-
Browse code

fix syntax error

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@79516 bc3139a8-67e5-0310-9ffc-ced21a209358

Dan Tenenbaum authored on 16/08/2013 22:34:26
Showing1 changed files
... ...
@@ -2845,4 +2845,3 @@ calculateRTheta <- function(object, ##genotype=c("AA", "AB", "BB"),
2845 2845
 	return(list(baf=bf, lrr=lrr))
2846 2846
 }
2847 2847
 
2848
-truncate <- func
Browse code

merging from collab

* collab:
add warning in vignette about NAs with BafLrrSetList function
Added Human Omni Express Exome 8 v1.1b as a supported chip
updated version number of pacakge and man pages to reflect these changes
skeleton for krlmm capability added. genotype.Illumina() can now take and XY object as input
update copynumber.Rnw to use BafLrrSetList
updates to vignettes
update namespace

# Please enter a commit message to explain why this merge is necessary,
# especially if it merges an updated upstream into a topic branch.
#
# Lines starting with '#' will be ignored, and an empty message aborts
# the commit.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@79138 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 31/07/2013 01:37:34
Showing1 changed files
... ...
@@ -2787,12 +2787,12 @@ calculateRTheta <- function(object, ##genotype=c("AA", "AB", "BB"),
2787 2787
 	centersMatrix <- lapply(centers, function(x, i, j) x[i, j], j=j, i=feature.index)
2788 2788
 	lapply(centers, close)
2789 2789
 	centersMatrix <- do.call(cbind, centersMatrix)
2790
-	centersA <- centersMatrix[, 1:3]
2791
-	centersB <- centersMatrix[, 4:6]
2790
+	centersA <- centersMatrix[, 1:3, drop=FALSE]
2791
+	centersB <- centersMatrix[, 4:6, drop=FALSE]
2792 2792
 	rm(centers, centersMatrix); gc()
2793 2793
 	centersA[centersA < 64] <- 64
2794 2794
 	centersB[centersB < 64] <- 64
2795
-	theta <- atan2(centersB, centersA) * 2/pi
2795
+	theta <- as.matrix(atan2(centersB, centersA) * 2/pi)
2796 2796
 	centersA[is.na(centersA)] <- 0
2797 2797
 	centersB[is.na(centersB)] <- 0
2798 2798
 	if(length(index.np) > 0) theta[index.np, ] <- 1
... ...
@@ -2844,3 +2844,5 @@ calculateRTheta <- function(object, ##genotype=c("AA", "AB", "BB"),
2844 2844
 	lrr <- integerMatrix(lrr, 100)
2845 2845
 	return(list(baf=bf, lrr=lrr))
2846 2846
 }
2847
+
2848
+truncate <- func
Browse code

merge with collab branch containing fix for dqrls and bug-fix for computeRBaf that can misalign sample index with batch index (when batch is not in alphabetical order)

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@71295 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 17/11/2012 15:24:46
Showing1 changed files
... ...
@@ -41,15 +41,22 @@ getFeatureData <- function(cdfName, copynumber=FALSE, genome){
41 41
 	}
42 42
 	path <- system.file("extdata", package=pkgname)
43 43
 	##multiple.builds <- length(grep("hg19", list.files(path)) > 0)
44
-	if(missing(genome)){
45
-		snp.file <- list.files(path, pattern="snpProbes_hg")
46
-		if(length(snp.file) > 1){
47
-			## use hg19
48
-			message("genome build not specified. Using build hg19 for annotation.")
49
-			snp.file <- snp.file[1]
50
-		}
51
-		genome <- gsub(".rda", "", strsplit(snp.file, "snpProbes_")[[1]][[2]])
52
-	} else snp.file <- paste("snpProbes_", genome, ".rda", sep="")
44
+	snp.file <- list.files(path, pattern="snpProbes_hg")
45
+	if(length(snp.file)==0){
46
+		no.build <- TRUE
47
+		snp.file <- list.files(path, pattern="snpProbes.rda")
48
+	} else {
49
+		no.build <- FALSE
50
+		if(missing(genome)){
51
+			snp.file <- list.files(path, pattern="snpProbes_hg")
52
+			if(length(snp.file) > 1){
53
+				## use hg19
54
+				message("genome build not specified. Using build hg19 for annotation.")
55
+				snp.file <- snp.file[1]
56
+			}
57
+			genome <- gsub(".rda", "", strsplit(snp.file, "snpProbes_")[[1]][[2]])
58
+		} else snp.file <- paste("snpProbes_", genome, ".rda", sep="")
59
+	}
53 60
 ##	if(!multiple.builds){
54 61
 ##		load(file.path(path, "snpProbes.rda"))
55 62
 ##	} else load(file.path(path, paste("snpProbes_", genome, ".rda", sep="")))
... ...
@@ -58,7 +65,9 @@ getFeatureData <- function(cdfName, copynumber=FALSE, genome){
58 65
 	## if we use a different build we may throw out a number of snps...
59 66
 	snpProbes <- snpProbes[rownames(snpProbes) %in% gns, ]
60 67
 	if(copynumber){
61
-		cn.file <- paste("cnProbes_", genome, ".rda", sep="")
68
+		if(!no.build){
69
+			cn.file <- paste("cnProbes_", genome, ".rda", sep="")
70
+		} else cn.file <- "cnProbes.rda"
62 71
 		load(file.path(path, cn.file))
63 72
 		##		if(!multiple.builds){
64 73
 		##			load(file.path(path, "cnProbes.rda"))
... ...
@@ -307,11 +316,11 @@ genotype <- function(filenames,
307 316
 	if(!(is(A(cnSet), "ff") || is(A(cnSet), "ffdf"))){
308 317
 		stop("The ff package is required for this function.")
309 318
 	}
310
-	cnSet <- snprmaAffy(cnSet,
311
-			    mixtureSampleSize=mixtureSampleSize,
312
-			    eps=eps,
313
-			    seed=seed,
314
-			    verbose=verbose)
319
+	ok <- snprmaAffy(cnSet,
320
+			 mixtureSampleSize=mixtureSampleSize,
321
+			 eps=eps,
322
+			 seed=seed,
323
+			 verbose=verbose)
315 324
 	ok <- cnrmaAffy(cnSet=cnSet,
316 325
 			seed=seed,
317 326
 			verbose=verbose)
... ...
@@ -434,17 +443,20 @@ rowCors <- function(x, y, ...){
434 443
 	return(covar/(sd.x*sd.y))
435 444
 }
436 445
 
437
-dqrlsWrapper <- function(x, y, wts, tol=1e-7){
438
-	n <- NROW(y)
439
-	p <- ncol(x)
440
-	ny <- NCOL(y)
441
-	.Fortran("dqrls", qr=x*wts, n=n, p=p, y=y * wts, ny=ny,
442
-		 tol=as.double(tol), coefficients=mat.or.vec(p, ny),
443
-		 residuals=y, effects=mat.or.vec(n, ny),
444
-		 rank=integer(1L), pivot=1L:p, qraux=double(p),
445
-		 work=double(2 * p), PACKAGE="base")[["coefficients"]]
446
-}
446
+## dqrlsWrapper <- function(x, y, wts, tol=1e-7){
447
+## 	n <- NROW(y)
448
+## 	p <- ncol(x)
449
+## 	ny <- NCOL(y)
450
+## 	.Fortran("dqrls", qr=x*wts, n=n, p=p, y=y * wts, ny=ny,
451
+## 		 tol=as.double(tol), coefficients=mat.or.vec(p, ny),
452
+## 		 residuals=y, effects=mat.or.vec(n, ny),
453
+## 		 rank=integer(1L), pivot=1L:p, qraux=double(p),
454
+## 		 work=double(2 * p), PACKAGE="base")[["coefficients"]]
455
+## }
456
+
447 457
 
458
+dqrlsWrapper <- function(x, y, wts, ...)
459
+    fastLmPure(X=x*wts, y=y*wts, method=3)[['coefficients']]
448 460
 
449 461
 fit.wls <- function(NN, sigma, allele, Y, autosome, X){
450 462
 	Np <- NN
... ...
@@ -730,7 +742,8 @@ fit.lm2 <- function(strata,
730 742
 		warning("All features in one of the probeset batches were flagged. The data is processed in probeset batches to reduce RAM, but the error suggests that increasing the size of the batches may help.  For example, ocProbesets(4*ocProbesets()) would increase the current size of the probeset batches by 4.")
731 743
 		fns.noflags <- featureNames(object)[marker.index] ## allow processing to continue
732 744
 	}
733
-	snp.index <- sample(match(fns.noflags, featureNames(object)), 5000)
745
+	index.flag <- match(fns.noflags, featureNames(object))
746
+	snp.index <- sample(index.flag, min(c(length(index.flag), 5000)))
734 747
 	##
735 748
 	nuA.np <- as.matrix(nuA(object)[marker.index, batch.index])
736 749
 	phiA.np <- as.matrix(phiA(object)[marker.index, batch.index])
... ...
@@ -1191,7 +1204,8 @@ fit.lm4 <- function(strata,
1191 1204
 		warning("All features in one of the probeset batches were flagged. The data is processed in probeset batches to reduce RAM, but the error suggests that increasing the size of the batches may help.  For example, ocProbesets(4*ocProbesets()) would increase the current size of the probeset batches by 4.")
1192 1205
 		fns.noflags <- featureNames(object)[marker.index] ## allow processing to continue
1193 1206
 	}
1194
-	snp.index <- sample(match(fns.noflags, featureNames(object)), 10000)
1207
+	index.flag <- match(fns.noflags, featureNames(object))
1208
+	snp.index <- sample(index.flag, min(c(length(index.flag), 10000)))
1195 1209
 	##
1196 1210
 	N.AA <- as.matrix(N.AA(object)[snp.index, batch.index, drop=FALSE])
1197 1211
 	N.AB <- as.matrix(N.AB(object)[snp.index, batch.index, drop=FALSE])
Browse code

Merge branch 'collab'

* collab:
Update AffyGW.pdf and IlluminaPreprocessCN.pdf
crlmmIlluminaV2 calls crlmmGT. comment call to crlmmGT2
Update AffyGW vignette to step through construction of CNSet, normalization, and genotyping
v 1.15.27: Fix bug in crlmmGT2. clean up comments in crlmmIllumina.
update getFeatureData

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@69561 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 19/09/2012 12:31:51
Showing1 changed files
... ...
@@ -21,7 +21,8 @@ getProtocolData.Affy <- function(filenames){
21 21
 ##		  return(gns)
22 22
 ##	  })
23 23
 
24
-getFeatureData <- function(cdfName, copynumber=FALSE, genome=genome){
24
+
25
+getFeatureData <- function(cdfName, copynumber=FALSE, genome){
25 26
 	pkgname <- getCrlmmAnnotationName(cdfName)
26 27
 	if(!require(pkgname, character.only=TRUE)){
27 28
 		suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
... ...
@@ -39,17 +40,29 @@ getFeatureData <- function(cdfName, copynumber=FALSE, genome=genome){
39 40
 		pkgname <- paste(pkgname, "Crlmm", sep="")
40 41
 	}
41 42
 	path <- system.file("extdata", package=pkgname)
42
-	multiple.builds <- length(grep("hg19", list.files(path)) > 0)
43
-	if(!multiple.builds){
44
-		load(file.path(path, "snpProbes.rda"))
45
-	} else load(file.path(path, paste("snpProbes_", genome, ".rda", sep="")))
43
+	##multiple.builds <- length(grep("hg19", list.files(path)) > 0)
44
+	if(missing(genome)){
45
+		snp.file <- list.files(path, pattern="snpProbes_hg")
46
+		if(length(snp.file) > 1){
47
+			## use hg19
48
+			message("genome build not specified. Using build hg19 for annotation.")
49
+			snp.file <- snp.file[1]
50
+		}
51
+		genome <- gsub(".rda", "", strsplit(snp.file, "snpProbes_")[[1]][[2]])
52
+	} else snp.file <- paste("snpProbes_", genome, ".rda", sep="")
53
+##	if(!multiple.builds){
54
+##		load(file.path(path, "snpProbes.rda"))
55
+##	} else load(file.path(path, paste("snpProbes_", genome, ".rda", sep="")))
56
+	load(file.path(path, snp.file))
46 57
 	snpProbes <- get("snpProbes")
47 58
 	## if we use a different build we may throw out a number of snps...
48 59
 	snpProbes <- snpProbes[rownames(snpProbes) %in% gns, ]
49 60
 	if(copynumber){
50
-		if(!multiple.builds){
51
-			load(file.path(path, "cnProbes.rda"))
52
-		} else load(file.path(path, paste("cnProbes_", genome, ".rda", sep="")))
61
+		cn.file <- paste("cnProbes_", genome, ".rda", sep="")
62
+		load(file.path(path, cn.file))
63
+		##		if(!multiple.builds){
64
+		##			load(file.path(path, "cnProbes.rda"))
65
+		##		} else load(file.path(path, paste("cnProbes_", genome, ".rda", sep="")))
53 66
 		cnProbes <- get("cnProbes")
54 67
 		snpIndex <- seq(along=gns)
55 68
 		npIndex <- seq(along=rownames(cnProbes)) + max(snpIndex)
... ...
@@ -300,7 +313,7 @@ genotype <- function(filenames,
300 313
 			    seed=seed,
301 314
 			    verbose=verbose)
302 315
 	ok <- cnrmaAffy(cnSet=cnSet,
303
-			cdfName=annotation(cnSet), seed=seed,
316
+			seed=seed,
304 317
 			verbose=verbose)
305 318
 	stopifnot(ok)
306 319
 	ok <- genotypeAffy(cnSet=cnSet,
... ...
@@ -336,6 +349,7 @@ genotypeAffy <- function(cnSet, SNRMin=5, recallMin=10,
336 349
 			badSNP=badSNP,
337 350
 			callsGt=calls(cnSet),
338 351
 			callsPr=snpCallProbability(cnSet))
352
+	cnSet$gender[] <- tmp$gender
339 353
 	if(verbose) message("Genotyping finished.")
340 354
 	return(TRUE)
341 355
 }
Browse code

Merge branch 'collab'

* collab:
update Rds
revert imputeGender to original api
bug fix for calculateRBafCNSet
update calculateRBafCNSet
Fix documentation for crlmmCopynumber -- added argument fitLinearModel
update crlmmGT2
Put rm(DD, ...) further down in crlmmGT2 function
remove message about cloning A and B
Open and close callsPr and callsGt in crlmmGT2 (when args not missing)
revert removed indices loaded in crlmmGT2
snprmaAffy no longer writes normalized intensities to calls and callProbability slots. crlmmGT2 takes arguments callsGt and callsPr. When present, crlmmGT2 will not overwrite A and B.
explicit coercion to matrix in imputeGender
Assign imputed gender to cnSet$gender within crlmmGT2 function.
Change sum(SNR > SNRmin) to sum(SNR[] > SNRmin) in imputeGender

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@69450 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 14/09/2012 20:07:37
Showing1 changed files
... ...
@@ -225,17 +225,12 @@ snprmaAffy <- function(cnSet,
225 225
 	idx2 <- sample(length(fid), 10^5)
226 226
 	A <- A(cnSet)
227 227
 	B <- B(cnSet)
228
-	C <- calls(cnSet)
229
-	D <- snpCallProbability(cnSet)
230 228
 	SKW <- cnSet$SKW; SNR <- cnSet$SNR
231 229
 	open(A)
232 230
 	open(B)
233
-	open(C)
234
-	open(D)
235 231
 	open(SKW)
236 232
 	open(mixtureParams)
237 233
 	open(SNR)
238
-	if(verbose) message("Cloning A and B matrices to store genotype calls and confidence scores.")
239 234
 	## RS ADDED
240 235
 	index <- match(gns, rownames(A))
241 236
 	rsprocessCEL <- function(i){
... ...
@@ -246,10 +241,10 @@ snprmaAffy <- function(cnSet,
246 241
 			rm(x)
247 242
 			y <- normalize.quantiles.use.target(y, target=reference)
248 243
 			## RS: add index for row assignment
249
-			A[index, k] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
250
-			B[index, k] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
251
-			C[index, k] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
252
-			D[index, k] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
244
+			ya <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
245
+			yb <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
246
+			A[index, k] <- ya
247
+			B[index, k] <- yb
253 248
 			rm(y)
254 249
 			S <- (log2(A[idx,k])+log2(B[idx, k]))/2 - SMEDIAN
255 250
 			M <- log2(A[idx, k])-log2(B[idx, k])
... ...
@@ -264,8 +259,6 @@ snprmaAffy <- function(cnSet,
264 259
 	ocLapply(sampleBatches, rsprocessCEL, neededPkgs="crlmm")
265 260
 	close(A)
266 261
 	close(B)
267
-	close(C)
268
-	close(D)
269 262
 	close(SKW)
270 263
 	close(mixtureParams)
271 264
 	close(SNR)
... ...
@@ -325,8 +318,10 @@ genotypeAffy <- function(cnSet, SNRMin=5, recallMin=10,
325 318
 			 recallRegMin=1000,
326 319
 			 gender=NULL, badSNP=0.7, returnParams=TRUE,
327 320
 			 verbose=TRUE){
328
-	tmp <- crlmmGT2(A=calls(cnSet),
329
-			B=snpCallProbability(cnSet),
321
+	## The passed arguments A and B currently contain the intensities
322
+	## (not calls and call probabilities)
323
+	tmp <- crlmmGT2(A=A(cnSet),
324
+			B=B(cnSet),
330 325
 			SNR=cnSet$SNR,
331 326
 			mixtureParams=cnSet@mixtureParams,
332 327
 			cdfName=annotation(cnSet),
... ...
@@ -338,11 +333,10 @@ genotypeAffy <- function(cnSet, SNRMin=5, recallMin=10,
338 333
 			gender=gender,
339 334
 			verbose=verbose,
340 335
 			returnParams=returnParams,
341
-			badSNP=badSNP)
342
-	if(verbose) message("Genotyping finished.  Updating container with genotype calls and confidence scores.")
343
-	open(cnSet$gender)
344
-	cnSet$gender[,] <- tmp[["gender"]]
345
-	close(cnSet$gender)
336
+			badSNP=badSNP,
337
+			callsGt=calls(cnSet),
338
+			callsPr=snpCallProbability(cnSet))
339
+	if(verbose) message("Genotyping finished.")
346 340
 	return(TRUE)
347 341
 }
348 342
 
Browse code

Merge branch 'collab'

* collab:
Update CNSet objects in data/ with datadir slot and protocolData(object)$filename
update cnrmaAffy. processCEL2 located inside cnrmaAffy function. Uses lexical scope.
Change API for genotypeAffy -- remove mixtureParams argument. Update call to genotypeAffy in genotype function
snprmaAffy no longer initializes mixtureParams object, but accesses this information from the cnSet
constructAffyCNSet initializes mixtureParams slot of the appropriate dimensions
updated cnrmaAffy. Removed cnrma2, cnrma. cnrmaAffy uses lexical scope
Fix bug in crlmmGT2 caused by unequal batch sizes
Moved rsprocessCel inside of snprmaAffy for lexical scope. Moved imputeGender inside crlmmGT2 for lexical scope
Revert imputeGender to original approach for crlmm. Splitting samples across nodes does not work well if there are not a lot of samples in the individual nodes. Probably better to use fewer markers on chr X when large number of samples are processed
contains old process1 call
change gender <- unlist(gender) to gender <- unlist(genderList)
v1.15.15 Fix memory leak in imputeGender step by running this function in sample batches of size ocSamples(). Use lexical scope in calling process1 function in crlmmGT2.
set default values in summarizeNps
depends on v 1.19.39 of oligoClasses
v1.15.14: Export constructAffyCNSet. Used datadir slot in CNSet object added to v 1.19.39 of oligoClasses
update getFeatureData for use with annotation package that contains a number of SNPs not necessarily included in the genotyping. These additional snps are removed when constructing the featureData in constructAffy

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@67826 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 19/07/2012 03:57:08
Showing1 changed files
... ...
@@ -40,14 +40,16 @@ getFeatureData <- function(cdfName, copynumber=FALSE, genome=genome){
40 40
 	}
41 41
 	path <- system.file("extdata", package=pkgname)
42 42
 	multiple.builds <- length(grep("hg19", list.files(path)) > 0)
43
-	if(!multiple.builds)
43
+	if(!multiple.builds){
44 44
 		load(file.path(path, "snpProbes.rda"))
45
-	else load(file.path(path, paste("snpProbes_", genome, ".rda", sep="")))
45
+	} else load(file.path(path, paste("snpProbes_", genome, ".rda", sep="")))
46 46
 	snpProbes <- get("snpProbes")
47
+	## if we use a different build we may throw out a number of snps...
48
+	snpProbes <- snpProbes[rownames(snpProbes) %in% gns, ]
47 49
 	if(copynumber){
48
-		if(!multiple.builds)
50
+		if(!multiple.builds){
49 51
 			load(file.path(path, "cnProbes.rda"))
50
-		else load(file.path(path, paste("cnProbes_", genome, ".rda", sep="")))
52
+		} else load(file.path(path, paste("cnProbes_", genome, ".rda", sep="")))
51 53
 		cnProbes <- get("cnProbes")
52 54
 		snpIndex <- seq(along=gns)
53 55
 		npIndex <- seq(along=rownames(cnProbes)) + max(snpIndex)
... ...
@@ -126,7 +128,7 @@ construct <- function(filenames,
126 128
 	return(cnSet)
127 129
 }
128 130
 
129
-constructAffy <- function(filenames, sns, cdfName, batch, verbose=TRUE, genome){
131
+constructAffyCNSet <- function(filenames, sns, cdfName, batch, verbose=TRUE, genome){
130 132
 	##stopifnot(!missing(filenames))
131 133
 	if(missing(sns)) sns <- basename(filenames)
132 134
 	stopifnot(!missing(batch))
... ...
@@ -158,8 +160,14 @@ constructAffy <- function(filenames, sns, cdfName, batch, verbose=TRUE, genome){
158 160
 	B <- initializeBigMatrix("crlmmB-", nr, length(filenames), "integer")
159 161
 	call <- initializeBigMatrix("call-", nr, length(filenames), "integer")
160 162
 	callPr <- initializeBigMatrix("callPr-", nr, length(filenames), "integer")
163
+	mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames), "double")
161 164
 	rownames(A) <- rownames(B) <- featureNames(featureData)
162 165
 	rownames(call) <- rownames(callPr) <- featureNames(featureData)
166
+	dirNames <- dirname(filenames)
167
+	unames <- unique(dirNames)
168
+	dirNames <- factor(dirNames, levels=unames)
169
+	dd <- split(seq_len(length(filenames)), dirNames)
170
+	datadir <- list(dirname=names(dd), n=as.integer(sapply(dd, length)))
163 171
 	if(verbose) message("Instantiating CNSet container")
164 172
 	cnSet <- new("CNSet",
165 173
 		     alleleA=A,
... ...
@@ -169,9 +177,12 @@ constructAffy <- function(filenames, sns, cdfName, batch, verbose=TRUE, genome){
169 177
 		     featureData=featureData,
170 178
 		     annotation=cdfName,
171 179
 		     batch=as.character(batch),
172
-		     genome=genome)
180
+		     genome=genome,
181
+		     datadir=datadir)
182
+	cnSet@mixtureParams <- mixtureParams
173 183
 	sampleNames(cnSet) <- sns
174 184
 	protocolData <- getProtocolData.Affy(filenames)
185
+	protocolData$filename <- basename(filenames)
175 186
 	rownames(pData(protocolData)) <- sns
176 187
 	protocolData(cnSet) <- protocolData
177 188
 	cnSet$SKW <- SKW
... ...
@@ -181,33 +192,84 @@ constructAffy <- function(filenames, sns, cdfName, batch, verbose=TRUE, genome){
181 192
 	return(cnSet)
182 193
 }
183 194
 
184
-snprmaAffy <- function(cnSet, filenames,
195
+snprmaAffy <- function(cnSet,
185 196
 		       mixtureSampleSize=10^5,
186 197
 		       eps=0.1,
187 198
 		       seed=1,
188 199
 		       verbose=TRUE){
200
+	filenames <- celfileName(cnSet)
189 201
 	if(verbose) message("Preprocessing ", length(filenames), " files.")
190 202
 	pkgname <- getCrlmmAnnotationName(annotation(cnSet))
191 203
 	stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose))
192
-	mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames), "double")
193
-	sampleBatches <- splitIndicesByLength(seq_along(filenames), ocSamples()/getDoParWorkers())
194
-##	if(length(sampleBatches) < getDoParWorkers())
195
-##		sampleBatches <- splitIndicesByNode(seq_along(filenames))
196
-	ocLapply(sampleBatches,
197
-		 rsprocessCEL,
198
-		 filenames=filenames,
199
-		 fitMixture=TRUE,
200
-		 A=A(cnSet), B=B(cnSet), C=calls(cnSet),
201
-		 D=snpCallProbability(cnSet),
202
-		 SKW=cnSet$SKW, SNR=cnSet$SNR,
203
-		 mixtureParams=mixtureParams,
204
-		 eps=eps,
205
-		 seed=seed,
206
-		 mixtureSampleSize=mixtureSampleSize,
207
-		 pkgname=pkgname,
208
-		 neededPkgs=c("crlmm", pkgname))
204
+	sampleBatches <- splitIndicesByNode(seq_along(filenames))
205
+	mixtureParams <- cnSet@mixtureParams
206
+	obj1 <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
207
+	obj2 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
208
+	obj3 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
209
+	autosomeIndex <- getVarInEnv("autosomeIndex")
210
+	pnsa <- getVarInEnv("pnsa")
211
+	pnsb <- getVarInEnv("pnsb")
212
+	fid <- getVarInEnv("fid")
213
+	reference <- getVarInEnv("reference")
214
+	aIndex <- getVarInEnv("aIndex")
215
+	bIndex <- getVarInEnv("bIndex")
216
+	SMEDIAN <- getVarInEnv("SMEDIAN")
217
+	theKnots <- getVarInEnv("theKnots")
218
+	gns <- getVarInEnv("gns")
219
+	rm(list=c(obj1, obj2, obj3), envir=.crlmmPkgEnv)
220
+	rm(obj1, obj2, obj3)
221
+	## for mixture
222
+	set.seed(seed)
223
+	idx <- sort(sample(autosomeIndex, mixtureSampleSize))
224
+	##for skewness. no need to do everything
225
+	idx2 <- sample(length(fid), 10^5)
226
+	A <- A(cnSet)
227
+	B <- B(cnSet)
228
+	C <- calls(cnSet)
229
+	D <- snpCallProbability(cnSet)
230
+	SKW <- cnSet$SKW; SNR <- cnSet$SNR
231
+	open(A)
232
+	open(B)
233
+	open(C)
234
+	open(D)
235
+	open(SKW)
236
+	open(mixtureParams)
237
+	open(SNR)
209 238
 	if(verbose) message("Cloning A and B matrices to store genotype calls and confidence scores.")
210
-	return(mixtureParams)
239
+	## RS ADDED
240
+	index <- match(gns, rownames(A))
241
+	rsprocessCEL <- function(i){
242
+		for (k in i){
243
+			y <- as.matrix(read.celfile(filenames[k], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
244
+			x <- log2(y[idx2])
245
+			SKW[k] <- mean((x-mean(x))^3)/(sd(x)^3)
246
+			rm(x)
247
+			y <- normalize.quantiles.use.target(y, target=reference)
248
+			## RS: add index for row assignment
249
+			A[index, k] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
250
+			B[index, k] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
251
+			C[index, k] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
252
+			D[index, k] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
253
+			rm(y)
254
+			S <- (log2(A[idx,k])+log2(B[idx, k]))/2 - SMEDIAN
255
+			M <- log2(A[idx, k])-log2(B[idx, k])
256
+			tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
257
+			rm(S, M)
258
+			mixtureParams[, k] <- tmp[["coef"]]
259
+			SNR[k] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
260
+			rm(tmp)
261
+		}
262
+		TRUE
263
+	}
264
+	ocLapply(sampleBatches, rsprocessCEL, neededPkgs="crlmm")
265
+	close(A)
266
+	close(B)
267
+	close(C)
268
+	close(D)
269
+	close(SKW)
270
+	close(mixtureParams)
271
+	close(SNR)
272
+	return()
211 273
 }
212 274
 
213 275
 genotype <- function(filenames,
... ...
@@ -231,25 +293,24 @@ genotype <- function(filenames,
231 293
 	if (missing(sns)) sns <- basename(filenames)
232 294
 	if (any(duplicated(sns))) stop("sample identifiers must be unique")
233 295
 	genome <- match.arg(genome)
234
-	cnSet <- constructAffy(filenames=filenames,
235
-			       sns=sns,
236
-			       cdfName=cdfName,
237
-			       batch=batch, verbose=verbose,
238
-			       genome=genome)
296
+	cnSet <- constructAffyCNSet(filenames=filenames,
297
+				    sns=sns,
298
+				    cdfName=cdfName,
299
+				    batch=batch, verbose=verbose,
300
+				    genome=genome)
239 301
 	if(!(is(A(cnSet), "ff") || is(A(cnSet), "ffdf"))){
240 302
 		stop("The ff package is required for this function.")
241 303
 	}
242
-	cnSet@mixtureParams <- snprmaAffy(cnSet, filenames=filenames,
243
-					  mixtureSampleSize=mixtureSampleSize,
244
-					  eps=eps,
245
-					  seed=seed,
246
-					  verbose=verbose)
247
-	ok <- cnrmaAffy(cnSet=cnSet, filenames=filenames,
304
+	cnSet <- snprmaAffy(cnSet,
305
+			    mixtureSampleSize=mixtureSampleSize,
306
+			    eps=eps,
307
+			    seed=seed,
308
+			    verbose=verbose)
309
+	ok <- cnrmaAffy(cnSet=cnSet,
248 310
 			cdfName=annotation(cnSet), seed=seed,
249 311
 			verbose=verbose)
250 312
 	stopifnot(ok)
251 313
 	ok <- genotypeAffy(cnSet=cnSet,
252
-			   mixtureParams=cnSet@mixtureParams,
253 314
 			   SNRMin=SNRMin,
254 315
 			   recallMin=recallMin,
255 316
 			   recallRegMin=recallRegMin,
... ...
@@ -260,15 +321,14 @@ genotype <- function(filenames,
260 321
 	return(cnSet)
261 322
 }
262 323
 
263
-genotypeAffy <- function(cnSet, mixtureParams, SNRMin=5, recallMin=10,
324
+genotypeAffy <- function(cnSet, SNRMin=5, recallMin=10,
264 325
 			 recallRegMin=1000,
265 326
 			 gender=NULL, badSNP=0.7, returnParams=TRUE,
266 327
 			 verbose=TRUE){
267
-	##snp.index <- which(isSnp(cnSet))
268 328
 	tmp <- crlmmGT2(A=calls(cnSet),
269 329
 			B=snpCallProbability(cnSet),
270 330
 			SNR=cnSet$SNR,
271
-			mixtureParams=mixtureParams,
331
+			mixtureParams=cnSet@mixtureParams,
272 332
 			cdfName=annotation(cnSet),
273 333
 			row.names=NULL,
274 334
 			col.names=sampleNames(cnSet),
... ...
@@ -279,7 +339,6 @@ genotypeAffy <- function(cnSet, mixtureParams, SNRMin=5, recallMin=10,
279 339
 			verbose=verbose,
280 340
 			returnParams=returnParams,
281 341
 			badSNP=badSNP)
282
-##			snp.names=featureNames(cnSet)[snp.index])
283 342
 	if(verbose) message("Genotyping finished.  Updating container with genotype calls and confidence scores.")
284 343
 	open(cnSet$gender)
285 344
 	cnSet$gender[,] <- tmp[["gender"]]
... ...
@@ -287,92 +346,48 @@ genotypeAffy <- function(cnSet, mixtureParams, SNRMin=5, recallMin=10,
287 346
 	return(TRUE)
288 347
 }
289 348
 
290
-cnrmaAffy <- function(cnSet, filenames, cdfName, seed=1, verbose=TRUE){
291
-	snp.I <- isSnp(cnSet)
292
-	snp.index <- which(snp.I)
293
-	np.index <- which(!snp.I)
349
+cnrmaAffy <- function(cnSet, seed=1, verbose=TRUE){
350
+	filenames <- celfileName(cnSet)
351
+	np.index <- which(!isSnp(cnSet))
352
+	A <- A(cnSet)
353
+	cdfName <- annotation(cnSet)
294 354
 	if(verbose) message("Quantile normalizing nonpolymorphic markers")
295
-	cnrma2(A=A(cnSet),
296
-	       filenames=filenames,
297
-	       row.names=featureNames(cnSet)[np.index],
298
-	       cdfName=cdfName,
299
-	       sns=sampleNames(cnSet),
300
-	       seed=seed,
301
-	       verbose=verbose)
302
-	TRUE
303
-}
304
-
305
-rsprocessCEL <- function(i, filenames, fitMixture, A, B, C, D, SKW, SNR,
306
-			 mixtureParams, eps, seed, mixtureSampleSize,
307
-			 pkgname){
308
-	obj1 <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
309
-	obj2 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
310
-	obj3 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
311
-	autosomeIndex <- getVarInEnv("autosomeIndex")
312
-	pnsa <- getVarInEnv("pnsa")
313
-	pnsb <- getVarInEnv("pnsb")
314
-	fid <- getVarInEnv("fid")
355
+	##if(missing(cdfName)) stop("must specify cdfName")
356
+	pkgname <- getCrlmmAnnotationName(cdfName)
357
+	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
358
+	sampleBatches <- splitIndicesByNode(seq_along(filenames))
359
+	if(verbose) message("Processing nonpolymorphic probes for ", length(filenames), " files.")
360
+	if(pkgname=="genomewidesnp6Crlmm"){
361
+		loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
362
+	}
363
+	if(pkgname=="genomewidesnp5Crlmm"){
364
+		loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname)
365
+	}
315 366
 	reference <- getVarInEnv("reference")
316
-	aIndex <- getVarInEnv("aIndex")
317
-	bIndex <- getVarInEnv("bIndex")
318
-	SMEDIAN <- getVarInEnv("SMEDIAN")
319
-	theKnots <- getVarInEnv("theKnots")
320
-	gns <- getVarInEnv("gns")
321
-	rm(list=c(obj1, obj2, obj3), envir=.crlmmPkgEnv)
322
-	rm(obj1, obj2, obj3)
323
-
324
-	## for mixture
367
+        loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
368
+	fid <- getVarInEnv("npProbesFid")
369
+	row.names <- featureNames(cnSet)[np.index]
370
+	row.names <- row.names[row.names %in% names(fid)] ##removes chromosome Y
371
+	fid <- fid[match(row.names, names(fid))]
372
+	np.index <- match(row.names, rownames(A))
373
+	gns <- names(fid)
325 374
 	set.seed(seed)
326
-	idx <- sort(sample(autosomeIndex, mixtureSampleSize))
327
-	##for skewness. no need to do everything
328
-	idx2 <- sample(length(fid), 10^5)
329
-
375
+	## updates A
330 376
 	open(A)
331
-	open(B)
332
-	open(C)
333
-	open(D)
334
-	open(SKW)
335
-	open(mixtureParams)
336
-	open(SNR)
337
-
338
-	## RS ADDED
339
-	index <- match(gns, rownames(A))
340
-
341
-	for (k in i){
342
-		y <- as.matrix(read.celfile(filenames[k], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
343
-		x <- log2(y[idx2])
344
-		SKW[k] <- mean((x-mean(x))^3)/(sd(x)^3)
345
-		rm(x)
346
-		y <- normalize.quantiles.use.target(y, target=reference)
347
-		## RS: add index for row assignment
348
-		A[index, k] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
349
-		B[index, k] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
350
-		C[index, k] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
351
-		D[index, k] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
352
-		rm(y)
353
-		if(fitMixture){
354
-			S <- (log2(A[idx,k])+log2(B[idx, k]))/2 - SMEDIAN
355
-			M <- log2(A[idx, k])-log2(B[idx, k])
356
-			tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
357
-			rm(S, M)
358
-			mixtureParams[, k] <- tmp[["coef"]]
359
-			SNR[k] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
360
-			rm(tmp)
361
-		} else {
362
-			mixtureParams[, k] <- NA
363
-			SNR[k] <- NA
377
+	processCEL2 <- function(i){
378
+		for (k in i){
379
+			y <- as.matrix(read.celfile(filenames[k], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
380
+			A[np.index, k] <- as.integer(normalize.quantiles.use.target(y, target=reference))
364 381
 		}
382
+		return(TRUE)
365 383
 	}
384
+	ocLapply(sampleBatches, processCEL2, neededPkgs="crlmm")
366 385
 	close(A)
367
-	close(B)
368
-	close(SKW)
369
-	close(mixtureParams)
370
-	close(SNR)
371
-	rm(list=ls())
372
-	gc(verbose=FALSE)
373 386
 	TRUE
374 387
 }
375 388
 
389
+
390
+
376 391
 genotype2 <- function(){
377 392
 	.Defunct(msg="The genotype2 function has been deprecated. The function genotype should be used instead.  genotype will support large data using ff provided that the ff package is loaded.")
378 393
 }
... ...
@@ -1251,80 +1266,10 @@ whichPlatform <- function(cdfName){
1251 1266
 	return(platform)
1252 1267
 }
1253 1268
 
1254
-cnrma <- function(A, filenames, row.names, verbose=TRUE, seed=1, cdfName, sns){
1255
-	if(missing(cdfName)) stop("must specify cdfName")
1256
-	pkgname <- getCrlmmAnnotationName(cdfName)
1257
-	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
1258
-	if (missing(sns)) sns <- basename(filenames)
1259
-	if(verbose) message("Loading annotations for nonpolymorphic probes")
1260
-        loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
1261
-	fid <- getVarInEnv("npProbesFid")
1262
-	if(cdfName=="genomewidesnp6"){
1263
-		loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
1264
-	}
1265
-	if(cdfName=="genomewidesnp5"){
1266
-		loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname)
1267
-	}
1268
-	reference <- getVarInEnv("reference")
1269
-	fid <- fid[match(row.names, names(fid))]
1270
-	np.index <- match(row.names, rownames(A))
1271
-	gns <- names(fid)
1272
-	set.seed(seed)
1273
-	##idx2 <- sample(length(fid), 10^5)
1274
-	for (k in seq_along(filenames)){
1275
-		y <- as.matrix(read.celfile(filenames[k], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
1276
-		A[np.index, k] <- as.integer(normalize.quantiles.use.target(y, target=reference))
1277
-	}
1278
-	return(A)
1279
-}
1280
-
1281 1269
 cnrma2 <- function(A, filenames, row.names, verbose=TRUE, seed=1, cdfName, sns){
1282
-	if(missing(cdfName)) stop("must specify cdfName")
1283
-	pkgname <- getCrlmmAnnotationName(cdfName)
1284
-	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
1285
-	if (missing(sns)) sns <- basename(filenames)
1286
-	sampleBatches <- splitIndicesByLength(seq_along(filenames), ocSamples()/getDoParWorkers())
1287
-	if(verbose) message("Processing nonpolymorphic probes for ", length(filenames), " files.")
1288
-	## updates A
1289
-	ocLapply(sampleBatches,
1290
-		 processCEL2,
1291
-		 row.names=row.names,
1292
-		 filenames=filenames,
1293
-		 A=A,
1294
-		 seed=seed,
1295
-		 pkgname=pkgname,
1296
-		 neededPkgs=c("crlmm", pkgname))
1297
-	##list(sns=sns, gns=row.names, SKW=SKW, cdfName=cdfName)
1298
-	return(A)
1299
-}
1300 1270
 
1301
-processCEL2 <- function(i, filenames, row.names, A, seed, pkgname){
1302
-	if(pkgname=="genomewidesnp6Crlmm"){
1303
-		loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
1304
-	}
1305
-	if(pkgname=="genomewidesnp5Crlmm"){
1306
-		loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname)
1307
-	}
1308
-	reference <- getVarInEnv("reference")
1309
-        loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
1310
-	fid <- getVarInEnv("npProbesFid")
1311
-	row.names <- row.names[row.names %in% names(fid)] ##removes chromosome Y
1312
-	fid <- fid[match(row.names, names(fid))]
1313
-	##stopifnot(all.equal(names(fid), row.names))
1314
-	np.index <- match(row.names, rownames(A))
1315
-	gns <- names(fid)
1316
-	set.seed(seed)
1317
-	open(A)
1318
-	##idx2 <- sample(length(fid), 10^5)
1319
-	for (k in i){
1320
-		y <- as.matrix(read.celfile(filenames[k], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
1321
-		A[np.index, k] <- as.integer(normalize.quantiles.use.target(y, target=reference))
1322
-	}
1323
-	close(A)
1324
-	return(TRUE)
1325 1271
 }
1326 1272
 
1327
-
1328 1273
 imputeCenter <- function(muA, muB, index.complete, index.missing){
1329 1274
 	index <- index.missing
1330 1275
 	mnA <- muA
... ...
@@ -1618,9 +1563,6 @@ genotypeSummary <- function(object,
1618 1563
 	FUN <- get(myf)
1619 1564
 	if(is.lds){
1620 1565
 		index.list <- splitIndicesByLength(marker.index, ocProbesets())
1621
-##		if(parStatus()){
1622
-##			index.list <- splitIndicesByNode(marker.index)
1623
-##		} else index.list <- splitIndicesByLength(marker.index, ocProbesets())
1624 1566
 		ocLapply(seq(along=index.list),
1625 1567
 			 FUN,
1626 1568
 			 index.list=index.list,
... ...
@@ -1653,7 +1595,7 @@ whichMarkers <- function(type, is.snp, is.autosome, is.annotated, is.X){
1653 1595
 }
1654 1596
 
1655 1597
 summarizeNps <- function(strata, index.list, object, batchSize,
1656
-			 GT.CONF.THR, verbose, CHR.X, ...){
1598
+			 GT.CONF.THR, verbose=TRUE, CHR.X=FALSE, ...){
1657 1599
 	is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
1658 1600
 	if(is.lds) {physical <- get("physical"); open(object)}
1659 1601
 	if(verbose) message("      Probe stratum ", strata, " of ", length(index.list))
... ...
@@ -1707,9 +1649,9 @@ summarizeNps <- function(strata, index.list, object, batchSize,
1707 1649
 summarizeSnps <- function(strata,
1708 1650
 			  index.list,
1709 1651
 			  object,
1710
-			  GT.CONF.THR,
1711
-			  verbose,
1712
-			  CHR.X, ...){
1652
+			  GT.CONF.THR=0.80,
1653
+			  verbose=TRUE,
1654
+			  CHR.X=FALSE, ...){
1713 1655
 ##	if(is.lds) {
1714 1656
 ##		physical <- get("physical")
1715 1657
 ##		open(object)
... ...
@@ -2012,7 +1954,8 @@ crlmmCopynumber <- function(object,
2012 1954
 			    MIN.NU=2^3,
2013 1955
 			    MIN.PHI=2^3,
2014 1956
 			    THR.NU.PHI=TRUE,
2015
-			    type=c("SNP", "NP", "X.SNP", "X.NP")){
1957
+			    type=c("SNP", "NP", "X.SNP", "X.NP"),
1958
+			    fit.linearModel=TRUE){
2016 1959
 	typeof <- paste(type, collapse=",")
2017 1960
 	stopifnot(typeof %in% c("SNP", "NP", "SNP,NP", "SNP,X.SNP", "SNP,X.NP", "SNP,NP,X.SNP", "SNP,NP,X.SNP,X.NP"))
2018 1961
 	if(GT.CONF.THR >= 1 | GT.CONF.THR < 0) stop("GT.CONF.THR must be in [0,1)")
... ...
@@ -2070,33 +2013,34 @@ crlmmCopynumber <- function(object,
2070 2013
 		}
2071 2014
 	}
2072 2015
 	if(verbose) message("Estimating parameters for copy number")##SNPs only
2073
-	for(i in seq_along(type)){
2074
-		marker.type <- type[i]
2075
-		message(paste("...", mylabel(marker.type)))
2076
-		CHR.X <- ifelse(marker.type %in% c("X.SNP", "X.NP"), TRUE, FALSE)
2077
-		marker.index <- whichMarkers(marker.type, is.snp,
2078
-					     is.autosome, is.annotated, is.X)
2079
-		if(length(marker.index) == 0) next()
2080
-		res <- estimateCnParameters(object=object,
2081
-					       type=marker.type,
2082
-					       SNRMin=SNRMin,
2083
-					       DF.PRIOR=DF.PRIOR,
2084
-					       GT.CONF.THR=GT.CONF.THR,
2085
-					       MIN.SAMPLES=MIN.SAMPLES,
2086
-					       MIN.OBS=MIN.OBS,
2087
-					       MIN.NU=MIN.NU,
2088
-					       MIN.PHI=MIN.PHI,
2089
-					       THR.NU.PHI=THR.NU.PHI,
2090
-					       verbose=verbose,
2091
-					       marker.index=marker.index,
2092
-					       is.lds=is.lds,
2093
-					       CHR.X=CHR.X)
2094
-		##if(!is.lds) {object <- res; rm(res); gc()}
2095
-		##if(!is.lds) {object <- res; rm(res); gc()}
2096
-	}
2097
-	close(object)
2016
+	if(fit.linearModel){
2017
+		for(i in seq_along(type)){
2018
+			marker.type <- type[i]
2019
+			message(paste("...", mylabel(marker.type)))
2020
+			CHR.X <- ifelse(marker.type %in% c("X.SNP", "X.NP"), TRUE, FALSE)
2021
+			marker.index <- whichMarkers(marker.type, is.snp,
2022
+						     is.autosome, is.annotated, is.X)
2023
+			if(length(marker.index) == 0) next()
2024
+			res <- estimateCnParameters(object=object,
2025
+						    type=marker.type,
2026
+						    SNRMin=SNRMin,
2027
+						    DF.PRIOR=DF.PRIOR,
2028
+						    GT.CONF.THR=GT.CONF.THR,
2029
+						    MIN.SAMPLES=MIN.SAMPLES,
2030
+						    MIN.OBS=MIN.OBS,
2031
+						    MIN.NU=MIN.NU,
2032
+						    MIN.PHI=MIN.PHI,
2033
+						    THR.NU.PHI=THR.NU.PHI,
2034
+						    verbose=verbose,
2035
+						    marker.index=marker.index,
2036
+						    is.lds=is.lds,
2037
+						    CHR.X=CHR.X)
2038
+		}
2039
+		close(object)
2040
+	}
2098 2041
 	if(is.lds) return(TRUE) else return(object)
2099 2042
 }
2043
+
2100 2044
 crlmmCopynumber2 <- function(){
2101 2045
 	.Defunct(msg="The crlmmCopynumber2 function has been deprecated. The function crlmmCopynumber should be used instead.  crlmmCopynumber will support large data using ff provided that the ff package is loaded.")
2102 2046
 }
Browse code

Merge branch 'collab'

* collab: (34 commits)
revert change to IlluminaPreprocessCN
fix bug in isValidCdfName
print warning when all features in a batch of probes are flagged, but allow processing to continue
add utility cleancdfnames
Add validCdfNames.Rd
export validCdfNames
imputeGender fix when chromosome Y not available
Use splitIndicesByLength(index, ocSamples/getDoParWorkers())
Can not allocate vector of size XG with genotype.Illumina. Use splitIndicesByNode() only if the length of the list is greater than the split from splitIndicesByLength(). Otherwise, split by length using ocSamples()
update .gitignore
Add make.unique for sampleSheet$Sample_ID in readIdatFiles
bug in description
ensure sample ids stored in samplesheet are unique when constructing cnSet object
update oligoClasses dependency
update unit test for genotype.Illumina
revert change in constructInf call from genotype.Illumina
Update genotype.Rd
edit ACN function
1.15.6 use make.unique(basename(arrayNames)) to allow processing of Illumina samples with duplicated barcodes
check that sample identifies are unique in crlmm function
...

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@67435 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 08/07/2012 19:00:03
Showing1 changed files
... ...
@@ -21,7 +21,7 @@ getProtocolData.Affy <- function(filenames){
21 21
 ##		  return(gns)
22 22
 ##	  })
23 23
 
24
-getFeatureData <- function(cdfName, copynumber=FALSE){
24
+getFeatureData <- function(cdfName, copynumber=FALSE, genome=genome){
25 25
 	pkgname <- getCrlmmAnnotationName(cdfName)
26 26
 	if(!require(pkgname, character.only=TRUE)){
27 27
 		suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
... ...
@@ -39,10 +39,15 @@ getFeatureData <- function(cdfName, copynumber=FALSE){
39 39
 		pkgname <- paste(pkgname, "Crlmm", sep="")
40 40
 	}
41 41
 	path <- system.file("extdata", package=pkgname)
42
-	load(file.path(path, "snpProbes.rda"))
42
+	multiple.builds <- length(grep("hg19", list.files(path)) > 0)
43
+	if(!multiple.builds)
44
+		load(file.path(path, "snpProbes.rda"))
45
+	else load(file.path(path, paste("snpProbes_", genome, ".rda", sep="")))
43 46
 	snpProbes <- get("snpProbes")
44 47
 	if(copynumber){
45
-		load(file.path(path, "cnProbes.rda"))
48
+		if(!multiple.builds)
49
+			load(file.path(path, "cnProbes.rda"))
50
+		else load(file.path(path, paste("cnProbes_", genome, ".rda", sep="")))
46 51
 		cnProbes <- get("cnProbes")
47 52
 		snpIndex <- seq(along=gns)
48 53
 		npIndex <- seq(along=rownames(cnProbes)) + max(snpIndex)
... ...
@@ -121,8 +126,8 @@ construct <- function(filenames,
121 126
 	return(cnSet)
122 127
 }
123 128
 
124
-constructAffy <- function(filenames, sns, cdfName, batch, verbose=TRUE){
125
-	stopifnot(!missing(filenames))
129
+constructAffy <- function(filenames, sns, cdfName, batch, verbose=TRUE, genome){
130
+	##stopifnot(!missing(filenames))
126 131
 	if(missing(sns)) sns <- basename(filenames)
127 132
 	stopifnot(!missing(batch))
128 133
 	##---------------------------------------------------------------------------
... ...
@@ -131,7 +136,6 @@ constructAffy <- function(filenames, sns, cdfName, batch, verbose=TRUE){
131 136
 	## appropriate dimension for snps+nps
132 137
 	##
133 138
 	##---------------------------------------------------------------------------
134
-	if (missing(sns)) sns <- basename(filenames)
135 139
 	if (missing(cdfName))
136 140
 		cdfName <- read.celfile.header(filenames[1])[["cdfName"]]
137 141
 	pkgname <- getCrlmmAnnotationName(cdfName)
... ...
@@ -148,7 +152,7 @@ constructAffy <- function(filenames, sns, cdfName, batch, verbose=TRUE){
148 152
 	SNR <- initializeBigVector("crlmmSNR-", length(filenames), "double")
149 153
 	SKW <- initializeBigVector("crlmmSKW-", length(filenames), "double")
150 154
 	genderff <- initializeBigVector("gender", length(filenames), "integer")
151
-	featureData <- getFeatureData(cdfName, copynumber=TRUE)
155
+	featureData <- getFeatureData(cdfName, copynumber=TRUE, genome=genome)
152 156
 	nr <- nrow(featureData); nc <- length(sns)
153 157
 	A <- initializeBigMatrix("crlmmA-", nr, length(filenames), "integer")
154 158
 	B <- initializeBigMatrix("crlmmB-", nr, length(filenames), "integer")
... ...
@@ -164,18 +168,12 @@ constructAffy <- function(filenames, sns, cdfName, batch, verbose=TRUE){
164 168
 		     callProbability=callPr,
165 169
 		     featureData=featureData,
166 170
 		     annotation=cdfName,
167
-		     batch=as.character(batch))
168
-	if(!missing(sns)){
169
-		sampleNames(cnSet) <- sns
170
-	} else {
171
-		sampleNames(cnSet) <- basename(filenames)
172
-	}
171
+		     batch=as.character(batch),
172
+		     genome=genome)
173
+	sampleNames(cnSet) <- sns
173 174
 	protocolData <- getProtocolData.Affy(filenames)
174 175
 	rownames(pData(protocolData)) <- sns
175 176
 	protocolData(cnSet) <- protocolData
176
-	##pd <- data.frame(matrix(NA, nc, 3), row.names=sns)
177
-	##colnames(pd)=c("SKW", "SNR", "gender")
178
-	##phenoData(cnSet) <- new("AnnotatedDataFrame", data=pd)
179 177
 	cnSet$SKW <- SKW
180 178
 	cnSet$SNR <- SNR
181 179
 	cnSet$gender <- genderff
... ...
@@ -192,7 +190,9 @@ snprmaAffy <- function(cnSet, filenames,
192 190
 	pkgname <- getCrlmmAnnotationName(annotation(cnSet))
193 191
 	stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose))
194 192
 	mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames), "double")
195
-	sampleBatches <- splitIndicesByNode(seq(along=filenames))
193
+	sampleBatches <- splitIndicesByLength(seq_along(filenames), ocSamples()/getDoParWorkers())
194
+##	if(length(sampleBatches) < getDoParWorkers())
195
+##		sampleBatches <- splitIndicesByNode(seq_along(filenames))
196 196
 	ocLapply(sampleBatches,
197 197
 		 rsprocessCEL,
198 198
 		 filenames=filenames,
... ...
@@ -225,19 +225,25 @@ genotype <- function(filenames,
225 225
 		     recallRegMin=1000,
226 226
 		     gender=NULL,
227 227
 		     returnParams=TRUE,
228
-		     badSNP=0.7){
229
-	stopifnot(isPackageLoaded("ff"))
228
+		     badSNP=0.7,
229
+		     genome=c("hg19", "hg18")){
230
+	if(!isPackageLoaded("ff")) stop("ff package must be loaded")
231
+	if (missing(sns)) sns <- basename(filenames)
232
+	if (any(duplicated(sns))) stop("sample identifiers must be unique")
233
+	genome <- match.arg(genome)
230 234
 	cnSet <- constructAffy(filenames=filenames,
235
+			       sns=sns,
231 236
 			       cdfName=cdfName,
232
-			       batch=batch, verbose=verbose)
237
+			       batch=batch, verbose=verbose,
238
+			       genome=genome)
233 239
 	if(!(is(A(cnSet), "ff") || is(A(cnSet), "ffdf"))){
234 240
 		stop("The ff package is required for this function.")
235 241
 	}
236 242
 	cnSet@mixtureParams <- snprmaAffy(cnSet, filenames=filenames,
237
-				    mixtureSampleSize=mixtureSampleSize,
238
-				    eps=eps,
239
-				    seed=seed,
240
-				    verbose=verbose)
243
+					  mixtureSampleSize=mixtureSampleSize,
244
+					  eps=eps,
245
+					  seed=seed,
246
+					  verbose=verbose)
241 247
 	ok <- cnrmaAffy(cnSet=cnSet, filenames=filenames,
242 248
 			cdfName=annotation(cnSet), seed=seed,
243 249
 			verbose=verbose)
... ...
@@ -697,6 +703,10 @@ fit.lm2 <- function(strata,
697 703
 	flags <- as.matrix(flags(object)[ii, batch.index])
698 704
 	fns <- featureNames(object)[ii]
699 705
 	fns.noflags <- fns[rowSums(flags, na.rm=T) == 0]
706
+	if(length(fns.noflags) == 0) {
707
+		warning("All features in one of the probeset batches were flagged. The data is processed in probeset batches to reduce RAM, but the error suggests that increasing the size of the batches may help.  For example, ocProbesets(4*ocProbesets()) would increase the current size of the probeset batches by 4.")
708
+		fns.noflags <- featureNames(object)[marker.index] ## allow processing to continue
709
+	}
700 710
 	snp.index <- sample(match(fns.noflags, featureNames(object)), 5000)
701 711
 	##
702 712
 	nuA.np <- as.matrix(nuA(object)[marker.index, batch.index])
... ...
@@ -1089,6 +1099,15 @@ fit.lm3 <- function(strata,
1089 1099
 	phiB(object)[marker.index, batch.index] <- phiB
1090 1100
 	phiPrimeA(object)[marker.index, batch.index] <- phiA2
1091 1101
 	phiPrimeB(object)[marker.index, batch.index] <- phiB2
1102
+	##
1103
+	if(enough.women){
1104
+		medianA.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA.Flist, function(x) x[, 1]))
1105
+		medianA.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA.Flist, function(x) x[, 2]))
1106
+		medianA.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA.Flist, function(x) x[, 3]))
1107
+		medianB.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB.Flist, function(x) x[, 1]))
1108
+		medianB.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB.Flist, function(x) x[, 2]))
1109
+		medianB.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB.Flist, function(x) x[, 3]))
1110
+	}
1092 1111
 	if(is.lds) {close(object); return(TRUE)} else return(object)
1093 1112
 }
1094 1113
 
... ...
@@ -1145,6 +1164,10 @@ fit.lm4 <- function(strata,
1145 1164
 	fns <- featureNames(object)[ii]
1146 1165
 	flags <- as.matrix(flags(object)[ii, batch.index])
1147 1166
 	fns.noflags <- fns[rowSums(flags, na.rm=T) == 0]
1167
+	if(length(fns.noflags) == 0){
1168
+		warning("All features in one of the probeset batches were flagged. The data is processed in probeset batches to reduce RAM, but the error suggests that increasing the size of the batches may help.  For example, ocProbesets(4*ocProbesets()) would increase the current size of the probeset batches by 4.")
1169
+		fns.noflags <- featureNames(object)[marker.index] ## allow processing to continue
1170
+	}
1148 1171
 	snp.index <- sample(match(fns.noflags, featureNames(object)), 10000)
1149 1172
 	##
1150 1173
 	N.AA <- as.matrix(N.AA(object)[snp.index, batch.index, drop=FALSE])
... ...
@@ -1260,7 +1283,7 @@ cnrma2 <- function(A, filenames, row.names, verbose=TRUE, seed=1, cdfName, sns){
1260 1283
 	pkgname <- getCrlmmAnnotationName(cdfName)
1261 1284
 	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
1262 1285
 	if (missing(sns)) sns <- basename(filenames)
1263
-	sampleBatches <- splitIndicesByNode(seq(along=filenames))
1286
+	sampleBatches <- splitIndicesByLength(seq_along(filenames), ocSamples()/getDoParWorkers())
1264 1287
 	if(verbose) message("Processing nonpolymorphic probes for ", length(filenames), " files.")
1265 1288
 	## updates A
1266 1289
 	ocLapply(sampleBatches,
... ...
@@ -1540,10 +1563,10 @@ shrinkSummary <- function(object,
1540 1563
 		marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X)
1541 1564
 	}
1542 1565
 	if(is.lds){
1543
-		##index.list <- splitIndicesByLength(marker.index, ocProbesets())
1544
-		if(parStatus()){
1545
-			index.list <- splitIndicesByNode(marker.index)
1546
-		} else index.list <- splitIndicesByLength(marker.index, ocProbesets())
1566
+		index.list <- splitIndicesByLength(marker.index, ocProbesets())
1567
+##		if(parStatus()){
1568
+##			index.list <- splitIndicesByNode(marker.index)
1569
+##		} else index.list <- splitIndicesByLength(marker.index, ocProbesets())
1547 1570
 		ocLapply(seq(along=index.list),
1548 1571
 			 shrinkGenotypeSummaries,
1549 1572
 			 index.list=index.list,
... ...
@@ -1594,10 +1617,10 @@ genotypeSummary <- function(object,
1594 1617
 	myf <- summaryFxn(type[[1]])
1595 1618
 	FUN <- get(myf)
1596 1619
 	if(is.lds){
1597
-		##index.list <- splitIndicesByLength(marker.index, ocProbesets())
1598
-		if(parStatus()){
1599
-			index.list <- splitIndicesByNode(marker.index)
1600
-		} else index.list <- splitIndicesByLength(marker.index, ocProbesets())
1620
+		index.list <- splitIndicesByLength(marker.index, ocProbesets())
1621
+##		if(parStatus()){
1622
+##			index.list <- splitIndicesByNode(marker.index)
1623
+##		} else index.list <- splitIndicesByLength(marker.index, ocProbesets())
1601 1624
 		ocLapply(seq(along=index.list),
1602 1625
 			 FUN,
1603 1626
 			 index.list=index.list,
... ...
@@ -2116,9 +2139,10 @@ estimateCnParameters <- function(object,
2116 2139
 	myfun <- lmFxn(type[[1]])
2117 2140
 	FUN <- get(myfun)
2118 2141
 	if(is.lds){
2119
-		if(parStatus()){
2120
-			index.list <- splitIndicesByNode(marker.index)
2121
-		} else index.list <- splitIndicesByLength(marker.index, ocProbesets())
2142
+		index.list <- splitIndicesByLength(marker.index, ocProbesets())
2143
+##		if(parStatus()){
2144
+##			index.list <- splitIndicesByNode(marker.index)
2145
+##		} else index.list <- splitIndicesByLength(marker.index, ocProbesets())
2122 2146
 		ocLapply(seq(along=index.list),
2123 2147
 			 FUN,
2124 2148
 			 index.list=index.list,
... ...
@@ -2784,89 +2808,73 @@ ABpanel <- function(x, y, predictRegion,
2784 2808
 	}
2785 2809
 }
2786 2810
 
2787
-calculateRTheta <- function(cnSet, genotype=c("AA", "AB", "BB"), batch.name){
2788
-	genotype <- match.arg(genotype)
2789
-	j <- match(batch.name, batchNames(cnSet))
2790
-	##centers <- medians(cnSet, i=snp.index, j)
2791
-	centers <- medians(cnSet, i=seq_len(nrow(cnSet)), j)
2792
-	theta <- matrix(NA, nrow(cnSet), 2)
2793
-	colnames(theta) <- c("theta", "R")
2794
-	x <- centers[, "A", genotype, j]
2795
-	y <- centers[, "B", genotype, j]
2796
-	## small imputed values -- should fix imputeCenter
2797
-	x[x < 64] <- 64
2798
-	y[y < 64] <- 64
2799
-	theta[, "theta"] <- atan2(y, x)*2/pi
2800
-##	if(any(is.na(theta[, "theta"]))){
2801
-##		##stop("NA's in thetas.  Can not compute theta / R values for batches with fewer than 10 samples")
2802
-##	}
2803
-	## so that 'R' is available for NP probes
2804
-	y[is.na(y)] <- 0
2805
-	theta[, "R"] <- x+y
2806
-	theta[!isSnp(cnSet), 1] <- 1
2807
-	if(any(is.na(theta[, "theta"]))){
2808
-		stop("NA's in thetas.  Can not compute theta / R values for batches with fewer than 10 samples")
2809
-	}
2810
-	return(theta)
2811
-}
2812
-
2813
-
2814
-
2815
-
2816
-calculateBAFandLRR <- function(cnSet) {
2817
-  ## Calculates B allele frequency and log2 R ratio for all snps for a given cnSet
2818
-  ## Returns a 3-D array of size Num Of Snps x Num of Samples x 2
2819
-  ## where result[,,1] is B allele frequency and result[,,2] is log2 R ratio
2820
-
2821
-  snp.index <- which(isSnp(cnSet))
2822
-  b <- (B(cnSet))[snp.index, ]
2823
-  a <- (A(cnSet))[snp.index, ]
2824
-  centers<-medians(cnSet, i=snp.index, j=1:length(unique(batch(cnSet))))
2825
-  bafAndLrr <- array(NA_integer_, dim=c(length(snp.index), length(sampleNames(cnSet)), 2), dimnames=list(featureNames(cnSet)[snp.index], sampleNames(cnSet),c("baf", "lrr")))
2826
-
2827
-  for (batch in unique(batch(cnSet))) {
2828
-
2829
-    ## get batch- and snp-specific centers
2830
-    center.aa.x <- centers[, 1, 1, batch]
2831
-    center.aa.y <- centers[, 2, 1, batch]
2832
-    center.ab.x <- centers[, 1, 2, batch]
2833
-    center.ab.y <- centers[, 2, 2, batch]
2834
-    center.bb.x <- centers[, 1, 3, batch]
2835
-    center.bb.y <- centers[, 2, 3, batch]
2836
-    theta.aa <- atan2(center.aa.y, center.aa.x) * 2 / pi
2837
-    r.aa <- center.aa.x + center.aa.y
2838
-    theta.ab <- atan2(center.ab.y, center.ab.x) * 2 / pi
2839
-    r.ab <- center.ab.x + center.ab.y
2840
-    theta.bb <- atan2(center.bb.y, center.bb.x) * 2 / pi
2841
-    r.bb <- center.bb.x + center.bb.y
2842
-
2843
-    index.sample <- which(batch(cnSet) %in% batch)
2844
-    for (i in index.sample) {
2845
-      theta <- atan2(b[, i], a[, i]) * 2 / pi
2846
-      r <- a[, i] + b[, i]
2847
-
2848
-      baf <- vector(mode="numeric", length=length(theta))
2849
-      baf[theta < theta.aa] <- 0
2850
-      baf[theta >= theta.bb] <- 1
2851
-      idx.ab <- which(theta >= theta.aa & theta < theta.ab)
2852
-      baf[idx.ab] <- .5 * (theta[idx.ab] - theta.aa[idx.ab]) / (theta.ab[idx.ab] - theta.aa[idx.ab])
2853
-      idx.bb <- which(theta >= theta.ab & theta < theta.bb)
2854
-      baf[idx.bb] <- .5 * (theta[idx.bb] - theta.ab[idx.bb]) / (theta.bb[idx.bb] - theta.ab[idx.bb]) + .5
2855
-      bafAndLrr[, i ,1] <- baf
2856
-
2857
-      r.expected <- vector(mode="numeric", length=length(r))
2858
-      r.expected[theta < theta.aa] <- r.aa[theta < theta.aa]
2859
-      r.expected[theta >= theta.bb] <- r.bb[theta >= theta.bb]
2860
-
2861
-      idx.ab <- which(theta >= theta.aa & theta < theta.ab)
2862
-      r.expected[idx.ab] <- (theta[idx.ab] - theta.aa[idx.ab]) * (r.ab[idx.ab]-r.aa[idx.ab]) /
2863
-        (theta.ab[idx.ab] - theta.aa[idx.ab]) + r.aa[idx.ab]
2864
-      idx.bb <- which(theta >= theta.ab & theta < theta.bb)
2865
-      r.expected[idx.bb] <- (theta[idx.bb] - theta.ab[idx.bb]) * (r.bb[idx.bb]-r.ab[idx.bb])/
2866
-        (theta.bb[idx.bb] - theta.ab[idx.bb]) + r.ab[idx.bb]
2867
-      bafAndLrr[, i, 2] <- log2(r / r.expected)
2868
-
2869
-    }
2870
-  }
2871
-  return(bafAndLrr)
2811
+calculateRTheta <- function(object, ##genotype=c("AA", "AB", "BB"),
2812
+			    batch.name,
2813
+			    feature.index){
2814
+	index.np <- which(!(isSnp(object)[feature.index]))
2815
+	j <- match(batch.name, batchNames(object))
2816
+	if(missing(j)) stop("batch.name did not match in batchNames(object)")
2817
+	CHR <- unique(chromosome(object)[feature.index])
2818
+	isX <- CHR == "X"
2819
+	centers <- getMedians(batchStatistics(object))
2820
+	lapply(centers, open)
2821
+	centersMatrix <- lapply(centers, function(x, i, j) x[i, j], j=j, i=feature.index)
2822
+	lapply(centers, close)
2823
+	centersMatrix <- do.call(cbind, centersMatrix)
2824
+	centersA <- centersMatrix[, 1:3]
2825
+	centersB <- centersMatrix[, 4:6]
2826
+	rm(centers, centersMatrix); gc()
2827
+	centersA[centersA < 64] <- 64
2828
+	centersB[centersB < 64] <- 64
2829
+	theta <- atan2(centersB, centersA) * 2/pi
2830
+	centersA[is.na(centersA)] <- 0
2831
+	centersB[is.na(centersB)] <- 0
2832
+	if(length(index.np) > 0) theta[index.np, ] <- 1
2833
+	##
2834
+	r <- centersA+centersB
2835
+	J <- which(batch(object)==batch.name)
2836
+	open(A(object))
2837
+	open(B(object))
2838
+	a <- as.matrix(A(object)[feature.index, J, drop=FALSE])
2839
+	b <- as.matrix(B(object)[feature.index, J, drop=FALSE])
2840
+	close(A(object))
2841
+	close(B(object))
2842
+	if(length(index.np) > 0) b[index.np, ] <- 0L
2843
+	obs.theta <- atan2(b, a)*2/pi
2844
+	calculateBandR <- function(o, r, theta, not.na, M){
2845
+		lessAA <- o < theta[, 1]
2846
+		lessAB <- o < theta[, 2]
2847
+		lessBB <- o < theta[, 3]
2848
+		ii <- which(lessAA & not.na)
2849
+		jj <- which(!lessBB & not.na)
2850
+		i <- which(!lessAA & lessAB & not.na)
2851
+		j <- which(!lessAB & lessBB & not.na)
2852
+		M[i, 1] <- 0.5*(o[i]-theta[i,1])/(theta[i,2]-theta[i,1])
2853
+		M[j, 1] <- 0.5*(o[j]-theta[j,2])/(theta[j,3]-theta[j,2]) + 0.5
2854
+		M[ii, 1] <- 0
2855
+		M[jj, 1] <- 1
2856
+		M[i, 2] <- (o[i]-theta[i,1])*(r[i,2]-r[i,1])/(theta[i,2]-theta[i,1]) + r[i, 1]
2857
+		M[j, 2] <- (o[j]-theta[j,2])*(r[j,3]-r[j,2])/(theta[j,3]-theta[j,2]) + r[j, 2]
2858
+		M[ii, 2] <- r[ii, 1]
2859
+		M[jj, 2] <- r[jj, 3]
2860
+		return(M)
2861
+	}
2862
+	not.na <- !is.na(theta[,1])
2863
+	r.expected <- bf <- matrix(NA, length(feature.index), ncol(a))
2864
+	M <- matrix(NA, length(feature.index), 2)
2865
+	for(j in seq_len(ncol(a))){
2866
+		tmp <- calculateBandR(obs.theta[, j], r=r, theta=theta, not.na=not.na, M=M)
2867
+		bf[, j] <- tmp[,1]
2868
+		r.expected[,j] <- tmp[,2]
2869
+	}
2870
+	bf[bf < 0] <- 0
2871
+	bf[bf > 1] <- 1
2872
+	if(length(index.np) > 0) r.expected[index.np, ] <- centersA[index.np, 1]
2873
+	obs.r <- a+b
2874
+	lrr <- log2(obs.r/r.expected)
2875
+	dimnames(bf) <- dimnames(lrr) <- list(featureNames(object)[feature.index],
2876
+					      sampleNames(object)[J])
2877
+	bf <- integerMatrix(bf, 1000)
2878
+	lrr <- integerMatrix(lrr, 100)
2879
+	return(list(baf=bf, lrr=lrr))
2872 2880
 }
Browse code

Merge branch 'collab'

* collab:
bump version
made cnSetExample smaller. Fix notes
Trying to revert bad commit
remove cn-functions. update description
comment most of cn-functions.r
Resaved rdas
update data/cnSetExample.rda and data/cnSetExample2.rda
bump version
coercion method from CNSet to oligoSnpSet makes integer matrices of BAFs and lrr's
import ff_or_matrix from oligoClasses. bump dependency on oligoClasses version. Use library(oligoClasses) in some of the crlmm examples.
Cleaning pkg loading process: work still required
move Biobase and methods to imports

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@64324 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 23/03/2012 03:34:50
Showing1 changed files
... ...
@@ -1896,85 +1896,85 @@ isCall <- function(G, call){
1896 1896
 	G.call
1897 1897
 }
1898 1898
 
1899
-computeSummary <- function(object, G.call, call, I, allele, Ns, index){
1900
-	k <- match("grandMean", batchNames(object))
1901
-	stats <- summaryStats(G.call, I, FUNS=c("rowMedians", "rowMAD"))
1902
-	Ns[, k] <- rowSums(G.call, na.rm=TRUE)
1903
-	updateStats(stats, Ns, object, call, allele, index)
1904
-	gc()
1905
-	return()
1906
-}
1899
+##computeSummary <- function(object, G.call, call, I, allele, Ns, index){
1900
+##	k <- match("grandMean", batchNames(object))
1901
+##	stats <- summaryStats(G.call, I, FUNS=c("rowMedians", "rowMAD"))
1902
+##	Ns[, k] <- rowSums(G.call, na.rm=TRUE)
1903
+##	updateStats(stats, Ns, object, call, allele, index)
1904
+##	gc()
1905
+##	return()
1906
+##}
1907 1907
 
1908
-updateTau <- function(object, tau2, G.call, call, I, allele, index){
1909
-	k <- match("grandMean", batchNames(object))
1910
-	logI <- log2(I)
1911
-	rm(I); gc()
1912
-	tau2[, k] <- summaryStats(G.call, logI, FUNS="rowMAD")^2
1913
-	if(call==1 & allele=="A"){
1914
-		tau2A.AA(object)[index, ] <- tau2
1915
-	}
1916
-	if(call==1 & allele=="B"){
1917
-		tau2B.AA(object)[index, ] <- tau2
1918
-	}
1919
-	if(call==3 & allele=="A"){
1920
-		tau2A.BB(object)[index, ] <- tau2
1921
-	}
1922
-	if(call==3 & allele=="B"){
1923
-		tau2B.BB(object)[index, ] <- tau2
1924
-	}
1925
-	NULL
1926
-}
1908
+##updateTau <- function(object, tau2, G.call, call, I, allele, index){
1909
+##	k <- match("grandMean", batchNames(object))
1910
+##	logI <- log2(I)
1911
+##	rm(I); gc()
1912
+##	tau2[, k] <- summaryStats(G.call, logI, FUNS="rowMAD")^2
1913
+##	if(call==1 & allele=="A"){
1914
+##		tau2A.AA(object)[index, ] <- tau2
1915
+##	}
1916
+##	if(call==1 & allele=="B"){
1917
+##		tau2B.AA(object)[index, ] <- tau2
1918
+##	}
1919
+##	if(call==3 & allele=="A"){
1920
+##		tau2A.BB(object)[index, ] <- tau2
1921
+##	}
1922
+##	if(call==3 & allele=="B"){
1923
+##		tau2B.BB(object)[index, ] <- tau2
1924
+##	}
1925
+##	NULL
1926
+##}
1927 1927
 
1928
-updateCors <- function(cors, G.call, I){
1929
-	k <- match("grandMean", batchNames(object))
1930
-	cors[, k] <- rowCors(I[[1]]*G.call, I[[2]]*G.call, na.rm=TRUE)
1931
-	if(call==1){
1932
-		corrAA(object)[index, ] <- cors
1933
-	}
1934
-	if(call==2){
1935
-		corrAB(object)[index, ] <- cors
1936
-	}
1937
-	if(call==3){
1938
-		corrBB(object)[index, ] <- cors
1939
-	}
1940
-}
1928
+##updateCors <- function(cors, G.call, I){
1929
+##	k <- match("grandMean", batchNames(object))
1930
+##	cors[, k] <- rowCors(I[[1]]*G.call, I[[2]]*G.call, na.rm=TRUE)
1931
+##	if(call==1){
1932
+##		corrAA(object)[index, ] <- cors
1933
+##	}
1934
+##	if(call==2){
1935
+##		corrAB(object)[index, ] <- cors
1936
+##	}
1937
+##	if(call==3){
1938
+##		corrBB(object)[index, ] <- cors
1939
+##	}
1940
+##}
1941 1941
 
1942
-updateStats <- function(stats, Ns, object, call, allele, tau2, index){
1943
-	if(call==1){
1944
-		Ns.AA(object)[index, ] <- Ns
1945
-		if(allele=="A"){
1946
-			medianA.AA(object)[index, k] <- stats[, 1]
1947
-			madA.AA(object)[index, k] <- stats[, 2]
1948
-			updateTau(object, tau2, G.call, call, I, allele, index)
1949
-		} else {
1950
-			medianB.AA(object)[index, k] <- stats[, 1]
1951
-			madB.AA(object)[index, k] <- stats[, 2]
1952
-			updateTau(object, tau2, G.call, call, I, allele, index)
1953
-		}
1954
-	}
1955
-	if(call==2){
1956
-		Ns.AB(object)[index, ] <- Ns
1957
-		if(allele=="A"){
1958
-			medianA.AB(object)[index, k] <- stats[, 1]
1959
-			madA.AB(object)[index, k] <- stats[, 2]
1960
-		} else {
1961
-			medianB.AB(object)[index, k] <- stats[, 1]
1962
-			madB.AB(object)[index, k] <- stats[, 2]
1963
-		}
1964
-	}
1965
-	if(call==3){
1966
-		Ns.BB(object)[index, ] <- Ns
1967
-		if(allele=="A"){
1968
-			medianA.BB(object)[index, k] <- stats[, 1]
1969
-			madA.BB(object)[index, k] <- stats[, 2]
1970
-			updateTau(object, tau2, G.call, call, I, allele, index)
1971
-		} else {
1972
-			medianB.BB(object)[index, k] <- stats[, 1]
1973
-			madB.BB(object)[index, k] <- stats[, 2]
1974
-			updateTau(object, tau2, G.call, call, I, allele, index)
1975
-		}
1976
-	}
1977
-}
1942
+##updateStats <- function(stats, Ns, object, call, allele, tau2, index){
1943
+##	if(call==1){
1944
+##		Ns.AA(object)[index, ] <- Ns
1945
+##		if(allele=="A"){
1946
+##			medianA.AA(object)[index, k] <- stats[, 1]
1947
+##			madA.AA(object)[index, k] <- stats[, 2]
1948
+##			updateTau(object, tau2, G.call, call, I, allele, index)
1949
+##		} else {
1950
+##			medianB.AA(object)[index, k] <- stats[, 1]
1951
+##			madB.AA(object)[index, k] <- stats[, 2]
1952
+##			updateTau(object, tau2, G.call, call, I, allele, index)
1953
+##		}
1954
+##	}
1955
+##	if(call==2){
1956
+##		Ns.AB(object)[index, ] <- Ns
1957
+##		if(allele=="A"){
1958
+##			medianA.AB(object)[index, k] <- stats[, 1]
1959
+##			madA.AB(object)[index, k] <- stats[, 2]
1960
+##		} else {
1961
+##			medianB.AB(object)[index, k] <- stats[, 1]
1962
+##			madB.AB(object)[index, k] <- stats[, 2]
1963
+##		}
1964
+##	}
1965
+##	if(call==3){
1966
+##		Ns.BB(object)[index, ] <- Ns
1967
+##		if(allele=="A"){
1968
+##			medianA.BB(object)[index, k] <- stats[, 1]
1969
+##			madA.BB(object)[index, k] <- stats[, 2]
1970
+##			updateTau(object, tau2, G.call, call, I, allele, index)
1971
+##		} else {
1972
+##			medianB.BB(object)[index, k] <- stats[, 1]
1973
+##			madB.BB(object)[index, k] <- stats[, 2]
1974
+##			updateTau(object, tau2, G.call, call, I, allele, index)
1975
+##		}
1976
+##	}
1977
+##}
1978 1978
 
1979 1979
 crlmmCopynumber <- function(object,
1980 1980
 			    MIN.SAMPLES=10,
Browse code

Merge branch 'collab'

* collab:
remove getCluster() calls and replace with parStatus()
update man pages for crlmm and genotype.Illumina with respect to the setup for parallelization
add neededPkgs argument to ocLapply calls in crlmmGT2
bump dependency on oligoClasses
Update R/crlmm-illumina.R
contructInf, preprocessInf and genotypeInf no longer exported

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@64211 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 21/03/2012 02:52:50
Showing1 changed files
... ...
@@ -1541,7 +1541,7 @@ shrinkSummary <- function(object,
1541 1541
 	}
1542 1542
 	if(is.lds){
1543 1543
 		##index.list <- splitIndicesByLength(marker.index, ocProbesets())
1544
-		if(!is.null(getCluster()) & isPackageLoaded("snow")){
1544
+		if(parStatus()){
1545 1545
 			index.list <- splitIndicesByNode(marker.index)
1546 1546
 		} else index.list <- splitIndicesByLength(marker.index, ocProbesets())
1547 1547
 		ocLapply(seq(along=index.list),
... ...
@@ -1595,7 +1595,7 @@ genotypeSummary <- function(object,
1595 1595
 	FUN <- get(myf)
1596 1596
 	if(is.lds){
1597 1597
 		##index.list <- splitIndicesByLength(marker.index, ocProbesets())
1598
-		if(!is.null(getCluster()) & isPackageLoaded("snow")){
1598
+		if(parStatus()){
1599 1599
 			index.list <- splitIndicesByNode(marker.index)
1600 1600
 		} else index.list <- splitIndicesByLength(marker.index, ocProbesets())
1601 1601
 		ocLapply(seq(along=index.list),
... ...
@@ -2116,7 +2116,7 @@ estimateCnParameters <- function(object,
2116 2116
 	myfun <- lmFxn(type[[1]])
2117 2117
 	FUN <- get(myfun)
2118 2118
 	if(is.lds){
2119
-		if(!is.null(getCluster()) & isPackageLoaded("snow")){
2119
+		if(parStatus()){
2120 2120
 			index.list <- splitIndicesByNode(marker.index)
2121 2121
 		} else index.list <- splitIndicesByLength(marker.index, ocProbesets())
2122 2122
 		ocLapply(seq(along=index.list),
Browse code

Merge branch 'collab'

* collab:
replace splitIndicesByLength with splitIndicesByNode throughout cnrma-functions.R (check that snow is loaded and getCluster is not null)
add an example to genotype.Illumina that indicates how parallelization would be enabled. The example requires local data and is not run.
change outdir in IlluminaPreprocessCN and AffyGW
Update R/crlmm-illumina.R
bump version for parallelization of genotype.Illumina
Update R/crlmm-illumina.R

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@64151 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 20/03/2012 13:55:50
Showing1 changed files