Browse code

merge fix for genotype function

* collab_release:
comment requarding dqrls
Bug fix in genotype function. snprmaAffy returns TRUE, and should not be assigned to the symbol 'cnSet'
Put illumina sample sheet in inst/extdata

inst/extdata

# Please enter a commit message to explain why this merge is necessary,
# especially if it merges an updated upstream into a topic branch.
#
# Lines starting with '#' will be ignored, and an empty message aborts
# the commit.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/branches/RELEASE_2_11/madman/Rpacks/crlmm@70914 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 02/11/2012 00:21:02
Showing3 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.16.2
4
+Version: 1.16.3
5 5
 Author: Benilton S Carvalho, Robert Scharpf, Matt Ritchie, Ingo Ruczinski, Rafael A Irizarry
6 6
 Maintainer: Benilton S Carvalho <Benilton.Carvalho@cancer.org.uk>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
7 7
 Description: Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays, as well as a copy number tool specific to 5.0, 6.0, and Illumina platforms
... ...
@@ -41,15 +41,6 @@ getFeatureData <- function(cdfName, copynumber=FALSE, genome){
41 41
 	}
42 42
 	path <- system.file("extdata", package=pkgname)
43 43
 	##multiple.builds <- length(grep("hg19", list.files(path)) > 0)
44
-##	if(missing(genome)){
45
-##		snp.file <- list.files(path, pattern="snpProbes_hg")
46
-##		if(length(snp.file) > 1){
47
-##			## use hg19
48
-##			message("genome build not specified. Using build hg19 for annotation.")
49
-##			snp.file <- snp.file[1]
50
-##		}
51
-##		genome <- gsub(".rda", "", strsplit(snp.file, "snpProbes_")[[1]][[2]])
52
-##	} else snp.file <- paste("snpProbes_", genome, ".rda", sep="")
53 44
 	snp.file <- list.files(path, pattern="snpProbes_hg")
54 45
 	if(length(snp.file)==0){
55 46
 		no.build <- TRUE
... ...
@@ -74,7 +65,6 @@ getFeatureData <- function(cdfName, copynumber=FALSE, genome){
74 65
 	## if we use a different build we may throw out a number of snps...
75 66
 	snpProbes <- snpProbes[rownames(snpProbes) %in% gns, ]
76 67
 	if(copynumber){
77
-##		cn.file <- paste("cnProbes_", genome, ".rda", sep="")
78 68
 		if(!no.build){
79 69
 			cn.file <- paste("cnProbes_", genome, ".rda", sep="")
80 70
 		} else cn.file <- "cnProbes.rda"
... ...
@@ -326,11 +316,11 @@ genotype <- function(filenames,
326 316
 	if(!(is(A(cnSet), "ff") || is(A(cnSet), "ffdf"))){
327 317
 		stop("The ff package is required for this function.")
328 318
 	}
329
-	cnSet <- snprmaAffy(cnSet,
330
-			    mixtureSampleSize=mixtureSampleSize,
331
-			    eps=eps,
332
-			    seed=seed,
333
-			    verbose=verbose)
319
+	ok <- snprmaAffy(cnSet,
320
+			 mixtureSampleSize=mixtureSampleSize,
321
+			 eps=eps,
322
+			 seed=seed,
323
+			 verbose=verbose)
334 324
 	ok <- cnrmaAffy(cnSet=cnSet,
335 325
 			seed=seed,
336 326
 			verbose=verbose)
... ...
@@ -457,6 +447,7 @@ dqrlsWrapper <- function(x, y, wts, tol=1e-7){
457 447
 	n <- NROW(y)
458 448
 	p <- ncol(x)
459 449
 	ny <- NCOL(y)
450
+	## perhaps use .Call("C_Cdqrls", x, y, tolerance)  instead -- see lsfit, for example
460 451
 	.Fortran("dqrls", qr=x*wts, n=n, p=p, y=y * wts, ny=ny,
461 452
 		 tol=as.double(tol), coefficients=mat.or.vec(p, ny),
462 453
 		 residuals=y, effects=mat.or.vec(n, ny),
... ...
@@ -749,7 +740,8 @@ fit.lm2 <- function(strata,
749 740
 		warning("All features in one of the probeset batches were flagged. The data is processed in probeset batches to reduce RAM, but the error suggests that increasing the size of the batches may help.  For example, ocProbesets(4*ocProbesets()) would increase the current size of the probeset batches by 4.")
750 741
 		fns.noflags <- featureNames(object)[marker.index] ## allow processing to continue
751 742
 	}
752
-	snp.index <- sample(match(fns.noflags, featureNames(object)), 5000)
743
+	index.flag <- match(fns.noflags, featureNames(object))
744
+	snp.index <- sample(index.flag, min(c(length(index.flag), 5000)))
753 745
 	##
754 746
 	nuA.np <- as.matrix(nuA(object)[marker.index, batch.index])
755 747
 	phiA.np <- as.matrix(phiA(object)[marker.index, batch.index])
... ...
@@ -1210,7 +1202,8 @@ fit.lm4 <- function(strata,
1210 1202
 		warning("All features in one of the probeset batches were flagged. The data is processed in probeset batches to reduce RAM, but the error suggests that increasing the size of the batches may help.  For example, ocProbesets(4*ocProbesets()) would increase the current size of the probeset batches by 4.")
1211 1203
 		fns.noflags <- featureNames(object)[marker.index] ## allow processing to continue
1212 1204
 	}
1213
-	snp.index <- sample(match(fns.noflags, featureNames(object)), 10000)
1205
+	index.flag <- match(fns.noflags, featureNames(object))
1206
+	snp.index <- sample(index.flag, min(c(length(index.flag), 10000)))
1214 1207
 	##
1215 1208
 	N.AA <- as.matrix(N.AA(object)[snp.index, batch.index, drop=FALSE])
1216 1209
 	N.AB <- as.matrix(N.AB(object)[snp.index, batch.index, drop=FALSE])
... ...
@@ -98,7 +98,8 @@ We begin by specifying the path containing the raw IDAT files for a
98 98
 set of samples from the Infinium 370k platform.
99 99
 
100 100
 <<datadir>>=
101
-datadir <- "/thumper/ctsa/snpmicroarray/illumina/IDATS/370k"
101
+idatdir <- "/thumper/ctsa/snpmicroarray/illumina/IDATS/370k"
102
+ssdir <- system.file("extdata", package="crlmm")
102 103
 @
103 104
 
104 105
 For Infinium platforms, an Illumina sample sheet containing
... ...
@@ -108,7 +109,8 @@ information.  The following code reads in the samplesheet for the IDAT
108 109
 files on our local server.
109 110
 
110 111
 <<samplesheet>>=
111
-samplesheet = read.csv(file.path(datadir, "HumanHap370Duo_Sample_Map.csv"), header=TRUE, as.is=TRUE)
112
+samplesheet <- read.csv(file.path(ssdir, "HumanHap370Duo_Sample_Map.csv"), header=TRUE, as.is=TRUE)
113
+head(samplesheet)
112 114
 @
113 115
 
114 116
 For the purposes of this vignette, we indicate that we only wish to