Browse code

Added weighted.lm argument to fit.lm1, fit.lm2 and computeCN. Added computeCN.

computeCN creates a separate object for the following markers:
- autosomal SNPs
- autosomal NPs
- chromosome X SNPs
- chromosome X NPs
One could run separate jobs for computing CN on these indices.

- code to combine the results from these objects is not yet available.

- the NAMESPACE currently exports a few things that are temporary

- the weighted.lm argument is to allow one to more quickly estimate
the linear model parameters. Code for the unweighted regression is
not yet available.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@48924 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 21/08/2010 02:47:27
Showing3 changed files

... ...
@@ -73,6 +73,7 @@ export(crlmm,
73 73
        crlmmCopynumber2, crlmmCopynumberLD)
74 74
 export(constructIlluminaCNSet)
75 75
 export(linesCNSetLM)
76
+export(linearModel.noweights, computeCN)
76 77
 
77 78
 
78 79
 
... ...
@@ -24,7 +24,7 @@ getFeatureData.Affy <- function(cdfName, copynumber=FALSE){
24 24
 	gns <- getVarInEnv("gns")
25 25
 	path <- system.file("extdata", package=paste(cdfName, "Crlmm", sep=""))
26 26
 	load(file.path(path, "snpProbes.rda"))
27
-	snpProbes <- getVarInEnv("snpProbes")
27
+	snpProbes <- get("snpProbes")
28 28
 	if(copynumber){
29 29
 		load(file.path(path, "cnProbes.rda"))
30 30
 		cnProbes <- get("cnProbes")
... ...
@@ -759,6 +759,32 @@ nuphiAllele2 <- function(allele, Ystar, W, Ns){
759 759
 	return(list(nu, phi))
760 760
 }
761 761
 
762
+## linear regression without weights -- design matrix is same for all snps
763
+linearModel.noweights <- function(allele, Ystar, W, Ns){
764
+	complete <- rowSums(is.na(W)) == 0 
765
+	if(any(!is.finite(W))){## | any(!is.finite(V))){
766
+		i <- which(rowSums(!is.finite(W)) > 0)
767
+		stop("Possible zeros in the within-genotype estimates of the spread (vA, vB). ")
768
+	}
769
+	NOHET <- mean(Ns[, 2], na.rm=TRUE) < 0.05
770
+	if(missing(allele)) stop("must specify allele")
771
+	if(allele == "A") X <- cbind(1, 2:0) else X <- cbind(1, 0:2)
772
+	if(NOHET) X <- X[-2, ] ##more than 1 X chromosome, but all homozygous		
773
+	##How to quickly generate Xstar, Xstar = diag(W) %*% X
774
+	Xstar <- apply(W, 1, generateX, X)
775
+	IXTX <- apply(Xstar, 2, generateIXTX, nrow=nrow(X))
776
+	betahat <- matrix(NA, 2, nrow(Ystar))
777
+	ses <- matrix(NA, 2, nrow(Ystar))
778
+	for(i in 1:nrow(Ystar)){
779
+		betahat[, i] <- crossprod(matrix(IXTX[, i], ncol(X), ncol(X)), crossprod(matrix(Xstar[, i], nrow=nrow(X)), Ystar[i, ]))
780
+		ssr <- sum((Ystar[i, ] - matrix(Xstar[, i], nrow(X), ncol(X)) %*% matrix(betahat[, i], ncol(X), 1))^2)
781
+		ses[, i] <- sqrt(diag(matrix(IXTX[, i], ncol(X), ncol(X)) * ssr))
782
+	}
783
+	nu <- betahat[1, ]
784
+	phi <- betahat[2, ]
785
+	return(list(nu, phi))
786
+}
787
+
762 788
 nuphiAlleleX <- function(allele, Ystar, W, Ns, chrX=FALSE){
763 789
 	complete <- rowSums(is.na(W)) == 0 
764 790
 	if(any(!is.finite(W))){## | any(!is.finite(V))){
... ...
@@ -933,7 +959,8 @@ crlmmCopynumber <- function(object,
933 959
 			    MIN.NU=2^3,
934 960
 			    MIN.PHI=2^3,
935 961
 			    THR.NU.PHI=TRUE,
936
-			    thresholdCopynumber=TRUE){
962
+			    thresholdCopynumber=TRUE,
963
+			    weighted.lm=TRUE){
937 964
 	ffIsLoaded <- class(calls(object))[[1]] == "ff"
938 965
 	if(ffIsLoaded){
939 966
 		open(object)
... ...
@@ -1040,7 +1067,8 @@ crlmmCopynumberLD <- function(object,
1040 1067
 			    MIN.NU=2^3,
1041 1068
 			    MIN.PHI=2^3,
1042 1069
 			    THR.NU.PHI=TRUE,
1043
-			    thresholdCopynumber=TRUE){
1070
+			    thresholdCopynumber=TRUE,
1071
+			      weighted.lm=TRUE){
1044 1072
 	stopifnot("batch" %in% varLabels(protocolData(object)))
1045 1073
 	stopifnot("chromosome" %in% fvarLabels(object))
1046 1074
 	stopifnot("position" %in% fvarLabels(object))
... ...
@@ -1074,7 +1102,7 @@ crlmmCopynumberLD <- function(object,
1074 1102
 	snpBatches <- splitIndicesByLength(autosomeIndex.snps, ocProbesets())
1075 1103
 	ocLapply(seq(along=snpBatches),
1076 1104
 		 fit.lm1,
1077
-		 autosomeIndex=autosomeIndex.snps,
1105
+		 marker.index=autosomeIndex.snps,
1078 1106
 		 object=object,
1079 1107
 		 Ns=Ns,
1080 1108
 		 normal=normal,
... ...
@@ -1096,7 +1124,7 @@ crlmmCopynumberLD <- function(object,
1096 1124
 	if(verbose) message("Estimating total copy number at nonpolymorphic loci")
1097 1125
 	ocLapply(seq(along=snpBatches),
1098 1126
 			fit.lm2,
1099
-			autosomeIndex=autosomeIndex.nps,
1127
+			marker.index=autosomeIndex.nps,
1100 1128
 			object=object,
1101 1129
 			Ns=Ns,
1102 1130
 			normal=normal,
... ...
@@ -1117,7 +1145,7 @@ crlmmCopynumberLD <- function(object,
1117 1145
 	if(verbose) message("Estimating allele-specific copy number at polymorphic loci on chromosome X")
1118 1146
 	ocLapply(seq(along=snpBatches),
1119 1147
 			fit.lm3,
1120
-			autosomeIndex=XIndex.snps,
1148
+			marker.index=XIndex.snps,
1121 1149
 			object=object,
1122 1150
 			Ns=Ns,
1123 1151
 			normal=normal,
... ...
@@ -1138,7 +1166,7 @@ crlmmCopynumberLD <- function(object,
1138 1166
 	snpBatches <- splitIndicesByLength(XIndex.nps, ocProbesets())
1139 1167
 	tmp <- ocLapply(seq(along=snpBatches),
1140 1168
 			fit.lm4,
1141
-			XIndex=XIndex.nps,
1169
+			marker.index=XIndex.nps,
1142 1170
 			object=object,
1143 1171
 			Ns=Ns,
1144 1172
 			normal=normal,
... ...
@@ -1161,7 +1189,8 @@ crlmmCopynumber2 <- crlmmCopynumberLD
1161 1189
 
1162 1190
 fit.lm1 <- function(idxBatch,
1163 1191
 		    snpBatches,
1164
-		    autosomeIndex,
1192
+##		    autosomeIndex,
1193
+		    marker.index,
1165 1194
 		    object,
1166 1195
 		    Ns,
1167 1196
 		    normal,
... ...
@@ -1175,7 +1204,8 @@ fit.lm1 <- function(idxBatch,
1175 1204
 		    THR.NU.PHI,
1176 1205
 		    MIN.NU,
1177 1206
 		    MIN.PHI,
1178
-		    verbose, ...){
1207
+		    verbose,
1208
+		    weighted.lm, ...){
1179 1209
 	physical <- get("physical")
1180 1210
 	if(verbose) message("Probe batch ", idxBatch, " of ", length(snpBatches))
1181 1211
 	snps <- snpBatches[[idxBatch]]
... ...
@@ -1285,10 +1315,14 @@ fit.lm1 <- function(idxBatch,
1285 1315
 		wB <- sqrt(1/vB2)
1286 1316
 		YA <- muA*wA
1287 1317
 		YB <- muB*wB
1288
-		res <- nuphiAllele2(allele="A", Ystar=YA, W=wA, Ns=Ns)
1318
+		if(weighted.lm){
1319
+			res <- nuphiAllele2(allele="A", Ystar=YA, W=wA, Ns=Ns)
1320
+		} else res <- linearModel.noweights(allele="A", Ystar=YA, W=wA, Ns=Ns)
1289 1321
 		nuA[, J] <- res[[1]]
1290 1322
 		phiA[, J] <- res[[2]]
1291
-		res <- nuphiAllele2(allele="B", Ystar=YB, W=wB, Ns=Ns)
1323
+		if(!weighted.lm){
1324
+			res <- nuphiAllele2(allele="B", Ystar=YB, W=wB, Ns=Ns)
1325
+		} else res <- linearModel.noweights(allele="B", Ystar=YB, W=wB, Ns=Ns)
1292 1326
 		nuB[, J] <- res[[1]]
1293 1327
 		phiB[, J] <- res[[2]]
1294 1328
 		if(THR.NU.PHI){
... ...
@@ -1364,7 +1398,7 @@ fit.lm1 <- function(idxBatch,
1364 1398
 
1365 1399
 fit.lm2 <- function(idxBatch,
1366 1400
 		    snpBatches,
1367
-		    autosomeIndex,
1401
+		    marker.index,
1368 1402
 		    object,
1369 1403
 		    Ns,
1370 1404
 		    normal,
... ...
@@ -1378,7 +1412,8 @@ fit.lm2 <- function(idxBatch,
1378 1412
 		    THR.NU.PHI,
1379 1413
 		    MIN.NU,
1380 1414
 		    MIN.PHI,
1381
-		    verbose, ...){
1415
+		    verbose,
1416
+		    weighted.lm, ...){
1382 1417
 	physical <- get("physical")
1383 1418
 	if(verbose) message("Probe batch ", idxBatch, " of ", length(snpBatches))
1384 1419
 	snps <- snpBatches[[idxBatch]]
... ...
@@ -1481,7 +1516,7 @@ fit.lm2 <- function(idxBatch,
1481 1516
 
1482 1517
 fit.lm3 <- function(idxBatch,
1483 1518
 		    snpBatches,
1484
-		    XIndex,
1519
+		    marker.index,
1485 1520
 		    object,
1486 1521
 		    Ns,
1487 1522
 		    normal,
... ...
@@ -1719,7 +1754,7 @@ fit.lm3 <- function(idxBatch,
1719 1754
 
1720 1755
 fit.lm4 <- function(idxBatch,
1721 1756
 		    snpBatches,
1722
-		    XIndex,
1757
+		    marker.index,
1723 1758
 		    object,
1724 1759
 		    Ns,
1725 1760
 		    normal,
... ...
@@ -3183,7 +3218,7 @@ constructIlluminaCNSet <- function(crlmmResult,
3183 3218
 				   
3184 3219
 
3185 3220
 ellipseCenters <- function(object, index, allele, batch, log.it=TRUE){
3186
-	ubatch <- unique(batch(cnSet))[batch]
3221
+	ubatch <- unique(batch(object))[batch]
3187 3222
 	Nu <- nu(object, allele)[index, batch]
3188 3223
 	Phi <- phi(object, allele)[index, batch]
3189 3224
 	centers <- list(Nu, Nu+Phi, Nu+2*Phi)
... ...
@@ -3202,3 +3237,141 @@ ellipseCenters <- function(object, index, allele, batch, log.it=TRUE){
3202 3237
 	centers$fns <- fns
3203 3238
 	return(centers)	
3204 3239
 }
3240
+
3241
+computeCN <- function(filenames,
3242
+		      object,
3243
+		      which.batches,
3244
+		      MIN.SAMPLES=10,
3245
+		      SNRMin=5,
3246
+		      MIN.OBS=1,
3247
+		      DF.PRIOR=50,
3248
+		      bias.adj=FALSE,
3249
+		      prior.prob=rep(1/4,4),
3250
+		      seed=1,
3251
+		      verbose=TRUE,
3252
+		      GT.CONF.THR=0.99,
3253
+		      PHI.THR=2^6,
3254
+		      nHOM.THR=5,
3255
+		      MIN.NU=2^3,
3256
+		      MIN.PHI=2^3,
3257
+		      THR.NU.PHI=TRUE,
3258
+		      thresholdCopynumber=TRUE,
3259
+		      type=c("autosome.snps", "autosome.nps", "X.snps", "X.nps"),
3260
+		      weighted.lm=TRUE){
3261
+	stopifnot("batch" %in% varLabels(protocolData(object)))
3262
+	stopifnot("chromosome" %in% fvarLabels(object))
3263
+	stopifnot("position" %in% fvarLabels(object))
3264
+	stopifnot("isSnp" %in% fvarLabels(object))
3265
+	batch <- batch(object)
3266
+	if(type=="autosome.snps"){
3267
+		marker.index <- which(chromosome(object) < 23 & isSnp(object) & !is.na(chromosome(object)))
3268
+		nr <- length(marker.index)
3269
+		Ns <- initializeBigMatrix("Ns", nr, 5)
3270
+		colnames(Ns) <- c("A", "B", "AA", "AB", "BB")
3271
+		if(!file.exists(file.path(ldPath(), "normal.snps.rda"))){
3272
+			normal <- initializeBigMatrix("normal.snps", nr, ncol(object), vmode="integer")
3273
+			normal[,] <- 1L
3274
+			save(normal, file=file.path(ldPath(), "normal.snps.rda"))
3275
+		} else load(file.path(ldPath(), "normal.snps.rda"))
3276
+		if(!file.exists(file.path(ldPath(), "flags.snps.rda"))){
3277
+			flags <- initializeBigMatrix("flags.snps", nr, length(unique(batch(object))), vmode="integer")
3278
+			flags[,] <- 0L
3279
+			save(flags, file=file.path(ldPath(), "flags.snps.rda"))
3280
+		} else{
3281
+			load(file.path(ldPath(), "flags.snps.rda"))
3282
+		} 
3283
+		if(verbose) message("Estimating allele-specific copy number at autosomal SNPs")
3284
+		FUN <- "fit.lm1"
3285
+	}
3286
+	if(type=="autosome.nps"){
3287
+		marker.index <- which(chromosome(object) < 23 & !isSnp(object) & !is.na(chromosome(object)))		
3288
+		##marker.index <- which(chromosome(object) < 23 & isSnp(object) & !is.na(chromosome(object)))
3289
+		nr <- length(marker.index)
3290
+		Ns <- initializeBigMatrix("Ns", nr, 5)
3291
+		colnames(Ns) <- c("A", "B", "AA", "AB", "BB")
3292
+		if(!file.exists(file.path(ldPath(), "normal.snps.rda"))){
3293
+			normal <- initializeBigMatrix("normal.nps", nr, ncol(object), vmode="integer")
3294
+			normal[,] <- 1L
3295
+			save(normal, file=file.path(ldPath(), "normal.nps.rda"))
3296
+		} else load(file.path(ldPath(), "normal.nps.rda"))
3297
+		if(!file.exists(file.path(ldPath(), "flags.nps.rda"))){
3298
+			flags <- initializeBigMatrix("flags.nps", nr, length(unique(batch(object))), vmode="integer")
3299
+			flags[,] <- 0L
3300
+			save(flags, file=file.path(ldPath(), "flags.nps.rda"))
3301
+		} else{
3302
+			load(file.path(ldPath(), "flags.nps.rda"))
3303
+		} 
3304
+		if(verbose) message("Estimating allele-specific copy number at autosomal SNPs")
3305
+		FUN <- "fit.lm2"
3306
+	}
3307
+	if(type=="X.snps"){
3308
+		marker.index <- which(chromosome(object) == 23 & isSnp(object) & !is.na(chromosome(object)))
3309
+		##marker.index <- which(chromosome(object) < 23 & isSnp(object) & !is.na(chromosome(object)))
3310
+		nr <- length(marker.index)
3311
+		Ns <- initializeBigMatrix("Ns", nr, 5)
3312
+		colnames(Ns) <- c("A", "B", "AA", "AB", "BB")
3313
+		if(!file.exists(file.path(ldPath(), "normal.X.snps.rda"))){
3314
+			normal <- initializeBigMatrix("normal.X.snps", nr, ncol(object), vmode="integer")
3315
+			normal[,] <- 1L
3316
+			save(normal, file=file.path(ldPath(), "normal.X.snps.rda"))
3317
+		} else load(file.path(ldPath(), "normal.X.snps.rda"))
3318
+		if(!file.exists(file.path(ldPath(), "flags.X.snps.rda"))){
3319
+			flags <- initializeBigMatrix("flags.X.snps", nr, length(unique(batch(object))), vmode="integer")
3320
+			flags[,] <- 0L
3321
+			save(flags, file=file.path(ldPath(), "flags.X.snps.rda"))
3322
+		} else{
3323
+			load(file.path(ldPath(), "flags.X.snps.rda"))
3324
+		} 
3325
+		if(verbose) message("Estimating allele-specific copy number at autosomal SNPs")
3326
+		FUN <- "fit.lm3"
3327
+	}
3328
+	if(type=="X.nps"){
3329
+		marker.index <- which(chromosome(object) == 23 & !isSnp(object) & !is.na(chromosome(object)))		
3330
+		nr <- length(marker.index)
3331
+		Ns <- initializeBigMatrix("Ns", nr, 5)
3332
+		colnames(Ns) <- c("A", "B", "AA", "AB", "BB")
3333
+		if(!file.exists(file.path(ldPath(), "normal.X.nps.rda"))){
3334
+			normal <- initializeBigMatrix("normal.X.nps", nr, ncol(object), vmode="integer")
3335
+			normal[,] <- 1L
3336
+			save(normal, file=file.path(ldPath(), "normal.X.nps.rda"))
3337
+		} else load(file.path(ldPath(), "normal.X.nps.rda"))
3338
+		if(!file.exists(file.path(ldPath(), "flags.X.nps.rda"))){
3339
+			flags <- initializeBigMatrix("flags.X.nps", nr, length(unique(batch(object))), vmode="integer")
3340
+			flags[,] <- 0L
3341
+			save(flags, file=file.path(ldPath(), "flags.X.nps.rda"))
3342
+		} else{
3343
+			load(file.path(ldPath(), "flags.X.nps.rda"))
3344
+		} 
3345
+		if(verbose) message("Estimating allele-specific copy number at autosomal SNPs")
3346
+		FUN <- "fit.lm4"
3347
+	}
3348
+	index.strata <- splitIndicesByLength(marker.index, ocProbesets())
3349
+	obj <- construct(filenames=filenames,
3350
+			 cdfName=annotation(object),
3351
+			 copynumber=TRUE,
3352
+			 batch=batch(object),
3353
+			 sns=sampleNames(object),
3354
+			 fns=featureNames(object)[marker.index])
3355
+	ocLapply(seq(along=index.strata),
3356
+		 match.fun(FUN),
3357
+		 marker.index=marker.index,
3358
+		 object=object,
3359
+		 Ns=Ns,
3360
+		 normal=normal,
3361
+		 snpflags=flags,
3362
+		 snpBatches=index.strata,
3363
+		 batchSize=ocProbesets(),
3364
+		 SNRMin=SNRMin,
3365
+		 MIN.SAMPLES=MIN.SAMPLES,
3366
+		 MIN.OBS=MIN.OBS,
3367
+		 DF=DF.PRIOR,
3368
+		 GT.CONF.THR=GT.CONF.THR,
3369
+		 THR.NU.PHI=THR.NU.PHI,
3370
+		 MIN.NU=MIN.NU,
3371
+		 MIN.PHI=MIN.PHI,
3372
+		 verbose=verbose,
3373
+		 neededPkgs="crlmm",
3374
+		 weighted.lm=weighted.lm)
3375
+	message("finished")
3376
+	return(obj)
3377
+}
... ...
@@ -11,13 +11,13 @@ crlmmCopynumber(object, chromosome = 1:23, which.batches, MIN.SAMPLES
11 11
 = 10, SNRMin = 5, MIN.OBS = 3, DF.PRIOR = 50, bias.adj = FALSE,
12 12
 prior.prob = rep(1/4, 4), seed = 1, verbose = TRUE, GT.CONF.THR =
13 13
 0.99, PHI.THR = 2^6, nHOM.THR = 5, MIN.NU = 2^3, MIN.PHI = 2^3,
14
-THR.NU.PHI = TRUE, thresholdCopynumber = TRUE)
14
+THR.NU.PHI = TRUE, thresholdCopynumber = TRUE, weighted.lm=TRUE)
15 15
 
16 16
 crlmmCopynumberLD(object, which.batches, MIN.SAMPLES = 10, SNRMin = 5,
17 17
 MIN.OBS = 1, DF.PRIOR = 50, bias.adj = FALSE, prior.prob = rep(1/4,
18 18
 4), seed = 1, verbose = TRUE, GT.CONF.THR = 0.99, PHI.THR = 2^6,
19 19
 nHOM.THR = 5, MIN.NU = 2^3, MIN.PHI = 2^3, THR.NU.PHI = TRUE,
20
-thresholdCopynumber = TRUE)
20
+thresholdCopynumber = TRUE, weighted.lm=TRUE)
21 21
 
22 22
 }
23 23
 \arguments{
... ...
@@ -136,6 +136,12 @@ thresholdCopynumber = TRUE)
136 136
   truncated.
137 137
 
138 138
 }
139
+
140
+\item{weighted.lm}{ If \code{TRUE}, a linear model is fit using
141
+weighted least squares. Otherwise, a linear model is fit without
142
+weights. The run-time for weighted least squares is substantially
143
+longer.}
144
+
139 145
 }
140 146
 
141 147
 \details{
... ...
@@ -151,15 +157,4 @@ thresholdCopynumber = TRUE)
151 157
 
152 158
 }
153 159
 \author{R. Scharpf}
154
-\examples{
155
-## data(example.callSet)
156
-## cnSet <- crlmmCopynumberLD(example.callSet)
157
-## total copy number
158
-## cn <- copyNumber(cnSet)
159
-## allele-specific copy number
160
-## ca <- CA(cnSet)/100 ## A dosage
161
-## cb <- CB(cnSet)/100 ## B dosage
162
-}
163
-% Add one or more standard keywords, see file 'KEYWORDS' in the
164
-% R documentation directory.
165 160
 \keyword{manip}