Browse code

added copy number vignette for illumina. see inst/scripts

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@38665 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 09/04/2009 18:09:04
Showing 4 changed files

... ...
@@ -143,3 +143,7 @@ is decoded and scanned
143 143
 * Fixed downstream vignette to account for the SnpSet object being returned by crlmm
144 144
 
145 145
 * Fixed some minor status messages
146
+
147
+2009-04-09 R Scharpf - committed version 1.0.81
148
+
149
+* added a skeleton of a copy number vignette for the illumina platform (illumina_copynumber.Rnw) to inst/scripts
... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.0.80
4
+Version: 1.0.81
5 5
 Date: 2008-12-30
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -188,7 +188,8 @@ goodSnps <- function(phi.thr, envir, fewAA=20, fewBB=20){
188 188
 
189 189
 instantiateObjects <- function(calls, conf, NP, plate, envir, chrom, A, B,
190 190
 			       gender, SNRmin=5, SNR,
191
-                               pkgname){
191
+                               pkgname,
192
+			       locusSet){
192 193
 	pkgname <- paste(pkgname, "Crlmm", sep="")
193 194
 	envir[["chrom"]] <- chrom
194 195
 	CHR_INDEX <- paste(chrom, "index", sep="")
... ...
@@ -615,7 +616,7 @@ oneBatch <- function(plateIndex,
615 616
 	CHR <- envir[["chrom"]]
616 617
 	if(bias.adj){
617 618
 		nuA <- envir[["nuA"]]
618
-		if(all(is.na(nuA))) {
619
+		if(all(is.na(nuA))){
619 620
 			message("Background and signal coefficients have not yet been estimated -- can not do bias correction yet")
620 621
 			stop("Must run computeCopynumber a second time with bias.adj=TRUE to do the adjustment")
621 622
 		}
... ...
@@ -1021,7 +1022,7 @@ biasAdj <- function(plateIndex, envir, priorProb, PROP=0.75){
1021 1022
 	hemDel <- hemDel/total
1022 1023
 	norm <- norm/total
1023 1024
 	amp <- amp/total
1024
-	envir[["posteriorProb"]] <- list(hemDel=hemDel, norm=norm, amp=amp) 
1025
+	envir[["posteriorProb"]] <- list(hemDel=hemDel, norm=norm, amp=amp)
1025 1026
 	tmp <- array(NA, dim=c(nrow(A), ncol(A), 4))
1026 1027
 	tmp[, , 1] <- homDel
1027 1028
 	tmp[, , 2] <- hemDel
... ...
@@ -1047,18 +1048,21 @@ biasAdj <- function(plateIndex, envir, priorProb, PROP=0.75){
1047 1048
 	##whether more are up or down
1048 1049
 	if(CHR != 23){
1049 1050
 		moreup <- rowSums(tmp2 > 3) > rowSums(tmp2 < 3) ##3 is normal
1050
-		##notUp <-  tmp2[ii & moreup, ] <= 3
1051
-		##notDown <- tmp2[ii & !moreup, ] >= 3
1052
-		
1051
+		down <-  tmp2[ii & moreup, ] <= 3
1052
+		up <- tmp2[ii & !moreup, ] >= 3
1053 1053
 		##What if we just keep X% of the ones with the highest
1054 1054
 		##posterior probability for normal
1055
-		prNorm <- tmp[, , 3]
1056
-		rankNorm <- t(apply(prNorm, 1, order, decreasing=TRUE))
1057
-		percentage <- round(0.75*ncol(prNorm),0)
1058
-		NORM <- rankNorm <= percentage
1059
-##		NORM <- matrix(NA, nrow(A), ncol(A))
1060
-##		NORM[ii & moreup, ] <- notUp
1061
-##		NORM[ii & !moreup, ] <- notDown
1055
+##		prNorm <- tmp[, , 3]
1056
+##		rankNorm <- t(apply(prNorm, 1, order, decreasing=TRUE))
1057
+
1058
+		rankUP <- t(apply(tmp[moreup & ii, , 4]/tmp[moreup & ii, , 3], 1, order, decreasing=TRUE))
1059
+		rankDown <- t(apply(tmp[!moreup & ii, , 2]/tmp[!moreup & ii, , 3], 1, order, decreasing=TRUE))		
1060
+		maxNumberToDrop <- round(0.25*ncol(tmp2),0)
1061
+##		NORM <- rankNorm <= percentage
1062
+		NORM <- matrix(TRUE, nrow(A), ncol(A))
1063
+		NORM[ii & moreup, ] <- rankUP >= maxNumberToDrop
1064
+		NORM[ii & !moreup, ] <- rankDown >= maxNumberToDrop
1065
+##		NORM[ii & !moreup, ] <- up
1062 1066
 		##Define NORM so that we can iterate this step
1063 1067
 		##NA's in the previous iteration (normal) will be propogated
1064 1068
 		normal <- NORM*normal
1065 1069
new file mode 100755
... ...
@@ -0,0 +1,80 @@
1
+<<>>=
2
+# start R from /home/bst/other/mritchie/R/R-300309/bin/R
3
+# as this has current crlmm and region package installed
4
+# if you want to ue your own R, the current region package is at
5
+# /thumper/ctsa/snpmicroarray/illumina/crlmm/370k/human370v1cCrlmm/
6
+library(Biobase)
7
+library(crlmm)
8
+setwd("/thumper/ctsa/snpmicroarray/illumina/IDATS/370k")
9
+@ 
10
+
11
+<<readIdat>>=
12
+samplesheet5 = read.csv("HumanHap370Duo_Sample_Map.csv", header=TRUE, as.is=TRUE)[-c(28:46,61:75,78:79),]
13
+if(!exists("RG")){
14
+	## remove samples which I don't have .idats for
15
+	## subset further to allow for quicker testing
16
+	##samplesheet5 = samplesheet5[1:5,]
17
+	## read in the .idats
18
+	RG <- readIdatFiles(samplesheet5, arrayInfoColNames=list(barcode=NULL, position="SentrixPosition"), saveDate=TRUE)
19
+}
20
+@ 
21
+
22
+Get dates for each array - scan date is likely to be most meaningful
23
+
24
+<<dates>>=
25
+pd <- pData(RG)
26
+scandatetime = strptime(as.character(pd$ScanDate), "%m/%d/%Y %H:%M:%S %p")
27
+decodedatetime = strptime(as.character(pd$DecodeDate), "%m/%d/%Y %H:%M:%S %p")
28
+table(format(scandatetime, "%d %b %Y"))
29
+table(format(scandatetime, "%b"))
30
+@ 
31
+
32
+Note: the code below is run for testing purposes only. None of these
33
+functions are exported, so would not routinely be run directly by the
34
+user.  A typical analysis would involve runnning readIdatFiles()
35
+followed by crlmmIllumina()
36
+
37
+<<crlmm>>=
38
+if(!exists("res.rda")){
39
+	outdir <- "/thumper/ctsa/snpmicroarray/rs/data/hapmap/illumina/HumanCNV370-Duo"	
40
+	if(!file.exists(file.path(outdir, "res.rda"))){
41
+		crlmmOut <- crlmmIllumina(RG=RG, cdfName="human370v1c", sns=pData(RG)$ID, returnParams=TRUE, save.it=TRUE, intensityFile=file.path(outdir, "res.rda"))
42
+		save(crlmmOut, file=file.path(outdir, "crlmmOut.rda"))				
43
+	} else{
44
+		message("Loading...")		
45
+		load(file.path(outdir, "res.rda"))
46
+		load(file.path(outdir, "crlmmOut.rda"))		
47
+	}
48
+}
49
+@ 
50
+
51
+<<SNR>>=
52
+hist(crlmmOut$SNR) ##approx. 5-fold higher than what we see in Affy!
53
+@ 
54
+
55
+<<>>=
56
+##cnAB was assigned to the global environment
57
+NP = (cnAB$A+cnAB$B)/2 # average normalized A and B intensities
58
+colnames(NP) <- colnames(crlmm:::calls(crlmmOut))
59
+cnenv = new.env()
60
+chr22 <- computeCopynumber(chrom=22, A=res[["A"]], B=res[["B"]], 
61
+			   calls=crlmm:::calls(crlmmOut),
62
+			   conf=confs(crlmmOut), 
63
+			   plate=rep(1, ncol(NP)),
64
+			   NP=NP, 
65
+			   cdfName="human370v1c", 
66
+			   SNR=res[["SNR"]], 
67
+			   envir=cnenv)
68
+@ 
69
+
70
+<<>>=
71
+copyA=cnenv[["CA"]]
72
+copyB=cnenv[["CB"]]
73
+copyT=(copyA + copyB)/100
74
+copyT[copyT > 6] <- 6
75
+copyT[copyT < 0] <- 0
76
+plot(copyT[, 1], pch=".")
77
+@ 
78
+
79
+
80
+