Browse code

updated readIdatFiles

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@41017 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 04/08/2009 15:18:36
Showing 5 changed files

... ...
@@ -252,3 +252,7 @@ is decoded and scanned
252 252
   a) samplesheet5 by get("samplesheet5")
253 253
   b) path by get("path")
254 254
   c) res by get("res")
255
+
256
+2009-08-04 R. Scharpf - committed version 1.3.17
257
+
258
+* changed readIdatFiles function to check whether arrayNames is NULL
... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for Affymetrix SNP 5.0 and 6.0 and Illumina arrays.
4
-Version: 1.3.16
4
+Version: 1.3.17
5 5
 Date: 2009-07-22
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>, Matt Ritchie <mritchie@wehi.EDU.AU>
... ...
@@ -819,7 +819,7 @@ thresholdCopyNumberSet <- function(object){
819 819
 			       priorProb,
820 820
 			       gender=NULL,
821 821
 			       SNR,
822
-			       SNRmin=5, seed=123,
822
+			       SNRmin, seed=123,
823 823
 			       cdfName,
824 824
 			       verbose=TRUE, ...){
825 825
 	if(missing(cdfName)) stop("cdfName must be provided")
... ...
@@ -7,21 +7,23 @@ readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
7 7
 			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
8 8
 			  highDensity=FALSE, sep="_", fileExt=list(green="Grn.idat", red="Red.idat"), saveDate=FALSE) {
9 9
        if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
10
-	       arrayNames=NULL
11
-	       if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
12
-		       barcode = sampleSheet[,arrayInfoColNames$barcode]
13
-		       arrayNames=barcode
14
-	       }
15
-	       if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {  
16
-		       position = sampleSheet[,arrayInfoColNames$position]
17
-		       if(is.null(arrayNames))
18
-			       arrayNames=position
19
-		       else
20
-			       arrayNames = paste(arrayNames, position, sep=sep)
21
-		       if(highDensity) {
22
-			       hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
23
-			       for(i in names(hdExt))
24
-				       arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
10
+	       if(!is.null(arrayNames)){
11
+		       ##arrayNames=NULL
12
+		       if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
13
+			       barcode = sampleSheet[,arrayInfoColNames$barcode]
14
+			       arrayNames=barcode
15
+		       }
16
+		       if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {  
17
+			       position = sampleSheet[,arrayInfoColNames$position]
18
+			       if(is.null(arrayNames))
19
+				       arrayNames=position
20
+			       else
21
+				       arrayNames = paste(arrayNames, position, sep=sep)
22
+			       if(highDensity) {
23
+				       hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
24
+				       for(i in names(hdExt))
25
+					       arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
26
+			       }
25 27
 		       }
26 28
 	       }
27 29
 	       pd = new("AnnotatedDataFrame", data = sampleSheet)
... ...
@@ -46,17 +48,14 @@ readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
46 48
 		    paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
47 49
        grnidats = file.path(path, grnfiles)
48 50
        redidats = file.path(path, redfiles)
49
-       
50 51
        headerInfo = list(nProbes = rep(NA, narrays),
51 52
                          Barcode = rep(NA, narrays),
52 53
                          ChipType = rep(NA, narrays),
53 54
                          Manifest = rep(NA, narrays), # not sure about this one - sometimes blank
54 55
                          Position = rep(NA, narrays)) # this may also vary a bit
55
-
56 56
        dates = list(decode=rep(NA, narrays),
57 57
                     scan=rep(NA, narrays))
58
-
59
-       # read in the data
58
+       ## read in the data
60 59
        for(i in seq(along=arrayNames)) {
61 60
 	       cat("reading", arrayNames[i], "\t")
62 61
 	       idsG = idsR = G = R = NULL
... ...
@@ -68,7 +67,6 @@ readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
68 67
 	       headerInfo$ChipType[i] = G$ChipType
69 68
 	       headerInfo$Manifest[i] = G$Unknown$MostlyNull
70 69
 	       headerInfo$Position[i] = G$Unknowns$MostlyA
71
-         
72 70
 	       if(headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) {
73 71
 		       ## || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest
74 72
 		       ## but differed by a few SNPs for some reason - most of the chip was the same though
... ...
@@ -76,29 +74,23 @@ readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
76 74
 		       warning("Chips are not of the same type.  Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
77 75
 		       next()
78 76
 	       }
79
-	       
80 77
 	       dates$decode[i] = G$RunInfo[1, 1]
81 78
 	       dates$scan[i] = G$RunInfo[2, 1]
82
-
83 79
 	       if(i==1) {
84
-		       if(is.null(ids) && !is.null(G))
80
+		       if(is.null(ids) && !is.null(G)){
85 81
 			       ids = idsG
86
-		       else
87
-			       stop("Could not find probe IDs")
82
+		       }else stop("Could not find probe IDs")
88 83
 		       nprobes = length(ids)
89 84
 		       narrays = length(arrayNames)
90
-           
91 85
 		       tmpmat = matrix(NA, nprobes, narrays)
92 86
 		       rownames(tmpmat) = ids
93
-		       if(!is.null(sampleSheet))
87
+		       if(!is.null(sampleSheet)){
94 88
 			       colnames(tmpmat) = sampleSheet$Sample_ID
95
-		       else
96
-			       colnames(tmpmat) = arrayNames
97
-
98
-		       RG = new("NChannelSet",
99
-		       R=tmpmat, G=tmpmat, Rnb=tmpmat, Gnb=tmpmat,
100
-		       Rse=tmpmat, Gse=tmpmat, annotation=headerInfo$Manifest[1],
101
-		       phenoData=pd, storage.mode="environment")
89
+		       }else  colnames(tmpmat) = arrayNames
90
+		       RG <- new("NChannelSet",
91
+				 R=tmpmat, G=tmpmat, Rnb=tmpmat, Gnb=tmpmat,
92
+				 Rse=tmpmat, Gse=tmpmat, annotation=headerInfo$Manifest[1],
93
+				 phenoData=pd, storage.mode="environment")
102 94
 		       rm(tmpmat)
103 95
 		       gc()
104 96
 	       }
... ...
@@ -109,8 +101,7 @@ readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
109 101
 			       RG@assayData$Gnb[,i] = G$Quants[, "NBeads"]
110 102
 			       RG@assayData$Gse[,i] = G$Quants[, "SD"]
111 103
 		       }
112
-	       }
113
-	       else {
104
+	       } else {
114 105
 		       indG = match(ids, idsG)
115 106
 		       RG@assayData$G[,i] = G$Quants[indG, "Mean"]
116 107
 		       RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"]
... ...
@@ -129,8 +120,7 @@ readIdatFiles <- function(sampleSheet=NULL, arrayNames=NULL, ids=NULL, path=".",
129 120
 			       RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"]
130 121
 			       RG@assayData$Rse[,i] = R$Quants[ ,"SD"]
131 122
 		       }
132
-	       }
133
-	       else {
123
+	       } else {
134 124
 		       indR = match(ids, idsR)
135 125
 		       RG@assayData$R[,i] = R$Quants[indR, "Mean"]
136 126
 		       RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"]
... ...
@@ -14,7 +14,7 @@
14 14
 \newcommand{\R}{\textsf{R}}
15 15
 
16 16
 \begin{document}
17
-\title{Trisomy analysis}
17
+\title{Copy number estimation}
18 18
 \date{May, 2009}
19 19
 \author{Rob Scharpf}
20 20
 \maketitle
... ...
@@ -171,7 +171,7 @@ Plot physical position versus copy number for the first sample.  Recall
171 171
 that the copy number estimates were multiplied by 100 and stored as an
172 172
 integer.
173 173
 
174
-<<oneSample, fig=TRUE, width=8, height=4, include=FALSE, eval=FALSE>>=
174
+<<oneSample, fig=TRUE, width=8, height=4, include=FALSE>>=
175 175
 par(las=1)
176 176
 plot(position(crlmmResults), copyNumber(crlmmResults)[, 1], pch=".", cex=2, xaxt="n", col="grey20", ylim=c(0,6), 
177 177
      ylab="copy number", xlab="physical position (Mb)",
... ...
@@ -199,7 +199,7 @@ plate. Plotting symbols are the genotype calls (1=AA, 2=AB, 3=BB); light
199 199
 grey points are from other plates. One could also add the prediction
200 200
 regions for 0-4 copies, but it gets crowded.
201 201
 
202
-<<genotypeCalls, fig=TRUE, width=8, height=8, include=FALSE, eval=FALSE>>=
202
+<<genotypeCalls, fig=TRUE, width=8, height=8, include=FALSE>>=
203 203
 xlim <- ylim <- c(6.5,13)
204 204
 pch <- 21
205 205
 colors <- c("red", "blue", "green3")
... ...
@@ -221,7 +221,7 @@ for(i in indices[[j]]){
221 221
 }
222 222
 @ 
223 223
 
224
-<<predictionRegion, fig=TRUE, width=8, height=8, include=FALSE, eval=FALSE>>=
224
+<<predictionRegion, fig=TRUE, width=8, height=8, include=FALSE>>=
225 225
 require(RColorBrewer)
226 226
 greens <- brewer.pal(9, "Greens")
227 227
 J <- split(1:ncol(crlmmResults), batch(crlmmResults))