... |
... |
@@ -12,193 +12,12 @@ readIdatFiles <- function(sampleSheet=NULL,
|
12 |
12 |
sep="_",
|
13 |
13 |
fileExt=list(green="Grn.idat", red="Red.idat"),
|
14 |
14 |
saveDate=FALSE) {
|
15 |
|
-# if(!is.null(arrayNames)) {
|
16 |
|
-# arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
|
17 |
|
-# if(!is.null(sampleSheet)) {
|
18 |
|
-# sampleSheet=NULL
|
19 |
|
-# cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
|
20 |
|
-# }
|
21 |
|
-# pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
|
22 |
|
-# }
|
23 |
|
- if(!is.null(arrayNames)) {
|
24 |
|
- pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
|
25 |
|
- }
|
26 |
|
- if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
|
27 |
|
- if(is.null(arrayNames)){
|
28 |
|
- ##arrayNames=NULL
|
29 |
|
- if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
|
30 |
|
- barcode = sampleSheet[,arrayInfoColNames$barcode]
|
31 |
|
- arrayNames=barcode
|
32 |
|
- }
|
33 |
|
- if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {
|
34 |
|
- position = sampleSheet[,arrayInfoColNames$position]
|
35 |
|
- if(is.null(arrayNames))
|
36 |
|
- arrayNames=position
|
37 |
|
- else
|
38 |
|
- arrayNames = paste(arrayNames, position, sep=sep)
|
39 |
|
- if(highDensity) {
|
40 |
|
- hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
|
41 |
|
- for(i in names(hdExt))
|
42 |
|
- arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
|
43 |
|
- }
|
44 |
|
- }
|
45 |
|
- }
|
46 |
|
- pd = new("AnnotatedDataFrame", data = sampleSheet)
|
47 |
|
- sampleNames(pd) <- basename(arrayNames)
|
48 |
|
- }
|
49 |
|
- if(is.null(arrayNames)) {
|
50 |
|
- arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
|
51 |
|
- if(!is.null(sampleSheet)) {
|
52 |
|
- sampleSheet=NULL
|
53 |
|
- cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
|
54 |
|
- }
|
55 |
|
- pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
|
56 |
|
- }
|
57 |
|
- narrays = length(arrayNames)
|
58 |
|
- grnfiles = paste(arrayNames, fileExt$green, sep=sep)
|
59 |
|
- redfiles = paste(arrayNames, fileExt$red, sep=sep)
|
60 |
|
- if(length(grnfiles)==0 || length(redfiles)==0)
|
61 |
|
- stop("Cannot find .idat files")
|
62 |
|
- if(length(grnfiles)!=length(redfiles))
|
63 |
|
- stop("Cannot find matching .idat files")
|
64 |
|
- if(path[1] != "."){
|
65 |
|
- grnidats = file.path(path, grnfiles)
|
66 |
|
- redidats = file.path(path, redfiles)
|
67 |
|
- } else {
|
68 |
|
- message("path arg not set. Assuming files are in local directory")
|
69 |
|
- grnidats <- grnfiles
|
70 |
|
- redidats <- redfiles
|
71 |
|
- }
|
72 |
|
- if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
|
73 |
|
- if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
|
74 |
|
-## if(!all(c(redfiles,grnfiles) %in% dir(path=path))){
|
75 |
|
-## stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
|
76 |
|
-## paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
|
77 |
|
-## }
|
78 |
|
- headerInfo = list(nProbes = rep(NA, narrays),
|
79 |
|
- Barcode = rep(NA, narrays),
|
80 |
|
- ChipType = rep(NA, narrays),
|
81 |
|
- Manifest = rep(NA, narrays), # not sure about this one - sometimes blank
|
82 |
|
- Position = rep(NA, narrays)) # this may also vary a bit
|
83 |
|
- dates = list(decode=rep(NA, narrays),
|
84 |
|
- scan=rep(NA, narrays))
|
85 |
|
- ## read in the data
|
86 |
|
- for(i in seq(along=arrayNames)) {
|
87 |
|
- cat("reading", arrayNames[i], "\t")
|
88 |
|
- idsG = idsR = G = R = NULL
|
89 |
|
- cat(paste(sep, fileExt$green, sep=""), "\t")
|
90 |
|
- G = readIDAT(grnidats[i])
|
91 |
|
- idsG = rownames(G$Quants)
|
92 |
|
- headerInfo$nProbes[i] = G$nSNPsRead
|
93 |
|
- headerInfo$Barcode[i] = G$Barcode
|
94 |
|
- headerInfo$ChipType[i] = G$ChipType
|
95 |
|
- headerInfo$Manifest[i] = G$Unknown$MostlyNull
|
96 |
|
- headerInfo$Position[i] = G$Unknowns$MostlyA
|
97 |
|
- if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) {
|
98 |
|
- ## headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) {
|
99 |
|
- ## || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest
|
100 |
|
- ## but differed by a few SNPs for some reason - most of the chip was the same though
|
101 |
|
- ## stop("Chips are not of all of the same type - please check your data")
|
102 |
|
- warning("Chips are not of the same type. Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
|
103 |
|
- next()
|
104 |
|
- }
|
105 |
|
- dates$decode[i] = G$RunInfo[1, 1]
|
106 |
|
- dates$scan[i] = G$RunInfo[2, 1]
|
107 |
|
- if(i==1) {
|
108 |
|
- if(is.null(ids) && !is.null(G)){
|
109 |
|
- ids = idsG
|
110 |
|
- }else stop("Could not find probe IDs")
|
111 |
|
- nprobes = length(ids)
|
112 |
|
- narrays = length(arrayNames)
|
113 |
|
- tmpmat = matrix(NA, nprobes, narrays)
|
114 |
|
- rownames(tmpmat) = ids
|
115 |
|
- if(!is.null(sampleSheet)){
|
116 |
|
- colnames(tmpmat) = sampleSheet$Sample_ID
|
117 |
|
- }else colnames(tmpmat) = arrayNames
|
118 |
|
- RG <- new("NChannelSet",
|
119 |
|
- R=tmpmat,
|
120 |
|
- G=tmpmat,
|
121 |
|
- zero=tmpmat,
|
122 |
|
-# Rnb=tmpmat,
|
123 |
|
-# Gnb=tmpmat,
|
124 |
|
-# Rse=tmpmat,
|
125 |
|
-# Gse=tmpmat,
|
126 |
|
- annotation=headerInfo$Manifest[1],
|
127 |
|
- phenoData=pd,
|
128 |
|
- storage.mode="environment")
|
129 |
|
- rm(tmpmat)
|
130 |
|
- gc()
|
131 |
|
- }
|
132 |
|
- if(length(ids)==length(idsG)) {
|
133 |
|
- if(sum(ids==idsG)==nprobes) {
|
134 |
|
- RG@assayData$G[,i] = G$Quants[, "Mean"]
|
135 |
|
- zeroG = G$Quants[, "NBeads"]==0
|
136 |
|
-# RG@assayData$Gnb[,i] = G$Quants[, "NBeads"]
|
137 |
|
-# RG@assayData$Gse[,i] = G$Quants[, "SD"]
|
138 |
|
- }
|
139 |
|
- } else {
|
140 |
|
- indG = match(ids, idsG)
|
141 |
|
- RG@assayData$G[,i] = G$Quants[indG, "Mean"]
|
142 |
|
- zeroG = G$Quants[indG, "NBeads"]==0
|
143 |
|
-# RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"]
|
144 |
|
-# RG@assayData$Gse[,i] = G$Quants[indG, "SD"]
|
145 |
|
- }
|
146 |
|
- rm(G)
|
147 |
|
- gc()
|
148 |
|
-
|
149 |
|
- cat(paste(sep, fileExt$red, sep=""), "\n")
|
150 |
|
- R = readIDAT(redidats[i])
|
151 |
|
- idsR = rownames(R$Quants)
|
152 |
|
-
|
153 |
|
- if(length(ids)==length(idsG)) {
|
154 |
|
- if(sum(ids==idsR)==nprobes) {
|
155 |
|
- RG@assayData$R[,i] = R$Quants[ ,"Mean"]
|
156 |
|
- zeroR = R$Quants[ ,"NBeads"]==0
|
157 |
|
-# RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"]
|
158 |
|
-# RG@assayData$Rse[,i] = R$Quants[ ,"SD"]
|
159 |
|
- }
|
160 |
|
- } else {
|
161 |
|
- indR = match(ids, idsR)
|
162 |
|
- RG@assayData$R[,i] = R$Quants[indR, "Mean"]
|
163 |
|
- zeroR = R$Quants[indR, "NBeads"]==0
|
164 |
|
-# RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"]
|
165 |
|
-# RG@assayData$Rse[,i] = R$Quants[indR, "SD"]
|
166 |
|
- }
|
167 |
|
- RG@assayData$zero[,i] = zeroG | zeroR
|
168 |
|
- rm(R, zeroG, zeroR)
|
169 |
|
- gc()
|
170 |
|
- }
|
171 |
|
- if(saveDate) {
|
172 |
|
- protocolData(RG)[["ScanDate"]] = dates$scan
|
173 |
|
- }
|
174 |
|
- storageMode(RG) = "lockedEnvironment"
|
175 |
|
- RG
|
176 |
|
-}
|
177 |
|
-
|
178 |
|
-
|
179 |
|
-readIdatFiles2 <- function(sampleSheet=NULL,
|
180 |
|
- arrayNames=NULL,
|
181 |
|
- ids=NULL,
|
182 |
|
- path=".",
|
183 |
|
- arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
|
184 |
|
- highDensity=FALSE,
|
185 |
|
- sep="_",
|
186 |
|
- fileExt=list(green="Grn.idat", red="Red.idat"),
|
187 |
|
- saveDate=FALSE) {
|
188 |
|
-# if(!is.null(arrayNames)) {
|
189 |
|
-# arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
|
190 |
|
-# if(!is.null(sampleSheet)) {
|
191 |
|
-# sampleSheet=NULL
|
192 |
|
-# cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
|
193 |
|
-# }
|
194 |
|
-# pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
|
195 |
|
-# }
|
|
15 |
+
|
196 |
16 |
if(!is.null(arrayNames)) {
|
197 |
17 |
pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
|
198 |
18 |
}
|
199 |
19 |
if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
|
200 |
20 |
if(is.null(arrayNames)){
|
201 |
|
- ##arrayNames=NULL
|
202 |
21 |
if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
|
203 |
22 |
barcode = sampleSheet[,arrayInfoColNames$barcode]
|
204 |
23 |
arrayNames=barcode
|
... |
... |
@@ -245,10 +64,6 @@ readIdatFiles2 <- function(sampleSheet=NULL,
|
245 |
64 |
}
|
246 |
65 |
if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
|
247 |
66 |
if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
|
248 |
|
-## if(!all(c(redfiles,grnfiles) %in% dir(path=path))){
|
249 |
|
-## stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
|
250 |
|
-## paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
|
251 |
|
-## }
|
252 |
67 |
headerInfo = list(nProbes = rep(NA, narrays),
|
253 |
68 |
Barcode = rep(NA, narrays),
|
254 |
69 |
ChipType = rep(NA, narrays),
|
... |
... |
@@ -268,11 +83,7 @@ readIdatFiles2 <- function(sampleSheet=NULL,
|
268 |
83 |
headerInfo$ChipType[i] = G$ChipType
|
269 |
84 |
headerInfo$Manifest[i] = G$Unknown$MostlyNull
|
270 |
85 |
headerInfo$Position[i] = G$Unknowns$MostlyA
|
271 |
|
- if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) {
|
272 |
|
- ## headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) {
|
273 |
|
- ## || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest
|
274 |
|
- ## but differed by a few SNPs for some reason - most of the chip was the same though
|
275 |
|
- ## stop("Chips are not of all of the same type - please check your data")
|
|
86 |
+ if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) {
|
276 |
87 |
warning("Chips are not of the same type. Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
|
277 |
88 |
next()
|
278 |
89 |
}
|
... |
... |
@@ -284,27 +95,13 @@ readIdatFiles2 <- function(sampleSheet=NULL,
|
284 |
95 |
} else stop("Could not find probe IDs")
|
285 |
96 |
nprobes = length(ids)
|
286 |
97 |
narrays = length(arrayNames)
|
287 |
|
-# tmpmat = matrix(NA, nprobes, narrays)
|
288 |
|
-# rownames(tmpmat) = ids
|
289 |
|
-# fD <- new("AnnotatedDataFrame", data=data.frame(row.names=ids))##, varMetadata=data.frame(labelDescript
|
290 |
98 |
RG <- new("NChannelSet",
|
291 |
99 |
R=initializeBigMatrix(name="R", nr=nprobes, nc=narrays, vmode="integer"),
|
292 |
100 |
G=initializeBigMatrix(name="G", nr=nprobes, nc=narrays, vmode="integer"),
|
293 |
101 |
zero=initializeBigMatrix(name="zero", nr=nprobes, nc=narrays, vmode="integer"),
|
294 |
|
-# featureData=fD,
|
295 |
|
-# annotation=cdfName)
|
296 |
|
-# R=tmpmat,
|
297 |
|
-# G=tmpmat,
|
298 |
|
-# zero=tmpmat,
|
299 |
|
-# Rnb=tmpmat,
|
300 |
|
-# Gnb=tmpmat,
|
301 |
|
-# Rse=tmpmat,
|
302 |
|
-# Gse=tmpmat,
|
303 |
|
- annotation=headerInfo$Manifest[1],
|
304 |
|
- phenoData=pd,
|
305 |
|
- storage.mode="environment")
|
306 |
|
- featureNames(RG) = ids
|
307 |
|
-# rm(tmpmat)
|
|
102 |
+ annotation=headerInfo$Manifest[1],
|
|
103 |
+ phenoData=pd, storage.mode="environment")
|
|
104 |
+ featureNames(RG) = ids
|
308 |
105 |
if(!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)){
|
309 |
106 |
sampleNames(RG) = sampleSheet$Sample_ID
|
310 |
107 |
} else sampleNames(RG) = arrayNames
|
... |
... |
@@ -314,15 +111,11 @@ readIdatFiles2 <- function(sampleSheet=NULL,
|
314 |
111 |
if(sum(ids==idsG)==nprobes) {
|
315 |
112 |
RG@assayData$G[,i] = G$Quants[, "Mean"]
|
316 |
113 |
zeroG = G$Quants[, "NBeads"]==0
|
317 |
|
-# RG@assayData$Gnb[,i] = G$Quants[, "NBeads"]
|
318 |
|
-# RG@assayData$Gse[,i] = G$Quants[, "SD"]
|
319 |
114 |
}
|
320 |
115 |
} else {
|
321 |
116 |
indG = match(ids, idsG)
|
322 |
117 |
RG@assayData$G[,i] = G$Quants[indG, "Mean"]
|
323 |
118 |
zeroG = G$Quants[indG, "NBeads"]==0
|
324 |
|
-# RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"]
|
325 |
|
-# RG@assayData$Gse[,i] = G$Quants[indG, "SD"]
|
326 |
119 |
}
|
327 |
120 |
rm(G)
|
328 |
121 |
gc()
|
... |
... |
@@ -334,16 +127,12 @@ readIdatFiles2 <- function(sampleSheet=NULL,
|
334 |
127 |
if(length(ids)==length(idsG)) {
|
335 |
128 |
if(sum(ids==idsR)==nprobes) {
|
336 |
129 |
RG@assayData$R[,i] = R$Quants[ ,"Mean"]
|
337 |
|
- zeroR = R$Quants[ ,"NBeads"]==0
|
338 |
|
-# RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"]
|
339 |
|
-# RG@assayData$Rse[,i] = R$Quants[ ,"SD"]
|
|
130 |
+ zeroR = R$Quants[ ,"NBeads"]==0
|
340 |
131 |
}
|
341 |
132 |
} else {
|
342 |
133 |
indR = match(ids, idsR)
|
343 |
134 |
RG@assayData$R[,i] = R$Quants[indR, "Mean"]
|
344 |
135 |
zeroR = R$Quants[indR, "NBeads"]==0
|
345 |
|
-# RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"]
|
346 |
|
-# RG@assayData$Rse[,i] = R$Quants[indR, "SD"]
|
347 |
136 |
}
|
348 |
137 |
RG@assayData$zero[,i] = zeroG | zeroR
|
349 |
138 |
rm(R, zeroG, zeroR)
|
... |
... |
@@ -615,114 +404,8 @@ readBPM <- function(bpmFile){
|
615 |
404 |
|
616 |
405 |
}
|
617 |
406 |
|
618 |
|
-RGtoXY = function(RG, chipType, verbose=TRUE) {
|
619 |
|
- chipList = c("human1mv1c", # 1M
|
620 |
|
- "human370v1c", # 370CNV
|
621 |
|
- "human650v3a", # 650Y
|
622 |
|
- "human610quadv1b", # 610 quad
|
623 |
|
- "human660quadv1a", # 660 quad
|
624 |
|
- "human370quadv3c", # 370CNV quad
|
625 |
|
- "human550v3b", # 550K
|
626 |
|
- "human1mduov3b", # 1M Duo
|
627 |
|
- "humanomni1quadv1b", # Omni1 quad
|
628 |
|
- "humanomniexpress12v1b") # Omni express 12
|
629 |
|
- if(missing(chipType)){
|
630 |
|
- chipType = match.arg(annotation(RG), chipList)
|
631 |
|
- } else chipType = match.arg(chipType, chipList)
|
632 |
|
-
|
633 |
|
- pkgname <- getCrlmmAnnotationName(chipType)
|
634 |
|
- if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
|
635 |
|
- suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
|
636 |
|
- msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
|
637 |
|
- message(strwrap(msg))
|
638 |
|
- stop("Package ", pkgname, " could not be found.")
|
639 |
|
- rm(suggCall, msg)
|
640 |
|
- }
|
641 |
|
- if(verbose) message("Loading chip annotation information.")
|
642 |
|
- loader("address.rda", .crlmmPkgEnv, pkgname)
|
643 |
|
-# data(annotation, package=pkgname, envir=.crlmmPkgEnv)
|
644 |
|
- aids <- getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest
|
645 |
|
- bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
|
646 |
|
- ids <- names(aids)
|
647 |
|
- snpbase <- getVarInEnv("base")
|
648 |
|
-
|
649 |
|
- nsnps = length(aids)
|
650 |
|
- narrays = ncol(RG)
|
651 |
|
-
|
652 |
|
-# aidcol = match("AddressA_ID", colnames(annot))
|
653 |
|
-# if(is.na(aidcol))
|
654 |
|
-# aidcol = match("Address", colnames(annot))
|
655 |
|
-# bidcol = match("AddressB_ID", colnames(annot))
|
656 |
|
-# if(is.na(bidcol))
|
657 |
|
-# bidcol = match("Address2", colnames(annot))
|
658 |
|
-# aids = annot[, aidcol]
|
659 |
|
-# bids = annot[, bidcol]
|
660 |
|
-# snpids = annot[,"Name"]
|
661 |
|
-# snpbase = annot[,"SNP"]
|
662 |
|
- infI = !is.na(bids) & bids!=0
|
663 |
|
- aord = match(aids, featureNames(RG)) # NAs are possible here
|
664 |
|
- bord = match(bids, featureNames(RG)) # and here
|
665 |
|
-# argrg = aids[rrgg]
|
666 |
|
-# brgrg = bids[rrgg]
|
667 |
407 |
|
668 |
|
- tmpmat = matrix(as.integer(0), nsnps, narrays)
|
669 |
|
- rownames(tmpmat) = ids
|
670 |
|
- colnames(tmpmat) = sampleNames(RG)
|
671 |
|
- XY = new("NChannelSet", X=tmpmat, Y=tmpmat, zero=tmpmat, # Xnb=tmpmat, Ynb=tmpmat, Xse=tmpmat, Yse=tmpmat, zero=tmpmat,
|
672 |
|
- annotation=chipType, phenoData=RG@phenoData, protocolData=RG@protocolData, storage.mode="environment")
|
673 |
|
- rm(tmpmat)
|
674 |
|
- gc()
|
675 |
|
-
|
676 |
|
- # First sort out Infinium II SNPs, X -> R (allele A) and Y -> G (allele B) from the same probe
|
677 |
|
- XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red
|
678 |
|
- XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green
|
679 |
|
- XY@assayData$zero[!is.na(aord),] = exprs(channel(RG, "zero"))[aord[!is.na(aord)],] # mostly green
|
680 |
|
-# XY@assayData$Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],]
|
681 |
|
-# XY@assayData$Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],]
|
682 |
|
-# XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],]
|
683 |
|
-# XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],]
|
684 |
|
- gc()
|
685 |
|
-
|
686 |
|
- ## Warning - not 100% sure that the code below is correct - could be more complicated than this
|
687 |
|
-
|
688 |
|
- # Next Infinium I where X -> R from allele A probe and Y -> R from allele B probe
|
689 |
|
-# infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]")
|
690 |
|
-
|
691 |
|
-# X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red
|
692 |
|
-# Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green
|
693 |
|
-# Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],]
|
694 |
|
-# Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],]
|
695 |
|
-# Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],]
|
696 |
|
-# Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],]
|
697 |
|
-
|
698 |
|
- # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe
|
699 |
|
-# infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]")
|
700 |
|
-
|
701 |
|
-# X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red
|
702 |
|
-# Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green
|
703 |
|
-# Xnb[infIGG,] = exprs(channel(RG, "Gnb"))[aord[infIGG],]
|
704 |
|
-# Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],]
|
705 |
|
-# Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],]
|
706 |
|
-# Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],]
|
707 |
|
-
|
708 |
|
- # For now zero out Infinium I probes
|
709 |
|
- XY@assayData$X[infI,] = 0
|
710 |
|
- XY@assayData$Y[infI,] = 0
|
711 |
|
- XY@assayData$zero[infI,] = 0
|
712 |
|
-# XY@assayData$Xnb[infI,] = 0
|
713 |
|
-# XY@assayData$Ynb[infI,] = 0
|
714 |
|
-# XY@assayData$Xse[infI,] = 0
|
715 |
|
-# XY@assayData$Yse[infI,] = 0
|
716 |
|
-
|
717 |
|
-# XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1
|
718 |
|
- gc()
|
719 |
|
-
|
720 |
|
-# storageMode(XY) = "lockedEnvironment"
|
721 |
|
- XY
|
722 |
|
-}
|
723 |
|
-
|
724 |
|
-
|
725 |
|
-RGtoXY2 = function(RG, chipType, verbose=TRUE) {
|
|
408 |
+RGtoXY = function(RG, chipType, verbose=TRUE) {
|
726 |
409 |
chipList = c("human1mv1c", # 1M
|
727 |
410 |
"human370v1c", # 370CNV
|
728 |
411 |
"human650v3a", # 650Y
|
... |
... |
@@ -747,7 +430,6 @@ RGtoXY2 = function(RG, chipType, verbose=TRUE) {
|
747 |
430 |
}
|
748 |
431 |
if(verbose) message("Loading chip annotation information.")
|
749 |
432 |
loader("address.rda", .crlmmPkgEnv, pkgname)
|
750 |
|
-# data(annotation, package=pkgname, envir=.crlmmPkgEnv)
|
751 |
433 |
aids <- getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest
|
752 |
434 |
bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
|
753 |
435 |
ids <- names(aids)
|
... |
... |
@@ -772,12 +454,11 @@ RGtoXY2 = function(RG, chipType, verbose=TRUE) {
|
772 |
454 |
# argrg = aids[rrgg]
|
773 |
455 |
# brgrg = bids[rrgg]
|
774 |
456 |
|
775 |
|
-# fD <- new("AnnotatedDataFrame", data=data.frame(row.names=ids))##, varMetadata=data.frame(labelDescript
|
776 |
457 |
XY <- new("NChannelSet",
|
777 |
458 |
X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"),
|
778 |
459 |
Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"),
|
779 |
460 |
zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer"),
|
780 |
|
- annotation=chipType, phenoData=RG@phenoData, # featureData=fD
|
|
461 |
+ annotation=chipType, phenoData=RG@phenoData,
|
781 |
462 |
protocolData=RG@protocolData, storage.mode="environment")
|
782 |
463 |
featureNames(XY) = ids # featureNames(RG)
|
783 |
464 |
gc()
|
... |
... |
@@ -790,10 +471,6 @@ RGtoXY2 = function(RG, chipType, verbose=TRUE) {
|
790 |
471 |
XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red
|
791 |
472 |
XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green
|
792 |
473 |
XY@assayData$zero[!is.na(aord),] = exprs(channel(RG, "zero"))[aord[!is.na(aord)],] # mostly green
|
793 |
|
-# XY@assayData$Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],]
|
794 |
|
-# XY@assayData$Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],]
|
795 |
|
-# XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],]
|
796 |
|
-# XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],]
|
797 |
474 |
gc()
|
798 |
475 |
|
799 |
476 |
## Warning - not 100% sure that the code below is correct - could be more complicated than this
|
... |
... |
@@ -803,31 +480,17 @@ RGtoXY2 = function(RG, chipType, verbose=TRUE) {
|
803 |
480 |
|
804 |
481 |
# X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red
|
805 |
482 |
# Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green
|
806 |
|
-# Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],]
|
807 |
|
-# Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],]
|
808 |
|
-# Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],]
|
809 |
|
-# Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],]
|
810 |
483 |
|
811 |
484 |
# Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe
|
812 |
485 |
# infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]")
|
813 |
486 |
|
814 |
487 |
# X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red
|
815 |
488 |
# Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green
|
816 |
|
-# Xnb[infIGG,] = exprs(channel(RG, "Gnb"))[aord[infIGG],]
|
817 |
|
-# Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],]
|
818 |
|
-# Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],]
|
819 |
|
-# Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],]
|
820 |
489 |
|
821 |
490 |
# For now zero out Infinium I probes
|
822 |
491 |
XY@assayData$X[infI,] = 0
|
823 |
492 |
XY@assayData$Y[infI,] = 0
|
824 |
493 |
XY@assayData$zero[infI,] = 0
|
825 |
|
-# XY@assayData$Xnb[infI,] = 0
|
826 |
|
-# XY@assayData$Ynb[infI,] = 0
|
827 |
|
-# XY@assayData$Xse[infI,] = 0
|
828 |
|
-# XY@assayData$Yse[infI,] = 0
|
829 |
|
-
|
830 |
|
-# XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1
|
831 |
494 |
gc()
|
832 |
495 |
|
833 |
496 |
# storageMode(XY) = "lockedEnvironment"
|
... |
... |
@@ -846,17 +509,12 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
|
846 |
509 |
}
|
847 |
510 |
if(verbose) message("Loading strip and reference normalization information.")
|
848 |
511 |
loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
|
849 |
|
-# data(preprocStuff, package=pkgname, envir=.crlmmPkgEnv)
|
850 |
512 |
|
851 |
513 |
stripnum <- getVarInEnv("stripnum")
|
852 |
514 |
|
853 |
515 |
if(useTarget)
|
854 |
516 |
targetdist = getVarInEnv("reference")
|
855 |
517 |
|
856 |
|
-# Xqws = Yqws = matrix(0, nrow(XY), ncol(XY))
|
857 |
|
-# colnames(Xqws) = colnames(Yqws) = sampleNames(XY) #$X
|
858 |
|
-# rownames(Xqws) = rownames(Yqws) = featureNames(XY)
|
859 |
|
-
|
860 |
518 |
if(verbose){
|
861 |
519 |
message("Quantile normalizing ", ncol(XY), " arrays by ", max(stripnum), " strips.")
|
862 |
520 |
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=max(stripnum), style=3)
|
... |
... |
@@ -883,160 +541,10 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
|
883 |
541 |
if(verbose)
|
884 |
542 |
cat("\n")
|
885 |
543 |
XY
|
886 |
|
-# XYNorm = new("NChannelSet",
|
887 |
|
-# X=Xqws+16,
|
888 |
|
-# Y=Yqws+16,
|
889 |
|
-# Xnb=exprs(channel(XY, "Xnb")),
|
890 |
|
-# Ynb=exprs(channel(XY, "Ynb")),
|
891 |
|
-# Xse=exprs(channel(XY, "Xse")),
|
892 |
|
-# Yse=exprs(channel(XY, "Yse")),
|
893 |
|
-# zero=exprs(channel(XY, "zero")),
|
894 |
|
-# annotation=annotation(XY),
|
895 |
|
-# phenoData=XY@phenoData)
|
896 |
544 |
}
|
897 |
545 |
|
898 |
546 |
|
899 |
547 |
preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
|
900 |
|
- fitMixture=TRUE,
|
901 |
|
- eps=0.1,
|
902 |
|
- verbose=TRUE,
|
903 |
|
- seed=1,
|
904 |
|
- cdfName,
|
905 |
|
- sns,
|
906 |
|
- stripNorm=TRUE,
|
907 |
|
- useTarget=TRUE,
|
908 |
|
- save.it=FALSE,
|
909 |
|
- snpFile,
|
910 |
|
- cnFile) {
|
911 |
|
- if(stripNorm)
|
912 |
|
- XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)
|
913 |
|
-
|
914 |
|
-## MR: the code below is mostly straight from snprma.R
|
915 |
|
- if (missing(sns)) sns <- sampleNames(XY) #$X
|
916 |
|
- if(missing(cdfName))
|
917 |
|
- cdfName <- annotation(XY)
|
918 |
|
-## stuffDir <- changeToCrlmmAnnotationName(cdfName)
|
919 |
|
- pkgname <- getCrlmmAnnotationName(cdfName)
|
920 |
|
- if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
|
921 |
|
- suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
|
922 |
|
- msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
|
923 |
|
- message(strwrap(msg))
|
924 |
|
- stop("Package ", pkgname, " could not be found.")
|
925 |
|
- rm(suggCall, msg)
|
926 |
|
- }
|
927 |
|
- if(verbose) message("Loading snp annotation and mixture model parameters.")
|
928 |
|
- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
|
929 |
|
- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
|
930 |
|
- loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname)
|
931 |
|
- loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
|
932 |
|
-# data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv)
|
933 |
|
- autosomeIndex <- getVarInEnv("autosomeIndex")
|
934 |
|
- # pnsa <- getVarInEnv("pnsa")
|
935 |
|
- # pnsb <- getVarInEnv("pnsb")
|
936 |
|
- # fid <- getVarInEnv("fid")
|
937 |
|
- # reference <- getVarInEnv("reference")
|
938 |
|
- # aIndex <- getVarInEnv("aIndex")
|
939 |
|
- # bIndex <- getVarInEnv("bIndex")
|
940 |
|
- SMEDIAN <- getVarInEnv("SMEDIAN")
|
941 |
|
- theKnots <- getVarInEnv("theKnots")
|
942 |
|
- gns <- getVarInEnv("gns")
|
943 |
|
-
|
944 |
|
- # separate out copy number probes
|
945 |
|
- npIndex = getVarInEnv("npProbesFid")
|
946 |
|
- nprobes = length(npIndex)
|
947 |
|
- if(length(nprobes)>0) {
|
948 |
|
- narrays = ncol(XY)
|
949 |
|
- A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays)
|
950 |
|
- B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
|
951 |
|
-
|
952 |
|
- # new lines below - useful to keep track of zeroed out probes
|
953 |
|
- zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays)
|
954 |
|
-
|
955 |
|
- colnames(A) <- colnames(B) <- colnames(zero) <- sns
|
956 |
|
- rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex)
|
957 |
|
-
|
958 |
|
- cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName)
|
959 |
|
- if(save.it & !missing(cnFile)) {
|
960 |
|
- t0 <- proc.time()
|
961 |
|
- save(cnAB, file=cnFile)
|
962 |
|
- t0 <- proc.time()-t0
|
963 |
|
- if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".")
|
964 |
|
- }
|
965 |
|
- rm(cnAB, B, zero)
|
966 |
|
- }
|
967 |
|
-
|
968 |
|
- # next process snp probes
|
969 |
|
- snpIndex = getVarInEnv("snpProbesFid")
|
970 |
|
- nprobes <- length(snpIndex)
|
971 |
|
-
|
972 |
|
- ##We will read each cel file, summarize, and run EM one by one
|
973 |
|
- ##We will save parameters of EM to use later
|
974 |
|
- mixtureParams <- matrix(0, 4, narrays)
|
975 |
|
- SNR <- vector("numeric", narrays)
|
976 |
|
- SKW <- vector("numeric", narrays)
|
977 |
|
-
|
978 |
|
- ## This is the sample for the fitting of splines
|
979 |
|
- ## BC: I like better the idea of the user passing the seed,
|
980 |
|
- ## because this might intefere with other analyses
|
981 |
|
- ## (like what happened to GCRMA)
|
982 |
|
- set.seed(seed)
|
983 |
|
- idx <- sort(sample(autosomeIndex, mixtureSampleSize))
|
984 |
|
- idx2 <- sample(nprobes, 10^5)
|
985 |
|
-
|
986 |
|
- ##S will hold (A+B)/2 and M will hold A-B
|
987 |
|
- ##NOTE: We actually dont need to save S. Only for pics etc...
|
988 |
|
- ##f is the correction. we save to avoid recomputing
|
989 |
|
- A <- matrix(as.integer(exprs(channel(XY, "X"))[snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames))
|
990 |
|
- B <- matrix(as.integer(exprs(channel(XY, "Y"))[snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames))
|
991 |
|
-
|
992 |
|
- # new lines below - useful to keep track of zeroed out SNPs
|
993 |
|
- zero <- matrix(as.integer(exprs(channel(XY, "zero"))[snpIndex,]), nprobes, narrays)
|
994 |
|
-
|
995 |
|
- colnames(A) <- colnames(B) <- colnames(zero) <- sns
|
996 |
|
- rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY)
|
997 |
|
-
|
998 |
|
- if(verbose){
|
999 |
|
- message("Calibrating ", narrays, " arrays.")
|
1000 |
|
- if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3)
|
1001 |
|
- }
|
1002 |
|
-
|
1003 |
|
-
|
1004 |
|
- for(i in 1:narrays){
|
1005 |
|
- SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3)
|
1006 |
|
- if(fitMixture){
|
1007 |
|
- S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN
|
1008 |
|
- M <- log2(A[idx, i])-log2(B[idx, i])
|
1009 |
|
-
|
1010 |
|
- ##we need to test the choice of eps.. it is not the max diff between funcs
|
1011 |
|
- tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
|
1012 |
|
-
|
1013 |
|
- mixtureParams[, i] <- tmp[["coef"]]
|
1014 |
|
- SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
|
1015 |
|
- }
|
1016 |
|
- if(verbose) {
|
1017 |
|
- if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
|
1018 |
|
- else cat(".")
|
1019 |
|
- }
|
1020 |
|
- }
|
1021 |
|
- if (verbose) {
|
1022 |
|
- if (getRversion() > '2.7.0') close(pb)
|
1023 |
|
- else cat("\n")
|
1024 |
|
- }
|
1025 |
|
- if (!fitMixture) SNR <- mixtureParams <- NA
|
1026 |
|
- ## gns comes from preprocStuff.rda
|
1027 |
|
- res = list(A=A, B=B, zero=zero, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
|
1028 |
|
-
|
1029 |
|
- if(save.it & !missing(snpFile)) {
|
1030 |
|
- t0 <- proc.time()
|
1031 |
|
- save(res, file=snpFile)
|
1032 |
|
- t0 <- proc.time()-t0
|
1033 |
|
- if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
|
1034 |
|
- }
|
1035 |
|
- return(res)
|
1036 |
|
-}
|
1037 |
|
-
|
1038 |
|
-
|
1039 |
|
-preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
|
1040 |
548 |
fitMixture=TRUE,
|
1041 |
549 |
eps=0.1,
|
1042 |
550 |
verbose=TRUE,
|
... |
... |
@@ -1055,7 +563,6 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
|
1055 |
563 |
if (missing(sns)) sns <- sampleNames(XY) #$X
|
1056 |
564 |
if(missing(cdfName))
|
1057 |
565 |
cdfName <- annotation(XY)
|
1058 |
|
-## stuffDir <- changeToCrlmmAnnotationName(cdfName)
|
1059 |
566 |
pkgname <- getCrlmmAnnotationName(cdfName)
|
1060 |
567 |
if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
|
1061 |
568 |
suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
|
... |
... |
@@ -1069,17 +576,9 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
|
1069 |
576 |
loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
|
1070 |
577 |
loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname)
|
1071 |
578 |
loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
|
1072 |
|
-# data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv)
|
1073 |
579 |
autosomeIndex <- getVarInEnv("autosomeIndex")
|
1074 |
|
- # pnsa <- getVarInEnv("pnsa")
|
1075 |
|
- # pnsb <- getVarInEnv("pnsb")
|
1076 |
|
- # fid <- getVarInEnv("fid")
|
1077 |
|
- # reference <- getVarInEnv("reference")
|
1078 |
|
- # aIndex <- getVarInEnv("aIndex")
|
1079 |
|
- # bIndex <- getVarInEnv("bIndex")
|
1080 |
580 |
SMEDIAN <- getVarInEnv("SMEDIAN")
|
1081 |
581 |
theKnots <- getVarInEnv("theKnots")
|
1082 |
|
-# gns <- featureNames(XY) # getVarInEnv("gns") # needs to include np probes - gns is only for snps
|
1083 |
582 |
narrays = ncol(XY)
|
1084 |
583 |
|
1085 |
584 |
if(save.it & !missing(cnFile)) {
|
... |
... |
@@ -1115,9 +614,6 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
|
1115 |
614 |
mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
|
1116 |
615 |
SNR <- initializeBigVector("crlmmSNR-", narrays, "double")
|
1117 |
616 |
SKW <- initializeBigVector("crlmmSKW-", narrays, "double")
|
1118 |
|
-# mixtureParams <- matrix(0, 4, narrays)
|
1119 |
|
-# SNR <- vector("numeric", narrays)
|
1120 |
|
-# SKW <- vector("numeric", narrays)
|
1121 |
617 |
|
1122 |
618 |
## This is the sample for the fitting of splines
|
1123 |
619 |
## BC: I like better the idea of the user passing the seed,
|
... |
... |
@@ -1130,25 +626,6 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
|
1130 |
626 |
##S will hold (A+B)/2 and M will hold A-B
|
1131 |
627 |
##NOTE: We actually dont need to save S. Only for pics etc...
|
1132 |
628 |
##f is the correction. we save to avoid recomputing
|
1133 |
|
-# A <- exprs(channel(XY, "X"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames))
|
1134 |
|
-# B <- exprs(channel(XY, "Y"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames))
|
1135 |
|
-
|
1136 |
|
- # new lines below - useful to keep track of zeroed out SNPs
|
1137 |
|
-# zero <- exprs(channel(XY, "zero"))[,] # )) #[snpIndex,]), nprobes, narrays)
|
1138 |
|
-
|
1139 |
|
-# if(!is.matrix(A)) {
|
1140 |
|
-# A = A[,]
|
1141 |
|
-# B = B[,]
|
1142 |
|
-# zero = zero[,]
|
1143 |
|
-# }
|
1144 |
|
-
|
1145 |
|
-# if(!is.integer(A)) {
|
1146 |
|
-# A = matrix(as.integer(A), nrow(A), ncol(A))
|
1147 |
|
-# B = matrix(as.integer(B), nrow(B), ncol(B))
|
1148 |
|
-# }
|
1149 |
|
-
|
1150 |
|
-# colnames(A) <- colnames(B) <- colnames(zero) <- sns
|
1151 |
|
-# rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY)
|
1152 |
629 |
|
1153 |
630 |
A <- initializeBigMatrix("crlmmA-", nprobes, narrays, "integer")
|
1154 |
631 |
B <- initializeBigMatrix("crlmmB-", nprobes, narrays, "integer")
|
... |
... |
@@ -1188,7 +665,7 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
|
1188 |
665 |
## gns comes from preprocStuff.rda
|
1189 |
666 |
res = list(A=A, B=B,
|
1190 |
667 |
zero=zero, sns=sns, gns=names(snpIndex), SNR=SNR, SKW=SKW,
|
1191 |
|
- mixtureParams=mixtureParams, cdfName=cdfName) # , snpIndex=snpIndex, npIndex=npIndex)
|
|
668 |
+ mixtureParams=mixtureParams, cdfName=cdfName)
|
1192 |
669 |
|
1193 |
670 |
if(save.it & !missing(snpFile)) {
|
1194 |
671 |
t0 <- proc.time()
|
... |
... |
@@ -1235,61 +712,6 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
|
1235 |
712 |
res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
|
1236 |
713 |
seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget,
|
1237 |
714 |
save.it=save.it, snpFile=snpFile, cnFile=cnFile)
|
1238 |
|
- }else{
|
1239 |
|
- if(verbose) message("Loading ", snpFile, ".")
|
1240 |
|
- obj <- load(snpFile)
|
1241 |
|
- if(verbose) message("Done.")
|
1242 |
|
- if(!any(obj == "res"))
|
1243 |
|
- stop("Object in ", snpFile, " seems to be invalid.")
|
1244 |
|
- }
|
1245 |
|
- if(row.names) row.names=res[["gns"]] else row.names=NULL
|
1246 |
|
- if(col.names) col.names=res[["sns"]] else col.names=NULL
|
1247 |
|
-
|
1248 |
|
- res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]],
|
1249 |
|
- res[["mixtureParams"]], res[["cdfName"]],
|
1250 |
|
- gender=gender, row.names=row.names,
|
1251 |
|
- col.names=col.names, recallMin=recallMin,
|
1252 |
|
- recallRegMin=1000, SNRMin=SNRMin,
|
1253 |
|
- returnParams=returnParams, badSNP=badSNP,
|
1254 |
|
- verbose=verbose)
|
1255 |
|
-
|
1256 |
|
- res2[["SNR"]] <- res[["SNR"]]
|
1257 |
|
- res2[["SKW"]] <- res[["SKW"]]
|
1258 |
|
- rm(res)
|
1259 |
|
- gc()
|
1260 |
|
- return(list2SnpSet(res2, returnParams=returnParams)) # return(res2)
|
1261 |
|
-}
|
1262 |
|
-
|
1263 |
|
-
|
1264 |
|
-crlmmIllumina2 <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
|
1265 |
|
- row.names=TRUE, col.names=TRUE,
|
1266 |
|
- probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
|
1267 |
|
- seed=1, save.it=FALSE, load.it=FALSE, snpFile, cnFile,
|
1268 |
|
- mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
|
1269 |
|
- cdfName, sns, recallMin=10, recallRegMin=1000,
|
1270 |
|
- returnParams=FALSE, badSNP=.7) {
|
1271 |
|
- if (save.it & (missing(snpFile) | missing(cnFile)))
|
1272 |
|
- stop("'snpFile' and/or 'cnFile' is missing and you chose to save.it")
|
1273 |
|
- if (load.it & missing(snpFile))
|
1274 |
|
- stop("'snpFile' is missing and you chose to load.it")
|
1275 |
|
- if (!missing(snpFile))
|
1276 |
|
- if (load.it & !file.exists(snpFile)){
|
1277 |
|
- load.it <- FALSE
|
1278 |
|
- message("File ", snpFile, " does not exist.")
|
1279 |
|
- stop("Cannot load SNP data.")
|
1280 |
|
- }
|
1281 |
|
- if (!load.it){
|
1282 |
|
- if(!missing(RG)) {
|
1283 |
|
- if(missing(XY))
|
1284 |
|
- XY = RGtoXY2(RG, chipType=cdfName)
|
1285 |
|
- else
|
1286 |
|
- stop("Both RG and XY specified - please use one or the other")
|
1287 |
|
- }
|
1288 |
|
- if (missing(sns)) sns <- sampleNames(XY) #$X
|
1289 |
|
-
|
1290 |
|
- res = preprocessInfinium2v2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
|
1291 |
|
- seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #,
|
1292 |
|
- # save.it=save.it, snpFile=snpFile, cnFile=cnFile)
|
1293 |
715 |
|
1294 |
716 |
# fD = featureData(XY)
|
1295 |
717 |
# phenD = XY@phenoData
|
... |
... |
@@ -1378,10 +800,10 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
|
1378 |
800 |
# rgFile,
|
1379 |
801 |
stripNorm=TRUE,
|
1380 |
802 |
useTarget=TRUE,
|
1381 |
|
- row.names=TRUE,
|
|
803 |
+ row.names=TRUE,
|
1382 |
804 |
col.names=TRUE,
|
1383 |
805 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
|
1384 |
|
- seed=1, save.ab=FALSE, snpFile, cnFile,
|
|
806 |
+ seed=1, save.it=FALSE, snpFile, cnFile,
|
1385 |
807 |
mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
|
1386 |
808 |
cdfName, sns, recallMin=10, recallRegMin=1000,
|
1387 |
809 |
returnParams=FALSE, badSNP=.7) {
|
... |
... |
@@ -1392,8 +814,8 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
|
1392 |
814 |
|
1393 |
815 |
# if (save.rg & missing(rgFile))
|
1394 |
816 |
# stop("'rgFile' is missing, and you chose save.rg")
|
1395 |
|
- if (save.ab & (missing(snpFile) | missing(cnFile)))
|
1396 |
|
- stop("'snpFile' or 'cnFile' is missing and you chose save.ab")
|
|
817 |
+ if (save.it & (missing(snpFile) | missing(cnFile)))
|
|
818 |
+ stop("'snpFile' or 'cnFile' is missing and you chose save.it")
|
1397 |
819 |
# batches = NULL
|
1398 |
820 |
# if(!is.null(arrayNames))
|
1399 |
821 |
# batches <- rep(1, length(arrayNames)) # problem here if arrayNames not specified! # splitIndicesByLength(seq(along=arrayNames), ocSamples())
|
... |
... |
@@ -1407,20 +829,20 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
|
1407 |
829 |
# RG = readIdatFiles(sampleSheet=sampleSheet[j,], arrayNames=arrayNames[j],
|
1408 |
830 |
# ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
|
1409 |
831 |
# highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
|
1410 |
|
- RG = readIdatFiles2(sampleSheet=sampleSheet, arrayNames=arrayNames,
|
|
832 |
+ RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames,
|
1411 |
833 |
ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
|
1412 |
834 |
highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
|
1413 |
835 |
|
1414 |
836 |
# if(save.rg)
|
1415 |
837 |
# save(RG, file=rgFile)
|
1416 |
838 |
|
1417 |
|
- XY = RGtoXY2(RG, chipType=cdfName)
|
|
839 |
+ XY = RGtoXY(RG, chipType=cdfName)
|
1418 |
840 |
rm(RG); gc()
|
1419 |
841 |
if (missing(sns)) { sns = sampleNames(XY) #subsns = sampleNames(XY)
|
1420 |
842 |
} # else subsns = sns[j]
|
1421 |
|
- res = preprocessInfinium2v2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
|
|
843 |
+ res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
|
1422 |
844 |
seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, #) # sns=subsns
|
1423 |
|
- save.it=save.ab, snpFile=snpFile, cnFile=cnFile)
|
|
845 |
+ save.it=save.it, snpFile=snpFile, cnFile=cnFile)
|
1424 |
846 |
# fD = featureData(XY)
|
1425 |
847 |
# phenD = XY@phenoData
|
1426 |
848 |
# protD = XY@protocolData
|