... |
... |
@@ -177,6 +177,187 @@ readIdatFiles <- function(sampleSheet=NULL,
|
177 |
177 |
}
|
178 |
178 |
|
179 |
179 |
|
|
180 |
+readIdatFiles2 <- function(sampleSheet=NULL,
|
|
181 |
+ arrayNames=NULL,
|
|
182 |
+ ids=NULL,
|
|
183 |
+ path=".",
|
|
184 |
+ arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
|
|
185 |
+ highDensity=FALSE,
|
|
186 |
+ sep="_",
|
|
187 |
+ fileExt=list(green="Grn.idat", red="Red.idat"),
|
|
188 |
+ saveDate=FALSE) {
|
|
189 |
+# if(!is.null(arrayNames)) {
|
|
190 |
+# arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
|
|
191 |
+# if(!is.null(sampleSheet)) {
|
|
192 |
+# sampleSheet=NULL
|
|
193 |
+# cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
|
|
194 |
+# }
|
|
195 |
+# pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
|
|
196 |
+# }
|
|
197 |
+ if(!is.null(arrayNames)) {
|
|
198 |
+ pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
|
|
199 |
+ }
|
|
200 |
+ if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
|
|
201 |
+ if(is.null(arrayNames)){
|
|
202 |
+ ##arrayNames=NULL
|
|
203 |
+ if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
|
|
204 |
+ barcode = sampleSheet[,arrayInfoColNames$barcode]
|
|
205 |
+ arrayNames=barcode
|
|
206 |
+ }
|
|
207 |
+ if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {
|
|
208 |
+ position = sampleSheet[,arrayInfoColNames$position]
|
|
209 |
+ if(is.null(arrayNames))
|
|
210 |
+ arrayNames=position
|
|
211 |
+ else
|
|
212 |
+ arrayNames = paste(arrayNames, position, sep=sep)
|
|
213 |
+ if(highDensity) {
|
|
214 |
+ hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
|
|
215 |
+ for(i in names(hdExt))
|
|
216 |
+ arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
|
|
217 |
+ }
|
|
218 |
+ }
|
|
219 |
+ }
|
|
220 |
+ pd = new("AnnotatedDataFrame", data = sampleSheet)
|
|
221 |
+ sampleNames(pd) <- basename(arrayNames)
|
|
222 |
+ }
|
|
223 |
+ if(is.null(arrayNames)) {
|
|
224 |
+ arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
|
|
225 |
+ if(!is.null(sampleSheet)) {
|
|
226 |
+ sampleSheet=NULL
|
|
227 |
+ cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
|
|
228 |
+ }
|
|
229 |
+ pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
|
|
230 |
+ }
|
|
231 |
+
|
|
232 |
+ narrays = length(arrayNames)
|
|
233 |
+ grnfiles = paste(arrayNames, fileExt$green, sep=sep)
|
|
234 |
+ redfiles = paste(arrayNames, fileExt$red, sep=sep)
|
|
235 |
+ if(length(grnfiles)==0 || length(redfiles)==0)
|
|
236 |
+ stop("Cannot find .idat files")
|
|
237 |
+ if(length(grnfiles)!=length(redfiles))
|
|
238 |
+ stop("Cannot find matching .idat files")
|
|
239 |
+ if(path[1] != "."){
|
|
240 |
+ grnidats = file.path(path, grnfiles)
|
|
241 |
+ redidats = file.path(path, redfiles)
|
|
242 |
+ } else {
|
|
243 |
+ message("path arg not set. Assuming files are in local directory")
|
|
244 |
+ grnidats <- grnfiles
|
|
245 |
+ redidats <- redfiles
|
|
246 |
+ }
|
|
247 |
+ if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
|
|
248 |
+ if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
|
|
249 |
+## if(!all(c(redfiles,grnfiles) %in% dir(path=path))){
|
|
250 |
+## stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
|
|
251 |
+## paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
|
|
252 |
+## }
|
|
253 |
+ headerInfo = list(nProbes = rep(NA, narrays),
|
|
254 |
+ Barcode = rep(NA, narrays),
|
|
255 |
+ ChipType = rep(NA, narrays),
|
|
256 |
+ Manifest = rep(NA, narrays), # not sure about this one - sometimes blank
|
|
257 |
+ Position = rep(NA, narrays)) # this may also vary a bit
|
|
258 |
+ dates = list(decode=rep(NA, narrays),
|
|
259 |
+ scan=rep(NA, narrays))
|
|
260 |
+ ## read in the data
|
|
261 |
+ for(i in seq(along=arrayNames)) {
|
|
262 |
+ cat("reading", arrayNames[i], "\t")
|
|
263 |
+ idsG = idsR = G = R = NULL
|
|
264 |
+ cat(paste(sep, fileExt$green, sep=""), "\t")
|
|
265 |
+ G = readIDAT(grnidats[i])
|
|
266 |
+ idsG = rownames(G$Quants)
|
|
267 |
+ headerInfo$nProbes[i] = G$nSNPsRead
|
|
268 |
+ headerInfo$Barcode[i] = G$Barcode
|
|
269 |
+ headerInfo$ChipType[i] = G$ChipType
|
|
270 |
+ headerInfo$Manifest[i] = G$Unknown$MostlyNull
|
|
271 |
+ headerInfo$Position[i] = G$Unknowns$MostlyA
|
|
272 |
+ if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) {
|
|
273 |
+ ## headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) {
|
|
274 |
+ ## || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest
|
|
275 |
+ ## but differed by a few SNPs for some reason - most of the chip was the same though
|
|
276 |
+ ## stop("Chips are not of all of the same type - please check your data")
|
|
277 |
+ warning("Chips are not of the same type. Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
|
|
278 |
+ next()
|
|
279 |
+ }
|
|
280 |
+ dates$decode[i] = G$RunInfo[1, 1]
|
|
281 |
+ dates$scan[i] = G$RunInfo[2, 1]
|
|
282 |
+ if(i==1) {
|
|
283 |
+ if(is.null(ids) && !is.null(G)){
|
|
284 |
+ ids = idsG
|
|
285 |
+ } else stop("Could not find probe IDs")
|
|
286 |
+ nprobes = length(ids)
|
|
287 |
+ narrays = length(arrayNames)
|
|
288 |
+# tmpmat = matrix(NA, nprobes, narrays)
|
|
289 |
+# rownames(tmpmat) = ids
|
|
290 |
+# fD <- new("AnnotatedDataFrame", data=data.frame(row.names=ids))##, varMetadata=data.frame(labelDescript
|
|
291 |
+ RG <- new("NChannelSet",
|
|
292 |
+ R=initializeBigMatrix(name="R", nr=nprobes, nc=narrays, vmode="integer"),
|
|
293 |
+ G=initializeBigMatrix(name="G", nr=nprobes, nc=narrays, vmode="integer"),
|
|
294 |
+ zero=initializeBigMatrix(name="zero", nr=nprobes, nc=narrays, vmode="integer"),
|
|
295 |
+# featureData=fD,
|
|
296 |
+# annotation=cdfName)
|
|
297 |
+# R=tmpmat,
|
|
298 |
+# G=tmpmat,
|
|
299 |
+# zero=tmpmat,
|
|
300 |
+# Rnb=tmpmat,
|
|
301 |
+# Gnb=tmpmat,
|
|
302 |
+# Rse=tmpmat,
|
|
303 |
+# Gse=tmpmat,
|
|
304 |
+ annotation=headerInfo$Manifest[1],
|
|
305 |
+ phenoData=pd,
|
|
306 |
+ storage.mode="environment")
|
|
307 |
+ featureNames(RG) = ids
|
|
308 |
+# rm(tmpmat)
|
|
309 |
+ if(!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)){
|
|
310 |
+ sampleNames(RG) = sampleSheet$Sample_ID
|
|
311 |
+ } else sampleNames(RG) = arrayNames
|
|
312 |
+ gc()
|
|
313 |
+ }
|
|
314 |
+ if(length(ids)==length(idsG)) {
|
|
315 |
+ if(sum(ids==idsG)==nprobes) {
|
|
316 |
+ RG@assayData$G[,i] = G$Quants[, "Mean"]
|
|
317 |
+ zeroG = G$Quants[, "NBeads"]==0
|
|
318 |
+# RG@assayData$Gnb[,i] = G$Quants[, "NBeads"]
|
|
319 |
+# RG@assayData$Gse[,i] = G$Quants[, "SD"]
|
|
320 |
+ }
|
|
321 |
+ } else {
|
|
322 |
+ indG = match(ids, idsG)
|
|
323 |
+ RG@assayData$G[,i] = G$Quants[indG, "Mean"]
|
|
324 |
+ zeroG = G$Quants[indG, "NBeads"]==0
|
|
325 |
+# RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"]
|
|
326 |
+# RG@assayData$Gse[,i] = G$Quants[indG, "SD"]
|
|
327 |
+ }
|
|
328 |
+ rm(G)
|
|
329 |
+ gc()
|
|
330 |
+
|
|
331 |
+ cat(paste(sep, fileExt$red, sep=""), "\n")
|
|
332 |
+ R = readIDAT(redidats[i])
|
|
333 |
+ idsR = rownames(R$Quants)
|
|
334 |
+
|
|
335 |
+ if(length(ids)==length(idsG)) {
|
|
336 |
+ if(sum(ids==idsR)==nprobes) {
|
|
337 |
+ RG@assayData$R[,i] = R$Quants[ ,"Mean"]
|
|
338 |
+ zeroR = R$Quants[ ,"NBeads"]==0
|
|
339 |
+# RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"]
|
|
340 |
+# RG@assayData$Rse[,i] = R$Quants[ ,"SD"]
|
|
341 |
+ }
|
|
342 |
+ } else {
|
|
343 |
+ indR = match(ids, idsR)
|
|
344 |
+ RG@assayData$R[,i] = R$Quants[indR, "Mean"]
|
|
345 |
+ zeroR = R$Quants[indR, "NBeads"]==0
|
|
346 |
+# RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"]
|
|
347 |
+# RG@assayData$Rse[,i] = R$Quants[indR, "SD"]
|
|
348 |
+ }
|
|
349 |
+ RG@assayData$zero[,i] = zeroG | zeroR
|
|
350 |
+ rm(R, zeroG, zeroR)
|
|
351 |
+ gc()
|
|
352 |
+ }
|
|
353 |
+ if(saveDate) {
|
|
354 |
+ protocolData(RG)[["ScanDate"]] = dates$scan
|
|
355 |
+ }
|
|
356 |
+ storageMode(RG) = "lockedEnvironment"
|
|
357 |
+ RG
|
|
358 |
+}
|
|
359 |
+
|
|
360 |
+
|
180 |
361 |
## the readIDAT() and readBPM() functions below were provided by Keith Baggerly, 27/8/2008
|
181 |
362 |
readIDAT <- function(idatFile){
|
182 |
363 |
|
... |
... |
@@ -433,9 +614,6 @@ readBPM <- function(bpmFile){
|
433 |
614 |
|
434 |
615 |
}
|
435 |
616 |
|
436 |
|
-
|
437 |
|
-
|
438 |
|
-
|
439 |
617 |
RGtoXY = function(RG, chipType, verbose=TRUE) {
|
440 |
618 |
chipList = c("human1mv1c", # 1M
|
441 |
619 |
"human370v1c", # 370CNV
|
... |
... |
@@ -541,6 +719,115 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
|
541 |
719 |
XY
|
542 |
720 |
}
|
543 |
721 |
|
|
722 |
+
|
|
723 |
+RGtoXY2 = function(RG, chipType, verbose=TRUE) {
|
|
724 |
+ chipList = c("human1mv1c", # 1M
|
|
725 |
+ "human370v1c", # 370CNV
|
|
726 |
+ "human650v3a", # 650Y
|
|
727 |
+ "human610quadv1b", # 610 quad
|
|
728 |
+ "human660quadv1a", # 660 quad
|
|
729 |
+ "human370quadv3c", # 370CNV quad
|
|
730 |
+ "human550v3b", # 550K
|
|
731 |
+ "human1mduov3b", # 1M Duo
|
|
732 |
+ "humanomni1quadv1b") # Omni1 quad
|
|
733 |
+ if(missing(chipType)){
|
|
734 |
+ chipType = match.arg(annotation(RG), chipList)
|
|
735 |
+ } else chipType = match.arg(chipType, chipList)
|
|
736 |
+
|
|
737 |
+ pkgname <- getCrlmmAnnotationName(chipType)
|
|
738 |
+ if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
|
|
739 |
+ suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
|
|
740 |
+ msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
|
|
741 |
+ message(strwrap(msg))
|
|
742 |
+ stop("Package ", pkgname, " could not be found.")
|
|
743 |
+ rm(suggCall, msg)
|
|
744 |
+ }
|
|
745 |
+ if(verbose) message("Loading chip annotation information.")
|
|
746 |
+ loader("address.rda", .crlmmPkgEnv, pkgname)
|
|
747 |
+# data(annotation, package=pkgname, envir=.crlmmPkgEnv)
|
|
748 |
+ aids <- getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest
|
|
749 |
+ bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
|
|
750 |
+ ids <- names(aids)
|
|
751 |
+ snpbase <- getVarInEnv("base")
|
|
752 |
+
|
|
753 |
+ nsnps = length(aids)
|
|
754 |
+ narrays = ncol(RG)
|
|
755 |
+
|
|
756 |
+# aidcol = match("AddressA_ID", colnames(annot))
|
|
757 |
+# if(is.na(aidcol))
|
|
758 |
+# aidcol = match("Address", colnames(annot))
|
|
759 |
+# bidcol = match("AddressB_ID", colnames(annot))
|
|
760 |
+# if(is.na(bidcol))
|
|
761 |
+# bidcol = match("Address2", colnames(annot))
|
|
762 |
+# aids = annot[, aidcol]
|
|
763 |
+# bids = annot[, bidcol]
|
|
764 |
+# snpids = annot[,"Name"]
|
|
765 |
+# snpbase = annot[,"SNP"]
|
|
766 |
+ infI = !is.na(bids) & bids!=0
|
|
767 |
+ aord = match(aids, featureNames(RG)) # NAs are possible here
|
|
768 |
+ bord = match(bids, featureNames(RG)) # and here
|
|
769 |
+# argrg = aids[rrgg]
|
|
770 |
+# brgrg = bids[rrgg]
|
|
771 |
+
|
|
772 |
+# fD <- new("AnnotatedDataFrame", data=data.frame(row.names=ids))##, varMetadata=data.frame(labelDescript
|
|
773 |
+ XY <- new("NChannelSet",
|
|
774 |
+ X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"),
|
|
775 |
+ Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"),
|
|
776 |
+ zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer"),
|
|
777 |
+ annotation=chipType, phenoData=RG@phenoData, # featureData=fD
|
|
778 |
+ protocolData=RG@protocolData, storage.mode="environment")
|
|
779 |
+ featureNames(XY) = ids # featureNames(RG)
|
|
780 |
+ gc()
|
|
781 |
+
|
|
782 |
+ # First sort out Infinium II SNPs, X -> R (allele A) and Y -> G (allele B) from the same probe
|
|
783 |
+ XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red
|
|
784 |
+ XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green
|
|
785 |
+ XY@assayData$zero[!is.na(aord),] = exprs(channel(RG, "zero"))[aord[!is.na(aord)],] # mostly green
|
|
786 |
+# XY@assayData$Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],]
|
|
787 |
+# XY@assayData$Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],]
|
|
788 |
+# XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],]
|
|
789 |
+# XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],]
|
|
790 |
+ gc()
|
|
791 |
+
|
|
792 |
+ ## Warning - not 100% sure that the code below is correct - could be more complicated than this
|
|
793 |
+
|
|
794 |
+ # Next Infinium I where X -> R from allele A probe and Y -> R from allele B probe
|
|
795 |
+# infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]")
|
|
796 |
+
|
|
797 |
+# X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red
|
|
798 |
+# Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green
|
|
799 |
+# Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],]
|
|
800 |
+# Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],]
|
|
801 |
+# Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],]
|
|
802 |
+# Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],]
|
|
803 |
+
|
|
804 |
+ # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe
|
|
805 |
+# infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]")
|
|
806 |
+
|
|
807 |
+# X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red
|
|
808 |
+# Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green
|
|
809 |
+# Xnb[infIGG,] = exprs(channel(RG, "Gnb"))[aord[infIGG],]
|
|
810 |
+# Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],]
|
|
811 |
+# Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],]
|
|
812 |
+# Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],]
|
|
813 |
+
|
|
814 |
+ # For now zero out Infinium I probes
|
|
815 |
+ XY@assayData$X[infI,] = 0
|
|
816 |
+ XY@assayData$Y[infI,] = 0
|
|
817 |
+ XY@assayData$zero[infI,] = 0
|
|
818 |
+# XY@assayData$Xnb[infI,] = 0
|
|
819 |
+# XY@assayData$Ynb[infI,] = 0
|
|
820 |
+# XY@assayData$Xse[infI,] = 0
|
|
821 |
+# XY@assayData$Yse[infI,] = 0
|
|
822 |
+
|
|
823 |
+# XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1
|
|
824 |
+ gc()
|
|
825 |
+
|
|
826 |
+# storageMode(XY) = "lockedEnvironment"
|
|
827 |
+ XY
|
|
828 |
+}
|
|
829 |
+
|
|
830 |
+
|
544 |
831 |
stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
|
545 |
832 |
pkgname <- getCrlmmAnnotationName(annotation(XY))
|
546 |
833 |
if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
|
... |
... |
@@ -739,8 +1026,157 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
|
739 |
1026 |
}
|
740 |
1027 |
|
741 |
1028 |
|
742 |
|
-## MR: Could add arguments to allow this function to read in idat data as well,
|
743 |
|
-## although this would add a further 7 arguments, which might over-complicate things
|
|
1029 |
+preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
|
|
1030 |
+ fitMixture=TRUE,
|
|
1031 |
+ eps=0.1,
|
|
1032 |
+ verbose=TRUE,
|
|
1033 |
+ seed=1,
|
|
1034 |
+ cdfName,
|
|
1035 |
+ sns,
|
|
1036 |
+ stripNorm=TRUE,
|
|
1037 |
+ useTarget=TRUE) { #,
|
|
1038 |
+# save.it=FALSE,
|
|
1039 |
+# snpFile,
|
|
1040 |
+# cnFile) {
|
|
1041 |
+ if(stripNorm)
|
|
1042 |
+ XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)
|
|
1043 |
+
|
|
1044 |
+## MR: the code below is mostly straight from snprma.R
|
|
1045 |
+ if (missing(sns)) sns <- sampleNames(XY) #$X
|
|
1046 |
+ if(missing(cdfName))
|
|
1047 |
+ cdfName <- annotation(XY)
|
|
1048 |
+## stuffDir <- changeToCrlmmAnnotationName(cdfName)
|
|
1049 |
+ pkgname <- getCrlmmAnnotationName(cdfName)
|
|
1050 |
+ if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
|
|
1051 |
+ suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
|
|
1052 |
+ msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
|
|
1053 |
+ message(strwrap(msg))
|
|
1054 |
+ stop("Package ", pkgname, " could not be found.")
|
|
1055 |
+ rm(suggCall, msg)
|
|
1056 |
+ }
|
|
1057 |
+ if(verbose) message("Loading snp annotation and mixture model parameters.")
|
|
1058 |
+ loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
|
|
1059 |
+ loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
|
|
1060 |
+ loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname)
|
|
1061 |
+ loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
|
|
1062 |
+# data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv)
|
|
1063 |
+ autosomeIndex <- getVarInEnv("autosomeIndex")
|
|
1064 |
+ # pnsa <- getVarInEnv("pnsa")
|
|
1065 |
+ # pnsb <- getVarInEnv("pnsb")
|
|
1066 |
+ # fid <- getVarInEnv("fid")
|
|
1067 |
+ # reference <- getVarInEnv("reference")
|
|
1068 |
+ # aIndex <- getVarInEnv("aIndex")
|
|
1069 |
+ # bIndex <- getVarInEnv("bIndex")
|
|
1070 |
+ SMEDIAN <- getVarInEnv("SMEDIAN")
|
|
1071 |
+ theKnots <- getVarInEnv("theKnots")
|
|
1072 |
+ gns <- featureNames(XY) # getVarInEnv("gns") # needs to include np probes - gns is only for snps
|
|
1073 |
+
|
|
1074 |
+ # separate out copy number probes
|
|
1075 |
+ npIndex = getVarInEnv("npProbesFid")
|
|
1076 |
+# nprobes = length(npIndex)
|
|
1077 |
+ narrays = ncol(XY)
|
|
1078 |
+# A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays)
|
|
1079 |
+# B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
|
|
1080 |
+
|
|
1081 |
+ # new lines below - useful to keep track of zeroed out probes
|
|
1082 |
+# zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays)
|
|
1083 |
+
|
|
1084 |
+# colnames(A) <- colnames(B) <- colnames(zero) <- sns
|
|
1085 |
+# rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex)
|
|
1086 |
+
|
|
1087 |
+# cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName)
|
|
1088 |
+# if(save.it & !missing(cnFile)) {
|
|
1089 |
+# t0 <- proc.time()
|
|
1090 |
+# save(cnAB, file=cnFile)
|
|
1091 |
+# t0 <- proc.time()-t0
|
|
1092 |
+# if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".")
|
|
1093 |
+# }
|
|
1094 |
+# rm(cnAB, B, zero)
|
|
1095 |
+
|
|
1096 |
+ # next process snp probes
|
|
1097 |
+ snpIndex = getVarInEnv("snpProbesFid")
|
|
1098 |
+ nprobes <- length(snpIndex)
|
|
1099 |
+
|
|
1100 |
+ ##We will read each cel file, summarize, and run EM one by one
|
|
1101 |
+ ##We will save parameters of EM to use later
|
|
1102 |
+# mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(narrays), "double")
|
|
1103 |
+# SNR <- initializeBigVector("crlmmSNR-", narrays, "double")
|
|
1104 |
+# SKW <- initializeBigVector("crlmmSKW-", narrays, "double")
|
|
1105 |
+ mixtureParams <- matrix(0, 4, narrays)
|
|
1106 |
+ SNR <- vector("numeric", narrays)
|
|
1107 |
+ SKW <- vector("numeric", narrays)
|
|
1108 |
+
|
|
1109 |
+ ## This is the sample for the fitting of splines
|
|
1110 |
+ ## BC: I like better the idea of the user passing the seed,
|
|
1111 |
+ ## because this might intefere with other analyses
|
|
1112 |
+ ## (like what happened to GCRMA)
|
|
1113 |
+ set.seed(seed)
|
|
1114 |
+ idx <- sort(sample(autosomeIndex, mixtureSampleSize))
|
|
1115 |
+
|
|
1116 |
+ ##S will hold (A+B)/2 and M will hold A-B
|
|
1117 |
+ ##NOTE: We actually dont need to save S. Only for pics etc...
|
|
1118 |
+ ##f is the correction. we save to avoid recomputing
|
|
1119 |
+ A <- exprs(channel(XY, "X"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames))
|
|
1120 |
+ B <- exprs(channel(XY, "Y"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames))
|
|
1121 |
+
|
|
1122 |
+ # new lines below - useful to keep track of zeroed out SNPs
|
|
1123 |
+ zero <- exprs(channel(XY, "zero"))[,] # )) #[snpIndex,]), nprobes, narrays)
|
|
1124 |
+
|
|
1125 |
+# if(!is.matrix(A)) {
|
|
1126 |
+# A = A[,]
|
|
1127 |
+# B = B[,]
|
|
1128 |
+# zero = zero[,]
|
|
1129 |
+# }
|
|
1130 |
+
|
|
1131 |
+ if(!is.integer(A)) {
|
|
1132 |
+ A = matrix(as.integer(A), nrow(A), ncol(A))
|
|
1133 |
+ B = matrix(as.integer(B), nrow(B), ncol(B))
|
|
1134 |
+ }
|
|
1135 |
+
|
|
1136 |
+# colnames(A) <- colnames(B) <- colnames(zero) <- sns
|
|
1137 |
+# rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY)
|
|
1138 |
+
|
|
1139 |
+ if(verbose){
|
|
1140 |
+ message("Calibrating ", narrays, " arrays.")
|
|
1141 |
+ if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3)
|
|
1142 |
+ }
|
|
1143 |
+
|
|
1144 |
+ idx2 <- sample(nprobes, 10^5)
|
|
1145 |
+ for(i in 1:narrays){
|
|
1146 |
+ SKW[i] = mean((A[snpIndex,i][idx2]-mean(A[snpIndex,i][idx2]))^3)/(sd(A[snpIndex,i][idx2])^3)
|
|
1147 |
+ if(fitMixture){
|
|
1148 |
+ S <- (log2(A[snpIndex,i][idx])+log2(B[snpIndex,i][idx]))/2 - SMEDIAN
|
|
1149 |
+ M <- log2(A[snpIndex,i][idx])-log2(B[snpIndex,i][idx])
|
|
1150 |
+
|
|
1151 |
+ ##we need to test the choice of eps.. it is not the max diff between funcs
|
|
1152 |
+ tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
|
|
1153 |
+
|
|
1154 |
+ mixtureParams[, i] <- tmp[["coef"]]
|
|
1155 |
+ SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
|
|
1156 |
+ }
|
|
1157 |
+ if(verbose) {
|
|
1158 |
+ if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
|
|
1159 |
+ else cat(".")
|
|
1160 |
+ }
|
|
1161 |
+ }
|
|
1162 |
+ if (verbose) {
|
|
1163 |
+ if (getRversion() > '2.7.0') close(pb)
|
|
1164 |
+ else cat("\n")
|
|
1165 |
+ }
|
|
1166 |
+ if (!fitMixture) SNR <- mixtureParams <- NA
|
|
1167 |
+ ## gns comes from preprocStuff.rda
|
|
1168 |
+ res = list(A=A, B=B, zero=zero, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName, snpIndex=snpIndex, npIndex=npIndex)
|
|
1169 |
+
|
|
1170 |
+# if(save.it & !missing(snpFile)) {
|
|
1171 |
+# t0 <- proc.time()
|
|
1172 |
+# save(res, file=snpFile)
|
|
1173 |
+# t0 <- proc.time()-t0
|
|
1174 |
+# if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
|
|
1175 |
+# }
|
|
1176 |
+ return(res)
|
|
1177 |
+}
|
|
1178 |
+
|
|
1179 |
+
|
744 |
1180 |
crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
|
745 |
1181 |
row.names=TRUE, col.names=TRUE,
|
746 |
1182 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
|
... |
... |
@@ -795,11 +1231,104 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
|
795 |
1231 |
return(list2SnpSet(res2, returnParams=returnParams)) # return(res2)
|
796 |
1232 |
}
|
797 |
1233 |
|
|
1234 |
+
|
|
1235 |
+crlmmIllumina2 <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
|
|
1236 |
+ row.names=TRUE, col.names=TRUE,
|
|
1237 |
+ probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
|
|
1238 |
+ seed=1, # save.it=FALSE, load.it=FALSE, snpFile, cnFile,
|
|
1239 |
+ mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
|
|
1240 |
+ cdfName, sns, recallMin=10, recallRegMin=1000,
|
|
1241 |
+ returnParams=FALSE, badSNP=.7) {
|
|
1242 |
+# if (save.it & (missing(snpFile) | missing(cnFile)))
|
|
1243 |
+# stop("'snpFile' and/or 'cnFile' is missing and you chose to save.it")
|
|
1244 |
+# if (load.it & missing(snpFile))
|
|
1245 |
+# stop("'snpFile' is missing and you chose to load.it")
|
|
1246 |
+# if (!missing(snpFile))
|
|
1247 |
+# if (load.it & !file.exists(snpFile)){
|
|
1248 |
+# load.it <- FALSE
|
|
1249 |
+# message("File ", snpFile, " does not exist.")
|
|
1250 |
+# stop("Cannot load SNP data.")
|
|
1251 |
+# }
|
|
1252 |
+# if (!load.it){
|
|
1253 |
+ if(!missing(RG)) {
|
|
1254 |
+ if(missing(XY))
|
|
1255 |
+ XY = RGtoXY2(RG, chipType=cdfName)
|
|
1256 |
+ else
|
|
1257 |
+ stop("Both RG and XY specified - please use one or the other")
|
|
1258 |
+ }
|
|
1259 |
+ if (missing(sns)) sns <- sampleNames(XY) #$X
|
|
1260 |
+
|
|
1261 |
+ res = preprocessInfinium2v2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
|
|
1262 |
+ seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #,
|
|
1263 |
+ # save.it=save.it, snpFile=snpFile, cnFile=cnFile)
|
|
1264 |
+
|
|
1265 |
+ fD = featureData(XY)
|
|
1266 |
+ phenD = XY@phenoData
|
|
1267 |
+ protD = XY@protocolData
|
|
1268 |
+ rm(XY)
|
|
1269 |
+ gc()
|
|
1270 |
+ if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability")
|
|
1271 |
+ callSet <- new("SnpSuperSet",
|
|
1272 |
+ alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)),
|
|
1273 |
+ alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
|
|
1274 |
+ call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
|
|
1275 |
+ callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
|
|
1276 |
+ annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD)
|
|
1277 |
+ sampleNames(callSet) <- sns
|
|
1278 |
+ featureNames(callSet) <- res[["gns"]]
|
|
1279 |
+ pData(callSet)$SKW <- rep(NA, length(sns))
|
|
1280 |
+ pData(callSet)$SNR <- rep(NA, length(sns))
|
|
1281 |
+ pData(callSet)$gender <- rep(NA, length(sns))
|
|
1282 |
+
|
|
1283 |
+# }else{
|
|
1284 |
+# if(verbose) message("Loading ", snpFile, ".")
|
|
1285 |
+# obj <- load(snpFile)
|
|
1286 |
+# if(verbose) message("Done.")
|
|
1287 |
+# if(!any(obj == "res"))
|
|
1288 |
+# stop("Object in ", snpFile, " seems to be invalid.")
|
|
1289 |
+# }
|
|
1290 |
+
|
|
1291 |
+ rm(phenD, protD , fD)
|
|
1292 |
+
|
|
1293 |
+ snp.index <- res$snpIndex #match(res$gns, featureNames(callSet))
|
|
1294 |
+ suppressWarnings(A(callSet) <- res[["A"]])
|
|
1295 |
+ suppressWarnings(B(callSet) <- res[["B"]])
|
|
1296 |
+ pData(callSet)$SKW <- res$SKW
|
|
1297 |
+ pData(callSet)$SNR <- res$SNR
|
|
1298 |
+ mixtureParams <- res$mixtureParams
|
|
1299 |
+ rm(res); gc()
|
|
1300 |
+ tmp <- crlmmGT(A=as.matrix(A(callSet)[snp.index,]), # j]),
|
|
1301 |
+ B=as.matrix(B(callSet)[snp.index,]), # j]),
|
|
1302 |
+ SNR=callSet$SNR, # [j],
|
|
1303 |
+ mixtureParams=mixtureParams,
|
|
1304 |
+ cdfName=annotation(callSet),
|
|
1305 |
+# row.names=featureNames(callSet)[snp.index],
|
|
1306 |
+# col.names=sampleNames(callSet), #[j],
|
|
1307 |
+ probs=probs,
|
|
1308 |
+ DF=DF,
|
|
1309 |
+ SNRMin=SNRMin,
|
|
1310 |
+ recallMin=recallMin,
|
|
1311 |
+ recallRegMin=recallRegMin,
|
|
1312 |
+ gender=gender,
|
|
1313 |
+ verbose=verbose,
|
|
1314 |
+ returnParams=returnParams,
|
|
1315 |
+ badSNP=badSNP)
|
|
1316 |
+# suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
|
|
1317 |
+# suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
|
|
1318 |
+# callSet$gender[j] <- tmp$gender
|
|
1319 |
+ suppressWarnings(snpCall(callSet)[snp.index,] <- tmp[["calls"]])
|
|
1320 |
+ suppressWarnings(snpCallProbability(callSet)[snp.index,] <- tmp[["confs"]])
|
|
1321 |
+ callSet$gender <- tmp$gender
|
|
1322 |
+ rm(tmp); gc()
|
|
1323 |
+ return(callSet)
|
|
1324 |
+}
|
|
1325 |
+
|
|
1326 |
+
|
798 |
1327 |
## MR: Below is a more memory efficient version of crlmmIllumina() which
|
799 |
1328 |
## reads in the .idats and genotypes in the one function and removes objects
|
800 |
1329 |
## after they have been used
|
801 |
1330 |
crlmmIlluminaV2 = function(sampleSheet=NULL,
|
802 |
|
- arrayNames=NULL,
|
|
1331 |
+ arrayNames=NULL,
|
803 |
1332 |
ids=NULL,
|
804 |
1333 |
path=".",
|
805 |
1334 |
arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
|
... |
... |
@@ -807,53 +1336,129 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
|
807 |
1336 |
sep="_",
|
808 |
1337 |
fileExt=list(green="Grn.idat", red="Red.idat"),
|
809 |
1338 |
saveDate=FALSE,
|
810 |
|
- save.rg=FALSE,
|
811 |
|
- rgFile,
|
|
1339 |
+# save.rg=FALSE,
|
|
1340 |
+# rgFile,
|
812 |
1341 |
stripNorm=TRUE,
|
813 |
1342 |
useTarget=TRUE,
|
814 |
|
- row.names=TRUE,
|
|
1343 |
+ row.names=TRUE,
|
815 |
1344 |
col.names=TRUE,
|
816 |
1345 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
|
817 |
|
- seed=1, save.ab=FALSE, snpFile, cnFile,
|
818 |
|
- mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
|
819 |
|
- cdfName, sns, recallMin=10, recallRegMin=1000,
|
820 |
|
- returnParams=FALSE, badSNP=.7) {
|
|
1346 |
+ seed=1, # save.ab=FALSE, snpFile, cnFile,
|
|
1347 |
+ mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
|
|
1348 |
+ cdfName, sns, recallMin=10, recallRegMin=1000,
|
|
1349 |
+ returnParams=FALSE, badSNP=.7) {
|
|
1350 |
+
|
|
1351 |
+ if(missing(cdfName)) stop("must specify cdfName")
|
|
1352 |
+ if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames")
|
|
1353 |
+# if(missing(sns)) sns <- basename(arrayNames)
|
821 |
1354 |
|
822 |
|
- if (save.rg & missing(rgFile))
|
823 |
|
- stop("'rgFile' is missing, and you chose save.rg")
|
824 |
|
- if (save.ab & (missing(snpFile) | missing(cnFile)))
|
825 |
|
- stop("'snpFile' or 'cnFile' is missing and you chose save.ab")
|
826 |
|
-
|
827 |
|
- RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames,
|
|
1355 |
+# if (save.rg & missing(rgFile))
|
|
1356 |
+# stop("'rgFile' is missing, and you chose save.rg")
|
|
1357 |
+# if (save.ab & (missing(snpFile) | missing(cnFile)))
|
|
1358 |
+# stop("'snpFile' or 'cnFile' is missing and you chose save.ab")
|
|
1359 |
+# batches = NULL
|
|
1360 |
+# if(!is.null(arrayNames))
|
|
1361 |
+# batches <- rep(1, length(arrayNames)) # problem here if arrayNames not specified! # splitIndicesByLength(seq(along=arrayNames), ocSamples())
|
|
1362 |
+# if(!is.null(sampleSheet))
|
|
1363 |
+# batches <- rep(1, nrow(sampleSheet))
|
|
1364 |
+# if(is.null(batches))
|
|
1365 |
+# batches=1
|
|
1366 |
+# k <- 1
|
|
1367 |
+# for(j in batches){
|
|
1368 |
+# if(verbose) message("Batch ", k, " of ", length(batches))
|
|
1369 |
+# RG = readIdatFiles(sampleSheet=sampleSheet[j,], arrayNames=arrayNames[j],
|
|
1370 |
+# ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
|
|
1371 |
+# highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
|
|
1372 |
+ RG = readIdatFiles2(sampleSheet=sampleSheet, arrayNames=arrayNames,
|
828 |
1373 |
ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
|
829 |
1374 |
highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
|
830 |
|
- if(save.rg)
|
831 |
|
- save(RG, file=rgFile)
|
832 |
1375 |
|
833 |
|
- XY = RGtoXY(RG, chipType=cdfName)
|
834 |
|
- rm(RG)
|
835 |
|
- gc()
|
836 |
|
- if (missing(sns)) sns = sampleNames(XY)
|
837 |
|
-
|
838 |
|
- res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
|
839 |
|
- seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget,
|
840 |
|
- save.it=save.ab, snpFile=snpFile, cnFile=cnFile)
|
841 |
|
- rm(XY)
|
842 |
|
- gc()
|
843 |
|
- if(row.names) row.names=res$gns else row.names=NULL
|
844 |
|
- if(col.names) col.names=res$sns else col.names=NULL
|
|
1376 |
+# if(save.rg)
|
|
1377 |
+# save(RG, file=rgFile)
|
845 |
1378 |
|
846 |
|
- res2 = crlmmGT(res[["A"]], res[["B"]], res[["SNR"]],
|
847 |
|
- res[["mixtureParams"]], res[["cdfName"]],
|
848 |
|
- gender=gender, row.names=row.names,
|
849 |
|
- col.names=col.names, recallMin=recallMin,
|
850 |
|
- recallRegMin=1000, SNRMin=SNRMin,
|
851 |
|
- returnParams=returnParams, badSNP=badSNP,
|
852 |
|
- verbose=verbose)
|
853 |
|
-
|
854 |
|
- res2[["SNR"]] = res[["SNR"]]
|
855 |
|
- res2[["SKW"]] = res[["SKW"]]
|
856 |
|
- rm(res)
|
857 |
|
- gc()
|
858 |
|
- return(list2SnpSet(res2, returnParams=returnParams))
|
|
1379 |
+ XY = RGtoXY2(RG, chipType=cdfName)
|
|
1380 |
+ rm(RG)
|
|
1381 |
+ gc()
|
|
1382 |
+ if (missing(sns)) { sns = sampleNames(XY) #subsns = sampleNames(XY)
|
|
1383 |
+ } else subsns = sns[j]
|
|
1384 |
+ res = preprocessInfinium2v2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
|
|
1385 |
+ seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) # sns=subsns
|
|
1386 |
+# save.it=save.ab, snpFile=snpFile, cnFile=cnFile)
|
|
1387 |
+ fD = featureData(XY)
|
|
1388 |
+ phenD = XY@phenoData
|
|
1389 |
+ protD = XY@protocolData
|
|
1390 |
+ rm(XY)
|
|
1391 |
+ gc()
|
|
1392 |
+# if(k == 1){
|
|
1393 |
+ if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability")
|
|
1394 |
+ callSet <- new("SnpSuperSet",
|
|
1395 |
+ alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)),
|
|
1396 |
+ alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
|
|
1397 |
+ call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
|
|
1398 |
+ callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
|
|
1399 |
+ annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD)
|
|
1400 |
+ sampleNames(callSet) <- sns
|
|
1401 |
+# phenoData(callSet) <- getPhenoData(sampleSheet=sampleSheet,
|
|
1402 |
+# arrayNames=sns,
|
|
1403 |
+# arrayInfoColNames=arrayInfoColNames)
|
|
1404 |
+# pD <- data.frame(matrix(NA, length(sns), 1), row.names=sns)
|
|
1405 |
+# colnames(pD) <- "ScanDate"
|
|
1406 |
+# protocolData(callSet) <- pData(protD) # new("AnnotatedDataFrame", data=pD)
|
|
1407 |
+# pData(protocolData(callSet))[j, ] <- pData(protocolData)
|
|
1408 |
+ featureNames(callSet) <- res[["gns"]]
|
|
1409 |
+ pData(callSet)$SKW <- rep(NA, length(sns))
|
|
1410 |
+ pData(callSet)$SNR <- rep(NA, length(sns))
|
|
1411 |
+ pData(callSet)$gender <- rep(NA, length(sns))
|
|
1412 |
+# }
|
|
1413 |
+# pData(callSet)[j,] <- phenD
|
|
1414 |
+# pData(protocolData(callSet))[j,] <- protD
|
|
1415 |
+# pData(callSet) <- phenD
|
|
1416 |
+# pData(protocolData(callSet)) <- protD
|
|
1417 |
+
|
|
1418 |
+ rm(phenD, protD, fD)
|
|
1419 |
+
|
|
1420 |
+# if(k > 1 & nrow(res[[1]]) != nrow(callSet)){
|
|
1421 |
+ ##RS: I don't understand why the IDATS for the
|
|
1422 |
+ ##same platform potentially have different lengths
|
|
1423 |
+# res[["A"]] <- res[["A"]][res$gns %in% featureNames(callSet), ]
|
|
1424 |
+# res[["B"]] <- res[["B"]][res$gns %in% featureNames(callSet), ]
|
|
1425 |
+# }
|
|
1426 |
+
|
|
1427 |
+ snp.index <- res$snpIndex #match(res$gns, featureNames(callSet))
|
|
1428 |
+# suppressWarnings(A(callSet)[, j] <- res[["A"]])
|
|
1429 |
+# suppressWarnings(B(callSet)[, j] <- res[["B"]])
|
|
1430 |
+ suppressWarnings(A(callSet) <- res[["A"]])
|
|
1431 |
+ suppressWarnings(B(callSet) <- res[["B"]])
|
|
1432 |
+# pData(callSet)$SKW[j] <- res$SKW
|
|
1433 |
+# pData(callSet)$SNR[j] <- res$SNR
|
|
1434 |
+ pData(callSet)$SKW <- res$SKW
|
|
1435 |
+ pData(callSet)$SNR <- res$SNR
|
|
1436 |
+ mixtureParams <- res$mixtureParams
|
|
1437 |
+ rm(res); gc()
|
|
1438 |
+ tmp <- crlmmGT(A=as.matrix(A(callSet)[snp.index,]), # j]),
|
|
1439 |
+ B=as.matrix(B(callSet)[snp.index,]), # j]),
|
|
1440 |
+ SNR=callSet$SNR, # [j],
|
|
1441 |
+ mixtureParams=mixtureParams,
|
|
1442 |
+ cdfName=annotation(callSet),
|
|
1443 |
+ row.names=featureNames(callSet)[snp.index],
|
|
1444 |
+ col.names=sampleNames(callSet), #[j],
|
|
1445 |
+ probs=probs,
|
|
1446 |
+ DF=DF,
|
|
1447 |
+ SNRMin=SNRMin,
|
|
1448 |
+ recallMin=recallMin,
|
|
1449 |
+ recallRegMin=recallRegMin,
|
|
1450 |
+ gender=gender,
|
|
1451 |
+ verbose=verbose,
|
|
1452 |
+ returnParams=returnParams,
|
|
1453 |
+ badSNP=badSNP)
|
|
1454 |
+# suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
|
|
1455 |
+# suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
|
|
1456 |
+# callSet$gender[j] <- tmp$gender
|
|
1457 |
+ suppressWarnings(snpCall(callSet)[snp.index,] <- tmp[["calls"]])
|
|
1458 |
+ suppressWarnings(snpCallProbability(callSet)[snp.index,] <- tmp[["confs"]])
|
|
1459 |
+ callSet$gender <- tmp$gender
|
|
1460 |
+ rm(tmp); gc()
|
|
1461 |
+# k <- k+1
|
|
1462 |
+# }
|
|
1463 |
+ return(callSet)
|
859 |
1464 |
}
|