...
|
...
|
@@ -177,6 +177,187 @@ readIdatFiles <- function(sampleSheet=NULL,
|
177
|
177
|
}
|
178
|
178
|
|
179
|
179
|
|
|
180
|
+readIdatFiles2 <- function(sampleSheet=NULL,
|
|
181
|
+ arrayNames=NULL,
|
|
182
|
+ ids=NULL,
|
|
183
|
+ path=".",
|
|
184
|
+ arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
|
|
185
|
+ highDensity=FALSE,
|
|
186
|
+ sep="_",
|
|
187
|
+ fileExt=list(green="Grn.idat", red="Red.idat"),
|
|
188
|
+ saveDate=FALSE) {
|
|
189
|
+# if(!is.null(arrayNames)) {
|
|
190
|
+# arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
|
|
191
|
+# if(!is.null(sampleSheet)) {
|
|
192
|
+# sampleSheet=NULL
|
|
193
|
+# cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
|
|
194
|
+# }
|
|
195
|
+# pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
|
|
196
|
+# }
|
|
197
|
+ if(!is.null(arrayNames)) {
|
|
198
|
+ pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
|
|
199
|
+ }
|
|
200
|
+ if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
|
|
201
|
+ if(is.null(arrayNames)){
|
|
202
|
+ ##arrayNames=NULL
|
|
203
|
+ if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
|
|
204
|
+ barcode = sampleSheet[,arrayInfoColNames$barcode]
|
|
205
|
+ arrayNames=barcode
|
|
206
|
+ }
|
|
207
|
+ if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {
|
|
208
|
+ position = sampleSheet[,arrayInfoColNames$position]
|
|
209
|
+ if(is.null(arrayNames))
|
|
210
|
+ arrayNames=position
|
|
211
|
+ else
|
|
212
|
+ arrayNames = paste(arrayNames, position, sep=sep)
|
|
213
|
+ if(highDensity) {
|
|
214
|
+ hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
|
|
215
|
+ for(i in names(hdExt))
|
|
216
|
+ arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
|
|
217
|
+ }
|
|
218
|
+ }
|
|
219
|
+ }
|
|
220
|
+ pd = new("AnnotatedDataFrame", data = sampleSheet)
|
|
221
|
+ sampleNames(pd) <- basename(arrayNames)
|
|
222
|
+ }
|
|
223
|
+ if(is.null(arrayNames)) {
|
|
224
|
+ arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
|
|
225
|
+ if(!is.null(sampleSheet)) {
|
|
226
|
+ sampleSheet=NULL
|
|
227
|
+ cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
|
|
228
|
+ }
|
|
229
|
+ pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
|
|
230
|
+ }
|
|
231
|
+
|
|
232
|
+ narrays = length(arrayNames)
|
|
233
|
+ grnfiles = paste(arrayNames, fileExt$green, sep=sep)
|
|
234
|
+ redfiles = paste(arrayNames, fileExt$red, sep=sep)
|
|
235
|
+ if(length(grnfiles)==0 || length(redfiles)==0)
|
|
236
|
+ stop("Cannot find .idat files")
|
|
237
|
+ if(length(grnfiles)!=length(redfiles))
|
|
238
|
+ stop("Cannot find matching .idat files")
|
|
239
|
+ if(path[1] != "."){
|
|
240
|
+ grnidats = file.path(path, grnfiles)
|
|
241
|
+ redidats = file.path(path, redfiles)
|
|
242
|
+ } else {
|
|
243
|
+ message("path arg not set. Assuming files are in local directory")
|
|
244
|
+ grnidats <- grnfiles
|
|
245
|
+ redidats <- redfiles
|
|
246
|
+ }
|
|
247
|
+ if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
|
|
248
|
+ if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
|
|
249
|
+## if(!all(c(redfiles,grnfiles) %in% dir(path=path))){
|
|
250
|
+## stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
|
|
251
|
+## paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
|
|
252
|
+## }
|
|
253
|
+ headerInfo = list(nProbes = rep(NA, narrays),
|
|
254
|
+ Barcode = rep(NA, narrays),
|
|
255
|
+ ChipType = rep(NA, narrays),
|
|
256
|
+ Manifest = rep(NA, narrays), # not sure about this one - sometimes blank
|
|
257
|
+ Position = rep(NA, narrays)) # this may also vary a bit
|
|
258
|
+ dates = list(decode=rep(NA, narrays),
|
|
259
|
+ scan=rep(NA, narrays))
|
|
260
|
+ ## read in the data
|
|
261
|
+ for(i in seq(along=arrayNames)) {
|
|
262
|
+ cat("reading", arrayNames[i], "\t")
|
|
263
|
+ idsG = idsR = G = R = NULL
|
|
264
|
+ cat(paste(sep, fileExt$green, sep=""), "\t")
|
|
265
|
+ G = readIDAT(grnidats[i])
|
|
266
|
+ idsG = rownames(G$Quants)
|
|
267
|
+ headerInfo$nProbes[i] = G$nSNPsRead
|
|
268
|
+ headerInfo$Barcode[i] = G$Barcode
|
|
269
|
+ headerInfo$ChipType[i] = G$ChipType
|
|
270
|
+ headerInfo$Manifest[i] = G$Unknown$MostlyNull
|
|
271
|
+ headerInfo$Position[i] = G$Unknowns$MostlyA
|
|
272
|
+ if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) {
|
|
273
|
+ ## headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) {
|
|
274
|
+ ## || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest
|
|
275
|
+ ## but differed by a few SNPs for some reason - most of the chip was the same though
|
|
276
|
+ ## stop("Chips are not of all of the same type - please check your data")
|
|
277
|
+ warning("Chips are not of the same type. Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
|
|
278
|
+ next()
|
|
279
|
+ }
|
|
280
|
+ dates$decode[i] = G$RunInfo[1, 1]
|
|
281
|
+ dates$scan[i] = G$RunInfo[2, 1]
|
|
282
|
+ if(i==1) {
|
|
283
|
+ if(is.null(ids) && !is.null(G)){
|
|
284
|
+ ids = idsG
|
|
285
|
+ } else stop("Could not find probe IDs")
|
|
286
|
+ nprobes = length(ids)
|
|
287
|
+ narrays = length(arrayNames)
|
|
288
|
+# tmpmat = matrix(NA, nprobes, narrays)
|
|
289
|
+# rownames(tmpmat) = ids
|
|
290
|
+# fD <- new("AnnotatedDataFrame", data=data.frame(row.names=ids))##, varMetadata=data.frame(labelDescript
|
|
291
|
+ RG <- new("NChannelSet",
|
|
292
|
+ R=initializeBigMatrix(name="R", nr=nprobes, nc=narrays, vmode="integer"),
|
|
293
|
+ G=initializeBigMatrix(name="G", nr=nprobes, nc=narrays, vmode="integer"),
|
|
294
|
+ zero=initializeBigMatrix(name="zero", nr=nprobes, nc=narrays, vmode="integer"),
|
|
295
|
+# featureData=fD,
|
|
296
|
+# annotation=cdfName)
|
|
297
|
+# R=tmpmat,
|
|
298
|
+# G=tmpmat,
|
|
299
|
+# zero=tmpmat,
|
|
300
|
+# Rnb=tmpmat,
|
|
301
|
+# Gnb=tmpmat,
|
|
302
|
+# Rse=tmpmat,
|
|
303
|
+# Gse=tmpmat,
|
|
304
|
+ annotation=headerInfo$Manifest[1],
|
|
305
|
+ phenoData=pd,
|
|
306
|
+ storage.mode="environment")
|
|
307
|
+ featureNames(RG) = ids
|
|
308
|
+# rm(tmpmat)
|
|
309
|
+ if(!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)){
|
|
310
|
+ sampleNames(RG) = sampleSheet$Sample_ID
|
|
311
|
+ } else sampleNames(RG) = arrayNames
|
|
312
|
+ gc()
|
|
313
|
+ }
|
|
314
|
+ if(length(ids)==length(idsG)) {
|
|
315
|
+ if(sum(ids==idsG)==nprobes) {
|
|
316
|
+ RG@assayData$G[,i] = G$Quants[, "Mean"]
|
|
317
|
+ zeroG = G$Quants[, "NBeads"]==0
|
|
318
|
+# RG@assayData$Gnb[,i] = G$Quants[, "NBeads"]
|
|
319
|
+# RG@assayData$Gse[,i] = G$Quants[, "SD"]
|
|
320
|
+ }
|
|
321
|
+ } else {
|
|
322
|
+ indG = match(ids, idsG)
|
|
323
|
+ RG@assayData$G[,i] = G$Quants[indG, "Mean"]
|
|
324
|
+ zeroG = G$Quants[indG, "NBeads"]==0
|
|
325
|
+# RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"]
|
|
326
|
+# RG@assayData$Gse[,i] = G$Quants[indG, "SD"]
|
|
327
|
+ }
|
|
328
|
+ rm(G)
|
|
329
|
+ gc()
|
|
330
|
+
|
|
331
|
+ cat(paste(sep, fileExt$red, sep=""), "\n")
|
|
332
|
+ R = readIDAT(redidats[i])
|
|
333
|
+ idsR = rownames(R$Quants)
|
|
334
|
+
|
|
335
|
+ if(length(ids)==length(idsG)) {
|
|
336
|
+ if(sum(ids==idsR)==nprobes) {
|
|
337
|
+ RG@assayData$R[,i] = R$Quants[ ,"Mean"]
|
|
338
|
+ zeroR = R$Quants[ ,"NBeads"]==0
|
|
339
|
+# RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"]
|
|
340
|
+# RG@assayData$Rse[,i] = R$Quants[ ,"SD"]
|
|
341
|
+ }
|
|
342
|
+ } else {
|
|
343
|
+ indR = match(ids, idsR)
|
|
344
|
+ RG@assayData$R[,i] = R$Quants[indR, "Mean"]
|
|
345
|
+ zeroR = R$Quants[indR, "NBeads"]==0
|
|
346
|
+# RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"]
|
|
347
|
+# RG@assayData$Rse[,i] = R$Quants[indR, "SD"]
|
|
348
|
+ }
|
|
349
|
+ RG@assayData$zero[,i] = zeroG | zeroR
|
|
350
|
+ rm(R, zeroG, zeroR)
|
|
351
|
+ gc()
|
|
352
|
+ }
|
|
353
|
+ if(saveDate) {
|
|
354
|
+ protocolData(RG)[["ScanDate"]] = dates$scan
|
|
355
|
+ }
|
|
356
|
+ storageMode(RG) = "lockedEnvironment"
|
|
357
|
+ RG
|
|
358
|
+}
|
|
359
|
+
|
|
360
|
+
|
180
|
361
|
## the readIDAT() and readBPM() functions below were provided by Keith Baggerly, 27/8/2008
|
181
|
362
|
readIDAT <- function(idatFile){
|
182
|
363
|
|
...
|
...
|
@@ -433,9 +614,6 @@ readBPM <- function(bpmFile){
|
433
|
614
|
|
434
|
615
|
}
|
435
|
616
|
|
436
|
|
-
|
437
|
|
-
|
438
|
|
-
|
439
|
617
|
RGtoXY = function(RG, chipType, verbose=TRUE) {
|
440
|
618
|
chipList = c("human1mv1c", # 1M
|
441
|
619
|
"human370v1c", # 370CNV
|
...
|
...
|
@@ -541,6 +719,115 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
|
541
|
719
|
XY
|
542
|
720
|
}
|
543
|
721
|
|
|
722
|
+
|
|
723
|
+RGtoXY2 = function(RG, chipType, verbose=TRUE) {
|
|
724
|
+ chipList = c("human1mv1c", # 1M
|
|
725
|
+ "human370v1c", # 370CNV
|
|
726
|
+ "human650v3a", # 650Y
|
|
727
|
+ "human610quadv1b", # 610 quad
|
|
728
|
+ "human660quadv1a", # 660 quad
|
|
729
|
+ "human370quadv3c", # 370CNV quad
|
|
730
|
+ "human550v3b", # 550K
|
|
731
|
+ "human1mduov3b", # 1M Duo
|
|
732
|
+ "humanomni1quadv1b") # Omni1 quad
|
|
733
|
+ if(missing(chipType)){
|
|
734
|
+ chipType = match.arg(annotation(RG), chipList)
|
|
735
|
+ } else chipType = match.arg(chipType, chipList)
|
|
736
|
+
|
|
737
|
+ pkgname <- getCrlmmAnnotationName(chipType)
|
|
738
|
+ if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
|
|
739
|
+ suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
|
|
740
|
+ msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
|
|
741
|
+ message(strwrap(msg))
|
|
742
|
+ stop("Package ", pkgname, " could not be found.")
|
|
743
|
+ rm(suggCall, msg)
|
|
744
|
+ }
|
|
745
|
+ if(verbose) message("Loading chip annotation information.")
|
|
746
|
+ loader("address.rda", .crlmmPkgEnv, pkgname)
|
|
747
|
+# data(annotation, package=pkgname, envir=.crlmmPkgEnv)
|
|
748
|
+ aids <- getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest
|
|
749
|
+ bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
|
|
750
|
+ ids <- names(aids)
|
|
751
|
+ snpbase <- getVarInEnv("base")
|
|
752
|
+
|
|
753
|
+ nsnps = length(aids)
|
|
754
|
+ narrays = ncol(RG)
|
|
755
|
+
|
|
756
|
+# aidcol = match("AddressA_ID", colnames(annot))
|
|
757
|
+# if(is.na(aidcol))
|
|
758
|
+# aidcol = match("Address", colnames(annot))
|
|
759
|
+# bidcol = match("AddressB_ID", colnames(annot))
|
|
760
|
+# if(is.na(bidcol))
|
|
761
|
+# bidcol = match("Address2", colnames(annot))
|
|
762
|
+# aids = annot[, aidcol]
|
|
763
|
+# bids = annot[, bidcol]
|
|
764
|
+# snpids = annot[,"Name"]
|
|
765
|
+# snpbase = annot[,"SNP"]
|
|
766
|
+ infI = !is.na(bids) & bids!=0
|
|
767
|
+ aord = match(aids, featureNames(RG)) # NAs are possible here
|
|
768
|
+ bord = match(bids, featureNames(RG)) # and here
|
|
769
|
+# argrg = aids[rrgg]
|
|
770
|
+# brgrg = bids[rrgg]
|
|
771
|
+
|
|
772
|
+# fD <- new("AnnotatedDataFrame", data=data.frame(row.names=ids))##, varMetadata=data.frame(labelDescript
|
|
773
|
+ XY <- new("NChannelSet",
|
|
774
|
+ X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"),
|
|
775
|
+ Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"),
|
|
776
|
+ zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer"),
|
|
777
|
+ annotation=chipType, phenoData=RG@phenoData, # featureData=fD
|
|
778
|
+ protocolData=RG@protocolData, storage.mode="environment")
|
|
779
|
+ featureNames(XY) = ids # featureNames(RG)
|
|
780
|
+ gc()
|
|
781
|
+
|
|
782
|
+ # First sort out Infinium II SNPs, X -> R (allele A) and Y -> G (allele B) from the same probe
|
|
783
|
+ XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red
|
|
784
|
+ XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green
|
|
785
|
+ XY@assayData$zero[!is.na(aord),] = exprs(channel(RG, "zero"))[aord[!is.na(aord)],] # mostly green
|
|
786
|
+# XY@assayData$Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],]
|
|
787
|
+# XY@assayData$Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],]
|
|
788
|
+# XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],]
|
|
789
|
+# XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],]
|
|
790
|
+ gc()
|
|
791
|
+
|
|
792
|
+ ## Warning - not 100% sure that the code below is correct - could be more complicated than this
|
|
793
|
+
|
|
794
|
+ # Next Infinium I where X -> R from allele A probe and Y -> R from allele B probe
|
|
795
|
+# infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]")
|
|
796
|
+
|
|
797
|
+# X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red
|
|
798
|
+# Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green
|
|
799
|
+# Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],]
|
|
800
|
+# Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],]
|
|
801
|
+# Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],]
|
|
802
|
+# Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],]
|
|
803
|
+
|
|
804
|
+ # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe
|
|
805
|
+# infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]")
|
|
806
|
+
|
|
807
|
+# X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red
|
|
808
|
+# Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green
|
|
809
|
+# Xnb[infIGG,] = exprs(channel(RG, "Gnb"))[aord[infIGG],]
|
|
810
|
+# Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],]
|
|
811
|
+# Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],]
|
|
812
|
+# Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],]
|
|
813
|
+
|
|
814
|
+ # For now zero out Infinium I probes
|
|
815
|
+ XY@assayData$X[infI,] = 0
|
|
816
|
+ XY@assayData$Y[infI,] = 0
|
|
817
|
+ XY@assayData$zero[infI,] = 0
|
|
818
|
+# XY@assayData$Xnb[infI,] = 0
|
|
819
|
+# XY@assayData$Ynb[infI,] = 0
|
|
820
|
+# XY@assayData$Xse[infI,] = 0
|
|
821
|
+# XY@assayData$Yse[infI,] = 0
|
|
822
|
+
|
|
823
|
+# XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1
|
|
824
|
+ gc()
|
|
825
|
+
|
|
826
|
+# storageMode(XY) = "lockedEnvironment"
|
|
827
|
+ XY
|
|
828
|
+}
|
|
829
|
+
|
|
830
|
+
|
544
|
831
|
stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
|
545
|
832
|
pkgname <- getCrlmmAnnotationName(annotation(XY))
|
546
|
833
|
if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
|
...
|
...
|
@@ -739,8 +1026,157 @@ preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
|
739
|
1026
|
}
|
740
|
1027
|
|
741
|
1028
|
|
742
|
|
-## MR: Could add arguments to allow this function to read in idat data as well,
|
743
|
|
-## although this would add a further 7 arguments, which might over-complicate things
|
|
1029
|
+preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
|
|
1030
|
+ fitMixture=TRUE,
|
|
1031
|
+ eps=0.1,
|
|
1032
|
+ verbose=TRUE,
|
|
1033
|
+ seed=1,
|
|
1034
|
+ cdfName,
|
|
1035
|
+ sns,
|
|
1036
|
+ stripNorm=TRUE,
|
|
1037
|
+ useTarget=TRUE) { #,
|
|
1038
|
+# save.it=FALSE,
|
|
1039
|
+# snpFile,
|
|
1040
|
+# cnFile) {
|
|
1041
|
+ if(stripNorm)
|
|
1042
|
+ XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)
|
|
1043
|
+
|
|
1044
|
+## MR: the code below is mostly straight from snprma.R
|
|
1045
|
+ if (missing(sns)) sns <- sampleNames(XY) #$X
|
|
1046
|
+ if(missing(cdfName))
|
|
1047
|
+ cdfName <- annotation(XY)
|
|
1048
|
+## stuffDir <- changeToCrlmmAnnotationName(cdfName)
|
|
1049
|
+ pkgname <- getCrlmmAnnotationName(cdfName)
|
|
1050
|
+ if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
|
|
1051
|
+ suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
|
|
1052
|
+ msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
|
|
1053
|
+ message(strwrap(msg))
|
|
1054
|
+ stop("Package ", pkgname, " could not be found.")
|
|
1055
|
+ rm(suggCall, msg)
|
|
1056
|
+ }
|
|
1057
|
+ if(verbose) message("Loading snp annotation and mixture model parameters.")
|
|
1058
|
+ loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
|
|
1059
|
+ loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
|
|
1060
|
+ loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname)
|
|
1061
|
+ loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
|
|
1062
|
+# data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv)
|
|
1063
|
+ autosomeIndex <- getVarInEnv("autosomeIndex")
|
|
1064
|
+ # pnsa <- getVarInEnv("pnsa")
|
|
1065
|
+ # pnsb <- getVarInEnv("pnsb")
|
|
1066
|
+ # fid <- getVarInEnv("fid")
|
|
1067
|
+ # reference <- getVarInEnv("reference")
|
|
1068
|
+ # aIndex <- getVarInEnv("aIndex")
|
|
1069
|
+ # bIndex <- getVarInEnv("bIndex")
|
|
1070
|
+ SMEDIAN <- getVarInEnv("SMEDIAN")
|
|
1071
|
+ theKnots <- getVarInEnv("theKnots")
|
|
1072
|
+ gns <- featureNames(XY) # getVarInEnv("gns") # needs to include np probes - gns is only for snps
|
|
1073
|
+
|
|
1074
|
+ # separate out copy number probes
|
|
1075
|
+ npIndex = getVarInEnv("npProbesFid")
|
|
1076
|
+# nprobes = length(npIndex)
|
|
1077
|
+ narrays = ncol(XY)
|
|
1078
|
+# A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays)
|
|
1079
|
+# B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
|
|
1080
|
+
|
|
1081
|
+ # new lines below - useful to keep track of zeroed out probes
|
|
1082
|
+# zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays)
|
|
1083
|
+
|
|
1084
|
+# colnames(A) <- colnames(B) <- colnames(zero) <- sns
|
|
1085
|
+# rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex)
|
|
1086
|
+
|
|
1087
|
+# cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName)
|
|
1088
|
+# if(save.it & !missing(cnFile)) {
|
|
1089
|
+# t0 <- proc.time()
|
|
1090
|
+# save(cnAB, file=cnFile)
|
|
1091
|
+# t0 <- proc.time()-t0
|
|
1092
|
+# if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".")
|
|
1093
|
+# }
|
|
1094
|
+# rm(cnAB, B, zero)
|
|
1095
|
+
|
|
1096
|
+ # next process snp probes
|
|
1097
|
+ snpIndex = getVarInEnv("snpProbesFid")
|
|
1098
|
+ nprobes <- length(snpIndex)
|
|
1099
|
+
|
|
1100
|
+ ##We will read each cel file, summarize, and run EM one by one
|
|
1101
|
+ ##We will save parameters of EM to use later
|
|
1102
|
+# mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(narrays), "double")
|
|
1103
|
+# SNR <- initializeBigVector("crlmmSNR-", narrays, "double")
|
|
1104
|
+# SKW <- initializeBigVector("crlmmSKW-", narrays, "double")
|
|
1105
|
+ mixtureParams <- matrix(0, 4, narrays)
|
|
1106
|
+ SNR <- vector("numeric", narrays)
|
|
1107
|
+ SKW <- vector("numeric", narrays)
|
|
1108
|
+
|
|
1109
|
+ ## This is the sample for the fitting of splines
|
|
1110
|
+ ## BC: I like better the idea of the user passing the seed,
|
|
1111
|
+ ## because this might intefere with other analyses
|
|
1112
|
+ ## (like what happened to GCRMA)
|
|
1113
|
+ set.seed(seed)
|
|
1114
|
+ idx <- sort(sample(autosomeIndex, mixtureSampleSize))
|
|
1115
|
+
|
|
1116
|
+ ##S will hold (A+B)/2 and M will hold A-B
|
|
1117
|
+ ##NOTE: We actually dont need to save S. Only for pics etc...
|
|
1118
|
+ ##f is the correction. we save to avoid recomputing
|
|
1119
|
+ A <- exprs(channel(XY, "X"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames))
|
|
1120
|
+ B <- exprs(channel(XY, "Y"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames))
|
|
1121
|
+
|
|
1122
|
+ # new lines below - useful to keep track of zeroed out SNPs
|
|
1123
|
+ zero <- exprs(channel(XY, "zero"))[,] # )) #[snpIndex,]), nprobes, narrays)
|
|
1124
|
+
|
|
1125
|
+# if(!is.matrix(A)) {
|
|
1126
|
+# A = A[,]
|
|
1127
|
+# B = B[,]
|
|
1128
|
+# zero = zero[,]
|
|
1129
|
+# }
|
|
1130
|
+
|
|
1131
|
+ if(!is.integer(A)) {
|
|
1132
|
+ A = matrix(as.integer(A), nrow(A), ncol(A))
|
|
1133
|
+ B = matrix(as.integer(B), nrow(B), ncol(B))
|
|
1134
|
+ }
|
|
1135
|
+
|
|
1136
|
+# colnames(A) <- colnames(B) <- colnames(zero) <- sns
|
|
1137
|
+# rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY)
|
|
1138
|
+
|
|
1139
|
+ if(verbose){
|
|
1140
|
+ message("Calibrating ", narrays, " arrays.")
|
|
1141
|
+ if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3)
|
|
1142
|
+ }
|
|
1143
|
+
|
|
1144
|
+ idx2 <- sample(nprobes, 10^5)
|
|
1145
|
+ for(i in 1:narrays){
|
|
1146
|
+ SKW[i] = mean((A[snpIndex,i][idx2]-mean(A[snpIndex,i][idx2]))^3)/(sd(A[snpIndex,i][idx2])^3)
|
|
1147
|
+ if(fitMixture){
|
|
1148
|
+ S <- (log2(A[snpIndex,i][idx])+log2(B[snpIndex,i][idx]))/2 - SMEDIAN
|
|
1149
|
+ M <- log2(A[snpIndex,i][idx])-log2(B[snpIndex,i][idx])
|
|
1150
|
+
|
|
1151
|
+ ##we need to test the choice of eps.. it is not the max diff between funcs
|
|
1152
|
+ tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
|
|
1153
|
+
|
|
1154
|
+ mixtureParams[, i] <- tmp[["coef"]]
|
|
1155
|
+ SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
|
|
1156
|
+ }
|
|
1157
|
+ if(verbose) {
|
|
1158
|
+ if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
|
|
1159
|
+ else cat(".")
|
|
1160
|
+ }
|
|
1161
|
+ }
|
|
1162
|
+ if (verbose) {
|
|
1163
|
+ if (getRversion() > '2.7.0') close(pb)
|
|
1164
|
+ else cat("\n")
|
|
1165
|
+ }
|
|
1166
|
+ if (!fitMixture) SNR <- mixtureParams <- NA
|
|
1167
|
+ ## gns comes from preprocStuff.rda
|
|
1168
|
+ res = list(A=A, B=B, zero=zero, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName, snpIndex=snpIndex, npIndex=npIndex)
|
|
1169
|
+
|
|
1170
|
+# if(save.it & !missing(snpFile)) {
|
|
1171
|
+# t0 <- proc.time()
|
|
1172
|
+# save(res, file=snpFile)
|
|
1173
|
+# t0 <- proc.time()-t0
|
|
1174
|
+# if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
|
|
1175
|
+# }
|
|
1176
|
+ return(res)
|
|
1177
|
+}
|
|
1178
|
+
|
|
1179
|
+
|
744
|
1180
|
crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
|
745
|
1181
|
row.names=TRUE, col.names=TRUE,
|
746
|
1182
|
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
|
...
|
...
|
@@ -795,11 +1231,104 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
|
795
|
1231
|
return(list2SnpSet(res2, returnParams=returnParams)) # return(res2)
|
796
|
1232
|
}
|
797
|
1233
|
|
|
1234
|
+
|
|
1235
|
+crlmmIllumina2 <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
|
|
1236
|
+ row.names=TRUE, col.names=TRUE,
|
|
1237
|
+ probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
|
|
1238
|
+ seed=1, # save.it=FALSE, load.it=FALSE, snpFile, cnFile,
|
|
1239
|
+ mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
|
|
1240
|
+ cdfName, sns, recallMin=10, recallRegMin=1000,
|
|
1241
|
+ returnParams=FALSE, badSNP=.7) {
|
|
1242
|
+# if (save.it & (missing(snpFile) | missing(cnFile)))
|
|
1243
|
+# stop("'snpFile' and/or 'cnFile' is missing and you chose to save.it")
|
|
1244
|
+# if (load.it & missing(snpFile))
|
|
1245
|
+# stop("'snpFile' is missing and you chose to load.it")
|
|
1246
|
+# if (!missing(snpFile))
|
|
1247
|
+# if (load.it & !file.exists(snpFile)){
|
|
1248
|
+# load.it <- FALSE
|
|
1249
|
+# message("File ", snpFile, " does not exist.")
|
|
1250
|
+# stop("Cannot load SNP data.")
|
|
1251
|
+# }
|
|
1252
|
+# if (!load.it){
|
|
1253
|
+ if(!missing(RG)) {
|
|
1254
|
+ if(missing(XY))
|
|
1255
|
+ XY = RGtoXY2(RG, chipType=cdfName)
|
|
1256
|
+ else
|
|
1257
|
+ stop("Both RG and XY specified - please use one or the other")
|
|
1258
|
+ }
|
|
1259
|
+ if (missing(sns)) sns <- sampleNames(XY) #$X
|
|
1260
|
+
|
|
1261
|
+ res = preprocessInfinium2v2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
|
|
1262
|
+ seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #,
|
|
1263
|
+ # save.it=save.it, snpFile=snpFile, cnFile=cnFile)
|
|
1264
|
+
|
|
1265
|
+ fD = featureData(XY)
|
|
1266
|
+ phenD = XY@phenoData
|
|
1267
|
+ protD = XY@protocolData
|
|
1268
|
+ rm(XY)
|
|
1269
|
+ gc()
|
|
1270
|
+ if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability")
|
|
1271
|
+ callSet <- new("SnpSuperSet",
|
|
1272
|
+ alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)),
|
|
1273
|
+ alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
|
|
1274
|
+ call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
|
|
1275
|
+ callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
|
|
1276
|
+ annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD)
|
|
1277
|
+ sampleNames(callSet) <- sns
|
|
1278
|
+ featureNames(callSet) <- res[["gns"]]
|
|
1279
|
+ pData(callSet)$SKW <- rep(NA, length(sns))
|
|
1280
|
+ pData(callSet)$SNR <- rep(NA, length(sns))
|
|
1281
|
+ pData(callSet)$gender <- rep(NA, length(sns))
|
|
1282
|
+
|
|
1283
|
+# }else{
|
|
1284
|
+# if(verbose) message("Loading ", snpFile, ".")
|
|
1285
|
+# obj <- load(snpFile)
|
|
1286
|
+# if(verbose) message("Done.")
|
|
1287
|
+# if(!any(obj == "res"))
|
|
1288
|
+# stop("Object in ", snpFile, " seems to be invalid.")
|
|
1289
|
+# }
|
|
1290
|
+
|
|
1291
|
+ rm(phenD, protD , fD)
|
|
1292
|
+
|
|
1293
|
+ snp.index <- res$snpIndex #match(res$gns, featureNames(callSet))
|
|
1294
|
+ suppressWarnings(A(callSet) <- res[["A"]])
|
|
1295
|
+ suppressWarnings(B(callSet) <- res[["B"]])
|
|
1296
|
+ pData(callSet)$SKW <- res$SKW
|
|
1297
|
+ pData(callSet)$SNR <- res$SNR
|
|
1298
|
+ mixtureParams <- res$mixtureParams
|
|
1299
|
+ rm(res); gc()
|
|
1300
|
+ tmp <- crlmmGT(A=as.matrix(A(callSet)[snp.index,]), # j]),
|
|
1301
|
+ B=as.matrix(B(callSet)[snp.index,]), # j]),
|
|
1302
|
+ SNR=callSet$SNR, # [j],
|
|
1303
|
+ mixtureParams=mixtureParams,
|
|
1304
|
+ cdfName=annotation(callSet),
|
|
1305
|
+# row.names=featureNames(callSet)[snp.index],
|
|
1306
|
+# col.names=sampleNames(callSet), #[j],
|
|
1307
|
+ probs=probs,
|
|
1308
|
+ DF=DF,
|
|
1309
|
+ SNRMin=SNRMin,
|
|
1310
|
+ recallMin=recallMin,
|
|
1311
|
+ recallRegMin=recallRegMin,
|
|
1312
|
+ gender=gender,
|
|
1313
|
+ verbose=verbose,
|
|
1314
|
+ returnParams=returnParams,
|
|
1315
|
+ badSNP=badSNP)
|
|
1316
|
+# suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
|
|
1317
|
+# suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
|
|
1318
|
+# callSet$gender[j] <- tmp$gender
|
|
1319
|
+ suppressWarnings(snpCall(callSet)[snp.index,] <- tmp[["calls"]])
|
|
1320
|
+ suppressWarnings(snpCallProbability(callSet)[snp.index,] <- tmp[["confs"]])
|
|
1321
|
+ callSet$gender <- tmp$gender
|
|
1322
|
+ rm(tmp); gc()
|
|
1323
|
+ return(callSet)
|
|
1324
|
+}
|
|
1325
|
+
|
|
1326
|
+
|
798
|
1327
|
## MR: Below is a more memory efficient version of crlmmIllumina() which
|
799
|
1328
|
## reads in the .idats and genotypes in the one function and removes objects
|
800
|
1329
|
## after they have been used
|
801
|
1330
|
crlmmIlluminaV2 = function(sampleSheet=NULL,
|
802
|
|
- arrayNames=NULL,
|
|
1331
|
+ arrayNames=NULL,
|
803
|
1332
|
ids=NULL,
|
804
|
1333
|
path=".",
|
805
|
1334
|
arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
|
...
|
...
|
@@ -807,53 +1336,129 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
|
807
|
1336
|
sep="_",
|
808
|
1337
|
fileExt=list(green="Grn.idat", red="Red.idat"),
|
809
|
1338
|
saveDate=FALSE,
|
810
|
|
- save.rg=FALSE,
|
811
|
|
- rgFile,
|
|
1339
|
+# save.rg=FALSE,
|
|
1340
|
+# rgFile,
|
812
|
1341
|
stripNorm=TRUE,
|
813
|
1342
|
useTarget=TRUE,
|
814
|
|
- row.names=TRUE,
|
|
1343
|
+ row.names=TRUE,
|
815
|
1344
|
col.names=TRUE,
|
816
|
1345
|
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
|
817
|
|
- seed=1, save.ab=FALSE, snpFile, cnFile,
|
818
|
|
- mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
|
819
|
|
- cdfName, sns, recallMin=10, recallRegMin=1000,
|
820
|
|
- returnParams=FALSE, badSNP=.7) {
|
|
1346
|
+ seed=1, # save.ab=FALSE, snpFile, cnFile,
|
|
1347
|
+ mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
|
|
1348
|
+ cdfName, sns, recallMin=10, recallRegMin=1000,
|
|
1349
|
+ returnParams=FALSE, badSNP=.7) {
|
|
1350
|
+
|
|
1351
|
+ if(missing(cdfName)) stop("must specify cdfName")
|
|
1352
|
+ if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames")
|
|
1353
|
+# if(missing(sns)) sns <- basename(arrayNames)
|
821
|
1354
|
|
822
|
|
- if (save.rg & missing(rgFile))
|
823
|
|
- stop("'rgFile' is missing, and you chose save.rg")
|
824
|
|
- if (save.ab & (missing(snpFile) | missing(cnFile)))
|
825
|
|
- stop("'snpFile' or 'cnFile' is missing and you chose save.ab")
|
826
|
|
-
|
827
|
|
- RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames,
|
|
1355
|
+# if (save.rg & missing(rgFile))
|
|
1356
|
+# stop("'rgFile' is missing, and you chose save.rg")
|
|
1357
|
+# if (save.ab & (missing(snpFile) | missing(cnFile)))
|
|
1358
|
+# stop("'snpFile' or 'cnFile' is missing and you chose save.ab")
|
|
1359
|
+# batches = NULL
|
|
1360
|
+# if(!is.null(arrayNames))
|
|
1361
|
+# batches <- rep(1, length(arrayNames)) # problem here if arrayNames not specified! # splitIndicesByLength(seq(along=arrayNames), ocSamples())
|
|
1362
|
+# if(!is.null(sampleSheet))
|
|
1363
|
+# batches <- rep(1, nrow(sampleSheet))
|
|
1364
|
+# if(is.null(batches))
|
|
1365
|
+# batches=1
|
|
1366
|
+# k <- 1
|
|
1367
|
+# for(j in batches){
|
|
1368
|
+# if(verbose) message("Batch ", k, " of ", length(batches))
|
|
1369
|
+# RG = readIdatFiles(sampleSheet=sampleSheet[j,], arrayNames=arrayNames[j],
|
|
1370
|
+# ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
|
|
1371
|
+# highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
|
|
1372
|
+ RG = readIdatFiles2(sampleSheet=sampleSheet, arrayNames=arrayNames,
|
828
|
1373
|
ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
|
829
|
1374
|
highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
|
830
|
|
- if(save.rg)
|
831
|
|
- save(RG, file=rgFile)
|
832
|
1375
|
|
833
|
|
- XY = RGtoXY(RG, chipType=cdfName)
|
834
|
|
- rm(RG)
|
835
|
|
- gc()
|
836
|
|
- if (missing(sns)) sns = sampleNames(XY)
|
837
|
|
-
|
838
|
|
- res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
|
839
|
|
- seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget,
|
840
|
|
- save.it=save.ab, snpFile=snpFile, cnFile=cnFile)
|
841
|
|
- rm(XY)
|
842
|
|
- gc()
|
843
|
|
- if(row.names) row.names=res$gns else row.names=NULL
|
844
|
|
- if(col.names) col.names=res$sns else col.names=NULL
|
|
1376
|
+# if(save.rg)
|
|
1377
|
+# save(RG, file=rgFile)
|
845
|
1378
|
|
846
|
|
- res2 = crlmmGT(res[["A"]], res[["B"]], res[["SNR"]],
|
847
|
|
- res[["mixtureParams"]], res[["cdfName"]],
|
848
|
|
- gender=gender, row.names=row.names,
|
849
|
|
- col.names=col.names, recallMin=recallMin,
|
850
|
|
- recallRegMin=1000, SNRMin=SNRMin,
|
851
|
|
- returnParams=returnParams, badSNP=badSNP,
|
852
|
|
- verbose=verbose)
|
853
|
|
-
|
854
|
|
- res2[["SNR"]] = res[["SNR"]]
|
855
|
|
- res2[["SKW"]] = res[["SKW"]]
|
856
|
|
- rm(res)
|
857
|
|
- gc()
|
858
|
|
- return(list2SnpSet(res2, returnParams=returnParams))
|
|
1379
|
+ XY = RGtoXY2(RG, chipType=cdfName)
|
|
1380
|
+ rm(RG)
|
|
1381
|
+ gc()
|
|
1382
|
+ if (missing(sns)) { sns = sampleNames(XY) #subsns = sampleNames(XY)
|
|
1383
|
+ } else subsns = sns[j]
|
|
1384
|
+ res = preprocessInfinium2v2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
|
|
1385
|
+ seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) # sns=subsns
|
|
1386
|
+# save.it=save.ab, snpFile=snpFile, cnFile=cnFile)
|
|
1387
|
+ fD = featureData(XY)
|
|
1388
|
+ phenD = XY@phenoData
|
|
1389
|
+ protD = XY@protocolData
|
|
1390
|
+ rm(XY)
|
|
1391
|
+ gc()
|
|
1392
|
+# if(k == 1){
|
|
1393
|
+ if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability")
|
|
1394
|
+ callSet <- new("SnpSuperSet",
|
|
1395
|
+ alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)),
|
|
1396
|
+ alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
|
|
1397
|
+ call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
|
|
1398
|
+ callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
|
|
1399
|
+ annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD)
|
|
1400
|
+ sampleNames(callSet) <- sns
|
|
1401
|
+# phenoData(callSet) <- getPhenoData(sampleSheet=sampleSheet,
|
|
1402
|
+# arrayNames=sns,
|
|
1403
|
+# arrayInfoColNames=arrayInfoColNames)
|
|
1404
|
+# pD <- data.frame(matrix(NA, length(sns), 1), row.names=sns)
|
|
1405
|
+# colnames(pD) <- "ScanDate"
|
|
1406
|
+# protocolData(callSet) <- pData(protD) # new("AnnotatedDataFrame", data=pD)
|
|
1407
|
+# pData(protocolData(callSet))[j, ] <- pData(protocolData)
|
|
1408
|
+ featureNames(callSet) <- res[["gns"]]
|
|
1409
|
+ pData(callSet)$SKW <- rep(NA, length(sns))
|
|
1410
|
+ pData(callSet)$SNR <- rep(NA, length(sns))
|
|
1411
|
+ pData(callSet)$gender <- rep(NA, length(sns))
|
|
1412
|
+# }
|
|
1413
|
+# pData(callSet)[j,] <- phenD
|
|
1414
|
+# pData(protocolData(callSet))[j,] <- protD
|
|
1415
|
+# pData(callSet) <- phenD
|
|
1416
|
+# pData(protocolData(callSet)) <- protD
|
|
1417
|
+
|
|
1418
|
+ rm(phenD, protD, fD)
|
|
1419
|
+
|
|
1420
|
+# if(k > 1 & nrow(res[[1]]) != nrow(callSet)){
|
|
1421
|
+ ##RS: I don't understand why the IDATS for the
|
|
1422
|
+ ##same platform potentially have different lengths
|
|
1423
|
+# res[["A"]] <- res[["A"]][res$gns %in% featureNames(callSet), ]
|
|
1424
|
+# res[["B"]] <- res[["B"]][res$gns %in% featureNames(callSet), ]
|
|
1425
|
+# }
|
|
1426
|
+
|
|
1427
|
+ snp.index <- res$snpIndex #match(res$gns, featureNames(callSet))
|
|
1428
|
+# suppressWarnings(A(callSet)[, j] <- res[["A"]])
|
|
1429
|
+# suppressWarnings(B(callSet)[, j] <- res[["B"]])
|
|
1430
|
+ suppressWarnings(A(callSet) <- res[["A"]])
|
|
1431
|
+ suppressWarnings(B(callSet) <- res[["B"]])
|
|
1432
|
+# pData(callSet)$SKW[j] <- res$SKW
|
|
1433
|
+# pData(callSet)$SNR[j] <- res$SNR
|
|
1434
|
+ pData(callSet)$SKW <- res$SKW
|
|
1435
|
+ pData(callSet)$SNR <- res$SNR
|
|
1436
|
+ mixtureParams <- res$mixtureParams
|
|
1437
|
+ rm(res); gc()
|
|
1438
|
+ tmp <- crlmmGT(A=as.matrix(A(callSet)[snp.index,]), # j]),
|
|
1439
|
+ B=as.matrix(B(callSet)[snp.index,]), # j]),
|
|
1440
|
+ SNR=callSet$SNR, # [j],
|
|
1441
|
+ mixtureParams=mixtureParams,
|
|
1442
|
+ cdfName=annotation(callSet),
|
|
1443
|
+ row.names=featureNames(callSet)[snp.index],
|
|
1444
|
+ col.names=sampleNames(callSet), #[j],
|
|
1445
|
+ probs=probs,
|
|
1446
|
+ DF=DF,
|
|
1447
|
+ SNRMin=SNRMin,
|
|
1448
|
+ recallMin=recallMin,
|
|
1449
|
+ recallRegMin=recallRegMin,
|
|
1450
|
+ gender=gender,
|
|
1451
|
+ verbose=verbose,
|
|
1452
|
+ returnParams=returnParams,
|
|
1453
|
+ badSNP=badSNP)
|
|
1454
|
+# suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
|
|
1455
|
+# suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
|
|
1456
|
+# callSet$gender[j] <- tmp$gender
|
|
1457
|
+ suppressWarnings(snpCall(callSet)[snp.index,] <- tmp[["calls"]])
|
|
1458
|
+ suppressWarnings(snpCallProbability(callSet)[snp.index,] <- tmp[["confs"]])
|
|
1459
|
+ callSet$gender <- tmp$gender
|
|
1460
|
+ rm(tmp); gc()
|
|
1461
|
+# k <- k+1
|
|
1462
|
+# }
|
|
1463
|
+ return(callSet)
|
859
|
1464
|
}
|