Browse code

Bug fixes to illumina genotype calling when using cdfName="nopackage" option

mritchie authored on 19/04/2018 07:17:14
Showing3 changed files

... ...
@@ -2,7 +2,7 @@ Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for
4 4
         Affymetrix SNP 5.0 and 6.0 and Illumina arrays
5
-Version: 1.37.0
5
+Version: 1.37.1
6 6
 Author: Benilton S Carvalho, Robert Scharpf, Matt Ritchie, Ingo
7 7
         Ruczinski, Rafael A Irizarry
8 8
 Maintainer: Benilton S Carvalho <benilton@unicamp.br>,
... ...
@@ -324,186 +324,139 @@ readGenCallOutput = function(filenames, path=".", cdfName,
324 324
 
325 325
 
326 326
 
327
-RGtoXY = function(RG, chipType, anno, verbose=TRUE) {
328
-  if(chipType!="nopackage") {
329
-  needToLoad <- !all(sapply(c('addressA', 'addressB', 'base'), isLoaded))
330
-  if(needToLoad){
331
-	  chipList = c("human1mv1c",# 1M
332
-	  "human370v1c",            # 370CNV
333
-	  "human650v3a",            # 650Y
334
-	  "human610quadv1b",        # 610 quad
335
-	  "human660quadv1a",        # 660 quad
336
-	  "human370quadv3c",        # 370CNV quad
337
-	  "human550v3b",            # 550K
338
-	  "human1mduov3b",          # 1M Duo
339
-	  "humanomni1quadv1b",      # Omni1 quad
340
-	  "humanomni25quadv1b",     # Omni2.5 quad
341
-	  "humanomni258v1a",        # Omni2.5 8 v1 A
342
-          "humanomni258v1p1b",      # Omni2.5 8 v1.1 B
343
-          "humanomni5quadv1b",      # Omni5 quad  
344
-	  "humanomniexpress12v1b",  # Omni express 12
345
-	  "humanimmuno12v1b",       # Immuno chip 12
346
-          "humancytosnp12v2p1h",    # CytoSNP 12
347
-          "humanexome12v1p2a",      # Exome 12 v1.2 A
348
-          "humanomniexpexome8v1p1b") # Omni Express Exome 8 v1.1b
349
-	  ## RS: added cleancdfname()
350
-	  if(missing(chipType)){
351
-		  chipType = match.arg(cleancdfname(annotation(RG)), chipList)
352
-	  } else chipType = match.arg(cleancdfname(chipType), chipList)
353
-	  pkgname = getCrlmmAnnotationName(chipType)
354
-	  if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
355
-		  suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
356
-		  msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
357
-		  message(strwrap(msg))
358
-		  stop("Package ", pkgname, " could not be found.")
359
-		  rm(suggCall, msg)
360
-	  }
361
-	  if(verbose) message("Loading chip annotation information.")
362
-	  loader("address.rda", .crlmmPkgEnv, pkgname)
363
-  }
364
-  aids = getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest
365
-  bids = getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
366
-  snpbase = getVarInEnv("base")
367
-##  loader(‘file.rda’)
368
-##  x = getVarInEnv(‘x’)
369
-##  y = getVarInEnv(‘y’)
370
-##
371
-##  I’d consider using something like:
372
-##
373
-##	  needToLoad = !all(sapply(c(‘x’, ‘y’), isLoaded))
374
-##  if (needToLoad){
375
-##	  loader(‘file.rda’)
376
-##	  x = getVarInEnv(‘x’)
377
-##	  y = getVarInEnv(‘y’)
378
-##  }
379
-  ids = names(aids)
380
-  nsnps = length(aids)
381
-  narrays = ncol(RG)
382
-#  aidcol = match("AddressA_ID", colnames(annot))
383
-#  if(is.na(aidcol))
384
-#    aidcol = match("Address", colnames(annot))
385
-#  if(is.na(bidcol))
386
-#    bidcol = match("Address2", colnames(annot))
387
-#  aids = annot[, aidcol]
388
-#  bids = annot[, bidcol]
389
-#  snpids = annot[,"Name"]
390
-#  snpbase = annot[,"SNP"]
391
-  infI = !is.na(bids) & bids!=0
392
-  aord = match(aids, featureNames(RG)) # NAs are possible here
393
-  bord = match(bids, featureNames(RG)) # and here
394
-#  argrg = aids[rrgg]
395
-#  brgrg = bids[rrgg]
396
-  xyPhenoData = AnnotatedDataFrame(data=RG@phenoData@data,varMetadata=RG@phenoData@varMetadata)
397
-  levels(xyPhenoData@varMetadata$channel) = c("X","Y","zero","_ALL_")
398
-  XY <- new("NChannelSet",
399
-        assayDataNew(X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"),
400
-                     Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"),
401
-                     zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer")),
402
-        phenoData=xyPhenoData, protocolData=RG@protocolData, annotation=chipType)
403
-   storageMode(XY) = "environment"
404
-#  XY <- new("NChannelSet",
405
-#	    X=matrix(0, nsnps, narrays),
406
-#	    Y=matrix(0, nsnps, narrays),
407
-#	    zero=matrix(0, nsnps, narrays),
408
-#	    annotation=chipType, phenoData=RG@phenoData,
409
-#	    protocolData=RG@protocolData, storage.mode="environment")
410
-  featureNames(XY) = ids
411
-  sampleNames(XY) = sampleNames(RG)
412
-  ## RS
413
-  rm(list=c("bord", "infI", "aids", "bids", "ids", "snpbase"))
414
-#  print(gc())
415
-  gc(verbose=FALSE)
416
-  # Need to initialize - matrices filled with NAs to begin with
417
-#  XY@assayData$X[1:nsnps,] = 0
418
-#  XY@assayData$Y[1:nsnps,] = 0
419
-#  XY@assayData$zero[1:nsnps,] = 0
420
-
421
-  # First sort out Infinium II SNPs, X -> R (allele A)  and Y -> G (allele B) from the same probe
422
-  ## RS added
423
-  not.na <- !is.na(aord)
424
-  not.na.aord <- aord[not.na]
425
-  ## RS substitution  !is.na(aord) -> not.na
426
-  ##r <- as.matrix(exprs(channel(RG, "R"))[not.na.aord,]) # mostly red
427
-  r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ])
428
-  XY@assayData$X[not.na,] <- r
429
-  rm(r);gc(verbose=FALSE)
430
-  g <- as.matrix(assayData(RG)[["G"]][not.na.aord,]) # mostly green
431
-  XY@assayData$Y[not.na,] <- g
432
-  rm(g); gc(verbose=FALSE)
433
-  ##z <- as.matrix(exprs(channel(RG, "zero"))[not.na.aord,]) # mostly green
434
-  z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ])
435
-  XY@assayData$zero[not.na,] <- z
436
-  rm(z); gc(verbose=FALSE)
437
-  ##RS added
438
-  rm(RG)
439
-#  print(gc())
440
-  gc(verbose=FALSE)
441
-  ## Warning - not 100% sure that the code below is correct - could be more complicated than this
442
-#  infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]")
443
-
444
-#  X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red
445
-#  Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green
446
-
447
-  # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe
448
-#  infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]")
449
-
450
-#  X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red
451
-#  Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green
327
+RGtoXY = function (RG, chipType, anno, verbose = TRUE) 
328
+{
329
+    if (chipType != "nopackage") {
330
+        needToLoad <- !all(sapply(c("addressA", "addressB", "base"), 
331
+            isLoaded))
332
+        if (needToLoad) {
333
+            chipList = c("human1mv1c", "human370v1c", "human650v3a", 
334
+                "human610quadv1b", "human660quadv1a", "human370quadv3c", 
335
+                "human550v3b", "human1mduov3b", "humanomni1quadv1b", 
336
+                "humanomni25quadv1b", "humanomni258v1a", "humanomni258v1p1b", 
337
+                "humanomni5quadv1b", "humanomniexpress12v1b", 
338
+                "humanimmuno12v1b", "humancytosnp12v2p1h", "humanexome12v1p2a", 
339
+                "humanomniexpexome8v1p1b")
340
+            if (missing(chipType)) {
341
+                chipType = match.arg(cleancdfname(annotation(RG)), 
342
+                  chipList)
343
+            }
344
+            else chipType = match.arg(cleancdfname(chipType), 
345
+                chipList)
346
+            pkgname = getCrlmmAnnotationName(chipType)
347
+            if (!require(pkgname, character.only = TRUE, quietly = !verbose)) {
348
+                suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", 
349
+                  sep = "")
350
+                msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", 
351
+                  suggCall)
352
+                message(strwrap(msg))
353
+                stop("Package ", pkgname, " could not be found.")
354
+                rm(suggCall, msg)
355
+            }
356
+            if (verbose) 
357
+                message("Loading chip annotation information.")
358
+            loader("address.rda", .crlmmPkgEnv, pkgname)
359
+        }
360
+        aids = getVarInEnv("addressA")
361
+        bids = getVarInEnv("addressB")
362
+        snpbase = getVarInEnv("base")
363
+        ids = names(aids)
364
+        nsnps = length(aids)
365
+        narrays = ncol(RG)
366
+        infI = !is.na(bids) & bids != 0
367
+        aord = match(aids, featureNames(RG))
368
+        bord = match(bids, featureNames(RG))
369
+        xyPhenoData = AnnotatedDataFrame(data = RG@phenoData@data, 
370
+            varMetadata = RG@phenoData@varMetadata)
371
+        levels(xyPhenoData@varMetadata$channel) = c("X", "Y", 
372
+            "zero", "_ALL_")
373
+        XY <- new("NChannelSet", assayDataNew(X = initializeBigMatrix(name = "X", 
374
+            nr = nsnps, nc = narrays, vmode = "integer"), Y = initializeBigMatrix(name = "Y", 
375
+            nr = nsnps, nc = narrays, vmode = "integer"), zero = initializeBigMatrix(name = "zero", 
376
+            nr = nsnps, nc = narrays, vmode = "integer")), phenoData = xyPhenoData, 
377
+            protocolData = RG@protocolData, annotation = chipType)
378
+        storageMode(XY) = "environment"
379
+        featureNames(XY) = ids
380
+        sampleNames(XY) = sampleNames(RG)
381
+        rm(list = c("bord", "infI", "aids", "bids", "ids", "snpbase"))
382
+        gc(verbose = FALSE)
383
+        not.na <- !is.na(aord)
384
+        not.na.aord <- aord[not.na]
385
+        r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ])
386
+        XY@assayData$X[not.na, ] <- r
387
+        rm(r)
388
+        gc(verbose = FALSE)
389
+        g <- as.matrix(assayData(RG)[["G"]][not.na.aord, ])
390
+        XY@assayData$Y[not.na, ] <- g
391
+        rm(g)
392
+        gc(verbose = FALSE)
393
+        z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ])
394
+        XY@assayData$zero[not.na, ] <- z
395
+        rm(z)
396
+        gc(verbose = FALSE)
397
+        rm(RG)
398
+        gc(verbose = FALSE)
399
+    }
400
+    if (chipType == "nopackage" && !is.null(anno)) {
401
+        pkgname = NULL
402
+        if(sum(colnames(anno@data)=="AddressA_ID")!=1){
403
+           
404
+           message("annotation format is wrong, could not find a column with name: AddressA_ID")
405
+         }else{
406
+         anno2=anno@data
407
+           # chr = as.character(anno$Chr)
408
+  #chr[chr=="X"] = 23
409
+  #  chr[chr=="Y"] = 24
410
+  #  chr[chr=="XY"] = 25
411
+#chr[chr=="MT"] = 26
412
+#anno[,"chromosome"]=as.integer(chr)
452 413
 
453
-  #  For now zero out Infinium I probes
454
-#  XY@assayData$X[infI,] = 0
455
-#  XY@assayData$Y[infI,] = 0
456
-#  XY@assayData$zero[infI,] = 0
457
-#  gc(verbose=FALSE)
458
-  }
459
-  if(chipType=="nopackage" && !is.null(anno)) {
460
-            pkgname=NULL
461
-#            anno=read.csv(annot,header=TRUE, as.is=TRUE, check.names=FALSE,skip=7)
462
-            aids=anno[,"AddressA_ID"]
463
-            bids=anno[,"AddressB_ID"]
464
-            snpbase=anno[,"SNP"]
465
-            names(aids)=ids=anno[,"Name"]
466
-#            m=match(aids,rownames(RG@assayData$R))
467
-#            ARG=RG[m[!is.na(m)],]
468
-#            mm=match(rownames(ARG@assayData$R),aids)
469
-#            aids=aids[mm]
470
-#            bids=bids[mm]
471
-#            snpbas=snpbase[mm]
472
-            nsnps = length(aids)
473
-            narrays = ncol(RG)
474
-            infI = !is.na(bids) & bids != 0
475
-            aord = match(aids, featureNames(RG))
476
-            bord = match(bids, featureNames(RG))
477
-            xyPhenoData = AnnotatedDataFrame(data = RG@phenoData@data, 
478
-                                             varMetadata = RG@phenoData@varMetadata)
479
-            levels(xyPhenoData@varMetadata$channel) = c("X", "Y", "zero", "_ALL_")
480
-            XY <- new("NChannelSet", assayDataNew(X = initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"),
481
-                                                  Y = initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"),
482
-                                                  zero = initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer")),
483
-                      phenoData = xyPhenoData, protocolData = RG@protocolData)
484
-            storageMode(XY) = "environment"
485
-            featureNames(XY) = names(aids)
486
-            sampleNames(XY) = sampleNames(RG)
487
-            rm(list = c("bord", "infI", "aids", "bids", "snpbase"))
488
-            gc(verbose = FALSE)
489
-            not.na <- !is.na(aord)
490
-            not.na.aord <- aord[not.na]
491
-            r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ])
492
-            XY@assayData$X[not.na, ] <- r
493
-            rm(r)
494
-            gc(verbose = FALSE)
495
-            g <- as.matrix(assayData(RG)[["G"]][not.na.aord, ])
496
-            XY@assayData$Y[not.na, ] <- g
497
-            rm(g)
498
-            gc(verbose = FALSE)
499
-            z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ])
500
-            XY@assayData$zero[not.na, ] <- z
501
-            rm(z)
502
-            gc(verbose = FALSE)
503
-            rm(RG)
504
-            gc(verbose = FALSE)
505
-   }
506
-   XY
414
+       aids = anno2[, "AddressA_ID"]
415
+        bids = anno2[, "AddressB_ID"]
416
+        snpbase = anno2[, "SNP"]
417
+        names(aids) = ids = anno2[, "featureNames"]
418
+        nsnps = length(aids)
419
+        narrays = ncol(RG)
420
+        infI = !is.na(bids) & bids != 0
421
+        aord = match(aids, featureNames(RG))
422
+        bord = match(bids,featureNames(RG))
423
+        xyPhenoData = AnnotatedDataFrame(data = RG@phenoData@data, 
424
+            varMetadata = RG@phenoData@varMetadata)
425
+        levels(xyPhenoData@varMetadata$channel) = c("X", "Y", 
426
+            "zero", "_ALL_")
427
+        XY <- new("NChannelSet", assayDataNew(X = initializeBigMatrix(name = "X", 
428
+            nr = nsnps, nc = narrays, vmode = "integer"), Y = initializeBigMatrix(name = "Y", 
429
+            nr = nsnps, nc = narrays, vmode = "integer"), zero = initializeBigMatrix(name = "zero", 
430
+            nr = nsnps, nc = narrays, vmode = "integer")), phenoData = xyPhenoData, 
431
+            protocolData = RG@protocolData)
432
+        storageMode(XY) = "environment"
433
+       # featureNames(XY) = names(aids)
434
+        sampleNames(XY) = sampleNames(RG)
435
+        rm(list = c("bord", "infI", "aids", "bids", "snpbase"))
436
+        gc(verbose = FALSE)
437
+        not.na <- !is.na(aord)
438
+        not.na.aord <- aord[not.na]
439
+        r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ])
440
+        XY@assayData$X[not.na, ] <- r
441
+        rm(r)
442
+        gc(verbose = FALSE)
443
+        g <- as.matrix(assayData(RG)[["G"]][not.na.aord, ])
444
+        XY@assayData$Y[not.na, ] <- g
445
+        rm(g)
446
+        gc(verbose = FALSE)
447
+        z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ])
448
+        XY@assayData$zero[not.na, ] <- z
449
+        rm(z)
450
+        gc(verbose = FALSE)
451
+        rm(RG)
452
+        gc(verbose = FALSE)
453
+   
454
+    
455
+    
456
+    
457
+         }
458
+      }
459
+           XY 
507 460
 }
508 461
 
509 462
 
... ...
@@ -1155,7 +1108,10 @@ constructInf <- function(sampleSheet=NULL,
1155 1108
             genome <- getAvailableIlluminaGenomeBuild(path)
1156 1109
             featureData = getFeatureData(cdfName, copynumber=TRUE, genome=genome)
1157 1110
           } else {
1158
-            featureData=anno
1111
+                      anno = as(anno, "AnnotatedDataFrame")
1112
+ anno = as(anno, "GenomeAnnotatedDataFrame")
1113
+featureData=anno
1114
+           
1159 1115
           }
1160 1116
           nr <- nrow(featureData)
1161 1117
           nc <- narrays
... ...
@@ -144,7 +144,11 @@ priormeans=prepriormeans
144 144
     XIndex = seq(1:nrow(cnSet))[chromosome(cnSet)==23 & isSnp(cnSet)]
145 145
 ### impute gender if gender information not provided
146 146
     if (is.null(gender)) {
147
-       gender = krlmmImputeGender(cnSet, annotation, YIndex, verbose, offset=offset)
147
+       if(sum(is.na(YIndex))==length(YIndex)){
148
+		gender=NULL
149
+       }else{
150
+	gender = krlmmImputeGender(cnSet, annotation, YIndex, verbose, offset=offset)
151
+       }
148 152
     }
149 153
 
150 154
 ### double-check ChrY ChrX SNP cluster, impute gender if gender information not provided