Browse code

Bug fixes to illumina genotype calling when using cdfName="nopackage" option

mritchie authored on 19/04/2018 07:17:14
Showing1 changed files
... ...
@@ -324,186 +324,139 @@ readGenCallOutput = function(filenames, path=".", cdfName,
324 324
 
325 325
 
326 326
 
327
-RGtoXY = function(RG, chipType, anno, verbose=TRUE) {
328
-  if(chipType!="nopackage") {
329
-  needToLoad <- !all(sapply(c('addressA', 'addressB', 'base'), isLoaded))
330
-  if(needToLoad){
331
-	  chipList = c("human1mv1c",# 1M
332
-	  "human370v1c",            # 370CNV
333
-	  "human650v3a",            # 650Y
334
-	  "human610quadv1b",        # 610 quad
335
-	  "human660quadv1a",        # 660 quad
336
-	  "human370quadv3c",        # 370CNV quad
337
-	  "human550v3b",            # 550K
338
-	  "human1mduov3b",          # 1M Duo
339
-	  "humanomni1quadv1b",      # Omni1 quad
340
-	  "humanomni25quadv1b",     # Omni2.5 quad
341
-	  "humanomni258v1a",        # Omni2.5 8 v1 A
342
-          "humanomni258v1p1b",      # Omni2.5 8 v1.1 B
343
-          "humanomni5quadv1b",      # Omni5 quad  
344
-	  "humanomniexpress12v1b",  # Omni express 12
345
-	  "humanimmuno12v1b",       # Immuno chip 12
346
-          "humancytosnp12v2p1h",    # CytoSNP 12
347
-          "humanexome12v1p2a",      # Exome 12 v1.2 A
348
-          "humanomniexpexome8v1p1b") # Omni Express Exome 8 v1.1b
349
-	  ## RS: added cleancdfname()
350
-	  if(missing(chipType)){
351
-		  chipType = match.arg(cleancdfname(annotation(RG)), chipList)
352
-	  } else chipType = match.arg(cleancdfname(chipType), chipList)
353
-	  pkgname = getCrlmmAnnotationName(chipType)
354
-	  if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
355
-		  suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
356
-		  msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
357
-		  message(strwrap(msg))
358
-		  stop("Package ", pkgname, " could not be found.")
359
-		  rm(suggCall, msg)
360
-	  }
361
-	  if(verbose) message("Loading chip annotation information.")
362
-	  loader("address.rda", .crlmmPkgEnv, pkgname)
363
-  }
364
-  aids = getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest
365
-  bids = getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
366
-  snpbase = getVarInEnv("base")
367
-##  loader(‘file.rda’)
368
-##  x = getVarInEnv(‘x’)
369
-##  y = getVarInEnv(‘y’)
370
-##
371
-##  I’d consider using something like:
372
-##
373
-##	  needToLoad = !all(sapply(c(‘x’, ‘y’), isLoaded))
374
-##  if (needToLoad){
375
-##	  loader(‘file.rda’)
376
-##	  x = getVarInEnv(‘x’)
377
-##	  y = getVarInEnv(‘y’)
378
-##  }
379
-  ids = names(aids)
380
-  nsnps = length(aids)
381
-  narrays = ncol(RG)
382
-#  aidcol = match("AddressA_ID", colnames(annot))
383
-#  if(is.na(aidcol))
384
-#    aidcol = match("Address", colnames(annot))
385
-#  if(is.na(bidcol))
386
-#    bidcol = match("Address2", colnames(annot))
387
-#  aids = annot[, aidcol]
388
-#  bids = annot[, bidcol]
389
-#  snpids = annot[,"Name"]
390
-#  snpbase = annot[,"SNP"]
391
-  infI = !is.na(bids) & bids!=0
392
-  aord = match(aids, featureNames(RG)) # NAs are possible here
393
-  bord = match(bids, featureNames(RG)) # and here
394
-#  argrg = aids[rrgg]
395
-#  brgrg = bids[rrgg]
396
-  xyPhenoData = AnnotatedDataFrame(data=RG@phenoData@data,varMetadata=RG@phenoData@varMetadata)
397
-  levels(xyPhenoData@varMetadata$channel) = c("X","Y","zero","_ALL_")
398
-  XY <- new("NChannelSet",
399
-        assayDataNew(X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"),
400
-                     Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"),
401
-                     zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer")),
402
-        phenoData=xyPhenoData, protocolData=RG@protocolData, annotation=chipType)
403
-   storageMode(XY) = "environment"
404
-#  XY <- new("NChannelSet",
405
-#	    X=matrix(0, nsnps, narrays),
406
-#	    Y=matrix(0, nsnps, narrays),
407
-#	    zero=matrix(0, nsnps, narrays),
408
-#	    annotation=chipType, phenoData=RG@phenoData,
409
-#	    protocolData=RG@protocolData, storage.mode="environment")
410
-  featureNames(XY) = ids
411
-  sampleNames(XY) = sampleNames(RG)
412
-  ## RS
413
-  rm(list=c("bord", "infI", "aids", "bids", "ids", "snpbase"))
414
-#  print(gc())
415
-  gc(verbose=FALSE)
416
-  # Need to initialize - matrices filled with NAs to begin with
417
-#  XY@assayData$X[1:nsnps,] = 0
418
-#  XY@assayData$Y[1:nsnps,] = 0
419
-#  XY@assayData$zero[1:nsnps,] = 0
420
-
421
-  # First sort out Infinium II SNPs, X -> R (allele A)  and Y -> G (allele B) from the same probe
422
-  ## RS added
423
-  not.na <- !is.na(aord)
424
-  not.na.aord <- aord[not.na]
425
-  ## RS substitution  !is.na(aord) -> not.na
426
-  ##r <- as.matrix(exprs(channel(RG, "R"))[not.na.aord,]) # mostly red
427
-  r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ])
428
-  XY@assayData$X[not.na,] <- r
429
-  rm(r);gc(verbose=FALSE)
430
-  g <- as.matrix(assayData(RG)[["G"]][not.na.aord,]) # mostly green
431
-  XY@assayData$Y[not.na,] <- g
432
-  rm(g); gc(verbose=FALSE)
433
-  ##z <- as.matrix(exprs(channel(RG, "zero"))[not.na.aord,]) # mostly green
434
-  z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ])
435
-  XY@assayData$zero[not.na,] <- z
436
-  rm(z); gc(verbose=FALSE)
437
-  ##RS added
438
-  rm(RG)
439
-#  print(gc())
440
-  gc(verbose=FALSE)
441
-  ## Warning - not 100% sure that the code below is correct - could be more complicated than this
442
-#  infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]")
443
-
444
-#  X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red
445
-#  Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green
446
-
447
-  # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe
448
-#  infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]")
449
-
450
-#  X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red
451
-#  Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green
327
+RGtoXY = function (RG, chipType, anno, verbose = TRUE) 
328
+{
329
+    if (chipType != "nopackage") {
330
+        needToLoad <- !all(sapply(c("addressA", "addressB", "base"), 
331
+            isLoaded))
332
+        if (needToLoad) {
333
+            chipList = c("human1mv1c", "human370v1c", "human650v3a", 
334
+                "human610quadv1b", "human660quadv1a", "human370quadv3c", 
335
+                "human550v3b", "human1mduov3b", "humanomni1quadv1b", 
336
+                "humanomni25quadv1b", "humanomni258v1a", "humanomni258v1p1b", 
337
+                "humanomni5quadv1b", "humanomniexpress12v1b", 
338
+                "humanimmuno12v1b", "humancytosnp12v2p1h", "humanexome12v1p2a", 
339
+                "humanomniexpexome8v1p1b")
340
+            if (missing(chipType)) {
341
+                chipType = match.arg(cleancdfname(annotation(RG)), 
342
+                  chipList)
343
+            }
344
+            else chipType = match.arg(cleancdfname(chipType), 
345
+                chipList)
346
+            pkgname = getCrlmmAnnotationName(chipType)
347
+            if (!require(pkgname, character.only = TRUE, quietly = !verbose)) {
348
+                suggCall = paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", 
349
+                  sep = "")
350
+                msg = paste("If", pkgname, "is installed on an alternative location, please load it manually by using", 
351
+                  suggCall)
352
+                message(strwrap(msg))
353
+                stop("Package ", pkgname, " could not be found.")
354
+                rm(suggCall, msg)
355
+            }
356
+            if (verbose) 
357
+                message("Loading chip annotation information.")
358
+            loader("address.rda", .crlmmPkgEnv, pkgname)
359
+        }
360
+        aids = getVarInEnv("addressA")
361
+        bids = getVarInEnv("addressB")
362
+        snpbase = getVarInEnv("base")
363
+        ids = names(aids)
364
+        nsnps = length(aids)
365
+        narrays = ncol(RG)
366
+        infI = !is.na(bids) & bids != 0
367
+        aord = match(aids, featureNames(RG))
368
+        bord = match(bids, featureNames(RG))
369
+        xyPhenoData = AnnotatedDataFrame(data = RG@phenoData@data, 
370
+            varMetadata = RG@phenoData@varMetadata)
371
+        levels(xyPhenoData@varMetadata$channel) = c("X", "Y", 
372
+            "zero", "_ALL_")
373
+        XY <- new("NChannelSet", assayDataNew(X = initializeBigMatrix(name = "X", 
374
+            nr = nsnps, nc = narrays, vmode = "integer"), Y = initializeBigMatrix(name = "Y", 
375
+            nr = nsnps, nc = narrays, vmode = "integer"), zero = initializeBigMatrix(name = "zero", 
376
+            nr = nsnps, nc = narrays, vmode = "integer")), phenoData = xyPhenoData, 
377
+            protocolData = RG@protocolData, annotation = chipType)
378
+        storageMode(XY) = "environment"
379
+        featureNames(XY) = ids
380
+        sampleNames(XY) = sampleNames(RG)
381
+        rm(list = c("bord", "infI", "aids", "bids", "ids", "snpbase"))
382
+        gc(verbose = FALSE)
383
+        not.na <- !is.na(aord)
384
+        not.na.aord <- aord[not.na]
385
+        r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ])
386
+        XY@assayData$X[not.na, ] <- r
387
+        rm(r)
388
+        gc(verbose = FALSE)
389
+        g <- as.matrix(assayData(RG)[["G"]][not.na.aord, ])
390
+        XY@assayData$Y[not.na, ] <- g
391
+        rm(g)
392
+        gc(verbose = FALSE)
393
+        z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ])
394
+        XY@assayData$zero[not.na, ] <- z
395
+        rm(z)
396
+        gc(verbose = FALSE)
397
+        rm(RG)
398
+        gc(verbose = FALSE)
399
+    }
400
+    if (chipType == "nopackage" && !is.null(anno)) {
401
+        pkgname = NULL
402
+        if(sum(colnames(anno@data)=="AddressA_ID")!=1){
403
+           
404
+           message("annotation format is wrong, could not find a column with name: AddressA_ID")
405
+         }else{
406
+         anno2=anno@data
407
+           # chr = as.character(anno$Chr)
408
+  #chr[chr=="X"] = 23
409
+  #  chr[chr=="Y"] = 24
410
+  #  chr[chr=="XY"] = 25
411
+#chr[chr=="MT"] = 26
412
+#anno[,"chromosome"]=as.integer(chr)
452 413
 
453
-  #  For now zero out Infinium I probes
454
-#  XY@assayData$X[infI,] = 0
455
-#  XY@assayData$Y[infI,] = 0
456
-#  XY@assayData$zero[infI,] = 0
457
-#  gc(verbose=FALSE)
458
-  }
459
-  if(chipType=="nopackage" && !is.null(anno)) {
460
-            pkgname=NULL
461
-#            anno=read.csv(annot,header=TRUE, as.is=TRUE, check.names=FALSE,skip=7)
462
-            aids=anno[,"AddressA_ID"]
463
-            bids=anno[,"AddressB_ID"]
464
-            snpbase=anno[,"SNP"]
465
-            names(aids)=ids=anno[,"Name"]
466
-#            m=match(aids,rownames(RG@assayData$R))
467
-#            ARG=RG[m[!is.na(m)],]
468
-#            mm=match(rownames(ARG@assayData$R),aids)
469
-#            aids=aids[mm]
470
-#            bids=bids[mm]
471
-#            snpbas=snpbase[mm]
472
-            nsnps = length(aids)
473
-            narrays = ncol(RG)
474
-            infI = !is.na(bids) & bids != 0
475
-            aord = match(aids, featureNames(RG))
476
-            bord = match(bids, featureNames(RG))
477
-            xyPhenoData = AnnotatedDataFrame(data = RG@phenoData@data, 
478
-                                             varMetadata = RG@phenoData@varMetadata)
479
-            levels(xyPhenoData@varMetadata$channel) = c("X", "Y", "zero", "_ALL_")
480
-            XY <- new("NChannelSet", assayDataNew(X = initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"),
481
-                                                  Y = initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"),
482
-                                                  zero = initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer")),
483
-                      phenoData = xyPhenoData, protocolData = RG@protocolData)
484
-            storageMode(XY) = "environment"
485
-            featureNames(XY) = names(aids)
486
-            sampleNames(XY) = sampleNames(RG)
487
-            rm(list = c("bord", "infI", "aids", "bids", "snpbase"))
488
-            gc(verbose = FALSE)
489
-            not.na <- !is.na(aord)
490
-            not.na.aord <- aord[not.na]
491
-            r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ])
492
-            XY@assayData$X[not.na, ] <- r
493
-            rm(r)
494
-            gc(verbose = FALSE)
495
-            g <- as.matrix(assayData(RG)[["G"]][not.na.aord, ])
496
-            XY@assayData$Y[not.na, ] <- g
497
-            rm(g)
498
-            gc(verbose = FALSE)
499
-            z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ])
500
-            XY@assayData$zero[not.na, ] <- z
501
-            rm(z)
502
-            gc(verbose = FALSE)
503
-            rm(RG)
504
-            gc(verbose = FALSE)
505
-   }
506
-   XY
414
+       aids = anno2[, "AddressA_ID"]
415
+        bids = anno2[, "AddressB_ID"]
416
+        snpbase = anno2[, "SNP"]
417
+        names(aids) = ids = anno2[, "featureNames"]
418
+        nsnps = length(aids)
419
+        narrays = ncol(RG)
420
+        infI = !is.na(bids) & bids != 0
421
+        aord = match(aids, featureNames(RG))
422
+        bord = match(bids,featureNames(RG))
423
+        xyPhenoData = AnnotatedDataFrame(data = RG@phenoData@data, 
424
+            varMetadata = RG@phenoData@varMetadata)
425
+        levels(xyPhenoData@varMetadata$channel) = c("X", "Y", 
426
+            "zero", "_ALL_")
427
+        XY <- new("NChannelSet", assayDataNew(X = initializeBigMatrix(name = "X", 
428
+            nr = nsnps, nc = narrays, vmode = "integer"), Y = initializeBigMatrix(name = "Y", 
429
+            nr = nsnps, nc = narrays, vmode = "integer"), zero = initializeBigMatrix(name = "zero", 
430
+            nr = nsnps, nc = narrays, vmode = "integer")), phenoData = xyPhenoData, 
431
+            protocolData = RG@protocolData)
432
+        storageMode(XY) = "environment"
433
+       # featureNames(XY) = names(aids)
434
+        sampleNames(XY) = sampleNames(RG)
435
+        rm(list = c("bord", "infI", "aids", "bids", "snpbase"))
436
+        gc(verbose = FALSE)
437
+        not.na <- !is.na(aord)
438
+        not.na.aord <- aord[not.na]
439
+        r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ])
440
+        XY@assayData$X[not.na, ] <- r
441
+        rm(r)
442
+        gc(verbose = FALSE)
443
+        g <- as.matrix(assayData(RG)[["G"]][not.na.aord, ])
444
+        XY@assayData$Y[not.na, ] <- g
445
+        rm(g)
446
+        gc(verbose = FALSE)
447
+        z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ])
448
+        XY@assayData$zero[not.na, ] <- z
449
+        rm(z)
450
+        gc(verbose = FALSE)
451
+        rm(RG)
452
+        gc(verbose = FALSE)
453
+   
454
+    
455
+    
456
+    
457
+         }
458
+      }
459
+           XY 
507 460
 }
508 461
 
509 462
 
... ...
@@ -1155,7 +1108,10 @@ constructInf <- function(sampleSheet=NULL,
1155 1108
             genome <- getAvailableIlluminaGenomeBuild(path)
1156 1109
             featureData = getFeatureData(cdfName, copynumber=TRUE, genome=genome)
1157 1110
           } else {
1158
-            featureData=anno
1111
+                      anno = as(anno, "AnnotatedDataFrame")
1112
+ anno = as(anno, "GenomeAnnotatedDataFrame")
1113
+featureData=anno
1114
+           
1159 1115
           }
1160 1116
           nr <- nrow(featureData)
1161 1117
           nc <- narrays
Browse code

Added nopackage option for krlmm

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@117087 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 03/05/2016 23:39:54
Showing1 changed files
... ...
@@ -12,7 +12,7 @@ readIdatFiles = function(sampleSheet=NULL,
12 12
 			  sep="_",
13 13
 			  fileExt=list(green="Grn.idat", red="Red.idat"),
14 14
 			  saveDate=FALSE, verbose=FALSE) {
15
-	verbose <- FALSE
15
+       verbose <- FALSE
16 16
        if(!is.null(arrayNames)) {
17 17
                pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
18 18
        }
... ...
@@ -57,7 +57,7 @@ readIdatFiles = function(sampleSheet=NULL,
57 57
 	       grnidats = file.path(path, grnfiles)
58 58
 	       redidats = file.path(path, redfiles)
59 59
        }  else {
60
-	       message("path arg not set.  Assuming files are in local directory, or that complete path is provided")
60
+	       message("'path' arg not set.  Assuming files are in local directory, or that complete path is provided in 'arrayNames'")
61 61
 	       grnidats = grnfiles
62 62
 	       redidats = redfiles
63 63
        }
... ...
@@ -102,13 +102,23 @@ readIdatFiles = function(sampleSheet=NULL,
102 102
 		       } # else stop("Could not find probe IDs")
103 103
 		       nprobes = length(ids)
104 104
 		       narrays = length(arrayNames)
105
-		       RG = new("NChannelSet",
106
-		                 R=matrix(0, nprobes, narrays),
107
-		                 G=matrix(0, nprobes, narrays),
108
-		                 zero=matrix(1, nprobes, narrays),
105
+#                       if(cdfName=="nopackage") {
106
+    	               RG = new("NChannelSet",
107
+                                 R = initializeBigMatrix(name = "R", nr = nprobes, nc = narrays, vmode = "integer"),
108
+                                 G = initializeBigMatrix(name = "G", nr = nprobes, nc = narrays, vmode = "integer"),
109
+		                 zero = initializeBigMatrix(name = "zero", nr = nprobes, nc = narrays, vmode = "integer"),
109 110
 				 annotation=headerInfo$Manifest[1],
110 111
 				 phenoData=pd, storage.mode="environment")
112
+#                       } else {
113
+#    	               RG = new("NChannelSet",
114
+#                                 R = matrix(0, nprobes, narrays),
115
+#                                 G = matrix(0, nprobes, narrays),
116
+#		                 zero = matrix(1, nprobes, narrays),
117
+#				 annotation=headerInfo$Manifest[1],
118
+#				 phenoData=pd, storage.mode="environment")                           
119
+#                       }
111 120
 		       featureNames(RG) = ids
121
+                       
112 122
 		       if(!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)){
113 123
 		            sampleNames(RG) = make.unique(sampleSheet$Sample_ID)
114 124
 		       } else  sampleNames(RG) = make.unique(basename(arrayNames))
... ...
@@ -289,6 +299,9 @@ readGenCallOutput = function(filenames, path=".", cdfName,
289 299
 	    # matching on SNP names? now assume they come in order
290 300
 	}
291 301
 	maxIndex = baseIndex + sampleCounts[i] - 1
302
+        m=match(totSNPNames, rownames(valuesThisFile$Xvalues))
303
+        valuesThisFile$Xvalues=valuesThisFile$Xvalues[m,]
304
+        valuesThisFile$Yvalues=valuesThisFile$Yvalues[m,]
292 305
 	X[, baseIndex:maxIndex] = valuesThisFile$Xvalues
293 306
 	Y[, baseIndex:maxIndex] = valuesThisFile$Yvalues
294 307
         zero[, baseIndex:maxIndex] = (X[, baseIndex:maxIndex] == 0) || (Y[, baseIndex:maxIndex] == 0)
... ...
@@ -311,7 +324,8 @@ readGenCallOutput = function(filenames, path=".", cdfName,
311 324
 
312 325
 
313 326
 
314
-RGtoXY = function(RG, chipType, verbose=TRUE) {
327
+RGtoXY = function(RG, chipType, anno, verbose=TRUE) {
328
+  if(chipType!="nopackage") {
315 329
   needToLoad <- !all(sapply(c('addressA', 'addressB', 'base'), isLoaded))
316 330
   if(needToLoad){
317 331
 	  chipList = c("human1mv1c",# 1M
... ...
@@ -368,7 +382,6 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
368 382
 #  aidcol = match("AddressA_ID", colnames(annot))
369 383
 #  if(is.na(aidcol))
370 384
 #    aidcol = match("Address", colnames(annot))
371
-#  bidcol = match("AddressB_ID", colnames(annot))
372 385
 #  if(is.na(bidcol))
373 386
 #    bidcol = match("Address2", colnames(annot))
374 387
 #  aids = annot[, aidcol]
... ...
@@ -383,7 +396,9 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
383 396
   xyPhenoData = AnnotatedDataFrame(data=RG@phenoData@data,varMetadata=RG@phenoData@varMetadata)
384 397
   levels(xyPhenoData@varMetadata$channel) = c("X","Y","zero","_ALL_")
385 398
   XY <- new("NChannelSet",
386
-        assayDataNew(X=matrix(0, nsnps, narrays),Y=matrix(0, nsnps, narrays),zero=matrix(0, nsnps, narrays)),
399
+        assayDataNew(X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"),
400
+                     Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"),
401
+                     zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer")),
387 402
         phenoData=xyPhenoData, protocolData=RG@protocolData, annotation=chipType)
388 403
    storageMode(XY) = "environment"
389 404
 #  XY <- new("NChannelSet",
... ...
@@ -440,7 +455,55 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
440 455
 #  XY@assayData$Y[infI,] = 0
441 456
 #  XY@assayData$zero[infI,] = 0
442 457
 #  gc(verbose=FALSE)
443
-  XY
458
+  }
459
+  if(chipType=="nopackage" && !is.null(anno)) {
460
+            pkgname=NULL
461
+#            anno=read.csv(annot,header=TRUE, as.is=TRUE, check.names=FALSE,skip=7)
462
+            aids=anno[,"AddressA_ID"]
463
+            bids=anno[,"AddressB_ID"]
464
+            snpbase=anno[,"SNP"]
465
+            names(aids)=ids=anno[,"Name"]
466
+#            m=match(aids,rownames(RG@assayData$R))
467
+#            ARG=RG[m[!is.na(m)],]
468
+#            mm=match(rownames(ARG@assayData$R),aids)
469
+#            aids=aids[mm]
470
+#            bids=bids[mm]
471
+#            snpbas=snpbase[mm]
472
+            nsnps = length(aids)
473
+            narrays = ncol(RG)
474
+            infI = !is.na(bids) & bids != 0
475
+            aord = match(aids, featureNames(RG))
476
+            bord = match(bids, featureNames(RG))
477
+            xyPhenoData = AnnotatedDataFrame(data = RG@phenoData@data, 
478
+                                             varMetadata = RG@phenoData@varMetadata)
479
+            levels(xyPhenoData@varMetadata$channel) = c("X", "Y", "zero", "_ALL_")
480
+            XY <- new("NChannelSet", assayDataNew(X = initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"),
481
+                                                  Y = initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"),
482
+                                                  zero = initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer")),
483
+                      phenoData = xyPhenoData, protocolData = RG@protocolData)
484
+            storageMode(XY) = "environment"
485
+            featureNames(XY) = names(aids)
486
+            sampleNames(XY) = sampleNames(RG)
487
+            rm(list = c("bord", "infI", "aids", "bids", "snpbase"))
488
+            gc(verbose = FALSE)
489
+            not.na <- !is.na(aord)
490
+            not.na.aord <- aord[not.na]
491
+            r <- as.matrix(assayData(RG)[["R"]][not.na.aord, ])
492
+            XY@assayData$X[not.na, ] <- r
493
+            rm(r)
494
+            gc(verbose = FALSE)
495
+            g <- as.matrix(assayData(RG)[["G"]][not.na.aord, ])
496
+            XY@assayData$Y[not.na, ] <- g
497
+            rm(g)
498
+            gc(verbose = FALSE)
499
+            z <- as.matrix(assayData(RG)[["zero"]][not.na.aord, ])
500
+            XY@assayData$zero[not.na, ] <- z
501
+            rm(z)
502
+            gc(verbose = FALSE)
503
+            rm(RG)
504
+            gc(verbose = FALSE)
505
+   }
506
+   XY
444 507
 }
445 508
 
446 509
 
... ...
@@ -704,186 +767,188 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
704 767
   return(res)
705 768
 }
706 769
 
707
-
708
-crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
709
-			  row.names=TRUE, col.names=TRUE,
710
-			  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
711
-			  seed=1,
712
-			  mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
713
-			  cdfName, sns, recallMin=10, recallRegMin=1000,
714
-			  returnParams=FALSE, badSNP=.7) {
715
-	if(missing(cdfName)) {
716
-		if(!missing(RG))
717
-			cdfName = annotation(RG)
718
-		if(!missing(XY))
719
-			cdfName = annotation(XY)
720
-	}
721
-	if(!isValidCdfName(cdfName))
722
-		stop("cdfName not valid.  see validCdfNames")
723
-	if(!missing(RG)) {
724
-		if(missing(XY))
725
-			XY = RGtoXY(RG, chipType=cdfName)
726
-		else
727
-			stop("Both RG and XY specified - please use one or the other")
728
-	}
729
-	if (missing(sns)) sns = sampleNames(XY)
730
-	res <- preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize,
731
-				   fitMixture=TRUE, verbose=verbose,
732
-				   seed=seed, eps=eps, cdfName=cdfName, sns=sns,
733
-				   stripNorm=stripNorm, useTarget=useTarget) #,
734
-	if(row.names) row.names=res$gns else row.names=NULL
735
-	if(col.names) col.names=res$sns else col.names=NULL
736
-	res2 <- crlmmGT(A=res[["A"]],
737
-			B=res[["B"]],
738
-			SNR=res[["SNR"]],
739
-			mixtureParams=res[["mixtureParams"]],
740
-			cdfName=cdfName,
741
-			row.names=row.names,
742
-			col.names=col.names,
743
-			probs=probs,
744
-			DF=DF,
745
-			SNRMin=SNRMin,
746
-			recallMin=recallMin,
747
-			recallRegMin=recallRegMin,
748
-			gender=gender,
749
-			verbose=verbose,
750
-			returnParams=returnParams,
751
-			badSNP=badSNP)
752
-	res2[["SNR"]] = res[["SNR"]]
753
-	res2[["SKW"]] = res[["SKW"]]
754
-	rm(res); gc(verbose=FALSE)
755
-	return(list2SnpSet(res2, returnParams=returnParams))
756
-}
757
-
758
-
770
+## crlmmIllumina and genotype.Illumina are now the same function
771
+## Use genotype.Illumina instead
772
+#crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
773
+#			  row.names=TRUE, col.names=TRUE,
774
+#			  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
775
+#			  seed=1,
776
+#			  mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
777
+#			  cdfName, sns, recallMin=10, recallRegMin=1000,
778
+#			  returnParams=FALSE, badSNP=.7) {
779
+#	if(missing(cdfName)) {
780
+#		if(!missing(RG))
781
+#			cdfName = annotation(RG)
782
+#		if(!missing(XY))
783
+#			cdfName = annotation(XY)
784
+#	}
785
+#	if(!isValidCdfName(cdfName))
786
+#		stop("cdfName not valid.  see validCdfNames")
787
+#	if(!missing(RG)) {
788
+#		if(missing(XY))
789
+#			XY = RGtoXY(RG, chipType=cdfName)
790
+#		else
791
+#			stop("Both RG and XY specified - please use one or the other")
792
+#	}
793
+#	if (missing(sns)) sns = sampleNames(XY)
794
+#	res <- preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize,
795
+#				   fitMixture=TRUE, verbose=verbose,
796
+#				   seed=seed, eps=eps, cdfName=cdfName, sns=sns,
797
+#				   stripNorm=stripNorm, useTarget=useTarget) #,
798
+#	if(row.names) row.names=res$gns else row.names=NULL
799
+#	if(col.names) col.names=res$sns else col.names=NULL
800
+#	res2 <- crlmmGT(A=res[["A"]],
801
+#			B=res[["B"]],
802
+#			SNR=res[["SNR"]],
803
+#			mixtureParams=res[["mixtureParams"]],
804
+#			cdfName=cdfName,
805
+#			row.names=row.names,
806
+#			col.names=col.names,
807
+#			probs=probs,
808
+#			DF=DF,
809
+#			SNRMin=SNRMin,
810
+#			recallMin=recallMin,
811
+#			recallRegMin=recallRegMin,
812
+#			gender=gender,
813
+#			verbose=verbose,
814
+#			returnParams=returnParams,
815
+#			badSNP=badSNP)
816
+#	res2[["SNR"]] = res[["SNR"]]
817
+#	res2[["SKW"]] = res[["SKW"]]
818
+#	rm(res); gc(verbose=FALSE)
819
+#	return(list2SnpSet(res2, returnParams=returnParams))
820
+#}
821
+
822
+
823
+## Deprecate crlmmIlluminaV2 function
759 824
 ## MR: Below is a more memory efficient version of crlmmIllumina() which
760 825
 ## reads in the .idats and genotypes in the one function and removes objects
761 826
 ## after they have been used
762
-crlmmIlluminaV2 = function(sampleSheet=NULL,
763
-			  arrayNames=NULL,
764
-			  ids=NULL,
765
-			  path=".",
766
-			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
767
-			  highDensity=FALSE,
768
-			  sep="_",
769
-			  fileExt=list(green="Grn.idat", red="Red.idat"),
770
-			  saveDate=FALSE,
771
-			  stripNorm=TRUE,
772
-			  useTarget=TRUE,
773
- #                         outdir=".",
774
-			  row.names=TRUE,
775
-			  col.names=TRUE,
776
-			  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
777
-                          seed=1,  # save.it=FALSE, snpFile, cnFile,
778
-                          mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
779
-                          cdfName, sns, recallMin=10, recallRegMin=1000,
780
-                          returnParams=FALSE, badSNP=.7) {
781
-
782
-    if(missing(cdfName)) stop("must specify cdfName")
783
-    if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
784
-
785
-#    is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
786
-    is.lds=FALSE
787
-    RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames,
788
-                       ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
789
-                       highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
790
-
791
-
792
-    XY = RGtoXY(RG, chipType=cdfName)
793
-#    if(is.lds) {
794
-#      open(RG@assayData$R); open(RG@assayData$G); open(RG@assayData$zero)
795
-#      delete(RG@assayData$R); delete(RG@assayData$G); delete(RG@assayData$zero)
796
-#    }
797
-    rm(RG); gc(verbose=FALSE)
798
-
799
-    if (missing(sns)) { sns = sampleNames(XY)
800
-    }
801
-    res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
802
-                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #, outdir=outdir) #,
803
-#                               save.it=save.it, snpFile=snpFile, cnFile=cnFile)
804
-
805
-#    if(is.lds) {
806
-#      open(XY@assayData$X); open(XY@assayData$Y); open(XY@assayData$zero)
807
-#      delete(XY@assayData$X); delete(XY@assayData$Y); delete(XY@assayData$zero)
808
-#    }
809
-    rm(XY); gc(verbose=FALSE)
810
-
811
-    if(row.names) row.names=res$gns else row.names=NULL
812
-    if(col.names) col.names=res$sns else col.names=NULL
813
-##
814
-##  if(is.lds){
815
-##	  res2 <- crlmmGT2(A=res[["A"]],
816
-##			   B=res[["B"]],
817
-##			   SNR=res[["SNR"]],
818
-##			   mixtureParams=res[["mixtureParams"]],
819
-##			   cdfName=cdfName,
820
-##			   row.names=row.names,
821
-##			   col.names=col.names,
822
-##			   probs=probs,
823
-##			   DF=DF,
824
-##			   SNRMin=SNRMin,
825
-##			   recallMin=recallMin,
826
-##			   recallRegMin=recallRegMin,
827
-##			   gender=gender,
828
-##			   verbose=verbose,
829
-##			   returnParams=returnParams,
830
-##			   badSNP=badSNP)
831
-##  } else {
832
-	  res2 <- crlmmGT(A=res[["A"]],
833
-			   B=res[["B"]],
834
-			   SNR=res[["SNR"]],
835
-			   mixtureParams=res[["mixtureParams"]],
836
-			   cdfName=cdfName,
837
-			   row.names=row.names,
838
-			   col.names=col.names,
839
-			   probs=probs,
840
-			   DF=DF,
841
-			   SNRMin=SNRMin,
842
-			   recallMin=recallMin,
843
-			   recallRegMin=recallRegMin,
844
-			   gender=gender,
845
-			   verbose=verbose,
846
-			   returnParams=returnParams,
847
-			   badSNP=badSNP)
848
-##  }
849
-
850
-##    FUN = ifelse(is.lds, "crlmmGT2", "crlmmGT")
851
-##    ## genotyping
852
-##    crlmmGTfxn = function(FUN,...){
853
-##		switch(FUN,
854
-##		       crlmmGT2=crlmmGT2(...),
855
-##		       crlmmGT=crlmmGT(...))
856
-##              }
857
-##    res2 = crlmmGTfxn(FUN,
858
-##                     A=res[["A"]],
859
-##                     B=res[["B"]],
860
-##                     SNR=res[["SNR"]],
861
-##                     mixtureParams=res[["mixtureParams"]],
862
-##                     cdfName=cdfName,
863
-##                     row.names=row.names,
864
-##                     col.names=col.names,
865
-##                     probs=probs,
866
-##                     DF=DF,
867
-##                     SNRMin=SNRMin,
868
-##                     recallMin=recallMin,
869
-##                     recallRegMin=recallRegMin,
870
-##                     gender=gender,
871
-##                     verbose=verbose,
872
-##                     returnParams=returnParams,
873
-##                     badSNP=badSNP)
874
-
875
-#    if(is.lds) {
876
-#      open(res[["SNR"]]); open(res[["SKW"]])
827
+#crlmmIlluminaV2 = function(sampleSheet=NULL,
828
+#			  arrayNames=NULL,
829
+#			  ids=NULL,
830
+#			  path=".",
831
+#			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
832
+#			  highDensity=FALSE,
833
+#			  sep="_",
834
+#			  fileExt=list(green="Grn.idat", red="Red.idat"),
835
+#			  saveDate=FALSE,
836
+#			  stripNorm=TRUE,
837
+#			  useTarget=TRUE,
838
+# #                         outdir=".",
839
+#			  row.names=TRUE,
840
+#			  col.names=TRUE,
841
+#			  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
842
+#                          seed=1,  # save.it=FALSE, snpFile, cnFile,
843
+#                          mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
844
+#                          cdfName, sns, recallMin=10, recallRegMin=1000,
845
+#                          returnParams=FALSE, badSNP=.7) {
846
+#
847
+#    if(missing(cdfName)) stop("must specify cdfName")
848
+#    if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
849
+#
850
+##    is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
851
+#    is.lds=FALSE
852
+#    RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames,
853
+#                       ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
854
+#                       highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
855
+#
856
+#
857
+#    XY = RGtoXY(RG, chipType=cdfName)
858
+##    if(is.lds) {
859
+##      open(RG@assayData$R); open(RG@assayData$G); open(RG@assayData$zero)
860
+##      delete(RG@assayData$R); delete(RG@assayData$G); delete(RG@assayData$zero)
861
+##    }
862
+#    rm(RG); gc(verbose=FALSE)
863
+#
864
+#    if (missing(sns)) { sns = sampleNames(XY)
877 865
 #    }
878
-    res2[["SNR"]] = res[["SNR"]]
879
-    res2[["SKW"]] = res[["SKW"]]
880
- #  if(is.lds) {
881
- #    delete(res[["A"]]); delete(res[["B"]])
882
- #    delete(res[["SKW"]]); delete(res[["SNR"]]); delete(res[["mixtureParams"]])
883
- #  }
884
-    rm(res); gc(verbose=FALSE)
885
-    return(list2SnpSet(res2, returnParams=returnParams))
886
-}
866
+#    res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
867
+#                               seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #, outdir=outdir) #,
868
+##                               save.it=save.it, snpFile=snpFile, cnFile=cnFile)
869
+#
870
+##    if(is.lds) {
871
+##      open(XY@assayData$X); open(XY@assayData$Y); open(XY@assayData$zero)
872
+##      delete(XY@assayData$X); delete(XY@assayData$Y); delete(XY@assayData$zero)
873
+##    }
874
+#    rm(XY); gc(verbose=FALSE)
875
+#
876
+#    if(row.names) row.names=res$gns else row.names=NULL
877
+#    if(col.names) col.names=res$sns else col.names=NULL
878
+###
879
+###  if(is.lds){
880
+###	  res2 <- crlmmGT2(A=res[["A"]],
881
+###			   B=res[["B"]],
882
+###			   SNR=res[["SNR"]],
883
+###			   mixtureParams=res[["mixtureParams"]],
884
+###			   cdfName=cdfName,
885
+###			   row.names=row.names,
886
+###			   col.names=col.names,
887
+###			   probs=probs,
888
+###			   DF=DF,
889
+###			   SNRMin=SNRMin,
890
+###			   recallMin=recallMin,
891
+###			   recallRegMin=recallRegMin,
892
+###			   gender=gender,
893
+###			   verbose=verbose,
894
+###			   returnParams=returnParams,
895
+###			   badSNP=badSNP)
896
+###  } else {
897
+#	  res2 <- crlmmGT(A=res[["A"]],
898
+#			   B=res[["B"]],
899
+#			   SNR=res[["SNR"]],
900
+#			   mixtureParams=res[["mixtureParams"]],
901
+#			   cdfName=cdfName,
902
+#			   row.names=row.names,
903
+#			   col.names=col.names,
904
+#			   probs=probs,
905
+#			   DF=DF,
906
+#			   SNRMin=SNRMin,
907
+#			   recallMin=recallMin,
908
+#			   recallRegMin=recallRegMin,
909
+#			   gender=gender,
910
+#			   verbose=verbose,
911
+#			   returnParams=returnParams,
912
+#			   badSNP=badSNP)
913
+###  }
914
+#
915
+###    FUN = ifelse(is.lds, "crlmmGT2", "crlmmGT")
916
+###    ## genotyping
917
+###    crlmmGTfxn = function(FUN,...){
918
+###		switch(FUN,
919
+###		       crlmmGT2=crlmmGT2(...),
920
+###		       crlmmGT=crlmmGT(...))
921
+###              }
922
+###    res2 = crlmmGTfxn(FUN,
923
+###                     A=res[["A"]],
924
+###                     B=res[["B"]],
925
+###                     SNR=res[["SNR"]],
926
+###                     mixtureParams=res[["mixtureParams"]],
927
+###                     cdfName=cdfName,
928
+###                     row.names=row.names,
929
+###                     col.names=col.names,
930
+###                     probs=probs,
931
+###                     DF=DF,
932
+###                     SNRMin=SNRMin,
933
+###                     recallMin=recallMin,
934
+###                     recallRegMin=recallRegMin,
935
+###                     gender=gender,
936
+###                     verbose=verbose,
937
+###                     returnParams=returnParams,
938
+###                     badSNP=badSNP)
939
+#
940
+##    if(is.lds) {
941
+##      open(res[["SNR"]]); open(res[["SKW"]])
942
+##    }
943
+#    res2[["SNR"]] = res[["SNR"]]
944
+#    res2[["SKW"]] = res[["SKW"]]
945
+# #  if(is.lds) {
946
+# #    delete(res[["A"]]); delete(res[["B"]])
947
+# #    delete(res[["SKW"]]); delete(res[["SNR"]]); delete(res[["mixtureParams"]])
948
+# #  }
949
+#    rm(res); gc(verbose=FALSE)
950
+#    return(list2SnpSet(res2, returnParams=returnParams))
951
+#}
887 952
 
888 953
 # Functions analogous to Rob's Affy functions to set up container
889 954
 getProtocolData.Illumina = function(filenames, sep="_", fileExt="Grn.idat", verbose=FALSE) {
... ...
@@ -950,6 +1015,8 @@ constructInf <- function(sampleSheet=NULL,
950 1015
 			 fileExt=list(green="Grn.idat", red="Red.idat"),
951 1016
                          XY,
952 1017
 			 cdfName,
1018
+                         anno,
1019
+                         genome,
953 1020
 			 verbose=FALSE,
954 1021
 			 batch=NULL, #fns,
955 1022
 			 saveDate=TRUE) { #, outdir="."){
... ...
@@ -1015,16 +1082,31 @@ constructInf <- function(sampleSheet=NULL,
1015 1082
 	  if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
1016 1083
 
1017 1084
 	  if(verbose) message("Initializing container for genotyping and copy number estimation")
1018
-	  pkgname <- getCrlmmAnnotationName(cdfName)
1019
-	  path <- system.file("extdata", package=pkgname)
1020
-	  genome <- getAvailableIlluminaGenomeBuild(path)
1021
-	  featureData = getFeatureData(cdfName, copynumber=TRUE, genome=genome)
1022
-	  nr = nrow(featureData); nc = narrays
1023
-	  sns <-  if (!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)) {
1085
+          if(cdfName!="nopackage") {
1086
+	    pkgname <- getCrlmmAnnotationName(cdfName)
1087
+	    path <- system.file("extdata", package=pkgname)
1088
+	    genome <- getAvailableIlluminaGenomeBuild(path)
1089
+	    featureData = getFeatureData(cdfName, copynumber=TRUE, genome=genome)
1090
+          } else {
1091
+            if(!is.integer(anno$chr)) {
1092
+                  chr = as.character(anno$chromosome)
1093
+                  chr[chr=="X"] = 23
1094
+                  chr[chr=="Y"] = 24
1095
+                  chr[chr=="XY"] = 25
1096
+            }
1097
+            featureData = new("GenomeAnnotatedDataFrame",
1098
+                          isSnp=as.logical(anno$isSnp),
1099
+                          position=as.integer(anno$position),
1100
+                          chromosome=as.integer(chr),
1101
+                          row.names=anno$featureNames)
1102
+          }
1103
+            nr <- nrow(featureData)
1104
+            nc <- narrays
1105
+	    sns <-  if (!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)) {
1024 1106
 		make.unique(sampleSheet$Sample_ID)
1025
-          } else{
1107
+            } else{
1026 1108
 		make.unique(basename(arrayNames))
1027
-          }
1109
+            } 
1028 1110
 	  biga <- initializeBigMatrix(name="A", nr, nc)
1029 1111
 	  bigb <- initializeBigMatrix(name="B", nr, nc)
1030 1112
 	  bigc <- initializeBigMatrix(name="call", nr, nc)
... ...
@@ -1067,11 +1149,16 @@ constructInf <- function(sampleSheet=NULL,
1067 1149
               batch = rep("batch1", narrays) # assume only one batch stop("Must specify 'batch'")
1068 1150
           }
1069 1151
           if(is(batch, "factor")) batch = as.character(batch)
1070
-          pkgname <- getCrlmmAnnotationName(cdfName)
1071
-          path <- system.file("extdata", package=pkgname)
1072
-          genome <- getAvailableIlluminaGenomeBuild(path)
1073
-          featureData = getFeatureData(cdfName, copynumber=TRUE, genome=genome)
1074
-          nr = nrow(featureData); nc = narrays
1152
+          if(cdfName!="nopackage") {
1153
+            pkgname <- getCrlmmAnnotationName(cdfName)
1154
+            path <- system.file("extdata", package=pkgname)
1155
+            genome <- getAvailableIlluminaGenomeBuild(path)
1156
+            featureData = getFeatureData(cdfName, copynumber=TRUE, genome=genome)
1157
+          } else {
1158
+            featureData=anno
1159
+          }
1160
+          nr <- nrow(featureData)
1161
+          nc <- narrays
1075 1162
           sns <-  sampleNames(XY)
1076 1163
           biga <- initializeBigMatrix(name="A", nr, nc)
1077 1164
           bigb <- initializeBigMatrix(name="B", nr, nc)
... ...
@@ -1109,6 +1196,7 @@ preprocessInf <- function(cnSet,
1109 1196
 		       sep="_",
1110 1197
 		       fileExt=list(green="Grn.idat", red="Red.idat"),
1111 1198
                        XY,
1199
+                       anno,
1112 1200
 		       saveDate=TRUE,
1113 1201
 		       stripNorm=TRUE,
1114 1202
 		       useTarget=TRUE,
... ...
@@ -1123,7 +1211,7 @@ preprocessInf <- function(cnSet,
1123 1211
 	open(B(cnSet))
1124 1212
 	sns <- sampleNames(cnSet)
1125 1213
 	pkgname = getCrlmmAnnotationName(annotation(cnSet))
1126
-	is.snp = isSnp(cnSet)
1214
+        is.snp = isSnp(cnSet)
1127 1215
 	snp.index = which(is.snp)
1128 1216
 	narrays = ncol(cnSet)
1129 1217
 	sampleBatches <- splitIndicesByLength(seq_len(ncol(cnSet)), ocSamples()/getDoParWorkers())
... ...
@@ -1140,6 +1228,7 @@ preprocessInf <- function(cnSet,
1140 1228
 		 sep=sep,
1141 1229
 		 fileExt=fileExt,
1142 1230
                  XY=XY,
1231
+                 anno=anno,
1143 1232
 		 saveDate=saveDate,
1144 1233
 		 verbose=verbose,
1145 1234
 		 mixtureSampleSize=mixtureSampleSize,
... ...
@@ -1174,6 +1263,7 @@ genotypeInf <- function(cnSet, mixtureParams, probs=rep(1/3,3),
1174 1263
 			gender=NULL,
1175 1264
 			DF=6,
1176 1265
 			cdfName,
1266
+                        nopackage.norm="quantile",
1177 1267
                         call.method="crlmm",
1178 1268
                         trueCalls=NULL){
1179 1269
 	is.snp = isSnp(cnSet)
... ...
@@ -1215,9 +1305,14 @@ genotypeInf <- function(cnSet, mixtureParams, probs=rep(1/3,3),
1215 1305
            cnSet$gender[,] = tmp$gender
1216 1306
            close(cnSet$gender)
1217 1307
         }
1218
-        if(call.method=="krlmm")
1219
-          tmp <- krlmm(cnSet, cdfName, gender=gender, trueCalls=trueCalls, verbose=verbose) # new function required...  cnSet, cdfName and gender are probably the only arguments you need...
1308
+        if(call.method=="krlmm") {
1309
+            if(cdfName=="nopackage") {
1310
+                tmp <- krlmmnopkg(cnSet, offset=16, gender=gender, normalize.method=nopackage.norm, annotation=anno, verbose=verbose)
1311
+            } else {
1312
+              tmp <- krlmm(cnSet, cdfName, gender=gender, trueCalls=trueCalls, verbose=verbose)
1220 1313
 	## snp.names=featureNames(cnSet)[snp.index])
1314
+            }
1315
+        }
1221 1316
 	if(verbose) message("Genotyping finished.") # Updating container with genotype calls and confidence scores.")
1222 1317
 	TRUE
1223 1318
 }
... ...
@@ -1232,6 +1327,8 @@ genotype.Illumina <- function(sampleSheet=NULL,
1232 1327
 			      sep="_",
1233 1328
 			      fileExt=list(green="Grn.idat", red="Red.idat"),
1234 1329
                               XY=NULL,
1330
+                              anno,
1331
+                              genome,
1235 1332
                               call.method="crlmm",
1236 1333
                               trueCalls=NULL,
1237 1334
 			      cdfName,
... ...
@@ -1240,7 +1337,8 @@ genotype.Illumina <- function(sampleSheet=NULL,
1240 1337
 			      saveDate=FALSE,
1241 1338
 			      stripNorm=TRUE,
1242 1339
 			      useTarget=TRUE,
1243
-                              quantile.method="between",                             
1340
+                              quantile.method="between",
1341
+                              nopackage.norm="quantile",
1244 1342
 			      mixtureSampleSize=10^5,
1245 1343
 			      fitMixture=TRUE,
1246 1344
 			      eps=0.1,
... ...
@@ -1261,7 +1359,8 @@ genotype.Illumina <- function(sampleSheet=NULL,
1261 1359
                             "humanomni258v1p1b",      # Omni2.5 8 v1.1 B
1262 1360
                             "humanomni5quadv1b",      # Omni5 quad
1263 1361
 			    "humanexome12v1p2a",      # Exome 12 v1.2 A
1264
-                            "humanomniexpexome8v1p1b") # Omni Express Exome 8 v1.1b
1362
+                            "humanomniexpexome8v1p1b", # Omni Express Exome 8 v1.1b
1363
+                            "nopackage")
1265 1364
         crlmm.supported = c("human1mv1c",             # 1M
1266 1365
                             "human370v1c",            # 370CNV
1267 1366
 	                    "human650v3a",            # 650Y
... ...
@@ -1309,6 +1408,8 @@ genotype.Illumina <- function(sampleSheet=NULL,
1309 1408
 				    sep=sep,
1310 1409
 				    fileExt=fileExt,
1311 1410
                                     XY=XY,
1411
+                                    anno=anno,
1412
+                                    genome=genome,
1312 1413
 				    cdfName=cdfName,
1313 1414
 				    verbose=verbose,
1314 1415
 				    batch=batch,
... ...
@@ -1327,6 +1428,7 @@ genotype.Illumina <- function(sampleSheet=NULL,
1327 1428
 				    fileExt=fileExt,
1328 1429
 				    saveDate=saveDate,
1329 1430
                                     XY=XY,
1431
+                                    anno=anno, 
1330 1432
 				    stripNorm=stripNorm,
1331 1433
 				    useTarget=useTarget,
1332 1434
 				    mixtureSampleSize=mixtureSampleSize,
... ...
@@ -1336,7 +1438,7 @@ genotype.Illumina <- function(sampleSheet=NULL,
1336 1438
 				    verbose=verbose,
1337 1439
 				    seed=seed,
1338 1440
 				    cdfName=cdfName)
1339
-	message("Preprocessing complete.  Begin genotyping...")
1441
+	message("Begin genotyping...")
1340 1442
 	genotypeInf(cnSet=cnSet, mixtureParams=mixtureParams,
1341 1443
 		    probs=probs,
1342 1444
 		    SNRMin=SNRMin,
... ...
@@ -1348,11 +1450,15 @@ genotype.Illumina <- function(sampleSheet=NULL,
1348 1450
 		    gender=gender,
1349 1451
 		    DF=DF,
1350 1452
 		    cdfName=cdfName,
1453
+                    nopackage.norm=nopackage.norm,
1351 1454
                     call.method=call.method,
1352 1455
                     trueCalls=trueCalls)
1353 1456
 	return(cnSet)
1354 1457
 }
1355 1458
 
1459
+crlmmIllumina <- genotype.Illumina
1460
+
1461
+
1356 1462
 
1357 1463
 processIDAT <- function(stratum, sampleBatches, sampleSheet=NULL,
1358 1464
 			arrayNames=NULL,
... ...
@@ -1363,6 +1469,7 @@ processIDAT <- function(stratum, sampleBatches, sampleSheet=NULL,
1363 1469
 			sep="_",
1364 1470
 			fileExt=list(green="Grn.idat", red="Red.idat"),
1365 1471
                         XY,
1472
+                        anno,
1366 1473
 			saveDate=FALSE,
1367 1474
 			verbose=TRUE,
1368 1475
 			mixtureSampleSize=10^5,
... ...
@@ -1385,19 +1492,30 @@ processIDAT <- function(stratum, sampleBatches, sampleSheet=NULL,
1385 1492
           RG = readIdatFiles(sampleSheet=sampleSheet[sel,], arrayNames=arrayNames[sel],
1386 1493
                        ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
1387 1494
                        highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate, verbose=verbose)
1388
-          XY = RGtoXY(RG, chipType=cdfName)
1495
+          XY = RGtoXY(RG, chipType=cdfName, anno=anno)
1389 1496
           rm(RG)
1390 1497
           gc(verbose=FALSE)
1391 1498
           if (missing(sns) || length(sns)!=ncol(XY)) sns = sampleNames(XY)
1392
-          res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1499
+          if(cdfName!="nopackage") {
1500
+             res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1393 1501
                                seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget,
1394
-                               quantile.method=quantile.method) #, outdir=outdir)
1395
-          rm(XY)
1396
-        }else{ #XY already available
1397
-          if (missing(sns) || length(sns)!=ncol(XY)) sns = sampleNames(XY)  
1398
-          res = preprocessInfinium2(XY[,sel], mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1502
+                               quantile.method=quantile.method) #, outdir=outdir
1503
+             rm(XY)
1504
+          } else {
1505
+              res = list(A=XY@assayData$X, B=XY@assayData$Y, zero=XY@assayData$zero, sns=sns, gns=names(XY), cdfName=cdfName,
1506
+                  SNR=NA, SKW=NA, mixtureParams=NA)
1507
+              rm(XY)
1508
+          }
1509
+          } else{ #XY already available
1510
+          if(missing(sns) || length(sns)!=ncol(XY)) sns = sampleNames(XY)
1511
+          if(cdfName!="nopackage") {
1512
+            res = preprocessInfinium2(XY[,sel], mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1399 1513
                                            seed=seed, eps=eps, cdfName=cdfName, sns=sns[sel], stripNorm=stripNorm, useTarget=useTarget,
1400
-                                           quantile.method=quantile.method) 
1514
+                                           quantile.method=quantile.method)
1515
+          } else {
1516
+              res = list(A=XY@assayData$X, B=XY@assayData$Y, zero=XY@assayData$zero, sns=sns, gns=names(XY), cdfName=cdfName,
1517
+                  SNR=NA, SKW=NA, mixtureParams=NA)
1518
+          }
1401 1519
         }
1402 1520
         gc(verbose=FALSE)
1403 1521
 	if(verbose) message("Finished preprocessing.")
... ...
@@ -1414,6 +1532,8 @@ processIDAT <- function(stratum, sampleBatches, sampleSheet=NULL,
1414 1532
 	## calls and call probabilities by the crlmmGT2 function. The A
1415 1533
 	## and B slots will retain the normalized intensities for the
1416 1534
 	## A and B alleles
1535
+        ## The loop below gives rise to warnings: In `[<-.ff_array`(`*tmp*`, snp.index, jj, value = structure(c(6648L,  ... :
1536
+        ## number of elements to replace is not multiple of values for replacement
1417 1537
 	for(j in seq_along(sel)){
1418 1538
 		jj <- sel[j]
1419 1539
 		A[snp.index, jj] <- Amatrix[, j]
... ...
@@ -1444,3 +1564,4 @@ processIDAT <- function(stratum, sampleBatches, sampleSheet=NULL,
1444 1564
 	gc(verbose=FALSE)
1445 1565
         TRUE
1446 1566
       }
1567
+
Browse code

bug fix to krlmm

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@101808 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 03/04/2015 00:01:45
Showing1 changed files
... ...
@@ -1260,7 +1260,8 @@ genotype.Illumina <- function(sampleSheet=NULL,
1260 1260
 	                    "humanomni258v1a",        # Omni2.5 8 v1 A
1261 1261
                             "humanomni258v1p1b",      # Omni2.5 8 v1.1 B
1262 1262
                             "humanomni5quadv1b",      # Omni5 quad
1263
-			    "humanexome12v1p2a")      # Exome 12 v1.2 A
1263
+			    "humanexome12v1p2a",      # Exome 12 v1.2 A
1264
+                            "humanomniexpexome8v1p1b") # Omni Express Exome 8 v1.1b
1264 1265
         crlmm.supported = c("human1mv1c",             # 1M
1265 1266
                             "human370v1c",            # 370CNV
1266 1267
 	                    "human650v3a",            # 650Y
Browse code

Added support for humanexome12v1.2a chip

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@94521 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 25/09/2014 11:06:52
Showing1 changed files
... ...
@@ -330,6 +330,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
330 330
 	  "humanomniexpress12v1b",  # Omni express 12
331 331
 	  "humanimmuno12v1b",       # Immuno chip 12
332 332
           "humancytosnp12v2p1h",    # CytoSNP 12
333
+          "humanexome12v1p2a",      # Exome 12 v1.2 A
333 334
           "humanomniexpexome8v1p1b") # Omni Express Exome 8 v1.1b
334 335
 	  ## RS: added cleancdfname()
335 336
 	  if(missing(chipType)){
... ...
@@ -1258,7 +1259,8 @@ genotype.Illumina <- function(sampleSheet=NULL,
1258 1259
 	                    "humanomni25quadv1b",     # Omni2.5 quad
1259 1260
 	                    "humanomni258v1a",        # Omni2.5 8 v1 A
1260 1261
                             "humanomni258v1p1b",      # Omni2.5 8 v1.1 B
1261
-                            "humanomni5quadv1b")      # Omni5 quad
1262
+                            "humanomni5quadv1b",      # Omni5 quad
1263
+			    "humanexome12v1p2a")      # Exome 12 v1.2 A
1262 1264
         crlmm.supported = c("human1mv1c",             # 1M
1263 1265
                             "human370v1c",            # 370CNV
1264 1266
 	                    "human650v3a",            # 650Y
Browse code

Added the requirement for more than 8 samples when running krlmm

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@91251 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 11/06/2014 01:17:05
Showing1 changed files
... ...
@@ -1311,6 +1311,8 @@ genotype.Illumina <- function(sampleSheet=NULL,
1311 1311
 				    batch=batch,
1312 1312
 				    saveDate=saveDate)
1313 1313
 #        }
1314
+        if(call.method=="krlmm" && ncol(cnSet)<8)
1315
+           stop(paste("To run krlmm you need at least 8 samples (in general, the more the better).  You currently have", ncol(cnSet)))
1314 1316
 	mixtureParams <- preprocessInf(cnSet=cnSet,
1315 1317
 				    sampleSheet=sampleSheet,
1316 1318
 				    arrayNames=arrayNames,
Browse code

Changed tolerance allowed for reading in arrays with different numbers of probes (was 10000, now 4% of total number of probes

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@91227 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 10/06/2014 23:43:33
Showing1 changed files
... ...
@@ -86,7 +86,7 @@ readIdatFiles = function(sampleSheet=NULL,
86 86
 	       headerInfo$ChipType[i] = G$ChipType
87 87
 	       headerInfo$Manifest[i] = G$Unknown$MostlyNull
88 88
 	       headerInfo$Position[i] = G$Unknowns$MostlyA
89
-               if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) {
89
+               if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+headerInfo$nProbes[1]*0.04) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-headerInfo$nProbes[1]*0.04)) {
90 90
 		       warning("Chips are not of the same type.  Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
91 91
 		       next()
92 92
 	       }
Browse code

Added Illumina Omni 2.5 8 v1.1b chip as a supported chip type

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@88305 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 02/04/2014 12:28:38
Showing1 changed files
... ...
@@ -1268,8 +1268,8 @@ genotype.Illumina <- function(sampleSheet=NULL,
1268 1268
 	                    "human550v3b",            # 550K
1269 1269
 	                    "human1mduov3b",          # 1M Duo
1270 1270
                             "humanomni1quadv1b",      # Omni1 quad
1271
-			    "humanomni258v1a",        # Omni2.5 8 v1 A
1272
-                            "humanomni258v1p1b",      # Omni2.5 8 v1.1 B
1271
+#			    "humanomni258v1a",        # Omni2.5 8 v1 A
1272
+#                           "humanomni258v1p1b",      # Omni2.5 8 v1.1 B
1273 1273
 	                    "humanomniexpress12v1b",  # Omni express 12
1274 1274
 	                    "humanimmuno12v1b",       # Immuno chip 12
1275 1275
                             "humancytosnp12v2p1h",    # CytoSNP 12
Browse code

Changes to krlmm to avoid error that occurs in k-means clustering when all intensities are identical

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@88225 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 01/04/2014 02:51:49
Showing1 changed files
... ...
@@ -324,7 +324,8 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
324 324
 	  "human1mduov3b",          # 1M Duo
325 325
 	  "humanomni1quadv1b",      # Omni1 quad
326 326
 	  "humanomni25quadv1b",     # Omni2.5 quad
327
-	  "humanomni258v1a",        # Omni2.5 8
327
+	  "humanomni258v1a",        # Omni2.5 8 v1 A
328
+          "humanomni258v1p1b",      # Omni2.5 8 v1.1 B
328 329
           "humanomni5quadv1b",      # Omni5 quad  
329 330
 	  "humanomniexpress12v1b",  # Omni express 12
330 331
 	  "humanimmuno12v1b",       # Immuno chip 12
... ...
@@ -1255,7 +1256,8 @@ genotype.Illumina <- function(sampleSheet=NULL,
1255 1256
 			      badSNP=0.7) {
1256 1257
         krlmm.supported = c("humanomni1quadv1b",      # Omni1 quad
1257 1258
 	                    "humanomni25quadv1b",     # Omni2.5 quad
1258
-	                    "humanomni258v1a",        # Omni2.5 8
1259
+	                    "humanomni258v1a",        # Omni2.5 8 v1 A
1260
+                            "humanomni258v1p1b",      # Omni2.5 8 v1.1 B
1259 1261
                             "humanomni5quadv1b")      # Omni5 quad
1260 1262
         crlmm.supported = c("human1mv1c",             # 1M
1261 1263
                             "human370v1c",            # 370CNV
... ...
@@ -1266,6 +1268,8 @@ genotype.Illumina <- function(sampleSheet=NULL,
1266 1268
 	                    "human550v3b",            # 550K
1267 1269
 	                    "human1mduov3b",          # 1M Duo
1268 1270
                             "humanomni1quadv1b",      # Omni1 quad
1271
+			    "humanomni258v1a",        # Omni2.5 8 v1 A
1272
+                            "humanomni258v1p1b",      # Omni2.5 8 v1.1 B
1269 1273
 	                    "humanomniexpress12v1b",  # Omni express 12
1270 1274
 	                    "humanimmuno12v1b",       # Immuno chip 12
1271 1275
                             "humancytosnp12v2p1h",    # CytoSNP 12
Browse code

Removed crlmm::: from .R files and set saveDate=FALSE in genotype.Illumina() man page to clean up Notes and Warnings

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@88085 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 28/03/2014 12:43:01
Showing1 changed files
... ...
@@ -1127,7 +1127,7 @@ preprocessInf <- function(cnSet,
1127 1127
 	sampleBatches <- splitIndicesByLength(seq_len(ncol(cnSet)), ocSamples()/getDoParWorkers())
1128 1128
 	mixtureParams = initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
1129 1129
 	ocLapply(seq_along(sampleBatches),
1130
-		 crlmm:::processIDAT,
1130
+		 processIDAT, # crlmm:::
1131 1131
 		 sampleBatches=sampleBatches,
1132 1132
 		 sampleSheet=sampleSheet,
1133 1133
 		 arrayNames=arrayNames,
Browse code

Added back support for human1mv1c chip (accidentally deleted)

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@87249 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 10/03/2014 04:38:28
Showing1 changed files
... ...
@@ -1257,7 +1257,8 @@ genotype.Illumina <- function(sampleSheet=NULL,
1257 1257
 	                    "humanomni25quadv1b",     # Omni2.5 quad
1258 1258
 	                    "humanomni258v1a",        # Omni2.5 8
1259 1259
                             "humanomni5quadv1b")      # Omni5 quad
1260
-        crlmm.supported = c("human370v1c",            # 370CNV
1260
+        crlmm.supported = c("human1mv1c",             # 1M
1261
+                            "human370v1c",            # 370CNV
1261 1262
 	                    "human650v3a",            # 650Y
1262 1263
 	                    "human610quadv1b",        # 610 quad
1263 1264
 	                    "human660quadv1a",        # 660 quad
Browse code

merged matt's changes on the collab branch

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@86002 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 03/02/2014 14:03:16
Showing1 changed files
... ...
@@ -90,8 +90,12 @@ readIdatFiles = function(sampleSheet=NULL,
90 90
 		       warning("Chips are not of the same type.  Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
91 91
 		       next()
92 92
 	       }
93
-	       dates$decode[i] = G$RunInfo[1, 1]
94
-	       dates$scan[i] = G$RunInfo[2, 1]
93
+               if(saveDate) {
94
+                      if(nrow(G$RunInfo)>=2) {
95
+	              dates$decode[i] = G$RunInfo[1, 1]
96
+	              dates$scan[i] = G$RunInfo[2, 1]
97
+                      }
98
+               }
95 99
 	       if(i==1) {
96 100
 		       if(is.null(ids) && !is.null(G)){
97 101
 			       ids = idsG
... ...
@@ -99,9 +103,9 @@ readIdatFiles = function(sampleSheet=NULL,
99 103
 		       nprobes = length(ids)
100 104
 		       narrays = length(arrayNames)
101 105
 		       RG = new("NChannelSet",
102
-		                 R=matrix(NA, nprobes, narrays),
103
-		                 G=matrix(NA, nprobes, narrays),
104
-		                 zero=matrix(NA, nprobes, narrays),
106
+		                 R=matrix(0, nprobes, narrays),
107
+		                 G=matrix(0, nprobes, narrays),
108
+		                 zero=matrix(1, nprobes, narrays),
105 109
 				 annotation=headerInfo$Manifest[1],
106 110
 				 phenoData=pd, storage.mode="environment")
107 111
 		       featureNames(RG) = ids
... ...
@@ -117,8 +121,9 @@ readIdatFiles = function(sampleSheet=NULL,
117 121
 		       }
118 122
 	       } else {
119 123
 		       indG = match(ids, idsG)
120
-		       RG@assayData$G[,i] = G$Quants[indG, "Mean"]
121
-		       zeroG = G$Quants[indG, "NBeads"]==0
124
+                       nasG = is.na(indG)
125
+		       RG@assayData$G[!nasG,i] = G$Quants[indG[!nasG], "Mean"]
126
+		       zeroG = G$Quants[indG[!nasG], "NBeads"]==0
122 127
 	       }
123 128
 	       rm(G)
124 129
 	       gc(verbose=FALSE)
... ...
@@ -132,13 +137,16 @@ readIdatFiles = function(sampleSheet=NULL,
132 137
 		       if(sum(ids==idsR)==nprobes) {
133 138
 			       RG@assayData$R[,i] = R$Quants[ ,"Mean"]
134 139
 		               zeroR = R$Quants[ ,"NBeads"]==0
140
+                               RG@assayData$zero[,i] = zeroG | zeroR
135 141
 		       }
136 142
 	       } else {
137 143
 		       indR = match(ids, idsR)
138
-		       RG@assayData$R[,i] = R$Quants[indR, "Mean"]
139
-		       zeroR = R$Quants[indR, "NBeads"]==0
144
+	               nasR = is.na(indR)
145
+		       RG@assayData$R[!nasR,i] = R$Quants[indR[!nasR], "Mean"]
146
+		       zeroR = R$Quants[indR[!nasR], "NBeads"]==0
147
+                       RG@assayData$zero[!nasR,i] = zeroG | zeroR
140 148
 	       }
141
-	       RG@assayData$zero[,i] = zeroG | zeroR
149
+#	       RG@assayData$zero[,i] = zeroG | zeroR
142 150
 	       rm(R, zeroG, zeroR)
143 151
 	       gc(verbose=FALSE)
144 152
        }
... ...
@@ -150,7 +158,6 @@ readIdatFiles = function(sampleSheet=NULL,
150 158
 }
151 159
 
152 160
 
153
-
154 161
 getNumberOfSNPs = function(afile, path){
155 162
     fullfilename = file.path(path, afile)
156 163
     headerSection = readLines(fullfilename, n=15)
... ...
@@ -624,8 +631,8 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
624 631
   ##NOTE: We actually dont need to save S. Only for pics etc...
625 632
   ##f is the correction. we save to avoid recomputing
626 633
 
627
-  A = matrix(NA, nprobes, narrays)
628
-  B = matrix(NA, nprobes, narrays)
634
+  A = matrix(0, nprobes, narrays)
635
+  B = matrix(0, nprobes, narrays)
629 636
   zero = matrix(NA, nprobes, narrays)
630 637
   if(verbose && fitMixture){
631 638
      message("Calibrating ", narrays, " arrays.")
... ...
@@ -904,8 +911,10 @@ getProtocolData.Illumina = function(filenames, sep="_", fileExt="Grn.idat", verb
904 911
 		       warning("Chips are not of the same type.  Skipping ", basename(filenames[i]))
905 912
 		       next()
906 913
 	       }
907
-	       scanDates$ScanDate[i] = G$RunInfo[1, 1]
908
-	       scanDates$DecodeDate[i] = G$RunInfo[2, 1]
914
+               if(nrow(G$RunInfo)>=2) {
915
+   	           scanDates$ScanDate[i] = G$RunInfo[1, 1]
916
+	           scanDates$DecodeDate[i] = G$RunInfo[2, 1]
917
+               }
909 918
 	       rm(G)
910 919
 	       gc(verbose=FALSE)
911 920
        }
... ...
@@ -1226,7 +1235,7 @@ genotype.Illumina <- function(sampleSheet=NULL,
1226 1235
 			      cdfName,
1227 1236
 			      copynumber=TRUE,
1228 1237
 			      batch=NULL,
1229
-			      saveDate=TRUE,
1238
+			      saveDate=FALSE,
1230 1239
 			      stripNorm=TRUE,
1231 1240
 			      useTarget=TRUE,
1232 1241
                               quantile.method="between",                             
Browse code

fix bug in construction of NChannelSet

- thanks to Alex Baras for finding and supplying patch

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@82280 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 31/10/2013 01:45:34
Showing1 changed files
... ...
@@ -371,12 +371,18 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
371 371
   bord = match(bids, featureNames(RG)) # and here
372 372
 #  argrg = aids[rrgg]
373 373
 #  brgrg = bids[rrgg]
374
+  xyPhenoData = AnnotatedDataFrame(data=RG@phenoData@data,varMetadata=RG@phenoData@varMetadata)
375
+  levels(xyPhenoData@varMetadata$channel) = c("X","Y","zero","_ALL_")
374 376
   XY <- new("NChannelSet",
375
-	    X=matrix(0, nsnps, narrays),
376
-	    Y=matrix(0, nsnps, narrays),
377
-	    zero=matrix(0, nsnps, narrays),
378
-	    annotation=chipType, phenoData=RG@phenoData,
379
-	    protocolData=RG@protocolData, storage.mode="environment")
377
+        assayDataNew(X=matrix(0, nsnps, narrays),Y=matrix(0, nsnps, narrays),zero=matrix(0, nsnps, narrays)),
378
+        phenoData=xyPhenoData, protocolData=RG@protocolData, annotation=chipType)
379
+   storageMode(XY) = "environment"
380
+#  XY <- new("NChannelSet",
381
+#	    X=matrix(0, nsnps, narrays),
382
+#	    Y=matrix(0, nsnps, narrays),
383
+#	    zero=matrix(0, nsnps, narrays),
384
+#	    annotation=chipType, phenoData=RG@phenoData,
385
+#	    protocolData=RG@protocolData, storage.mode="environment")
380 386
   featureNames(XY) = ids
381 387
   sampleNames(XY) = sampleNames(RG)
382 388
   ## RS
... ...
@@ -1352,7 +1358,7 @@ processIDAT <- function(stratum, sampleBatches, sampleSheet=NULL,
1352 1358
 			A, B,
1353 1359
 			GT,
1354 1360
 			GTP,
1355
-			SKW, SNR, mixtureParams, is.snp) { #, outdir=".") {
1361
+			SKW, SNR, mixtureParams, is.snp) { #, outdir=".") {
1356 1362
 	message("Processing sample stratum ", stratum, " of ", length(sampleBatches))
1357 1363
 	sel <- sampleBatches[[stratum]]
1358 1364
         if(length(path)>= length(sel)) path = path[sel]
Browse code

fix conflict in description

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@80947 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 30/09/2013 14:07:17
Showing1 changed files
... ...
@@ -150,86 +150,160 @@ readIdatFiles = function(sampleSheet=NULL,
150 150
 }
151 151
 
152 152
 
153
-readGenCallOutput = function(file, path=".", cdfName,
154
-    colnames=list("SampleID"="Sample ID", "SNPID"="SNP Name", "XRaw"="X Raw", "YRaw"="Y Raw"),
155
-    type=list("SampleID"="character", "SNPID"="character", "XRaw"="integer", "YRaw"="integer"), verbose=FALSE) {
156 153
 
157
-    if(!identical(names(type), names(colnames)))
158
-       stop("The arguments 'colnames' and 'type' must have consistent names")
159
-    if(verbose)
160
-  	cat("Reading", file, "\n")
161
-    tmp=readLines(file.path(path,file),n=15)
162
-    s=c("\t",",")
163
-    a=unlist(strsplit(tmp[10][1],s[1]))
164
-    if(length(a)!=1){
165
-	sepp=s[1]
166
-      	a1=unlist(strsplit(tmp[10][1],s[1]))
154
+getNumberOfSNPs = function(afile, path){
155
+    fullfilename = file.path(path, afile)
156
+    headerSection = readLines(fullfilename, n=15)
157
+
158
+    headerLine = headerSection[10][1]
159
+    delimiterList = c(",", "\t")
160
+
161
+    headers = unlist(strsplit(headerLine, delimiterList[1]))
162
+    if (length(headers)!=1) {
163
+        delimiterIndex = 1
164
+    }
165
+    if (length(headers) == 1) {
166
+        headers = unlist(strsplit(headerLine, delimiterList[2]))
167
+        if (length(headers) != 1) {
168
+            delimiterIndex = 2
169
+        }
170
+        if (length(headers) == 1) {
171
+            stop("Input file ", fullfilename, " is not delimited by either comm(,) or tab(\\t)")
172
+        }
167 173
     }
168
-    if(length(a)==1){
169
-	sepp=s[2]
170
-	a1=unlist(strsplit(tmp[10][1],s[2]))
174
+
175
+    SNPLine = headerSection[5][1]
176
+    elements = unlist(strsplit(SNPLine, delimiterList[delimiterIndex]))
177
+    numSNP = as.integer(elements[2])
178 </