Browse code

Added new function readGenCallOutput() to extract X and Y values from GenomeStudio output

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@59151 bc3139a8-67e5-0310-9ffc-ced21a209358

unknown authored on 13/10/2011 05:28:47
Showing 1 changed files

... ...
@@ -443,6 +443,91 @@ readBPM <- function(bpmFile){
443 443
 }
444 444
 
445 445
 
446
+readGenCallOutput = function(file, path=".", cdfName, verbose=FALSE) {
447
+    if(verbose)
448
+  	  cat("Reading", file, "\n")
449
+	tmp=readLines(file.path(path,file),n=15)
450
+	s=c("\t",",")
451
+    a=unlist(strsplit(tmp[10][1],s[1]))
452
+	if(length(a)!=1){
453
+		sepp=s[1]
454
+      	a1=unlist(strsplit(tmp[10][1],s[1]))
455
+    }
456
+	if(length(a)==1){
457
+		sepp=s[2]
458
+		a1=unlist(strsplit(tmp[10][1],s[2]))
459
+    }
460
+	
461
+	b=c("Sample ID","SNP Name","Allele1 - Forward","Allele2 - Forward","GC Score","X Raw","Y Raw")
462
+	b2=c("GC Score","X Raw","Y Raw")
463
+	
464
+	m1=m=match(a1,b)
465
+	m2=match(a1,b2)
466
+	m[is.na(m)==FALSE]<-list(character(0))
467
+	m[is.na(m2)==FALSE]<-list(numeric(0))
468
+	m[is.na(m)==TRUE]<-list(NULL)
469
+	names(m)<-paste(b[m1],names(m),sep='')
470
+	
471
+    fc = file(file.path(path, file), open="r")
472
+	
473
+	dat = scan(fc, what=m, skip=10,sep=sepp)
474
+	close(fc)
475
+	
476
+	samples = unique(dat$"Sample ID")
477
+	nsamples = length(samples)
478
+	snps = unique(dat$"SNP Name")
479
+	nsnps = length(snps)
480
+	if(verbose)
481
+   	  cat("Check ordering for samples","\n")
482
+    
483
+	X = Y = zeroes = matrix(0, nsnps, nsamples)
484
+	
485
+	for(i in 1:length(samples)) {
486
+		ind = dat$"Sample ID"==samples[i]
487
+		if(sum(dat$"SNP Name"[ind]==snps)==nsnps) {
488
+		    if(verbose)
489
+	          cat(paste("Correct ordering for sample", samples[i], "\n"))
490
+			X[,i] = dat$"X Raw"[ind]
491
+			Y[,i] = dat$"Y Raw"[ind]
492
+			gc()
493
+		}
494
+		if(sum(dat$"SNP Name"[ind]==snps)!=nsnps) {
495
+		    if(verbose)
496
+			  cat("Reordering sample ", samples[i],"\n")
497
+			m=match(snps,dat$"SNP Name"[ind])
498
+			X[,i]= dat$"X Raw"[ind][m]
499
+			Y[,i]= dat$"Y Raw"[ind][m]
500
+		}
501
+	}
502
+	
503
+#	samplenamescorrect = gsub("(_R.)|(_R$)", "", samples)
504
+    zeroes=(X=="0"|Y=="0")
505
+	colnames(X) = colnames(Y) =  colnames(zeroes) = samples #namescorrect
506
+	rownames(X) = rownames(Y) = snps
507
+    
508
+	if(verbose)      
509
+      cat("Creating NChannelSet object\n")
510
+   
511
+#	if (getRversion() < "2.13.0") {
512
+#      rpath <- getRversion()
513
+# 	  outdir <- paste(path, "/illumina_vignette", sep = "")
514
+#	  ldPath(outdir)
515
+#	  dir.create(outdir, recursive = TRUE, showWarnings = FALSE)
516
+#	}
517
+        
518
+    XY = new("NChannelSet", X = initializeBigMatrix(name = "X", nr = nrow(X), nc = ncol(X), vmode = "integer", initdata=X),
519
+			 Y = initializeBigMatrix(name = "Y", nr = nrow(X), nc = ncol(X), vmode = "integer", initdata=Y),
520
+			 zero = initializeBigMatrix(name = "zero", nr = nrow(X), nc = ncol(X), vmode = "integer", initdata=zeroes),
521
+			 annotation = cdfName, storage.mode = "environment")
522
+	sampleNames(XY)=colnames(X)
523
+	
524
+    if(verbose)
525
+      cat("Done\n")
526
+	  
527
+    XY
528
+}
529
+
530
+
446 531
 RGtoXY = function(RG, chipType, verbose=TRUE) {
447 532
 
448 533
   needToLoad <- !all(sapply(c('addressA', 'addressB', 'base'), isLoaded))