Browse code

more compact script

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SICtools@115433 bc3139a8-67e5-0310-9ffc-ced21a209358

Xiaobin Xing authored on 30/03/2016 16:07:21
Showing 5 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: SICtools
2 2
 Type: Package
3 3
 Title: Find SNV/Indel differences between two bam files with near relationship
4
-Version: 1.1.0
4
+Version: 1.1.1
5 5
 Date: 2014-12-11
6 6
 Author: Xiaobin Xing, Wu Wei
7 7
 Maintainer: Xiaobin Xing <xiaobinxing0316@gmail.com>
... ...
@@ -15,7 +15,7 @@ Description: This package is to find SNV/Indel differences between two
15 15
 License: GPL (>=2)
16 16
 LazyLoad: Yes
17 17
 Depends: R (>= 3.0.0), methods, Rsamtools (>= 1.18.1), doParallel (>= 1.0.8), Biostrings (>= 2.32.1), stringr (>= 0.6.2), matrixStats
18
-               (>= 0.10.0)
18
+               (>= 0.10.0), plyr (>= 1.8.3)
19 19
 Imports: foreach, IRanges, GenomicRanges
20 20
 Suggests: knitr, RUnit, BiocGenerics
21 21
 biocViews: Alignment, Sequencing, Coverage, SequenceMatching, QualityControl, DataImport, Software, SNP, VariantDetection
... ...
@@ -6,5 +6,6 @@ import(Biostrings)
6 6
 import(matrixStats)
7 7
 import(foreach)
8 8
 import(IRanges)
9
+import(plyr)
9 10
 importFrom(GenomicRanges, GRanges)
10 11
 export(snpDiff,indelDiff)
... ...
@@ -73,7 +73,7 @@ function(ppIndel,rowIndex,refFsa,pValueCutOff,gtDistCutOff,verbose){
73 73
 		## select two most alts
74 74
 		if(ncol(gtMtx) >= 3){  
75 75
 			
76
-			gtCountMtx <- gtMtx[,c('*',names(sort(colSums(gtMtx[,which(colnames(gtMtx)  != '*'),drop=FALSE]),
76
+			gtCountMtx <- gtMtx[,c('*',names(sort(colSums(gtMtx[,which(colnames(gtMtx)  != '*')]),
77 77
 											decreasing=TRUE)[1:2]))]     
78 78
 			
79 79
 		}else if(ncol(gtMtx) == 2){
... ...
@@ -89,7 +89,7 @@ function(ppIndel,rowIndex,refFsa,pValueCutOff,gtDistCutOff,verbose){
89 89
 		gtPvalue <- ifelse(is.na(gtPvalue),1,gtPvalue)
90 90
 		gtDist <- ifelse(is.na(gtDist),0,gtDist)
91 91
 		
92
-		if(gtPvalue < pValueCutOff & gtDist > gtDistCutOff){
92
+		if(gtPvalue <= pValueCutOff & gtDist >= gtDistCutOff){
93 93
 			
94 94
 			## format genotype to output
95 95
 			delSeqLen <- as.numeric(str_extract(colnames(gtCountMtx),'-\\d+'))
... ...
@@ -113,21 +113,23 @@ function(ppIndel,rowIndex,refFsa,pValueCutOff,gtDistCutOff,verbose){
113 113
 			if(ncol(gtCountMtx) == 3){
114 114
 				
115 115
 				## output table
116
-				tmpOut <- as.character(c(rowDat[,2:3],colnames(gtCountMtx),
116
+				tmpOut <- c(rowDat[,2:3],colnames(gtCountMtx),
117 117
 								gtCountMtx[1,1], gtCountMtx[1,2],gtCountMtx[1,3],
118 118
 								gtCountMtx[2,1], gtCountMtx[2,2],gtCountMtx[2,3],
119
-								pValue = format(gtPvalue,digits=3,scientific=TRUE),
120
-								gtDist = format(gtDist,digits=3,scientific=TRUE)))
121
-				
119
+								pValue = gtPvalue,
120
+								gtDist = gtDist)
121
+						
122 122
 			}else if(ncol(gtCountMtx) < 3){
123 123
 				
124 124
 				## output table
125
-				tmpOut <- as.character(c(rowDat[,2:3],colnames(gtCountMtx),NA,
125
+				tmpOut <- c(rowDat[,2:3],colnames(gtCountMtx),NA,
126 126
 								gtCountMtx[1,1], gtCountMtx[1,2],NA,
127 127
 								gtCountMtx[2,1], gtCountMtx[2,2],NA,
128
-								format(gtPvalue,digits=3,scientific=TRUE),
129
-								format(gtDist,digits=3,scientific=TRUE)))
128
+								pValue = gtPvalue,
129
+								gtDist = gtDist)
130 130
 			}
131
+			
132
+			return(tmpOut)
131 133
 		}
132 134
 	}
133 135
 }
... ...
@@ -2,6 +2,29 @@ indelDiff <-
2 2
 function(bam1,bam2,refFsa,regChr,regStart,regEnd,minBaseQuality = 13,
3 3
 				minMapQuality = 0,nCores = 1,pValueCutOff = 0.05,gtDistCutOff = 0.1,verbose=TRUE){
4 4
 	
5
+	## for test
6
+#	library(doParallel)
7
+#	library(Rsamtools)
8
+#	library(matrixStats)
9
+#	library(plyr)
10
+#	
11
+#	bam1 <- '/g/steinmetz/xing/SICtoolsDebug/20160319/debug/ER.sorted.bam'
12
+#	bam2 <- '/g/steinmetz/xing/SICtoolsDebug/20160319/debug/d1c.0.01.bam'
13
+#	refFsa <- '/g/steinmetz/xing/SICtoolsDebug/20160319/debug/S288c.fa'
14
+#	regChr <- 'chr2'
15
+#	regStart <- 1
16
+#	regEnd <- 230218
17
+#	minBaseQuality <- 13
18
+#	minMapQuality <- 0
19
+#	nCores <- 10
20
+#	pValueCutOff <- 0.05
21
+#	gtDistCutOff <- 0.1
22
+#	verbose <- TRUE
23
+#	
24
+	
25
+	## disable dimention dropping for matrix
26
+	`[` <- function(...) base::`[`(...,drop=FALSE)
27
+	
5 28
 	## set NULL
6 29
 	rowIndex <- NULL
7 30
 	
... ...
@@ -11,12 +34,12 @@ function(bam1,bam2,refFsa,regChr,regStart,regEnd,minBaseQuality = 13,
11 34
 	if(!file.exists(refFsa)){stop("ERROR: input refFsa file doesn't exist!")}
12 35
 	
13 36
 	if(regChr < 0){stop("ERROR: please input valid regChr!")}
14
-	if(as.numeric(regStart) < 0){stop("ERROR: regStart should be more than 0!")}
15
-	if(as.numeric(regEnd) < 0){stop("ERROR: regEnd should be more than 0!")}
16
-	if(as.numeric(regStart) > as.numeric(regEnd)){stop("ERROR: regStart should be smaller than regEnd!")}
37
+	regStart <- as.numeric(regStart);if(regStart < 0 ){stop("ERROR: regStart should be more than 0!")}
38
+	regEnd <- as.numeric(regEnd);if(regEnd < 0 ){stop("ERROR: regEnd should be more than 0!")}
39
+	if(regStart > regEnd){stop("ERROR: regStart should be smaller than regEnd!")}
17 40
 	
18
-	regStart <- format(as.numeric(regStart),scientific=FALSE)
19
-	regEnd <- format(as.numeric(regEnd),scientific=FALSE)
41
+	regStart <- format(regStart,scientific=FALSE)
42
+	regEnd <- format(regEnd,scientific=FALSE)
20 43
 	minBaseQuality <- as.numeric(minBaseQuality)
21 44
 	minMapQuality <- as.numeric(minMapQuality)
22 45
 	pValueCutOff <- as.numeric(pValueCutOff)
... ...
@@ -31,46 +54,35 @@ function(bam1,bam2,refFsa,regChr,regStart,regEnd,minBaseQuality = 13,
31 54
 	ppIndel <- read.delim(pipe(paste(pathSICtools,' mpileup -Q ',minBaseQuality,' -q ',minMapQuality, ' -Ogf ',refFsa,' ',bam1,' ',bam2,' -r ',regChr,':',regStart,'-',regEnd,sep='')),
32 55
 			header=FALSE,colClasses = 'character',col.names=paste('V',1:12,sep=''))
33 56
 	
34
-	## remove items with too low indel hits
35
-	ppIndel <- ppIndel[which(!(unlist(lapply(ppIndel$V6,
36
-												function(x){sum(gregexpr('[+-]',x)[[1]] > 0)}))/as.numeric(ppIndel$V5) < 0.05 
37
-								& unlist(lapply(ppIndel$V10,
38
-												function(x){sum(gregexpr('[+-]',x)[[1]] > 0)}))/as.numeric(ppIndel$V9) < 0.05) 
39
-							& as.numeric(ppIndel$V5) > 0 
40
-							& as.numeric(ppIndel$V9) > 0),,drop=FALSE]
57
+	## remove items with too low indel hits	
58
+	ppIndel <- ppIndel[which(!(unlist(lapply(gregexpr('[+-]',ppIndel$V6),function(x){sum(x > 0)}),use.names=FALSE)/as.numeric(ppIndel$V5) < 0.05 & 
59
+								
60
+	                           unlist(lapply(gregexpr('[+-]',ppIndel$V10),function(x){sum(x > 0)}),use.names=FALSE)/as.numeric(ppIndel$V9) < 0.05) & 
61
+						       as.numeric(ppIndel$V5) > 0 & as.numeric(ppIndel$V9) > 0),]
41 62
 	
42
-	if(nrow(ppIndel) > 0){ 
63
+	if(nrow(ppIndel) > 0){
43 64
 		
44 65
 		## test out
45
-		testOutIndel <- foreach(rowIndex=1:nrow(ppIndel)) %dopar% 
46
-				.indelDiffFunc(ppIndel,rowIndex,refFsa,pValueCutOff,gtDistCutOff,verbose)
47
-		
48
-		## remove NULL
49
-		testOutIndel <- testOutIndel[!sapply(testOutIndel,is.null)]
66
+		testOutIndel <- ldply(1:nrow(ppIndel),function(rowIndex){.indelDiffFunc(ppIndel,rowIndex,refFsa,pValueCutOff,gtDistCutOff,verbose)},.parallel=TRUE)
50 67
 		
51
-		## combine the result
52
-		testOutIndel <- do.call(rbind,testOutIndel)
53
-		
54
-		if(!is.null(testOutIndel)){
68
+		if(nrow(testOutIndel) > 0){
55 69
 			
56
-			testIndelMtx <- testOutIndel[,-c(1,3:5),drop=FALSE]
57
-			class(testIndelMtx) <- 'numeric'
58
-			testIndelDf <- data.frame(chr=testOutIndel[,1],ref=testOutIndel[,3],alt1=testOutIndel[,4],alt2 = testOutIndel[,5],testIndelMtx,stringsAsFactors=FALSE)
70
+			colnames(testOutIndel) <- c('chr','pos','ref','altGt1','altGt2','refBam1Count','altGt1Bam1Count',
71
+					'altGt2Bam1Count','refBam2Count','altGt1Bam2Count','altGt2Bam2Count','p.value','d.value')
72
+			class(testOutIndel$pos) <- 'numeric'
73
+			class(testOutIndel$refBam1Count) <- 'numeric'
74
+			class(testOutIndel$altGt1Bam1Count) <- 'numeric'
75
+			class(testOutIndel$altGt2Bam1Count) <- 'numeric'
76
+			class(testOutIndel$refBam2Count) <- 'numeric'
77
+			class(testOutIndel$altGt1Bam2Count) <- 'numeric'
78
+			class(testOutIndel$altGt2Bam2Count) <- 'numeric'
79
+			class(testOutIndel$p.value) <- 'numeric'
80
+			class(testOutIndel$d.value) <- 'numeric'
59 81
 			
60
-			## at least one result
61
-			if(nrow(testIndelDf) > 0){
62
-				
63
-				rownames(testIndelDf) <- 1:nrow(testIndelDf)
64
-				testIndelDf <- testIndelDf[,c(1,5,2:4,6:13)]
65
-				colnames(testIndelDf) <- c('chr','pos','ref','altGt1','altGt2','refBam1Count','altGt1Bam1Count',
66
-						'altGt1Bam2Count','refBam2Count','altGt2Bam1Count','altGt2Bam2Count','p.value','d.value')
67
-				
68
-				testIndelDf <- testIndelDf[,c('chr','pos','ref','altGt1','altGt2','refBam1Count','altGt1Bam1Count','altGt2Bam1Count',
69
-								'refBam2Count','altGt1Bam2Count','altGt2Bam2Count','p.value','d.value')]
70
-				
71
-				return(testIndelDf[,,drop=FALSE])
72
-				
73
-			}
82
+			return(testOutIndel)
74 83
 		}
84
+		
85
+	}else{
86
+		NULL
75 87
 	}
76 88
 }
... ...
@@ -1,6 +1,29 @@
1 1
 snpDiff <-
2
-function(bam1,bam2,refFsa,regChr,regStart,regEnd,minBaseQuality = 13,
3
-		minMapQuality = 0,nCores = 1,pValueCutOff= 0.05,baseDistCutOff = 0.1,verbose=TRUE){
2
+		function(bam1,bam2,refFsa,regChr,regStart,regEnd,minBaseQuality = 13,
3
+				minMapQuality = 0,nCores = 1,pValueCutOff= 0.05,baseDistCutOff = 0.1,verbose=TRUE){
4
+	
5
+	
6
+	## for test
7
+#	library(doParallel)
8
+#	library(Rsamtools)
9
+#	library(matrixStats)
10
+#	library(plyr)
11
+#	
12
+#	bam1 <- '/g/steinmetz/xing/SICtoolsDebug/20160319/debug/ER.sorted.bam'
13
+#	bam2 <- '/g/steinmetz/xing/SICtoolsDebug/20160319/debug/d1c.0.01.bam'
14
+#	refFsa <- '/g/steinmetz/xing/SICtoolsDebug/20160319/debug/S288c.fa'
15
+#	regChr <- 'chr1'
16
+#	regStart <- 1
17
+#	regEnd <- 230218
18
+#	minBaseQuality <- 13
19
+#	minMapQuality <- 0
20
+#	nCores <- 10
21
+#	pValueCutOff <- 0.05
22
+#	baseDistCutOff <- 0.1
23
+#	verbose <- TRUE
24
+	
25
+	## disable dimention dropping for matrix
26
+	`[` <- function(...) base::`[`(...,drop=FALSE)
4 27
 	
5 28
 	## validate the parameters
6 29
 	if(!file.exists(bam1)){stop("ERROR: input bam1 file doesn't exist!")}
... ...
@@ -8,9 +31,9 @@ function(bam1,bam2,refFsa,regChr,regStart,regEnd,minBaseQuality = 13,
8 31
 	if(!file.exists(refFsa)){stop("ERROR: input refFsa file doesn't exist!")}
9 32
 	
10 33
 	if(regChr < 0 ){stop("ERROR: please input valid regChr!")}
11
-	if(as.numeric(regStart) < 0 ){stop("ERROR: regStart should be more than 0!")}
12
-	if(as.numeric(regEnd) < 0 ){stop("ERROR: regEnd should be more than 0!")}
13
-	if(as.numeric(regStart) > as.numeric(regEnd)){stop("ERROR: regStart should be smaller than regEnd!")}
34
+	regStart <- as.numeric(regStart);if(regStart < 0 ){stop("ERROR: regStart should be more than 0!")}
35
+	regEnd <- as.numeric(regEnd);if(regEnd < 0 ){stop("ERROR: regEnd should be more than 0!")}
36
+	if(regStart > regEnd){stop("ERROR: regStart should be smaller than regEnd!")}
14 37
 	
15 38
 	pValueCutOff <- as.numeric(pValueCutOff)
16 39
 	baseDistCutOff <- as.numeric(baseDistCutOff) 
... ...
@@ -27,30 +50,24 @@ function(bam1,bam2,refFsa,regChr,regStart,regEnd,minBaseQuality = 13,
27 50
 		countMtx <- cbind(countMtx,x[['pos']])
28 51
 		
29 52
 		## duplicated index
30
-		dupIndex <- duplicated(countMtx[,-c(5,10,11),drop=FALSE])
53
+		dupIndex <- duplicated(countMtx[,-c(5,10,11)])
31 54
 		
32 55
 		## max bam1 == max bam2 index
33
-		maxBaseIndex <- rowMaxs(countMtx[,1:4,drop=FALSE])/rowSums(countMtx[,1:4,drop=FALSE]) > 0.95 & 
34
-				rowMaxs(countMtx[,6:9,drop=FALSE])/rowSums(countMtx[,6:9,drop=FALSE]) > 0.95 & 
35
-				max.col(countMtx[,1:4,drop=FALSE]) == max.col(countMtx[,6:9,drop=FALSE])
36
-		
37
-#		maxBaseIndex <- rowMaxs(countMtx[,,drop=FALSE],cols=1:4)/rowSums(countMtx[,,drop=FALSE],cols=1:4) > 0.95 & 
38
-#				rowMaxs(countMtx[,,drop=FALSE],cols=6:9)/rowSums(countMtx[,,drop=FALSE],cols=6:9) > 0.95 & 
39
-#				colMaxs(countMtx[,,drop=FALSE],cols=1:4) == colMaxs(countMtx[,,drop=FALSE],cols=6:9)
56
+		maxBaseIndex <- rowMaxs(countMtx[,1:4])/rowSums(countMtx[,1:4]) > 0.95 & 
57
+				rowMaxs(countMtx[,6:9])/rowSums(countMtx[,6:9]) > 0.95 & 
58
+				max.col(countMtx[,1:4]) == max.col(countMtx[,6:9])
40 59
 		
41 60
 		## all 0 test
42
-		allZeroIndex <- rowSums(countMtx[,1:4,drop=FALSE]) == 0 | rowSums(countMtx[,6:9,drop=FALSE]) == 0
43
-#		allZeroIndex <- rowSums(countMtx[,,drop=FALSE],cols=1:4) == 0 | rowSums(countMtx[,,drop=FALSE],cols=6:9) == 0
44
-		
61
+		allZeroIndex <- rowSums(countMtx[,1:4]) == 0 | rowSums(countMtx[,6:9]) == 0		
45 62
 		
46 63
 		## test index
47 64
 		testIndex <- !dupIndex & !maxBaseIndex & !allZeroIndex
48 65
 		
49 66
 		if(sum(testIndex) > 0){ ## at least one candidate
50 67
 			
51
-			countMtxTest <- countMtx[testIndex,,drop=FALSE]
68
+			countMtxTest <- countMtx[testIndex,]
52 69
 			
53
-			testTmp <- lapply(1:nrow(countMtxTest),function(rowIndex){
70
+			testResultMtx <- do.call(rbind,llply(1:nrow(countMtxTest),function(rowIndex){
54 71
 						
55 72
 						rowInfo <- countMtxTest[rowIndex,]
56 73
 						
... ...
@@ -58,7 +75,7 @@ function(bam1,bam2,refFsa,regChr,regStart,regEnd,minBaseQuality = 13,
58 75
 						testMtx <- matrix(rowInfo[c(1:4,6:9)],nrow=2,byrow=TRUE)
59 76
 						
60 77
 						## remove 0 columns
61
-						testMtx <- testMtx[,colSums(testMtx) > 0,drop=FALSE]
78
+						testMtx <- testMtx[,colSums(testMtx) > 0]
62 79
 						
63 80
 						## get pValue and dist
64 81
 						pValue <- fisher.test(testMtx)$p.value
... ...
@@ -69,39 +86,24 @@ function(bam1,bam2,refFsa,regChr,regStart,regEnd,minBaseQuality = 13,
69 86
 						
70 87
 						if(pValue <= as.numeric(pValueCutOff) & baseDist >= as.numeric(baseDistCutOff)){
71 88
 							
72
-							as.character(c(names(x[['seqnames']]),rowInfo[11],
73
-											rowInfo[1],rowInfo[2],rowInfo[3],rowInfo[4],rowInfo[5],
74
-											rowInfo[6],rowInfo[7],rowInfo[8],rowInfo[9],rowInfo[10],
75
-											format(pValue,digits=3,scientific=TRUE),
76
-											format(baseDist,digits=3,scientific=TRUE)))
89
+							c(rowInfo[1],rowInfo[2],rowInfo[3],rowInfo[4],rowInfo[5],
90
+											rowInfo[6],rowInfo[7],rowInfo[8],rowInfo[9],rowInfo[10],rowInfo[11],
91
+											pValue,baseDist)
77 92
 						}
78
-					})
79
-			
80
-			testTmp2 <- do.call(rbind,testTmp[,drop=FALSE])
81
-			testMtx <- testTmp2[,-1,drop=FALSE]
82
-			class(testMtx) <- 'numeric'
83
-			testDf <- data.frame(chr = testTmp2[,1],testMtx,stringsAsFactors=FALSE)
84
-			
93
+					}))
94
+						
85 95
 			## for genotype duplicated positions, trace back
86
-			countMtxDup <- countMtx[!maxBaseIndex & !allZeroIndex,,drop=FALSE]
87
-			countMtxDupDf <- as.data.frame(countMtxDup[duplicated(countMtxDup[,-11,drop=FALSE]),,drop=FALSE],stringsAsFactors=FALSE)
96
+			countMtxDup <- countMtx[!maxBaseIndex & !allZeroIndex,]
97
+			countMtxDupIndex <- duplicated(countMtxDup[,-11])
88 98
 			
89
-			if(nrow(countMtxDupDf) > 0 & nrow(testDf) > 0){
90
-				
91
-				countMtxDupDfFull <- cbind(countMtxDupDf,testDf[match(apply(countMtxDupDf[,-11,drop=FALSE],1,paste,collapse=' '),apply(testDf[,3:12],1,paste,collapse=' ')),c('X12','X13')])
92
-				countMtxDupDfFull <- countMtxDupDfFull[!is.na(countMtxDupDfFull$X12),,drop=FALSE]
93
-				
94
-				if(nrow(countMtxDupDfFull) > 0){
95
-					countMtxDupDfFull[,'X14'] <- unique(testDf$chr)
96
-					countMtxDupDfFull <- countMtxDupDfFull[,c(14,11,1:10,12,13)]
97
-					colnames(countMtxDupDfFull) <- colnames(testDf)
98
-					testDf <- rbind(testDf,countMtxDupDfFull)
99
-					testDf <- testDf[order(testDf[,'X1']),]
100
-				}
99
+			if(sum(countMtxDupIndex) > 0 & !is.null(testResultMtx)){
101 100
 				
101
+				countMtxDupFull <- cbind(countMtxDup,testResultMtx[match(apply(countMtxDup[,1:10],1,paste,collapse='_'),apply(testResultMtx[,1:10],1,paste,collapse='_')),c(12,13)])
102
+				countMtxDupFull <- countMtxDupFull[!is.na(countMtxDupFull[,12]),]
103
+				testResultMtx <- unique(rbind(testResultMtx,countMtxDupFull))
102 104
 			}
103 105
 			
104
-			return(testDf)          
106
+			return(testResultMtx)          
105 107
 		}
106 108
 	}
107 109
 	
... ...
@@ -111,12 +113,8 @@ function(bam1,bam2,refFsa,regChr,regStart,regEnd,minBaseQuality = 13,
111 113
 	## build bam files for pileup
112 114
 	ppFiles <- PileupFiles(c(bam1,bam2))
113 115
 	
114
-	## get region info
115
-	regStart <- as.numeric(regStart)
116
-	regEnd <- as.numeric(regEnd) + 1
117
-	
118 116
 	## split into 10M block
119
-	regSplit <- shift(as(breakInChunks(regEnd - regStart,1e7L),'IRanges'),regStart-1)
117
+	regSplit <- shift(as(breakInChunks(regEnd - regStart + 1,1e7L),'IRanges'),regStart-1)
120 118
 	
121 119
 	## for each region
122 120
 	regSplitDiffFunc <- function(splitIndex){
... ...
@@ -127,7 +125,7 @@ function(bam1,bam2,refFsa,regChr,regStart,regEnd,minBaseQuality = 13,
127 125
 		}
128 126
 		
129 127
 		regSplitParam <- ApplyPileupsParam(flag = scanBamFlag(isDuplicate=FALSE),
130
-				which=GRanges(regChr, IRanges(start=start(regSplit)[splitIndex], end=end(regSplit)[splitIndex])), 
128
+				which=GRanges(regChr, IRanges(start=start(regSplit[splitIndex]), end=end(regSplit[splitIndex]))), 
131 129
 				what='seq',minDepth = 0L,minBaseQuality, minMapQuality,maxDepth = 2500L, yieldBy = 'range')
132 130
 		
133 131
 		applyPileups(ppFiles,calcInfoRange,param=regSplitParam)[[1]]
... ...
@@ -138,23 +136,21 @@ function(bam1,bam2,refFsa,regChr,regStart,regEnd,minBaseQuality = 13,
138 136
 	splitIndex <- NULL
139 137
 	
140 138
 	## range out
141
-	rangeOut <- foreach(splitIndex=1:length(regSplit)) %dopar% regSplitDiffFunc(splitIndex)
142
-	
143
-	## remove NULL
144
-	rangeOut <- rangeOut[!sapply(rangeOut,is.null)]
145
-	
146
-	## combine the results
147
-	rangeOut <- do.call(rbind,rangeOut)
148
-	
149
-	if(!is.null(rangeOut)){
139
+	rangeOut <- ldply(1:length(regSplit),regSplitDiffFunc,.parallel=TRUE)
140
+		
150 141
 		if(nrow(rangeOut) > 0){
151 142
 			
143
+			colnames(rangeOut) <- c('A1','C1','G1','T1','N1','A2','C2','G2','T2','N2','pos','p.value','d.value')
144
+			rangeOut$chr <- regChr
145
+			
152 146
 			## get ref by Biostrings
153
-			rangeOut$ref <- as.character(getSeq(FaFile(refFsa),GRanges(seqnames=rangeOut[,1],IRanges(start=rangeOut[,2],width=1))))
147
+			rangeOut$ref <- as.character(getSeq(FaFile(refFsa),GRanges(seqnames=rangeOut$chr,IRanges(start=rangeOut$pos,width=1))))
148
+			rangeOut <- rangeOut[order(rangeOut$pos),]
154 149
 			
155 150
 			## format output    
156
-			rangeOut <- rangeOut[,c(1,2,15,3:14),drop=FALSE]
157
-			colnames(rangeOut) <- c('chr','pos','ref','A1','C1','G1','T1','N1','A2','C2','G2','T2','N2','p.value','d.value')
158
-			return(rangeOut[,,drop=FALSE])
159
-		}}
151
+			rangeOut <- rangeOut[,c('chr','pos','ref','A1','C1','G1','T1','N1','A2','C2','G2','T2','N2','p.value','d.value')]
152
+			return(rangeOut)
153
+		}else{
154
+			NULL
155
+		}
160 156
 }