Browse code

update doParallel;fix the bug to trace back duplicated genotype

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SICtools@112139 bc3139a8-67e5-0310-9ffc-ced21a209358

Xiaobin Xing authored on 04/01/2016 16:07:24
Showing 4 changed files

... ...
@@ -1,7 +1,6 @@
1 1
 Package: SICtools
2 2
 Type: Package
3
-Title: Find SNV/Indel differences between two bam files with near
4
-        relationship
3
+Title: Find SNV/Indel differences between two bam files with near relationship
5 4
 Version: 1.1.0
6 5
 Date: 2014-12-11
7 6
 Author: Xiaobin Xing, Wu Wei
... ...
@@ -15,11 +14,10 @@ Description: This package is to find SNV/Indel differences between two
15 14
         span no less than 2bp on both sides of indel region.
16 15
 License: GPL (>=2)
17 16
 LazyLoad: Yes
18
-Depends: R (>= 3.0.0), methods, Rsamtools (>= 1.18.1), doMC (>= 1.3.3),
19
-        Biostrings (>= 2.32.1), stringr (>= 0.6.2), matrixStats (>=
20
-        0.10.0)
17
+Depends: R (>= 3.0.0), methods, Rsamtools (>= 1.18.1), doParallel (>= 1.0.8), Biostrings (>= 2.32.1), stringr (>= 0.6.2), matrixStats
18
+               (>= 0.10.0)
21 19
 Imports: foreach, IRanges, GenomicRanges
22 20
 Suggests: knitr, RUnit, BiocGenerics
23
-biocViews: Alignment, Sequencing, Coverage, SequenceMatching,
24
-        QualityControl, DataImport, Software, SNP, VariantDetection
21
+biocViews: Alignment, Sequencing, Coverage, SequenceMatching, QualityControl, DataImport, Software, SNP, VariantDetection
25 22
 VignetteBuilder: knitr
23
+Packaged: 2016-01-04 15:11:21 UTC; xing
... ...
@@ -1,5 +1,5 @@
1 1
 import(Rsamtools)
2
-import(doMC)
2
+import(doParallel)
3 3
 import(methods)
4 4
 import(stringr)
5 5
 import(Biostrings)
... ...
@@ -23,7 +23,7 @@ function(bam1,bam2,refFsa,regChr,regStart,regEnd,minBaseQuality = 13,
23 23
 	gtDistCutOff <- as.numeric(gtDistCutOff)
24 24
 	
25 25
 	## number of cores used
26
-	registerDoMC(cores=nCores)
26
+	registerDoParallel(cores=nCores)
27 27
 	
28 28
 	## get mpileupPlus result; the samtools will calculate BAQ for SNP around indel
29 29
 	pathSICtools <- system.file(package = "SICtools","etc","samtools2SIC")
... ...
@@ -18,7 +18,7 @@ function(bam1,bam2,refFsa,regChr,regStart,regEnd,minBaseQuality = 13,
18 18
 	minMapQuality <- as.numeric(minMapQuality)
19 19
 	
20 20
 	## number of cores used
21
-	registerDoMC(cores=nCores)
21
+	registerDoParallel(cores=nCores)
22 22
 	
23 23
 	################################ function to call difference ##############################
24 24
 	calcInfoRange <- function(x){
... ...
@@ -34,8 +34,14 @@ function(bam1,bam2,refFsa,regChr,regStart,regEnd,minBaseQuality = 13,
34 34
 				rowMaxs(countMtx[,6:9,drop=FALSE])/rowSums(countMtx[,6:9,drop=FALSE]) > 0.95 & 
35 35
 				max.col(countMtx[,1:4,drop=FALSE]) == max.col(countMtx[,6:9,drop=FALSE])
36 36
 		
37
+#		maxBaseIndex <- rowMaxs(countMtx[,,drop=FALSE],cols=1:4)/rowSums(countMtx[,,drop=FALSE],cols=1:4) > 0.95 & 
38
+#				rowMaxs(countMtx[,,drop=FALSE],cols=6:9)/rowSums(countMtx[,,drop=FALSE],cols=6:9) > 0.95 & 
39
+#				colMaxs(countMtx[,,drop=FALSE],cols=1:4) == colMaxs(countMtx[,,drop=FALSE],cols=6:9)
40
+		
37 41
 		## all 0 test
38 42
 		allZeroIndex <- rowSums(countMtx[,1:4,drop=FALSE]) == 0 | rowSums(countMtx[,6:9,drop=FALSE]) == 0
43
+#		allZeroIndex <- rowSums(countMtx[,,drop=FALSE],cols=1:4) == 0 | rowSums(countMtx[,,drop=FALSE],cols=6:9) == 0
44
+		
39 45
 		
40 46
 		## test index
41 47
 		testIndex <- !dupIndex & !maxBaseIndex & !allZeroIndex
... ...
@@ -75,6 +81,22 @@ function(bam1,bam2,refFsa,regChr,regStart,regEnd,minBaseQuality = 13,
75 81
 			testMtx <- testTmp2[,-1,drop=FALSE]
76 82
 			class(testMtx) <- 'numeric'
77 83
 			testDf <- data.frame(chr = testTmp2[,1],testMtx,stringsAsFactors=FALSE)
84
+			
85
+			## for genotype duplicated positions, trace back
86
+			countMtxDup <- countMtx[!maxBaseIndex & !allZeroIndex,]
87
+			countMtxDupDf <- as.data.frame(countMtxDup[duplicated(countMtxDup[,-11]),,drop=FALSE],stringsAsFactors=FALSE)
88
+			
89
+			if(nrow(countMtxDupDf) > 0){
90
+				
91
+				countMtxDupDfFull <- cbind(countMtxDupDf,testDf[match(apply(countMtxDupDf[,-11],1,paste,collapse=' '),apply(testDf[,3:12],1,paste,collapse=' ')),c('X12','X13')])
92
+				countMtxDupDfFull <- countMtxDupDfFull[!is.na(countMtxDupDfFull$X12),]
93
+				countMtxDupDfFull[,'X14'] <- unique(testDf$chr)
94
+				countMtxDupDfFull <- countMtxDupDfFull[,c(14,11,1:10,12,13)]
95
+				colnames(countMtxDupDfFull) <- colnames(testDf)
96
+				testDf <- rbind(testDf,countMtxDupDfFull)
97
+				testDf <- testDf[order(testDf[,'X1']),]
98
+			}
99
+			
78 100
 			return(testDf)          
79 101
 		}
80 102
 	}