git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/seqTools@128662 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,140 @@ |
1 |
+ |
|
2 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
3 |
+## ## |
|
4 |
+## Project : seqTools ## |
|
5 |
+## Created : 26.August.2013 ## |
|
6 |
+## Author : W. Kaisers ## |
|
7 |
+## File : trimFastq.r ## |
|
8 |
+## Content : Functions which work on fastq and fastq and write output ## |
|
9 |
+## files: trimFastq, writeFai ## |
|
10 |
+## ## |
|
11 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
12 |
+ |
|
13 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
14 |
+## trimFastq: Trimming and discarding reads based on quality values |
|
15 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
16 |
+ |
|
17 |
+trimFastq<-function(infile, outfile="keep.fq.gz", discard="disc.fq.gz", |
|
18 |
+ qualDiscard=0, qualMask=0, fixTrimLeft=0, fixTrimRight=0, |
|
19 |
+ qualTrimLeft=0, qualTrimRight=0, qualMaskValue=78, minSeqLen=0) |
|
20 |
+{ |
|
21 |
+ if(!is.character(infile)) |
|
22 |
+ stop("'infile' must be a string.") |
|
23 |
+ |
|
24 |
+ if(length(infile) != 1) |
|
25 |
+ stop("'infile' must have length 1.") |
|
26 |
+ |
|
27 |
+ if(!file.exists(infile)) |
|
28 |
+ stop("File 'infile' not found.") |
|
29 |
+ |
|
30 |
+ if(!is.character(outfile)) |
|
31 |
+ stop("'outfile' must be character.") |
|
32 |
+ |
|
33 |
+ if(!is.character(outfile)) |
|
34 |
+ stop("'discard' (file) must be character.") |
|
35 |
+ |
|
36 |
+ if(!is.character(discard)) |
|
37 |
+ stop("'discard' must be character.") |
|
38 |
+ |
|
39 |
+ if(!is.character(discard)) |
|
40 |
+ stop("'discard' (file) must be character.") |
|
41 |
+ |
|
42 |
+ if(!is.numeric(qualDiscard)) |
|
43 |
+ stop("'qualDiscard' must be numeric.") |
|
44 |
+ |
|
45 |
+ if(length(qualDiscard)!=1) |
|
46 |
+ stop("'qualDiscard' must have length 1.") |
|
47 |
+ |
|
48 |
+ qualDiscard<-as.integer(qualDiscard) |
|
49 |
+ if( (qualDiscard < 0) || (qualDiscard > 93)) |
|
50 |
+ stop("'qualDiscard' out of range.") |
|
51 |
+ |
|
52 |
+ if(!is.numeric(qualMask)) |
|
53 |
+ stop("'qualMask' must be numeric.") |
|
54 |
+ |
|
55 |
+ if(length(qualMask) != 1) |
|
56 |
+ stop("'qualMask' must have length 1.") |
|
57 |
+ |
|
58 |
+ qualMask<-as.integer(qualMask) |
|
59 |
+ if( (qualMask < 0) || (qualMask > 93) ) |
|
60 |
+ stop("'qualMask' out of range.") |
|
61 |
+ |
|
62 |
+ if(!is.numeric(fixTrimLeft)) |
|
63 |
+ stop("'fixTrimLeft' must be numeric.") |
|
64 |
+ |
|
65 |
+ if(length(fixTrimLeft) != 1) |
|
66 |
+ stop("'fixTrimLeft' must have length 1.") |
|
67 |
+ |
|
68 |
+ if( (fixTrimLeft < 0) || (fixTrimLeft > 100) ) |
|
69 |
+ stop("'fixTrimLeft' out of range.") |
|
70 |
+ |
|
71 |
+ if(!is.numeric(fixTrimRight)) |
|
72 |
+ stop("'fixTrimRight' must be numeric.") |
|
73 |
+ |
|
74 |
+ if(length(fixTrimRight) != 1) |
|
75 |
+ stop("'fixTrimRight' must have length 1.") |
|
76 |
+ |
|
77 |
+ if( (fixTrimRight < 0) || (fixTrimRight > 100) ) |
|
78 |
+ stop("'fixTrimRight' out of range.") |
|
79 |
+ |
|
80 |
+ if(!is.numeric(qualTrimLeft)) |
|
81 |
+ stop("'qualTrimLeft' must be numeric.") |
|
82 |
+ |
|
83 |
+ if(length(qualTrimLeft)!= 1) |
|
84 |
+ stop("'qualTrimLeft' must have length 1.") |
|
85 |
+ qualTrimLeft<-as.integer(qualTrimLeft) |
|
86 |
+ |
|
87 |
+ if( (qualTrimLeft < 0) || (qualTrimLeft > 93) ) |
|
88 |
+ stop("'qualTrimLeft' out of range.") |
|
89 |
+ |
|
90 |
+ if(!is.numeric(qualTrimRight)) |
|
91 |
+ stop("'qualTrimRight' must be numeric.") |
|
92 |
+ |
|
93 |
+ if(length(qualTrimRight) != 1) |
|
94 |
+ stop("'qualTrimRight' must have length 1.") |
|
95 |
+ qualTrimRight<-as.integer(qualTrimRight) |
|
96 |
+ |
|
97 |
+ if( (qualTrimRight < 0) || (qualTrimRight > 93) ) |
|
98 |
+ stop("'qualTrimRight' out of range.") |
|
99 |
+ |
|
100 |
+ if(!is.numeric(qualMaskValue)) |
|
101 |
+ stop("'qualMaskValue' must be numeric.") |
|
102 |
+ |
|
103 |
+ if(length(qualMaskValue) != 1) |
|
104 |
+ stop("'qualMaskValue' must have length 1.") |
|
105 |
+ |
|
106 |
+ if( (qualMaskValue < 0) || (qualMaskValue > 93) ) |
|
107 |
+ stop("'qualMaskValue' out of range.") |
|
108 |
+ |
|
109 |
+ if(!is.numeric(minSeqLen)) |
|
110 |
+ stop("'minSeqLen' must be numeric.") |
|
111 |
+ |
|
112 |
+ if(length(minSeqLen) != 1) |
|
113 |
+ stop("'minSeqLen' must have length 1.") |
|
114 |
+ |
|
115 |
+ if( (minSeqLen < 0) || (minSeqLen > 200) ) |
|
116 |
+ stop("'minSeqLen' out of range.") |
|
117 |
+ |
|
118 |
+ val<-as.integer(c( |
|
119 |
+ fixTrimLeft, |
|
120 |
+ fixTrimRight, |
|
121 |
+ qualTrimLeft, |
|
122 |
+ qualTrimRight, |
|
123 |
+ qualDiscard, |
|
124 |
+ qualMask, |
|
125 |
+ qualMaskValue, |
|
126 |
+ minSeqLen |
|
127 |
+ )) |
|
128 |
+ res<-.Call("trim_fastq", infile, val, c(outfile,discard)) |
|
129 |
+ |
|
130 |
+ bm <- Sys.localeconv()[7] |
|
131 |
+ message("[trimFastq] ", format(res[1], width=11, big.mark=bm), |
|
132 |
+ " records written to outfile.") |
|
133 |
+ message("[trimFastq] ", format(res[2], width=11, big.mark=bm), |
|
134 |
+ " records written to discard.") |
|
135 |
+ return(invisible(res)) |
|
136 |
+} |
|
137 |
+ |
|
138 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
139 |
+## END OF FILE |
|
140 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |