git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/seqTools@95415 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,14 @@ |
1 |
+Package: seqTools |
|
2 |
+Type: Package |
|
3 |
+Title: Analysis of nucleotide, sequence and quality content on fastq |
|
4 |
+ files. |
|
5 |
+Version: 0.99.44 |
|
6 |
+Date: 2013-10-14 |
|
7 |
+Author: Wolfgang Kaisers |
|
8 |
+Maintainer: Wolfgang Kaisers <kaisers@med.uni-duesseldorf.de> |
|
9 |
+Description: Analyze read length, phred scores and alphabet frequency and DNA k-mers on uncompressed and compressed fastq files. |
|
10 |
+biocViews: QualityControl,Sequencing |
|
11 |
+License: Artistic-2.0 |
|
12 |
+Depends: methods,utils,zlibbioc |
|
13 |
+LinkingTo: zlibbioc |
|
14 |
+Suggests: RUnit, BiocGenerics |
0 | 15 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,65 @@ |
1 |
+#exportPattern("^[[:alpha:]]+") |
|
2 |
+useDynLib(seqTools) |
|
3 |
+import(methods) |
|
4 |
+import(zlibbioc) |
|
5 |
+importFrom(utils,head) |
|
6 |
+importFrom(utils,tail) |
|
7 |
+export( |
|
8 |
+ ascii2char, |
|
9 |
+ countDnaKmers, |
|
10 |
+ countFastaKmers, |
|
11 |
+ countGenomeKmers, |
|
12 |
+ countSpliceKmers, |
|
13 |
+ char2ascii, |
|
14 |
+ kMerIndex, |
|
15 |
+ phredTable, |
|
16 |
+ fastqq, |
|
17 |
+ fastqKmerLocs, |
|
18 |
+ fastqKmerSubsetLocs, |
|
19 |
+ phredDist, |
|
20 |
+ plotPhredDist, |
|
21 |
+ plotPhredQuant, |
|
22 |
+ phredQuantiles, |
|
23 |
+ revCountDnaKmers, |
|
24 |
+ simFastqqRunTimes, |
|
25 |
+ sim_fq, |
|
26 |
+ trimFastq, |
|
27 |
+ writeFai, |
|
28 |
+ writeSimFastq, |
|
29 |
+ writeSimContFastq |
|
30 |
+) |
|
31 |
+exportMethods( |
|
32 |
+ cbDistMatrix, |
|
33 |
+ collectTime, |
|
34 |
+ collectDur, |
|
35 |
+ gcContent, |
|
36 |
+ gcContentMatrix, |
|
37 |
+ fileNames, |
|
38 |
+ getK, |
|
39 |
+ kmerCount, |
|
40 |
+ maxSeqLen, |
|
41 |
+ meltDownK, |
|
42 |
+ mergedPhred, |
|
43 |
+ mergedPhredQuantiles, |
|
44 |
+ mergeFastqq, |
|
45 |
+ nFiles, |
|
46 |
+ nNnucs, |
|
47 |
+ nReads, |
|
48 |
+ nucFreq, |
|
49 |
+ phred, |
|
50 |
+ plotGCcontent, |
|
51 |
+ plotKmerCount, |
|
52 |
+ plotMergedPhredQuant, |
|
53 |
+ plotNucCount, |
|
54 |
+ plotNucFreq, |
|
55 |
+ probeLabel, |
|
56 |
+ "probeLabel<-", |
|
57 |
+ propPhred, |
|
58 |
+ seqLen, |
|
59 |
+ seqLenCount |
|
60 |
+) |
|
61 |
+exportClasses( |
|
62 |
+ Fastqq |
|
63 |
+) |
|
64 |
+ |
|
65 |
+ |
0 | 66 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,32 @@ |
1 |
+CHANGES IN VERSION 0.99.42 |
|
2 |
+------------------------- |
|
3 |
+ |
|
4 |
+NEW FEATURES |
|
5 |
+ |
|
6 |
+ o (none) |
|
7 |
+ |
|
8 |
+SIGNIFICANT USER-VISIBLE CHANGES |
|
9 |
+ |
|
10 |
+ o (none) |
|
11 |
+ |
|
12 |
+BUG FIXES |
|
13 |
+ |
|
14 |
+ o trimFastq also includes record identifiers in output files |
|
15 |
+ (=read title), e.g. SRR014849.1 EIXKN4201CFU84 length=93. |
|
16 |
+ |
|
17 |
+ |
|
18 |
+CHANGES IN VERSION 0.99.40 |
|
19 |
+------------------------- |
|
20 |
+ |
|
21 |
+NEW FEATURES |
|
22 |
+ |
|
23 |
+ o Included this NEWS file |
|
24 |
+ |
|
25 |
+ |
|
26 |
+SIGNIFICANT USER-VISIBLE CHANGES |
|
27 |
+ |
|
28 |
+ o (none) |
|
29 |
+ |
|
30 |
+BUG FIXES |
|
31 |
+ |
|
32 |
+ o (none) |
0 | 33 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,35 @@ |
1 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
2 |
+## ## |
|
3 |
+## Project : seqTools ## |
|
4 |
+## Created : 26.August.2013 ## |
|
5 |
+## Author : W. Kaisers ## |
|
6 |
+## Version : 0.9.0 ## |
|
7 |
+## File : allClasses.r ## |
|
8 |
+## Content : Declarations for all S4 classes in package ## |
|
9 |
+## ## |
|
10 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
11 |
+ |
|
12 |
+.onUnload<-function(libpath) { library.dynam.unload("seqTools",libpath) } |
|
13 |
+ |
|
14 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
15 |
+setClass("Fastqq",representation( |
|
16 |
+ filenames="character", |
|
17 |
+ probeLabel="character", |
|
18 |
+ nFiles="integer", |
|
19 |
+ k="integer", |
|
20 |
+ maxSeqLen="integer", |
|
21 |
+ kmer="matrix", |
|
22 |
+ firstKmer="matrix", |
|
23 |
+ nReads="integer", |
|
24 |
+ nN="integer", |
|
25 |
+ seqLenCount="matrix", |
|
26 |
+ gcContent="matrix", |
|
27 |
+ nac="list", |
|
28 |
+ phred="list", |
|
29 |
+ seqLen="matrix", |
|
30 |
+ collectTime="list")) |
|
31 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
32 |
+ |
|
33 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
34 |
+## END OF FILE |
|
35 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
0 | 36 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,105 @@ |
1 |
+ |
|
2 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
3 |
+## ## |
|
4 |
+## Project : seqTools ## |
|
5 |
+## Created : 26.August.2013 ## |
|
6 |
+## Author : W. Kaisers ## |
|
7 |
+## File : allGenerics.r ## |
|
8 |
+## Content : All static variables and (not directly object related ) ## |
|
9 |
+## function declarations ## |
|
10 |
+## ## |
|
11 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
12 |
+ |
|
13 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
14 |
+## Changing Fastqq object structure |
|
15 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
16 |
+setGeneric("mergeFastqq", function(lhs, rhs) standardGeneric("mergeFastqq")) |
|
17 |
+setGeneric("meltDownK", function(object, newK) standardGeneric("meltDownK")) |
|
18 |
+ |
|
19 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
20 |
+## Preparation for Hierarchical clustering (HC) |
|
21 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
22 |
+setGeneric("cbDistMatrix", function(object, nReadNorm = max(nReads(object))) |
|
23 |
+ standardGeneric("cbDistMatrix")) |
|
24 |
+ |
|
25 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
26 |
+## Slot accessors |
|
27 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
28 |
+ |
|
29 |
+setGeneric("fileNames", function(object) standardGeneric("fileNames")) |
|
30 |
+setGeneric("collectTime", function(object) standardGeneric("collectTime")) |
|
31 |
+setGeneric("collectDur", function(object) standardGeneric("collectDur")) |
|
32 |
+setGeneric("getK", function(object) standardGeneric("getK")) |
|
33 |
+setGeneric("nFiles", function(object) standardGeneric("nFiles")) |
|
34 |
+setGeneric("nNnucs", function(object) standardGeneric("nNnucs")) |
|
35 |
+setGeneric("nReads", function(object) standardGeneric("nReads")) |
|
36 |
+setGeneric("maxSeqLen", function(object) standardGeneric("maxSeqLen")) |
|
37 |
+setGeneric("seqLenCount", function(object) standardGeneric("seqLenCount")) |
|
38 |
+setGeneric("nucFreq", function(object, i) standardGeneric("nucFreq")) |
|
39 |
+setGeneric("gcContent", function(object, i) standardGeneric("gcContent")) |
|
40 |
+ |
|
41 |
+setGeneric("gcContentMatrix", function(object) |
|
42 |
+ standardGeneric("gcContentMatrix")) |
|
43 |
+ |
|
44 |
+setGeneric("seqLen", function(object) standardGeneric("seqLen")) |
|
45 |
+setGeneric("kmerCount", function(object) standardGeneric("kmerCount")) |
|
46 |
+ |
|
47 |
+setGeneric("probeLabel", function(object, label) |
|
48 |
+ standardGeneric("probeLabel")) |
|
49 |
+ |
|
50 |
+setGeneric("probeLabel<-", function(object, value) |
|
51 |
+ standardGeneric("probeLabel<-")) |
|
52 |
+ |
|
53 |
+setGeneric("phred", function(object, i) standardGeneric("phred")) |
|
54 |
+ |
|
55 |
+ |
|
56 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
57 |
+## Retrieving Phred distribution |
|
58 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
59 |
+ |
|
60 |
+setGeneric("phredQuantiles", function(object, quantiles, i, ...) |
|
61 |
+ standardGeneric("phredQuantiles")) |
|
62 |
+ |
|
63 |
+setGeneric("plotPhredQuant", function(object, i, main, ...) |
|
64 |
+ standardGeneric("plotPhredQuant")) |
|
65 |
+ |
|
66 |
+## Global Phred content functions |
|
67 |
+setGeneric("phredDist", function(object, i) standardGeneric("phredDist")) |
|
68 |
+ |
|
69 |
+setGeneric("plotPhredDist", function(object, i, maxp=45, col, ...) |
|
70 |
+ standardGeneric("plotPhredDist")) |
|
71 |
+ |
|
72 |
+setGeneric("propPhred", function(object, greater=30, less=93) |
|
73 |
+ standardGeneric("propPhred")) |
|
74 |
+ |
|
75 |
+ |
|
76 |
+setGeneric("mergedPhred", function(object) standardGeneric("mergedPhred")) |
|
77 |
+ |
|
78 |
+setGeneric("mergedPhredQuantiles", function(object, quantiles) |
|
79 |
+ standardGeneric("mergedPhredQuantiles")) |
|
80 |
+ |
|
81 |
+setGeneric("plotMergedPhredQuant", function(object, main, ...) |
|
82 |
+ standardGeneric("plotMergedPhredQuant")) |
|
83 |
+ |
|
84 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
85 |
+## Predefined Plot functions |
|
86 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
87 |
+ |
|
88 |
+setGeneric("plotNucFreq", function(object, i, main, maxx, ...) |
|
89 |
+ standardGeneric("plotNucFreq")) |
|
90 |
+ |
|
91 |
+setGeneric("plotGCcontent", function(object,main,...) |
|
92 |
+ standardGeneric("plotGCcontent")) |
|
93 |
+ |
|
94 |
+setGeneric("plotNucCount", function(object, nucs=16, maxx, ...) |
|
95 |
+ standardGeneric("plotNucCount")) |
|
96 |
+ |
|
97 |
+setGeneric("plotKmerCount", |
|
98 |
+ function(object, index, mxey, main="K-mer count", ...) |
|
99 |
+ standardGeneric("plotKmerCount")) |
|
100 |
+ |
|
101 |
+ |
|
102 |
+ |
|
103 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
104 |
+## END OF FILE |
|
105 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
0 | 106 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,113 @@ |
1 |
+ |
|
2 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
3 |
+## ## |
|
4 |
+## Project : seqTools ## |
|
5 |
+## Created : 26.August.2013 ## |
|
6 |
+## Author : W. Kaisers ## |
|
7 |
+## File : allStatics.r ## |
|
8 |
+## Content : All static variables and (not directly object related ) ## |
|
9 |
+## function declarations ## |
|
10 |
+## ## |
|
11 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
12 |
+ |
|
13 |
+ |
|
14 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
15 |
+## Global variables: |
|
16 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
17 |
+ |
|
18 |
+strandlevels <- c("+", "-", "*") |
|
19 |
+iupac_chars <- c('a', 'c', 'g', 't', 'u', 'r', 'y', 's', 'w', 'k', 'm', 'b', |
|
20 |
+ 'd', 'h', 'v', 'n', '.', '-', '=') |
|
21 |
+ |
|
22 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
23 |
+## Although static array sizes would allow larger k (e.g. 15: 8 GB RAM), |
|
24 |
+## exponential increase in run-time restricts usability to k values in 1:12 |
|
25 |
+## 4^12 = 16.777.216 possible combinations. |
|
26 |
+## 'Hard' coded (in stat_defs.h: 15) |
|
27 |
+max_k <- 12 |
|
28 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
29 |
+ |
|
30 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
31 |
+## End global variables |
|
32 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
33 |
+ |
|
34 |
+ |
|
35 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
36 |
+## Some useful functions for work with phred's |
|
37 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
38 |
+ |
|
39 |
+char2ascii <- function(c) { strtoi(charToRaw(c), base=16L) } |
|
40 |
+ |
|
41 |
+ascii2char <- function(x, multiple=FALSE) |
|
42 |
+ { rawToChar(as.raw(x), multiple=multiple) } |
|
43 |
+ |
|
44 |
+phredTable <- function(phred=0:93) |
|
45 |
+{ |
|
46 |
+ if(!is.numeric(phred)) |
|
47 |
+ stop("phred must be numeric.") |
|
48 |
+ |
|
49 |
+ phred <- sort(unique(as.integer(phred))) |
|
50 |
+ |
|
51 |
+ if( (phred[1] < 0) || (max(phred) > 93) ) |
|
52 |
+ stop("Only phred values in 0:93 are allowed.") |
|
53 |
+ |
|
54 |
+ ascii <- phred + 33 |
|
55 |
+ return(data.frame(ascii=ascii, phred=phred, |
|
56 |
+ char=ascii2char(ascii, multiple=TRUE))) |
|
57 |
+} |
|
58 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
59 |
+ |
|
60 |
+# unexported functions, do not check for x |
|
61 |
+rel_int <- function(x) {return(.Call("rel_int", x, PACKAGE="seqTools"))} |
|
62 |
+rel_real <- function(x) {return(.Call("rel_real", x, PACKAGE="seqTools"))} |
|
63 |
+ |
|
64 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
65 |
+enlargeIntMat <- function(x, newDim){ |
|
66 |
+ if(!is.matrix(x)) |
|
67 |
+ stop("x must be matrix.") |
|
68 |
+ |
|
69 |
+ if(!is.numeric(newDim)) |
|
70 |
+ stop("newDim must be numeric.") |
|
71 |
+ |
|
72 |
+ newDim <- as.integer(newDim) |
|
73 |
+ |
|
74 |
+ if(length(newDim) != 2) |
|
75 |
+ stop("newDim must have length 2.") |
|
76 |
+ |
|
77 |
+ return(.Call("r_enlarge_int_mat", x, newDim, PACKAGE="seqTools")) |
|
78 |
+} |
|
79 |
+ |
|
80 |
+kMerIndex <- function(kMers, k=nchar(kMers)[1], base=1) |
|
81 |
+{ |
|
82 |
+ if(!is.character(kMers)) |
|
83 |
+ stop("kMers must be character.") |
|
84 |
+ |
|
85 |
+ if(!is.numeric(k)) |
|
86 |
+ stop("k must be numeric.") |
|
87 |
+ k <- as.integer(k) |
|
88 |
+ |
|
89 |
+ if(k<0 || k > max_k) |
|
90 |
+ stop("k must be in range 0, ... , ", max_k, ".") |
|
91 |
+ |
|
92 |
+ if(!all(nchar(kMers) == k)) |
|
93 |
+ stop("All kMers must have k characters!") |
|
94 |
+ |
|
95 |
+ if(!is.numeric(base)) |
|
96 |
+ stop("base must be numeric") |
|
97 |
+ |
|
98 |
+ if(length(base) > 1) |
|
99 |
+ stop("base must have length 1.") |
|
100 |
+ base<-as.integer(base) |
|
101 |
+ |
|
102 |
+ if(!(base %in% 0:1)) |
|
103 |
+ stop("base must be 0 or 1.") |
|
104 |
+ |
|
105 |
+ return(.Call("get_Kmer_Index", kMers, k, PACKAGE="seqTools") + base) |
|
106 |
+} |
|
107 |
+ |
|
108 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
109 |
+ |
|
110 |
+ |
|
111 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
112 |
+## END OF FILE |
|
113 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
0 | 114 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,53 @@ |
1 |
+ |
|
2 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
3 |
+## ## |
|
4 |
+## Project : seqTools ## |
|
5 |
+## Created : 26.August.2013 ## |
|
6 |
+## Author : W. Kaisers ## |
|
7 |
+## File : fastaFunctions.r ## |
|
8 |
+## Content : Functions which work on fastq and fastq and write output ## |
|
9 |
+## files: writeFai, countFastaKmers ## |
|
10 |
+## ## |
|
11 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
12 |
+ |
|
13 |
+writeFai <- function(infiles, outfiles) |
|
14 |
+{ |
|
15 |
+ if(!is.character(infiles)) |
|
16 |
+ stop("infiles must be character.") |
|
17 |
+ |
|
18 |
+ if(!is.character(outfiles)) |
|
19 |
+ stop("outfiles must be character.") |
|
20 |
+ |
|
21 |
+ if(length(infiles) != length(outfiles)) |
|
22 |
+ stop("infiles and outfiles must have equal length.") |
|
23 |
+ |
|
24 |
+ if(any( !file.exists(infiles) ) ) |
|
25 |
+ stop("Some infile(s) do not exist!\n") |
|
26 |
+ |
|
27 |
+ .Call("write_fai", infiles, outfiles, PACKAGE = "seqTools") |
|
28 |
+ message("[write_fai] done.\n") |
|
29 |
+ |
|
30 |
+ return(invisible()) |
|
31 |
+} |
|
32 |
+ |
|
33 |
+ |
|
34 |
+countFastaKmers <- function(filenames, k=4) |
|
35 |
+{ |
|
36 |
+ if(!is.character(filenames)) |
|
37 |
+ stop("filenames must be character.") |
|
38 |
+ |
|
39 |
+ if(!is.numeric(k)) |
|
40 |
+ stop("k must be numeric.") |
|
41 |
+ k <- as.integer(k) |
|
42 |
+ |
|
43 |
+ if( (k < 0) || (k > max_k) ) |
|
44 |
+ stop("k must be in range 0, ... , ", max_k, ".") |
|
45 |
+ |
|
46 |
+ res <- .Call("count_fasta_Kmers", filenames, k, PACKAGE="seqTools") |
|
47 |
+ |
|
48 |
+ return(res) |
|
49 |
+} |
|
50 |
+ |
|
51 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
52 |
+## END OF FILE |
|
53 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
0 | 54 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,250 @@ |
1 |
+ |
|
2 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
3 |
+## ## |
|
4 |
+## Project : seqTools ## |
|
5 |
+## Created : 26.August.2013 ## |
|
6 |
+## Author : W. Kaisers ## |
|
7 |
+## File : kMer.r ## |
|
8 |
+## Content : Functionality for counting DNA k-mers ## |
|
9 |
+## (independent of fastq or fasta files) ## |
|
10 |
+## countKmers, countDnaKmers, revCountDnaKmers, ## |
|
11 |
+## countGenomeKmers, countSpliceKmers ## |
|
12 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
13 |
+ |
|
14 |
+ |
|
15 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
16 |
+## Counts DNA k-mers on specified regions inside single (character) sequence |
|
17 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
18 |
+countDnaKmers <- function(dna, k, start=1, width=nchar(dna) - k + 1) |
|
19 |
+{ |
|
20 |
+ if(!is.character(dna)) |
|
21 |
+ stop("'dna' must be character.") |
|
22 |
+ |
|
23 |
+ if(length(dna) != 1) |
|
24 |
+ stop("'dna' must have length 1.") |
|
25 |
+ |
|
26 |
+ if(is.numeric(start)) |
|
27 |
+ start <- as.integer(start) |
|
28 |
+ |
|
29 |
+ if(is.numeric(width)) |
|
30 |
+ width <- as.integer(width) |
|
31 |
+ |
|
32 |
+ if(length(width) == 1) |
|
33 |
+ width <- rep(width, length(start)) |
|
34 |
+ |
|
35 |
+ if(is.numeric(k)) |
|
36 |
+ k <- as.integer(k) |
|
37 |
+ |
|
38 |
+ if(length(k)!=1) |
|
39 |
+ stop("'k' must have length 1.") |
|
40 |
+ |
|
41 |
+ if(k < 1) |
|
42 |
+ stop("'k' must be positive.") |
|
43 |
+ |
|
44 |
+ if(k > max_k) |
|
45 |
+ stop("'k' must not exceed", max_k, ".") |
|
46 |
+ |
|
47 |
+ nc <- nchar(dna) |
|
48 |
+ if(k > nc) |
|
49 |
+ stop("'k' must be <= nchar(dna).") |
|
50 |
+ if(any(start + width + k > nc + 2)) |
|
51 |
+ stop("Search region exceeds string end.") |
|
52 |
+ |
|
53 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
54 |
+ ## Counts N's |
|
55 |
+ ## ToDo: Return value |
|
56 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
57 |
+ nn <- integer(length(start)) |
|
58 |
+ return(.Call("count_dna_Kmers", |
|
59 |
+ dna, start, width, k, nn, PACKAGE="seqTools")) |
|
60 |
+ |
|
61 |
+} |
|
62 |
+ |
|
63 |
+ |
|
64 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
65 |
+## Counts DNA k-mers on specified regions inside single (character) sequence |
|
66 |
+## in reverse direction |
|
67 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
68 |
+revCountDnaKmers <- function(dna, k, start, width) |
|
69 |
+{ |
|
70 |
+ if(!is.character(dna)) |
|
71 |
+ stop("'dna' must be character.") |
|
72 |
+ |
|
73 |
+ if(length(dna) != 1) |
|
74 |
+ stop("'dna' must have length 1.") |
|
75 |
+ |
|
76 |
+ if(is.numeric(start)) |
|
77 |
+ start <- as.integer(start) |
|
78 |
+ |
|
79 |
+ if(is.numeric(width)) |
|
80 |
+ width <- as.integer(width) |
|
81 |
+ |
|
82 |
+ if(length(width) == 1) |
|
83 |
+ width <- rep(width, length(start)) |
|
84 |
+ |
|
85 |
+ if(length(width) != length(start)) |
|
86 |
+ stop("'width' must have length 1 or the same length as 'start'.") |
|
87 |
+ |
|
88 |
+ if(is.numeric(k)) |
|
89 |
+ k <- as.integer(k) |
|
90 |
+ |
|
91 |
+ if(any(width + k > start)) |
|
92 |
+ stop("'width' must be <= 'start' - 'k'.") |
|
93 |
+ |
|
94 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
95 |
+ ## nn contains N counts |
|
96 |
+ ## ToDo: Add value of nn to returned object |
|
97 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
98 |
+ nn <- integer(length(start)) |
|
99 |
+ |
|
100 |
+ return(.Call("rev_count_dna_Kmers", |
|
101 |
+ dna, start, width, k, nn, PACKAGE="seqTools")) |
|
102 |
+} |
|
103 |
+ |
|
104 |
+ |
|
105 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
106 |
+## Counts DNA k-mers on specified regions inside multiple (character) sequences |
|
107 |
+## in possibly reversed direction (depending on strand) |
|
108 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
109 |
+countGenomeKmers <- function(dna, seqid, start, width, strand, k) |
|
110 |
+{ |
|
111 |
+ if(!is.character(dna)) |
|
112 |
+ stop("'dna' must be character.") |
|
113 |
+ |
|
114 |
+ if(!is.numeric(seqid)) |
|
115 |
+ seqid <- as.integer(seqid) |
|
116 |
+ rg <- range(seqid) |
|
117 |
+ |
|
118 |
+ if(rg[1] < 0) |
|
119 |
+ stop("Negative seqid's are not allowed.") |
|
120 |
+ |
|
121 |
+ if(rg[2] > length(dna)) |
|
122 |
+ stop("Out of range seqid's.") |
|
123 |
+ |
|
124 |
+ if(!is.numeric(start)) |
|
125 |
+ stop("'start' must be numeric.") |
|
126 |
+ start <- as.integer(start) |
|
127 |
+ |
|
128 |
+ if(!is.numeric(width)) |
|
129 |
+ stop("'width' must be numeric") |
|
130 |
+ width <- as.integer(width) |
|
131 |
+ |
|
132 |
+ if(is.factor(strand)) |
|
133 |
+ strand <- as.integer(strand) |
|
134 |
+ else |
|
135 |
+ { |
|
136 |
+ if(!is.numeric("strand")) |
|
137 |
+ strand <- as.integer(strand) |
|
138 |
+ } |
|
139 |
+ |
|
140 |
+ nStart <- length(start) |
|
141 |
+ if( (length(seqid) != nStart) | (length(width) != nStart) | |
|
142 |
+ (length(strand) != nStart) ) |
|
143 |
+ stop("'seqid', 'start', 'width' and 'strand' must have same length.") |
|
144 |
+ |
|
145 |
+ if(length(k) != 1) |
|
146 |
+ stop("'k' must be a single value.") |
|
147 |
+ |
|
148 |
+ if(!is.numeric(k)) |
|
149 |
+ stop("'k' must be numeric.") |
|
150 |
+ k<-as.integer(k) |
|
151 |
+ |
|
152 |
+ if(k <= 0) |
|
153 |
+ stop("'k' must be >=1") |
|
154 |
+ |
|
155 |
+ if(k > max_k) |
|
156 |
+ stop("'k' must not exceed", max_k, ".") |
|
157 |
+ |
|
158 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
159 |
+ ## Counts N's |
|
160 |
+ ## ToDo: Return value |
|
161 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
162 |
+ nn <- integer(length(start)) |
|
163 |
+ return(.Call("count_genome_Kmers", dna, seqid, start, width, |
|
164 |
+ strand, k, nn, PACKAGE="seqTools")) |
|
165 |
+} |
|
166 |
+ |
|
167 |
+ |
|
168 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
169 |
+## Counts DNA k-mers on each border of a splice-site defined by wLend and |
|
170 |
+## wRstart in range of size width |
|
171 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
172 |
+ |
|
173 |
+countSpliceKmers <- function(dna, seqid, lEnd, rStart, width, strand, k) |
|
174 |
+{ |
|
175 |
+ if(!is.character(dna)) |
|
176 |
+ stop("'dna' must be character.") |
|
177 |
+ |
|
178 |
+ if(!is.numeric(seqid)) |
|
179 |
+ stop("'seqid' must be numeric.") |
|
180 |
+ seqid <- as.integer(seqid) |
|
181 |
+ |
|
182 |
+ rg <- range(seqid) |
|
183 |
+ if(rg[1] < 0) |
|
184 |
+ stop("Negative seqid's are not allowed.") |
|
185 |
+ |
|
186 |
+ if(rg[2] > length(dna)) |
|
187 |
+ stop("Out of range seqid's.") |
|
188 |
+ |
|
189 |
+ if(!is.numeric(lEnd)) |
|
190 |
+ stop("'lEnd' must be numeric.") |
|
191 |
+ lEnd <- as.integer(lEnd) |
|
192 |
+ |
|
193 |
+ if(!is.numeric(rStart)) |
|
194 |
+ stop("'rStart' must be numeric.") |
|
195 |
+ rStart <- as.integer(rStart) |
|
196 |
+ |
|
197 |
+ if(!is.numeric(width)) |
|
198 |
+ stop("'width' must be numeric.") |
|
199 |
+ width <- as.integer(width) |
|
200 |
+ |
|
201 |
+ if(is.factor(strand)) |
|
202 |
+ strand <- as.integer(strand) |
|
203 |
+ else |
|
204 |
+ { |
|
205 |
+ if(!is.numeric("strand")) |
|
206 |
+ strand <- as.integer(strand) |
|
207 |
+ } |
|
208 |
+ |
|
209 |
+ nStart <- length(lEnd) |
|
210 |
+ if(length(seqid) != nStart | length(rStart) != nStart | |
|
211 |
+ length(width) != nStart | length(strand) != nStart) |
|
212 |
+ { |
|
213 |
+ stop(paste("'seqid', 'lEnd', 'rStart', 'width'", |
|
214 |
+ "and 'strand' must have equal length.")) |
|
215 |
+ } |
|
216 |
+ if(!is.numeric(k)) |
|
217 |
+ stop("'k' must be numeric.") |
|
218 |
+ k <- as.integer(k) |
|
219 |
+ |
|
220 |
+ if(length(k) != 1) |
|
221 |
+ stop("'k' must be a single value.") |
|
222 |
+ |
|
223 |
+ if(k > max_k) |
|
224 |
+ stop("'k' must not exceed", max_k, ".") |
|
225 |
+ |
|
226 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
227 |
+ ## Plus strand |
|
228 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
229 |
+ plus_strand<-strand == 1 |
|
230 |
+ |
|
231 |
+ if(sum(plus_strand) > 0) |
|
232 |
+ { |
|
233 |
+ if(any((lEnd[plus_strand] - width[plus_strand] - k + 1) < 0)) |
|
234 |
+ stop("lEnd must be >= width+k-1 for all +-strand coordinates") |
|
235 |
+ } |
|
236 |
+ |
|
237 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
238 |
+ ## Counts N's |
|
239 |
+ ## ToDo: Return value |
|
240 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
241 |
+ nn <- integer(length = nStart) |
|
242 |
+ |
|
243 |
+ return(.Call("count_splice_Kmers", dna, seqid, lEnd, rStart, width, |
|
244 |
+ strand, k, nn, PACKAGE="seqTools")) |
|
245 |
+} |
|
246 |
+ |
|
247 |
+ |
|
248 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
249 |
+## END OF FILE |
|
250 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
0 | 251 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,1123 @@ |
1 |
+ |
|
2 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
3 |
+## ## |
|
4 |
+## Project : seqTools ## |
|
5 |
+## Created : 26.August.2013 ## |
|
6 |
+## Author : W. Kaisers ## |
|
7 |
+## Content : Doing some diagnostic and interventional tasks on fastq ## |
|
8 |
+## and fasta ## |
|
9 |
+## esp. DNA k-mer counts. ## |
|
10 |
+## Version : 0.99.34 ## |
|
11 |
+## ## |
|
12 |
+## Changelog : ## |
|
13 |
+## 26.08.13 : 0.0.1 Project created ## |
|
14 |
+## 03.09.13 : 0.0.6 C-Code valgrind tested ## |
|
15 |
+## 27.09.13 : 0.1.0 Added fastqDnaKmers ## |
|
16 |
+## 14.10.13 : 0.1.1 Added C-library for parsing gz fasta and fastq files ## |
|
17 |
+## 17.10.13 : 0.1.3 C-Wrapper for fastq working. ## |
|
18 |
+## 17.10.13 : 0.1.6 First version of R package ## |
|
19 |
+## 21.10.13 : 0.3.0 New C library for fastq parsing ## |
|
20 |
+## 28.10.13 : 0.4.0 Added fastq-loc functions ## |
|
21 |
+## 29.10.13 : 0.4.4 seqTools C-code valgrind tested. ## |
|
22 |
+## 01.11.13 : 0.5.0 Distance matrices implemented ## |
|
23 |
+## 02.11.13 : 0.5.1 First working version with clustering based on ## |
|
24 |
+## K-mers ## |
|
25 |
+## 07.11.13 : 0.5.4 countFastaKmers now resizes arrays automatically ## |
|
26 |
+## 09.11.13 : 0.6.0 count_fastq now resizes arrays automatically ## |
|
27 |
+## 11.11.13 : 0.6.5 Added fastq simulation functions ## |
|
28 |
+## 19.11.13 : 0.7.0 Added trimFastq function ## |
|
29 |
+## 30.11.13 : 0.9.0 Separated R source files ## |
|
30 |
+## 22.12.13 : 0.99.2 Added '['-operator for Fastqq class ## |
|
31 |
+## 19.01.14 : 0.99.7 Added zlib version control for correction of ## |
|
32 |
+## gzBuffer ## |
|
33 |
+## error ## |
|
34 |
+## + checks: cran win-builder + valgrind ## |
|
35 |
+## 21.05.14 : 0.99.34 Corrected error in count_fasta_Kmers ## |
|
36 |
+## which freezed function ## |
|
37 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
38 |
+ |
|
39 |
+.onUnload <- function(libpath) { library.dynam.unload("seqTools", libpath) } |
|
40 |
+ |
|
41 |
+## see: http://bioconductor.org/developers/how-to/coding-style/ |
|
42 |
+ |
|
43 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
44 |
+## Data collection functions: |
|
45 |
+## Fastqq, fastqKmerLocs, fastqKmerSubsetLocs |
|
46 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
47 |
+ |
|
48 |
+fastqq <- function(filenames, k=6, probeLabel) |
|
49 |
+{ |
|
50 |
+ k <- as.integer(k) |
|
51 |
+ |
|
52 |
+ tl <- list() |
|
53 |
+ tl$start <- Sys.time() |
|
54 |
+ filenames <- path.expand(filenames) |
|
55 |
+ |
|
56 |
+ res <- .Call("count_fastq", filenames, k, PACKAGE="seqTools") |
|
57 |
+ |
|
58 |
+ tl$end <- Sys.time() |
|
59 |
+ res@collectTime <- tl |
|
60 |
+ |
|
61 |
+ # Correct minSeqLen when empty files are counted |
|
62 |
+ if(any(res@nReads==0)) |
|
63 |
+ res@seqLen[1,res@nReads==0] <- 0 |
|
64 |
+ |
|
65 |
+ if(!missing(probeLabel)) |
|
66 |
+ { |
|
67 |
+ if(length(probeLabel) == res@nFiles) |
|
68 |
+ res@probeLabel <- as.character(probeLabel) |
|
69 |
+ else{ |
|
70 |
+ warning("[Fastqq] probeLabel and filenames must have equal length.") |
|
71 |
+ res@probeLabel <- as.character(1:res@nFiles) |
|
72 |
+ } |
|
73 |
+ }else{ |
|
74 |
+ res@probeLabel <- as.character(1:res@nFiles) |
|
75 |
+ } |
|
76 |
+ return(res) |
|
77 |
+} |
|
78 |
+ |
|
79 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
80 |
+## fastq K-mer locs |
|
81 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
82 |
+ |
|
83 |
+fastqKmerLocs <- function(filenames, k=4) |
|
84 |
+{ |
|
85 |
+ if(!is.numeric(k)) |
|
86 |
+ stop("'k' must be numeric.") |
|
87 |
+ k <- as.integer(k) |
|
88 |
+ |
|
89 |
+ if( (k < 0) || (k > max_k) ) |
|
90 |
+ stop("'k' must be in range 0, ... , 16.") |
|
91 |
+ |
|
92 |
+ return(.Call("fastq_Kmer_locs", filenames, k, PACKAGE="seqTools")) |
|
93 |
+} |
|
94 |
+ |
|
95 |
+ |
|
96 |
+fastqKmerSubsetLocs <- function(filenames, k=4, kIndex) |
|
97 |
+{ |
|
98 |
+ # Returns list with matrix elements. |
|
99 |
+ if(!is.numeric(k)) |
|
100 |
+ stop("'k' must be numeric.") |
|
101 |
+ |
|
102 |
+ k <- as.integer(k) |
|
103 |
+ |
|
104 |
+ if( (k < 0) || (k > max_k) ) |
|
105 |
+ stop("'k' must be in range 0, ... , ", max_k, ".") |
|
106 |
+ |
|
107 |
+ if(!is.numeric(kIndex)) |
|
108 |
+ stop("'kIndex' must be numeric.") |
|
109 |
+ kIndex <- as.integer(kIndex) |
|
110 |
+ |
|
111 |
+ if(any(kIndex < 0)) |
|
112 |
+ stop("No negative 'kIndex' values allowed.") |
|
113 |
+ |
|
114 |
+ if(any(kIndex > (4^k)) ) |
|
115 |
+ stop("'kIndex' out of range (>4^k).") |
|
116 |
+ |
|
117 |
+ return(.Call("fastq_KmerSubset_locs", |
|
118 |
+ filenames, k, kIndex, PACKAGE="seqTools")) |
|
119 |
+} |
|
120 |
+ |
|
121 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
122 |
+## End: Data collection functions. |
|
123 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
124 |
+ |
|
125 |
+ |
|
126 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
127 |
+## Standard slot accessor functions |
|
128 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
129 |
+ |
|
130 |
+ |
|
131 |
+setMethod("fileNames", "Fastqq", function(object) |
|
132 |
+ {return(object@filenames)}) |
|
133 |
+ |
|
134 |
+setMethod("collectTime", "Fastqq", function(object) |
|
135 |
+ {return(object@collectTime)}) |
|
136 |
+ |
|
137 |
+setMethod("collectDur", "Fastqq", function(object) { |
|
138 |
+return(as.numeric(difftime(object@collectTime$end, object@collectTime$start, |
|
139 |
+ units = "secs"))) |
|
140 |
+}) |
|
141 |
+ |
|
142 |
+setMethod("getK", "Fastqq", function(object) {return(object@k)}) |
|
143 |
+ |
|
144 |
+setMethod("nFiles", "Fastqq", function(object) {return(object@nFiles)}) |
|
145 |
+ |
|
146 |
+setMethod("nNnucs", "Fastqq", function(object) {return(object@nN)}) |
|
147 |
+ |
|
148 |
+setMethod("nReads", "Fastqq", function(object) {return(object@nReads)}) |
|
149 |
+ |
|
150 |
+setMethod("maxSeqLen", "Fastqq", function(object) {return(object@maxSeqLen)}) |
|
151 |
+ |
|
152 |
+setMethod("seqLenCount", "Fastqq", function(object) |
|
153 |
+{ |
|
154 |
+ res<-object@seqLenCount |
|
155 |
+ colnames(res) <- object@probeLabel |
|
156 |
+ return(res) |
|
157 |
+}) |
|
158 |
+ |
|
159 |
+setMethod("nucFreq", "Fastqq", function(object, i) |
|
160 |
+{ |
|
161 |
+ if(missing(i)) |
|
162 |
+ stop("Argument 'i' is not optional.") |
|
163 |
+ |
|
164 |
+ if(!is.numeric(i)) |
|
165 |
+ stop("'i' must be numeric.") |
|
166 |
+ |
|
167 |
+ i <- as.integer(i) |
|
168 |
+ if( (i < 1) || (i > object@nFiles) ) |
|
169 |
+ stop("'i' must be >0 and < nFiles.") |
|
170 |
+ |
|
171 |
+ return(object@nac[[i]]) |
|
172 |
+}) |
|
173 |
+ |
|
174 |
+setMethod("gcContent", "Fastqq", function(object, i) |
|
175 |
+{ |
|
176 |
+ if(missing(i)) |
|
177 |
+ stop("Argument 'i' is not optional.") |
|
178 |
+ |
|
179 |
+ if(!is.numeric(i)) |
|
180 |
+ stop("'i' must be numeric.") |
|
181 |
+ |
|
182 |
+ i <- as.integer(i) |
|
183 |
+ if( (i < 1) || (i > object@nFiles)) |
|
184 |
+ stop("'i' must be >0 and < nFiles.") |
|
185 |
+ |
|
186 |
+ return(object@gcContent[, i]) |
|
187 |
+}) |
|
188 |
+ |
|
189 |
+ |
|
190 |
+setMethod("gcContentMatrix", "Fastqq", function(object) |
|
191 |
+{ |
|
192 |
+ gcc <- object@gcContent |
|
193 |
+ colnames(gcc) <- object@probeLabel |
|
194 |
+ return(gcc) |
|
195 |
+}) |
|
196 |
+ |
|
197 |
+setMethod("seqLen", "Fastqq", function(object) |
|
198 |
+{ |
|
199 |
+ sql <- object@seqLen |
|
200 |
+ colnames(sql) <- object@probeLabel |
|
201 |
+ return(sql) |
|
202 |
+}) |
|
203 |
+ |
|
204 |
+setMethod("kmerCount", "Fastqq", function(object) |
|
205 |
+{ |
|
206 |
+ kmer <- object@kmer |
|
207 |
+ colnames(kmer) <- object@probeLabel |
|
208 |
+ return(kmer) |
|
209 |
+}) |
|
210 |
+ |
|
211 |
+ |
|
212 |
+setMethod("probeLabel", "Fastqq", function(object){return(object@probeLabel)}) |
|
213 |
+setReplaceMethod("probeLabel", "Fastqq", function(object, value) |
|
214 |
+{ |
|
215 |
+ if(length(value) != nFiles(object)) |
|
216 |
+ stop("'value' must have length ", nFiles(object), ".") |
|
217 |
+ |
|
218 |
+ val <- as.character(value) |
|
219 |
+ if(any(table(val)) > 1) |
|
220 |
+ { |
|
221 |
+ warning("[probeLabel <- .Fastqq] probeLabel unique suffix added.") |
|
222 |
+ val <- paste(1:nFiles(object), val, sep="_") |
|
223 |
+ } |
|
224 |
+ object@probeLabel <- val |
|
225 |
+ |
|
226 |
+ return(object) |
|
227 |
+}) |
|
228 |
+ |
|
229 |
+setMethod("phred", signature="Fastqq", definition=function(object, i) |
|
230 |
+{ |
|
231 |
+ if(missing(i)) |
|
232 |
+ stop("Argument 'i' is not optional.") |
|
233 |
+ |
|
234 |
+ if(!is.numeric(i)) |
|
235 |
+ stop("'i' must be numeric.") |
|
236 |
+ |
|
237 |
+ i <- as.integer(i) |
|
238 |
+ if( (i < 1) || (i > object@nFiles) ) |
|
239 |
+ stop("'i' must be >0 and < nFiles.") |
|
240 |
+ |
|
241 |
+ return(object@phred[[i]]) |
|
242 |
+}) |
|
243 |
+ |
|
244 |
+ |
|
245 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
246 |
+## End: Standard slot accessor functions |
|
247 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
248 |
+ |
|
249 |
+ |
|
250 |
+setMethod("show", "Fastqq", function(object) |
|
251 |
+{ |
|
252 |
+ bm <- Sys.localeconv()[7] |
|
253 |
+ w <- 20 |
|
254 |
+ r <- "right" |
|
255 |
+ cat("Class : ", format(class(object), w=w, j=r) , "\n", sep="") |
|
256 |
+ cat("nFiles : ", format(format(nFiles(object) , big.m=bm), w=w, j=r) , "\n", sep="") |
|
257 |
+ cat("maxSeqLen : ", format(format(maxSeqLen(object) , big.m=bm), w=w, j=r) , "\n", sep="") |
|
258 |
+ cat("k (Kmer len): ", format(format(getK(object) , big.m=bm), w=w, j=r) , "\n", sep="") |
|
259 |
+ cat("\n") |
|
260 |
+ cat("nReads : ", format(format(sum(as.numeric(nReads(object))) , big.m=bm), w=w, j=r) , "\n", sep="") |
|
261 |
+ cat("nr N nuc : ", format(format(sum(nNnucs(object)) , big.m=bm), w=w, j=r) , "\n", sep="") |
|
262 |
+ cat("Min seq len : ", format(format(min(seqLen(object)[1, ]), big.m=bm), w=w, j=r) , "\n", sep="") |
|
263 |
+ cat("Max seq len : ", format(format(max(seqLen(object)[2, ]), big.m=bm), w=w, j=r) , "\n", sep="") |
|
264 |
+ return(invisible()) |
|
265 |
+}) |
|
266 |
+ |
|
267 |
+ |
|
268 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
269 |
+## Phred related functions |
|
270 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
271 |
+ |
|
272 |
+setMethod("phredQuantiles", "Fastqq", function(object, quantiles, i, ...) |
|
273 |
+{ |
|
274 |
+ # + + + + + + + + + + + + + + + + + + + + + + + + + + + + # |
|
275 |
+ # Checking arguments |
|
276 |
+ # + + + + + + + + + + + + + + + + + + + + + + + + + + + + # |
|
277 |
+ |
|
278 |
+ ## Check quantiles argument |
|
279 |
+ if(missing(quantiles)) |
|
280 |
+ stop("'quantiles' argument is not optional") |
|
281 |
+ |
|
282 |
+ if(!is.numeric(quantiles)) |
|
283 |
+ stop("Quantiles must be numeric.") |
|
284 |
+ |
|
285 |
+ if(!(all(quantiles >= 0) & all(quantiles <= 1) )) |
|
286 |
+ stop("All quantiles mustbe in [0, 1]") |
|
287 |
+ |
|
288 |
+ quantiles <- sort(unique(round(quantiles, 2))) |
|
289 |
+ |
|
290 |
+ ## Check 'i' argument |
|
291 |
+ if(missing(i)) |
|
292 |
+ stop("'i' argument is not optional") |
|
293 |
+ |
|
294 |
+ if(!is.numeric(i)) |
|
295 |
+ stop("'i' must be numeric.") |
|
296 |
+ |
|
297 |
+ if(length(i) > 1) |
|
298 |
+ stop("'i' must have length 1") |
|
299 |
+ |
|
300 |
+ i <- as.integer(i) |
|
301 |
+ if( (i < 1) || (i > object@nFiles) ) |
|
302 |
+ stop("'i' must be >0 and <nFiles.") |
|
303 |
+ |
|
304 |
+ # + + + + + + + + + + + + + + + + + + + + + + + + + + + + # |
|
305 |
+ # Count qual values for each sequence position |
|
306 |
+ # Convert integer counts into column-wise relative values |
|
307 |
+ # Maximum counted read length |
|
308 |
+ # + + + + + + + + + + + + + + + + + + + + + + + + + + + + # |
|
309 |
+ vec <- 1:seqLen(object)[2, i] |
|
310 |
+ qrel <- as.data.frame(apply(object@phred[[i]][, vec], 2, rel_int)) |
|
311 |
+ names(qrel) <- vec |
|
312 |
+ |
|
313 |
+ # + + + + + + + + + + + + + + + + + + + + + + + + + + + + # |
|
314 |
+ # Walk through each column and extract row number |
|
315 |
+ # for given quantile values |
|
316 |
+ # + + + + + + + + + + + + + + + + + + + + + + + + + + + + # |
|
317 |
+ res <- .Call("get_column_quantiles", quantiles, qrel, PACKAGE="seqTools") |
|
318 |
+ return(res) |
|
319 |
+}) |
|
320 |
+ |
|
321 |
+ |
|
322 |
+setMethod("plotPhredQuant", "Fastqq", function(object, i, main, ...){ |
|
323 |
+ if(!is.numeric(i)) |
|
324 |
+ stop("'i' must be numeric.") |
|
325 |
+ |
|
326 |
+ i <- as.integer(i) |
|
327 |
+ |
|
328 |
+ if( (i < 1) || (i > object@nFiles) ) |
|
329 |
+ stop("'i' must be >0 and <nFiles.") |
|
330 |
+ |
|
331 |
+ quant <- c(0.1, 0.25, 0.5, 0.75, 0.9) |
|
332 |
+ cols <- c("#1F78B4", "#FF7F00", "#E31A1C", "#FF7F00", "#1F78B4") |
|
333 |
+ qq <- phredQuantiles(object, quant, i) |
|
334 |
+ maxQ = floor(1.2*max(qq)) |
|
335 |
+ xv <- 1:ncol(qq) |
|
336 |
+ |
|
337 |
+ if(missing(main)) |
|
338 |
+ { |
|
339 |
+ main <- paste("Position wise Phred Quantiles (", |
|
340 |
+ probeLabel(object)[i], ")", sep="") |
|
341 |
+ } |
|
342 |
+ |
|
343 |
+ plot(xv, xv, ylim=c(0, maxQ), type="n", bty="n", las=1, |
|
344 |
+ ylab = "Phred score", xlab="Sequence position", main=main, ...) |
|
345 |
+ |
|
346 |
+ lines(xv, qq[1, ], col=cols[1], lty=2) |
|
347 |
+ lines(xv, qq[2, ], col=cols[2], lty=1) |
|
348 |
+ lines(xv, qq[3, ], col=cols[3], lwd=2) |
|
349 |
+ lines(xv, qq[4, ], col=cols[4], lty=1) |
|
350 |
+ lines(xv, qq[5, ], col=cols[5], lty=2) |
|
351 |
+ |
|
352 |
+ legend("top", ncol=6, lty=c(2, 1, 1, 1, 2), |
|
353 |
+ lwd=c(1, 1, 2, 1, 1), col=cols, xjust=0.5, |
|
354 |
+ legend=c("10%", "25%", "50%", "75%", "90%"), bty="n", cex=0.8) |
|
355 |
+ return(invisible()) |
|
356 |
+}) |
|
357 |
+ |
|
358 |
+ |
|
359 |
+ |
|
360 |
+ |
|
361 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
362 |
+## Global Phred content functions |
|
363 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
364 |
+setMethod("phredDist", "Fastqq", function(object, i){ |
|
365 |
+ idx <- 1:nFiles(object) |
|
366 |
+ |
|
367 |
+ if(missing(i)) |
|
368 |
+ i <- idx |
|
369 |
+ else |
|
370 |
+ { |
|
371 |
+ if(!is.numeric(i)) |
|
372 |
+ stop("'i' must be numeric.") |
|
373 |
+ |
|
374 |
+ if(!all(is.element(i, idx))) |
|
375 |
+ stop("'i' is out of range.") |
|
376 |
+ } |
|
377 |
+ |
|
378 |
+ phred <- Reduce("+", object@phred[i]) |
|
379 |
+ phred <- matrix(as.numeric(phred), nrow=nrow(phred)) |
|
380 |
+ |
|
381 |
+ phred_vals <- apply(phred, 1, sum) |
|
382 |
+ phred_dist <- phred_vals/sum(phred_vals) |
|
383 |
+ names(phred_dist) <- rownames(object@phred[[1]]) |
|
384 |
+ |
|
385 |
+ return(phred_dist) |
|
386 |
+}) |
|
387 |
+ |
|
388 |
+ |
|
389 |
+setMethod("plotPhredDist", "Fastqq", function(object, i, maxp=45, col, ...) |
|
390 |
+{ |
|
391 |
+ if(!is.numeric(maxp)) |
|
392 |
+ stop("maxp must be numeric") |
|
393 |
+ |
|
394 |
+ if(!is.integer(maxp)) |
|
395 |
+ maxp<-as.integer(maxp) |
|
396 |
+ |
|
397 |
+ if(maxp <= 0) |
|
398 |
+ stop("maxp must be >=0") |
|
399 |
+ |
|
400 |
+ if(missing(col)) |
|
401 |
+ col <- topo.colors(10)[3] |
|
402 |
+ |
|
403 |
+ phred <- phredDist(object, i) |
|
404 |
+ maxy <- ceiling(max(phred) * 5) / 5 |
|
405 |
+ x <- 1:maxp |
|
406 |
+ xmax <- 10 * (ceiling(maxp / 10)) |
|
407 |
+ |
|
408 |
+ plot(x, phred[x], ylim=c(0, maxy), xlim=c(0, xmax), type="l", lwd=2, |
|
409 |
+ col=col, yaxt="n", bty="n", xlab="Phred value", |
|
410 |
+ ylab="Content (%)", ...) |
|
411 |
+ |
|
412 |
+ ylb <- 0:(10 * maxy) / 10 |
|
413 |
+ axis(side=2, at=ylb, labels=100 * ylb, las=1) |
|
414 |
+ return(invisible()) |
|
415 |
+}) |
|
416 |
+ |
|
417 |
+ |
|
418 |
+# + + + + + + + + + + + + + + + + + + + + + + + + + + + + # |
|
419 |
+# Not exported: |
|
420 |
+# + + + + + + + + + + + + + + + + + + + + + + + + + + + + # |
|
421 |
+pd_l10 <- function(x){ x <- phredDist(x); return(sum(x[1:10]) / sum(x))} |
|
422 |
+ |
|
423 |
+ |
|
424 |
+setMethod("propPhred","Fastqq",function(object, greater=30, less=93) |
|
425 |
+{ |
|
426 |
+ if(!is.numeric(greater)) |
|
427 |
+ stop("'greater' must be numeric.") |
|
428 |
+ |
|
429 |
+ if(length(greater) != 1) |
|
430 |
+ stop("'greater' must have length 1.") |
|
431 |
+ |
|
432 |
+ if(!is.numeric(less)) |
|
433 |
+ stop("'less' must be numeric.") |
|
434 |
+ |
|
435 |
+ if(length(less) != 1) |
|
436 |
+ stop("'less must have length 1.") |
|
437 |
+ |
|
438 |
+ ## + + + + + + + + + + + + + + + + + + + + + + ## |
|
439 |
+ ## greater and less shall be used as |
|
440 |
+ ## array indices: increase greater |
|
441 |
+ ## + + + + + + + + + + + + + + + + + + + + + + ## |
|
442 |
+ greater<-as.integer(greater+1) |
|
443 |
+ less<-as.integer(less) |
|
444 |
+ |
|
445 |
+ if(greater < 1) |
|
446 |
+ stop("'greater' must be >=0.") |
|
447 |
+ |
|
448 |
+ if(less > 93) |
|
449 |
+ stop("'less' must be < 94.") |
|
450 |
+ |
|
451 |
+ if(greater >= less) |
|
452 |
+ stop("'greater' must be <= 'less'") |
|
453 |
+ |
|
454 |
+ n <- nFiles(object) |
|
455 |
+ res <- numeric(n) |
|
456 |
+ for(i in 1:n) |
|
457 |
+ { |
|
458 |
+ pd <- phredDist(object, i) |
|
459 |
+ res[i] <- sum(pd[greater:less]) |
|
460 |
+ } |
|
461 |
+ names(res) <- probeLabel(object) |
|
462 |
+ return(res) |
|
463 |
+}) |
|
464 |
+ |
|
465 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
466 |
+## End: Global Phred content functions |
|
467 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
468 |
+ |
|
469 |
+setMethod("mergedPhred", "Fastqq", function(object){ |
|
470 |
+ sql <- seqLen(object) |
|
471 |
+ maxSeqLen <- max(sql[2, ]) |
|
472 |
+ |
|
473 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
474 |
+ ## as.numeric: Sum on integer is likely to exceed |
|
475 |
+ ## maximal 32-bit integer values |
|
476 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
477 |
+ |
|
478 |
+ if(sql[2, 1] < maxSeqLen) |
|
479 |
+ { |
|
480 |
+ mp <- as.numeric(.Call("r_enlarge_int_mat", object@phred[[1]], |
|
481 |
+ c(nrow(object@phred[[1]]), maxSeqLen), PACKAGE="seqTools")) |
|
482 |
+ }else{ |
|
483 |
+ mp <- as.numeric(object@phred[[1]]) |
|
484 |
+ } |
|
485 |
+ |
|
486 |
+ |
|
487 |
+ n <- nFiles(object) |
|
488 |
+ if(n > 1) |
|
489 |
+ { |
|
490 |
+ for(i in 2:n) |
|
491 |
+ { |
|
492 |
+ if(sql[2, i] < maxSeqLen) |
|
493 |
+ { |
|
494 |
+ mp <- mp + as.numeric(.Call("r_enlarge_int_mat", |
|
495 |
+ object@phred[[i]], |
|
496 |
+ c(nrow(object@phred[[i]]), maxSeqLen), |
|
497 |
+ PACKAGE="seqTools")) |
|
498 |
+ }else{ |
|
499 |
+ mp <- mp + as.numeric(object@phred[[i]]) |
|
500 |
+ } |
|
501 |
+ } |
|
502 |
+ } |
|
503 |
+ mp <- matrix(mp, ncol = maxSeqLen) |
|
504 |
+ rownames(mp) <- rownames(object@phred[[1]]) |
|
505 |
+ colnames(mp) <- 1:maxSeqLen |
|
506 |
+ return(mp) |
|
507 |
+}) |
|
508 |
+ |
|
509 |
+setMethod("mergedPhredQuantiles", "Fastqq", function(object, quantiles) |
|
510 |
+{ |
|
511 |
+ if(!(all(quantiles >= 0) & all(quantiles <= 1)) ) |
|
512 |
+ stop("[mergedPhredQuantiles.Fastqq] all quantiles mustbe in [0, 1]") |
|
513 |
+ quantiles <- sort(unique(round(quantiles, 2))) |
|
514 |
+ |
|
515 |
+ sql <- seqLen(object) |
|
516 |
+ maxSeqLen <- max(sql[2, ]) |
|
517 |
+ |
|
518 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + # |
|
519 |
+ ## Count qual values for each sequence position |
|
520 |
+ ## Convert counts into column-wise relative values |
|
521 |
+ ## Maximum counted read length |
|
522 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + # |
|
523 |
+ vec <- 1:maxSeqLen |
|
524 |
+ mrg <- mergedPhred(object) |
|
525 |
+ qrel <- as.data.frame(apply(mrg[, vec], 2, rel_real)) |
|
526 |
+ names(qrel) |
|
527 |
+ names(qrel) <- vec |
|
528 |
+ |
|
529 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + # |
|
530 |
+ ## Walk through each column and extract row number |
|
531 |
+ ## for given quantile values |
|
532 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + # |
|
533 |
+ res <- .Call("get_column_quantiles", quantiles, qrel, PACKAGE="seqTools") |
|
534 |
+ return(res) |
|
535 |
+}) |
|
536 |
+ |
|
537 |
+ |
|
538 |
+setMethod("plotMergedPhredQuant", "Fastqq", function(object, main, ...) |
|
539 |
+{ |
|
540 |
+ quant <- c(0.1, 0.25, 0.5, 0.75, 0.9) |
|
541 |
+ cols <- c("#1F78B4", "#FF7F00", "#E31A1C", "#FF7F00", "#1F78B4") |
|
542 |
+ qq <- mergedPhredQuantiles(object, quant) |
|
543 |
+ maxQ = floor(1.2*max(qq)) |
|
544 |
+ xv <- 1:ncol(qq) |
|
545 |
+ |
|
546 |
+ if(missing(main)) |
|
547 |
+ main <- paste("Merged position wise Phred Quantiles.", sep = "") |
|
548 |
+ |
|
549 |
+ plot(xv, xv, ylim=c(0, maxQ), type="n", bty="n", las=1, |
|
550 |
+ ylab="Phred score", xlab="Sequence position", main=main, ...) |
|
551 |
+ |
|
552 |
+ |
|
553 |
+ lines(xv, qq[1, ], col=cols[1], lty=2) |
|
554 |
+ lines(xv, qq[2, ], col=cols[2], lty=1) |
|
555 |
+ lines(xv, qq[3, ], col=cols[3], lwd=2) |
|
556 |
+ lines(xv, qq[4, ], col=cols[4], lty=1) |
|
557 |
+ lines(xv, qq[5, ], col=cols[5], lty=2) |
|
558 |
+ |
|
559 |
+ legend("top", ncol=6, lty=c(2, 1, 1, 1, 2), |
|
560 |
+ lwd=c(1, 1, 2, 1, 1), col=cols, xjust=0.5, |
|
561 |
+ legend=c("10%", "25%", "50%", "75%", "90%"), bty="n", cex=0.8) |
|
562 |
+ |
|
563 |
+ return(invisible()) |
|
564 |
+}) |
|
565 |
+ |
|
566 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
567 |
+## End: Phred related functions |
|
568 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
569 |
+ |
|
570 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
571 |
+## Nucleotide frequency related functions |
|
572 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
573 |
+ |
|
574 |
+ |
|
575 |
+setMethod("plotNucFreq", "Fastqq", function(object, i, main, maxx, ...) |
|
576 |
+{ |
|
577 |
+ if(!is.numeric(i)) |
|
578 |
+ stop("'i' must be numeric.") |
|
579 |
+ |
|
580 |
+ i <- as.integer(i) |
|
581 |
+ |
|
582 |
+ if( (i < 1) || (i > object@nFiles) ) |
|
583 |
+ stop("'i' must be >0 and <nFiles.") |
|
584 |
+ |
|
585 |
+ maxSeqlen <- max(seqLen(object)[2, ]) |
|
586 |
+ if(missing(maxx)) |
|
587 |
+ { |
|
588 |
+ maxx <- maxSeqlen |
|
589 |
+ } |
|
590 |
+ else |
|
591 |
+ { |
|
592 |
+ if(!is.numeric(maxx)) |
|
593 |
+ stop("'maxx' must be numeric.") |
|
594 |
+ maxx <- as.integer(maxx) |
|
595 |
+ if(maxx < 1) |
|
596 |
+ stop("'maxx' must be >0.") |
|
597 |
+ if(maxx > maxSeqlen) |
|
598 |
+ maxx <- maxSeqlen |
|
599 |
+ } |
|
600 |
+ |
|
601 |
+ # Skip extra line |
|
602 |
+ x <- 1:maxx |
|
603 |
+ nac <- object@nac[[i]][1:4, x] |
|
604 |
+ nacrel <- apply(nac, 2, rel_int) |
|
605 |
+ maxY = round(1.4 * max(nacrel), 1) |
|
606 |
+ |
|
607 |
+ # Maximum counted read length |
|
608 |
+ nacrel <- apply(nac, 2, rel_int) |
|
609 |
+ cols <- c("#1F78B4", "#33A02C", "#E31A1C", "#FF7F00") |
|
610 |
+ |
|
611 |
+ if(missing(main)) |
|
612 |
+ main <- paste("Position wise Nucleotide frequency (", |
|
613 |
+ probeLabel(object)[i], ")", sep="") |
|
614 |
+ |
|
615 |
+ plot(x, x, ylim=c(0, maxY), type="n", bty="n", las=1, |
|
616 |
+ ylab="Nucleotide fequency", xlab="sequence position", |
|
617 |
+ main=main, ...) |
|
618 |
+ |
|
619 |
+ lines(x, nacrel[1, ], col=cols[1], lwd=2) |
|
620 |
+ lines(x, nacrel[2, ], col=cols[2], lwd=2) |
|
621 |
+ lines(x, nacrel[3, ], col=cols[3], lwd=2) |
|
622 |
+ lines(x, nacrel[4, ], col=cols[4], lwd=2) |
|
623 |
+ |
|
624 |
+ legend("top", ncol=6, |
|
625 |
+ lwd=c(1, 1, 2, 1, 1), col=cols, xjust=0.5, |
|
626 |
+ legend=c("A", "C", "G", "T"), bty="n", cex=0.8) |
|
627 |
+ |
|
628 |
+ return(invisible()) |
|
629 |
+}) |
|
630 |
+ |
|
631 |
+ |
|
632 |
+setMethod("plotGCcontent", "Fastqq", function(object,main,...) |
|
633 |
+{ |
|
634 |
+ cols <- c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", |
|
635 |
+ "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A") |
|
636 |
+ |
|
637 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + # |
|
638 |
+ ## Normalize matrix to colsum = 1 |
|
639 |
+ ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + # |
|
640 |
+ |
|
641 |
+ gc <- apply(object@gcContent, 2, rel_int) |
|
642 |
+ maxY = round(1.3*max(gc), 2) |
|
643 |
+ nf <- nFiles(object) |
|
644 |
+ x <- 1:nrow(gc) |
|
645 |
+ |
|
646 |
+ if(missing(main)) |
|
647 |
+ main<-"GC content" |
|
648 |
+ |
|
649 |
+ plot(x, x, ylim=c(0, maxY), type="n", bty="n", las=1, |
|
650 |
+ ylab="Proportion of reads (%)", xlab="Relative GC content (%)", |
|
651 |
+ main=main, ...) |
|
652 |
+ |
|
653 |
+ for(i in 1:nf) |
|
654 |
+ lines(x, gc[, i], col=cols[i], lwd=2) |
|
655 |
+ |
|
656 |
+ legend("right", lwd=2, col=cols, legend=probeLabel(object), |
|
657 |
+ bty="n", cex=0.8) |
|
658 |
+ |
|
659 |
+ return(invisible()) |
|
660 |
+}) |
|
661 |
+ |
|
662 |
+ |
|
663 |
+setMethod("plotNucCount", "Fastqq", function(object, nucs=16, maxx, ...) |
|
664 |
+{ |
|
665 |
+ |
|
666 |
+ # j = 16: N, j = 2:3: gc |
|
667 |
+ if(!is.numeric(nucs)) |
|
668 |
+ stop("'nucs' must be numeric.") |
|
669 |
+ |
|
670 |
+ nucs <- as.integer(nucs) |
|
671 |
+ if(any(nucs < 1) || any(nucs > 19)) |
|
672 |
+ stop("'nucs' must be >0 and <20.") |
|
673 |
+ |
|
674 |
+ maxSeqlen <- max(seqLen(object)[2, ]) |
|
675 |
+ |
|
676 |
+ if(missing(maxx)) |
|
677 |
+ { |
|
678 |
+ maxx <- maxSeqlen |
|
679 |
+ } |
|
680 |
+ else |
|
681 |
+ { |
|
682 |
+ if(!is.numeric(maxx)) |
|
683 |
+ stop("'maxx' must be numeric.") |
|
684 |
+ |
|
685 |
+ maxx <- as.integer(maxx) |
|
686 |
+ |
|
687 |
+ if(maxx<1) |
|
688 |
+ stop("'maxx must be >0.") |
|
689 |
+ |
|
690 |
+ if(maxx > maxSeqlen) |
|
691 |
+ maxx <- maxSeqlen |
|
692 |
+ } |
|
693 |
+ |
|
694 |