git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/seqTools@128664 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,9 +1,8 @@ |
1 | 1 |
Package: seqTools |
2 | 2 |
Type: Package |
3 |
-Title: Analysis of nucleotide, sequence and quality content on fastq |
|
4 |
- files. |
|
5 |
-Version: 1.9.0 |
|
6 |
-Date: 2013-10-14 |
|
3 |
+Title: Analysis of nucleotide, sequence and quality content on fastq files |
|
4 |
+Version: 1.9.1 |
|
5 |
+Date: 2017-04-13 |
|
7 | 6 |
Author: Wolfgang Kaisers |
8 | 7 |
Maintainer: Wolfgang Kaisers <kaisers@med.uni-duesseldorf.de> |
9 | 8 |
Description: Analyze read length, phred scores and alphabet frequency and DNA k-mers on uncompressed and compressed fastq files. |
... | ... |
@@ -12,3 +11,5 @@ License: Artistic-2.0 |
12 | 11 |
Depends: methods,utils,zlibbioc |
13 | 12 |
LinkingTo: zlibbioc |
14 | 13 |
Suggests: RUnit, BiocGenerics |
14 |
+Packaged: 2016-11-02 14:54:43 UTC; kaisers |
|
15 |
+NeedsCompilation: yes |
... | ... |
@@ -2,8 +2,11 @@ |
2 | 2 |
useDynLib(seqTools) |
3 | 3 |
import(methods) |
4 | 4 |
import(zlibbioc) |
5 |
-importFrom(utils,head) |
|
6 |
-importFrom(utils,tail) |
|
5 |
+importFrom(utils,"head", "tail") |
|
6 |
+importFrom("grDevices", "topo.colors") |
|
7 |
+importFrom("graphics", "axis", "legend", "lines", "plot") |
|
8 |
+importFrom("stats", "as.dist", "hclust") |
|
9 |
+ |
|
7 | 10 |
export( |
8 | 11 |
ascii2char, |
9 | 12 |
countDnaKmers, |
... | ... |
@@ -34,9 +37,11 @@ exportMethods( |
34 | 37 |
collectDur, |
35 | 38 |
gcContent, |
36 | 39 |
gcContentMatrix, |
40 |
+ getGCcontent, |
|
37 | 41 |
fileNames, |
38 | 42 |
getK, |
39 | 43 |
kmerCount, |
44 |
+ kmerSvd, |
|
40 | 45 |
maxSeqLen, |
41 | 46 |
meltDownK, |
42 | 47 |
mergedPhred, |
... | ... |
@@ -1,3 +1,20 @@ |
1 |
+CHANGES IN VERSION 1.9.1 |
|
2 |
+------------------------- |
|
3 |
+ |
|
4 |
+NEW FEATURES |
|
5 |
+ |
|
6 |
+ o (none) |
|
7 |
+ |
|
8 |
+SIGNIFICANT USER-VISIBLE CHANGES |
|
9 |
+ |
|
10 |
+ o (none) |
|
11 |
+ |
|
12 |
+BUG FIXES |
|
13 |
+ |
|
14 |
+ o Added entry in NAMESPACE |
|
15 |
+ |
|
16 |
+ |
|
17 |
+ |
|
1 | 18 |
CHANGES IN VERSION 0.99.42 |
2 | 19 |
------------------------- |
3 | 20 |
|
... | ... |
@@ -6,13 +6,17 @@ |
6 | 6 |
\alias{gcContentMatrix-methods} |
7 | 7 |
\alias{gcContentMatrix,Fastqq-method} |
8 | 8 |
% |
9 |
+\alias{getGCcontent} |
|
10 |
+\alias{getGCcontent-methods} |
|
11 |
+\alias{getGCcontent,Fastqq-method} |
|
9 | 12 |
\title{ |
10 | 13 |
gcContentMatrix: Returns matrix with read counts for GC content. |
11 | 14 |
} |
12 | 15 |
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - % |
13 | 16 |
% Description |
14 | 17 |
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - % |
15 |
-\description{Returns a matrix with read counts.} |
|
18 |
+\description{Returns a matrix with read counts. getGCcontent returns a |
|
19 |
+numeric vector with the GC contend (in percent) for each fastq file.} |
|
16 | 20 |
% |
17 | 21 |
\usage{ |
18 | 22 |
gcContentMatrix(object) |
... | ... |
@@ -23,7 +27,7 @@ gcContentMatrix(object) |
23 | 27 |
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - % |
24 | 28 |
% Details |
25 | 29 |
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - % |
26 |
-\details{The matrix contains one column for each FASTQ file. Rows labeled |
|
30 |
+\details{The matrix contains one column for each FASTQ file. Rows labeled |
|
27 | 31 |
from 0 to 100 which represents percent (\%) GC content. The matrix contains |
28 | 32 |
numbers of reads with the respective proportion of GC (Row 2 contains |
29 | 33 |
number of reads with 2\% GC content).} |
... | ... |
@@ -32,7 +36,7 @@ gcContentMatrix(object) |
32 | 36 |
% |
33 | 37 |
\references{ |
34 | 38 |
Cock PJA, Fields CJ, Goto N, Heuer ML, Rice PM |
35 |
-The sanger FASTQ file format for sequences with quality scores and |
|
39 |
+The sanger FASTQ file format for sequences with quality scores and |
|
36 | 40 |
the Solexa/Illumina FASTQ variants. |
37 | 41 |
Nucleic Acids Research 2010 Vol.38 No.6 1767-1771 |
38 | 42 |
} |
... | ... |
@@ -47,6 +51,7 @@ setwd(basedir) |
47 | 51 |
fq <- fastqq(c("g4_l101_n100.fq.gz", "g5_l101_n100.fq.gz"), k=4, |
48 | 52 |
probeLabel=c("g4","g5")) |
49 | 53 |
fqm<-gcContentMatrix(fq) |
54 |
+getGCcontent(fq) |
|
50 | 55 |
} |
51 | 56 |
\seealso{gcContent} |
52 | 57 |
\keyword{gcContentMatrix} |
... | ... |
@@ -4,24 +4,76 @@ |
4 | 4 |
## Load prerequisites |
5 | 5 |
## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
6 | 6 |
|
7 |
-require(seqTools) |
|
7 |
+library(seqTools) |
|
8 | 8 |
|
9 | 9 |
## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
10 | 10 |
## Initialize example data |
11 | 11 |
## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
12 | 12 |
|
13 | 13 |
basedir<-system.file("extdata",package="seqTools") |
14 |
-load(file.path(basedir,"test_res.RData")) |
|
15 | 14 |
|
16 | 15 |
## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
17 | 16 |
## Run tests |
18 | 17 |
## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
19 |
-source("test_seqTools.r") |
|
18 |
+ |
|
19 |
+filename <- "test_seqTools.R" |
|
20 |
+basedir <- system.file("extdata", package = "seqTools") |
|
21 |
+load(file.path(basedir,"test_res.RData")) |
|
22 |
+ |
|
23 |
+ |
|
24 |
+ |
|
25 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
26 |
+## kmerCount.fastqq |
|
27 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
28 |
+ |
|
29 |
+fq <- fastqq(file.path(basedir, "test_l5_N.fq"), k = 2) |
|
30 |
+if(!identical(kmerCount(fq), kmer_l5_N)) |
|
31 |
+ stop("[kmerCount.fastqq] Test 1 '", filename, "' FAILED!") |
|
32 |
+ |
|
33 |
+fq<-fastqq(file.path(basedir, "test_l6.fq"), k = 2) |
|
34 |
+if(!identical(kmerCount(fq), kmer_l6)) |
|
35 |
+ stop("[kmerCount.fastqq] Test 2 '", filename, "' FAILED!") |
|
36 |
+ |
|
37 |
+fq<-fastqq(file.path(basedir, "test_l6_multi_line.fq"), k = 2) |
|
38 |
+if(!identical(kmerCount(fq), kmer_l6_ml)) |
|
39 |
+ stop("[kmerCount.fastqq] Test 3 '", filename, "' FAILED!") |
|
40 |
+ |
|
41 |
+fq<-fastqq(file.path(basedir, "test_l10_20_40.fq"),k = 2) |
|
42 |
+if(!identical(kmerCount(fq), kmer_l10_20)) |
|
43 |
+ stop("[kmerCount.fastqq] Test 4 '", filename, "' FAILED!") |
|
44 |
+ |
|
45 |
+fq<-fastqq(file.path(basedir, "test_l10_atcg.fq"), k = 2) |
|
46 |
+if(!identical(kmerCount(fq), kmer_l10_atcg)) |
|
47 |
+ stop("[kmerCount.fastqq] Test 5 '", filename, "' FAILED!") |
|
48 |
+ |
|
49 |
+fq<-fastqq(file.path(basedir, "test_l10_ATCGN.fq"), k = 2) |
|
50 |
+if(!identical(kmerCount(fq), kmer_l10_ATCGN)) |
|
51 |
+ stop("[kmerCount.fastqq] Test 6 '", filename, "' FAILED!") |
|
52 |
+ |
|
53 |
+# Counting k-mers on linux ('\n') and equal windows ('\r\n') |
|
54 |
+# formatted FASTQ file should give equal results |
|
55 |
+# fq<-fastqq(file.path(basedir, c("test_l4.fq", "test_win.fq")), k = 2) |
|
56 |
+# kc <- kmerCount(fq) |
|
57 |
+# if(!all(kc[,1]==kc[,2])) |
|
58 |
+# stop("[kmerCount.fastqq] test_l4: kmerCount unequal to test_win.fq") |
|
20 | 59 |
|
21 | 60 |
## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
22 |
-## Cleanup |
|
61 |
+## ascii2char, char2ascii |
|
23 | 62 |
## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
24 | 63 |
|
64 |
+if(!identical(ascii2char(97:101, multiple = FALSE), "abcde")) |
|
65 |
+ stop("[ascii2char] Test 1 '", filename, "' FAILED!") |
|
66 |
+ |
|
67 |
+if(!identical(ascii2char(97:101, multiple = TRUE), letters[1:5])) |
|
68 |
+ stop("[ascii2char] Test 2 '", filename, "' FAILED!") |
|
69 |
+ |
|
70 |
+if(!identical(ascii2char(char2ascii("abcde")), "abcde")) |
|
71 |
+ stop("[ascii2char] Test 3 '", filename, "' FAILED!") |
|
72 |
+ |
|
73 |
+if(!identical(char2ascii("abcde"), 97:101)) |
|
74 |
+ stop("[char2ascii] Test 1 '", filename, "' FAILED!") |
|
75 |
+ |
|
25 | 76 |
## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
26 | 77 |
## END OF FILE |
27 | 78 |
## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
79 |
+ |
... | ... |
@@ -1,8 +1,24 @@ |
1 | 1 |
|
2 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
3 |
+## Load prerequisites |
|
4 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
5 |
+ |
|
2 | 6 |
library(seqTools) |
3 | 7 |
|
4 |
-filename <- "test_seqTools.r" |
|
8 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
9 |
+## Initialize example data |
|
10 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
11 |
+ |
|
12 |
+basedir<-system.file("extdata",package="seqTools") |
|
13 |
+ |
|
14 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
15 |
+## Run tests |
|
16 |
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
17 |
+ |
|
18 |
+filename <- "test_seqTools.R" |
|
5 | 19 |
basedir <- system.file("extdata", package = "seqTools") |
20 |
+load(file.path(basedir,"test_res.RData")) |
|
21 |
+ |
|
6 | 22 |
|
7 | 23 |
## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
8 | 24 |
## countDnaKmers |
... | ... |
@@ -10,7 +26,7 @@ basedir <- system.file("extdata", package = "seqTools") |
10 | 26 |
|
11 | 27 |
if(!identical(countDnaKmers("ACGT", k = 1, start = 3:1, width = 1), cdk_ACGT)) |
12 | 28 |
stop("[countDnaKmers] Test 1 '", filename, "' FAILED!") |
13 |
- |
|
29 |
+ |
|
14 | 30 |
if(!identical(countDnaKmers("ACGT", k = 1, start = 3, width = 1), cdk_ACGT_one)) |
15 | 31 |
stop("[countDnaKmers] Test 2 '", filename, "' FAILED!") |
16 | 32 |
|
... | ... |
@@ -27,49 +43,6 @@ if(!identical( |
27 | 43 |
stop("[revCountDnaKmers] Test 1 '", filename, "' FAILED!") |
28 | 44 |
|
29 | 45 |
|
30 |
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
31 |
-## kmerCount.fastqq |
|
32 |
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
33 |
- |
|
34 |
-fq <- fastqq(file.path(basedir, "test_l5_N.fq"), k = 2) |
|
35 |
-if(!identical(kmerCount(fq), kmer_l5_N)) |
|
36 |
- stop("[kmerCount.fastqq] Test 1 '", filename, "' FAILED!") |
|
37 |
- |
|
38 |
-fq<-fastqq(file.path(basedir, "test_l6.fq"), k = 2) |
|
39 |
-if(!identical(kmerCount(fq), kmer_l6)) |
|
40 |
- stop("[kmerCount.fastqq] Test 2 '", filename, "' FAILED!") |
|
41 |
- |
|
42 |
-fq<-fastqq(file.path(basedir, "test_l6_multi_line.fq"), k = 2) |
|
43 |
-if(!identical(kmerCount(fq), kmer_l6_ml)) |
|
44 |
- stop("[kmerCount.fastqq] Test 3 '", filename, "' FAILED!") |
|
45 |
- |
|
46 |
-fq<-fastqq(file.path(basedir, "test_l10_20_40.fq"),k = 2) |
|
47 |
-if(!identical(kmerCount(fq), kmer_l10_20)) |
|
48 |
- stop("[kmerCount.fastqq] Test 4 '", filename, "' FAILED!") |
|
49 |
- |
|
50 |
-fq<-fastqq(file.path(basedir, "test_l10_atcg.fq"), k = 2) |
|
51 |
-if(!identical(kmerCount(fq), kmer_l10_atcg)) |
|
52 |
- stop("[kmerCount.fastqq] Test 5 '", filename, "' FAILED!") |
|
53 |
- |
|
54 |
-fq<-fastqq(file.path(basedir, "test_l10_ATCGN.fq"), k = 2) |
|
55 |
-if(!identical(kmerCount(fq), kmer_l10_ATCGN)) |
|
56 |
- stop("[kmerCount.fastqq] Test 6 '", filename, "' FAILED!") |
|
57 |
- |
|
58 |
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
59 |
-## ascii2char, char2ascii |
|
60 |
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
|
61 |
- |
|
62 |
-if(!identical(ascii2char(97:101, multiple = FALSE), "abcde")) |
|
63 |
- stop("[ascii2char] Test 1 '", filename, "' FAILED!") |
|
64 |
- |
|
65 |
-if(!identical(ascii2char(97:101, multiple = TRUE), letters[1:5])) |
|
66 |
- stop("[ascii2char] Test 2 '", filename, "' FAILED!") |
|
67 |
- |
|
68 |
-if(!identical(ascii2char(char2ascii("abcde")), "abcde")) |
|
69 |
- stop("[ascii2char] Test 3 '", filename, "' FAILED!") |
|
70 |
- |
|
71 |
-if(!identical(char2ascii("abcde"), 97:101)) |
|
72 |
- stop("[char2ascii] Test 1 '", filename, "' FAILED!") |
|
73 | 46 |
|
74 | 47 |
## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## |
75 | 48 |
## END OF FILE |