- Make generic with methods for _BSgenome_ and _DNAStringSet_ (e.g., a FASTA file imported with `rtracklayer::import()`)
... | ... |
@@ -42,6 +42,8 @@ import(DelayedArray) |
42 | 42 |
import(BiocParallel) |
43 | 43 |
importFrom(readr, "cols", "cols_only", "col_character", "col_integer", |
44 | 44 |
"col_skip", "col_double", "col_factor", "read_tsv", "tokenizer_tsv") |
45 |
+importFrom(Biostrings, "DNAString", "vmatchPattern", "reverseComplement") |
|
46 |
+ |
|
45 | 47 |
## |
46 | 48 |
## Exporting |
47 | 49 |
## |
... | ... |
@@ -66,7 +68,8 @@ exportMethods("[", "show", |
66 | 68 |
"sampleNames", "sampleNames<-", |
67 | 69 |
"pData", "pData<-", |
68 | 70 |
"findOverlaps", "subsetByOverlaps", |
69 |
- "combine", "updateObject") |
|
71 |
+ "combine", "updateObject", |
|
72 |
+ "findCytosines") |
|
70 | 73 |
|
71 | 74 |
export("BSseq", "getMeth", "getCoverage", "getBSseq", "getStats", |
72 | 75 |
"collapseBSseq", "orderBSseq", "hasBeenSmoothed", "chrSelectBSseq", |
... | ... |
@@ -76,7 +79,8 @@ export("BSseq", "getMeth", "getCoverage", "getBSseq", "getStats", |
76 | 79 |
"read.umtab", "read.umtab2", "read.bsmooth", "read.bismark", |
77 | 80 |
"poissonGoodnessOfFit", "binomialGoodnessOfFit", |
78 | 81 |
"data.frame2GRanges", "BSseqTstat", |
79 |
- "BSseqStat") |
|
82 |
+ "BSseqStat", |
|
83 |
+ "findCytosines") |
|
80 | 84 |
|
81 | 85 |
S3method("print", "chisqGoodnessOfFit") |
82 | 86 |
S3method("plot", "chisqGoodnessOfFit") |
... | ... |
@@ -1,75 +1,56 @@ |
1 |
-# TODO: Need to update NAMESPACE |
|
2 |
-# TODO: Search both forward and reverse strands in a single search. |
|
3 |
-# TODO: Default value of seqlevels as missing or NULL? Check what other |
|
4 |
-# functions with this arg use. |
|
5 |
-findCytosines <- function(BSgenome, context = c("CG", "CA", "CC", "CT"), |
|
6 |
- seqlevels, verbose = getOption("verbose")) { |
|
7 |
- stopifnot(is(BSgenome, "BSgenome")) |
|
8 |
- context <- match.arg(context) |
|
9 |
- context <- DNAString(context) |
|
10 |
- seqinfo <- seqinfo(BSgenome) |
|
11 |
- if (missing(seqlevels)) { |
|
12 |
- seqlevels <- seqlevels(seqinfo) |
|
13 |
- } else { |
|
14 |
- seqinfo <- seqinfo[seqlevels] |
|
1 |
+# Exported generics ------------------------------------------------------------ |
|
2 |
+ |
|
3 |
+setGeneric( |
|
4 |
+ "findCytosines", |
|
5 |
+ function(x, context, seqlevels) standardGeneric("findCytosines"), |
|
6 |
+ signature = "x") |
|
7 |
+ |
|
8 |
+# Exported methods ------------------------------------------------------------- |
|
9 |
+ |
|
10 |
+setMethod( |
|
11 |
+ "findCytosines", |
|
12 |
+ "BSgenome", |
|
13 |
+ function(x, context, seqlevels = seqlevels(x)) { |
|
14 |
+ # NOTE: vmatchPattern,BSgenome-method returns a GRanges instance. |
|
15 |
+ gr <- vmatchPattern( |
|
16 |
+ pattern = context, |
|
17 |
+ subject = x, |
|
18 |
+ exclude = setdiff(seqlevels(x), seqlevels), |
|
19 |
+ fixed = "subject") |
|
20 |
+ # NOTE: Want just the position of the cytosine. |
|
21 |
+ resize(gr, width = 1L, fix = "start") |
|
22 |
+ } |
|
23 |
+) |
|
24 |
+ |
|
25 |
+# TODO: Replace with vmatchPDict() when/if this is available, where the pdict |
|
26 |
+# argument is a DNAStringSet including the original context and its |
|
27 |
+# reverse complement. |
|
28 |
+setMethod( |
|
29 |
+ "findCytosines", |
|
30 |
+ "DNAStringSet", |
|
31 |
+ function(x, context, seqlevels = seqlevels(x)) { |
|
32 |
+ context <- DNAString(context) |
|
33 |
+ x <- x[seqlevels] |
|
34 |
+ fwd_gr <- as( |
|
35 |
+ vmatchPattern( |
|
36 |
+ pattern = context, |
|
37 |
+ subject = x, |
|
38 |
+ fixed = "subject"), |
|
39 |
+ "GRanges") |
|
40 |
+ strand(fwd_gr) <- "+" |
|
41 |
+ rev_gr <- as( |
|
42 |
+ vmatchPattern( |
|
43 |
+ pattern = reverseComplement(context), |
|
44 |
+ subject = x, |
|
45 |
+ fixed = "subject"), |
|
46 |
+ "GRanges") |
|
47 |
+ strand(rev_gr) <- "-" |
|
48 |
+ gr <- c(fwd_gr, rev_gr) |
|
49 |
+ # NOTE: Want just the position of the cytosine. |
|
50 |
+ resize(gr, width = 1L, fix = "start") |
|
15 | 51 |
} |
16 |
- bsparams <- new( |
|
17 |
- "BSParams", |
|
18 |
- X = BSgenome, |
|
19 |
- FUN = matchPattern, |
|
20 |
- exclude = setdiff(seqlevels(BSgenome), seqlevels)) |
|
21 |
- current_verbose <- getOption("verbose") |
|
22 |
- options(verbose = verbose) |
|
23 |
- on.exit(options(verbose = current_verbose), add = TRUE) |
|
24 |
- fwd_strand <- IRangesList( |
|
25 |
- endoapply(bsapply(bsparams, pattern = context), as, "IRanges")) |
|
26 |
- n <- sum(as.numeric(lengths(fwd_strand))) |
|
27 |
- fwd_strand <- .FWGRanges( |
|
28 |
- seqnames = Rle( |
|
29 |
- factor(names(fwd_strand), levels = seqlevels(seqinfo)), |
|
30 |
- lengths(fwd_strand)), |
|
31 |
- ranges = .FWIRanges( |
|
32 |
- start = unlist(start(fwd_strand), use.names = FALSE), |
|
33 |
- width = 1L), |
|
34 |
- strand = Rle(strand("+"), n), |
|
35 |
- seqinfo = seqinfo, |
|
36 |
- elementMetadata = S4Vectors:::make_zero_col_DataFrame(n)) |
|
37 |
- rev_strand <- IRangesList( |
|
38 |
- endoapply( |
|
39 |
- bsapply(bsparams, pattern = reverseComplement(context)), |
|
40 |
- as, |
|
41 |
- "IRanges")) |
|
42 |
- n <- sum(as.numeric(lengths(rev_strand))) |
|
43 |
- rev_strand <- .FWGRanges( |
|
44 |
- seqnames = Rle( |
|
45 |
- factor(names(rev_strand), levels = seqlevels(seqinfo)), |
|
46 |
- lengths(rev_strand)), |
|
47 |
- ranges = .FWIRanges( |
|
48 |
- start = unlist(start(rev_strand), use.names = FALSE), |
|
49 |
- width = 1L), |
|
50 |
- strand = Rle(strand("-"), n), |
|
51 |
- seqinfo = seqinfo, |
|
52 |
- elementMetadata = S4Vectors:::make_zero_col_DataFrame(n)) |
|
53 |
- sort(c(fwd_strand, rev_strand)) |
|
54 |
-} |
|
52 |
+) |
|
55 | 53 |
|
56 |
-# TODO: findCytosines from a FASTA file (e.g., for lambda phage) |
|
57 |
-# lambda <- import("http://www.ebi.ac.uk/ena/data/view/J02459&display=fasta&download=fasta&filename=J02459.fasta") |
|
54 |
+# TODOs ------------------------------------------------------------------------ |
|
58 | 55 |
|
59 |
-# |
|
60 |
-# hg19_si <- suppressWarnings(keepStandardChromosomes( |
|
61 |
-# seqinfo(BSgenome.Hsapiens.UCSC.hg19))) |
|
62 |
-# params <- new("BSParams", |
|
63 |
-# X = BSgenome.Hsapiens.UCSC.hg19, |
|
64 |
-# FUN = matchPattern, |
|
65 |
-# exclude = setdiff(seqnames(BSgenome.Hsapiens.UCSC.hg19), |
|
66 |
-# seqnames(hg19_si))) |
|
67 |
-# options(verbose = TRUE) |
|
68 |
-# hg19_cpgs <- IRangesList( |
|
69 |
-# endoapply(bsapply(params, pattern = "CG"), as, "IRanges")) |
|
70 |
-# options(verbose = FALSE) |
|
71 |
-# hg19_cpgs <- GRanges(seqnames = Rle(names(hg19_cpgs), lengths(hg19_cpgs)), |
|
72 |
-# ranges = unlist(hg19_cpgs, use.names = FALSE), |
|
73 |
-# strand = "*", |
|
74 |
-# seqinfo = hg19_si) |
|
75 |
-# width(hg19_cpgs) <- 1L |
|
56 |
+# TODO: Default value of seqlevels isn't working. |