... | ... |
@@ -35,7 +35,8 @@ importMethodsFrom(GenomicAlignments, last) |
35 | 35 |
importFrom(GenomeInfoDb, Seqinfo) |
36 | 36 |
importFrom(GenomicRanges, GRanges) |
37 | 37 |
importFrom(GenomicAlignments, explodeCigarOps, CIGAR_OPS, explodeCigarOpLengths) |
38 |
-importFrom(Biostrings, DNAStringSet, DNA_BASES, fasta.seqlengths) |
|
38 |
+importFrom(Biostrings, DNAStringSet, DNA_BASES, fasta.seqlengths, |
|
39 |
+ readBStringSet) |
|
39 | 40 |
importFrom(RSQLite, SQLite) |
40 | 41 |
importFrom(methods, is, new, "as", "extends", "validObject") |
41 | 42 |
importFrom(rtracklayer, TwoBitFile) |
... | ... |
@@ -126,6 +127,7 @@ export( |
126 | 127 |
writeAxt, |
127 | 128 |
read.rmMask.GRanges, |
128 | 129 |
saveCNEToSQLite, |
130 |
+ read.rmskFasta, |
|
129 | 131 |
|
130 | 132 |
## ceScan.R |
131 | 133 |
|
... | ... |
@@ -1,8 +1,10 @@ |
1 | 1 |
CHANGES IN Bioc 3.4 |
2 |
+------------------------ |
|
2 | 3 |
NEW FEATURES |
3 | 4 |
o Updated CNE class for storing all the information about |
4 |
- running the pipeline |
|
5 |
- o Add read.rmMask.GRanges to read RepeatMasker .out file |
|
5 |
+ running the pipeline. |
|
6 |
+ o Add read.rmMask.GRanges to read RepeatMasker .out file. |
|
7 |
+ o Add read.rmskFasta to read soft repeat masked fasta file. |
|
6 | 8 |
o Add the distribution plot of axt alignment matches. |
7 | 9 |
o Add the distribution plot of CNE length. |
8 | 10 |
o Add the syntenicDotplot for axt alignment and GRangePairs. |
... | ... |
@@ -12,7 +14,10 @@ NEW FEATURES |
12 | 14 |
o Add parallel subAxt for Axt alignment. |
13 | 15 |
o Add the plot of genomic distribution of CNE. |
14 | 16 |
o Add the function to make bed and bigwig files of CNEs. |
15 |
- |
|
17 |
+BUG FIXES |
|
18 |
+ o Instead of an error, an empty GRangePairs is returned when no CNEs |
|
19 |
+ identified. |
|
20 |
+ |
|
16 | 21 |
CHANGES IN Bioc 3.3 |
17 | 22 |
------------------------ |
18 | 23 |
BUG FIXES |
... | ... |
@@ -146,6 +146,23 @@ read.rmMask.GRanges <- function(fn){ |
146 | 146 |
return(rmMaskGRanges) |
147 | 147 |
} |
148 | 148 |
|
149 |
+### ----------------------------------------------------------------- |
|
150 |
+### read a soft-repeatMasked fasta (repeats in lower case) and |
|
151 |
+### get the repeats regions |
|
152 |
+### Exported! |
|
153 |
+read.rmskFasta <- function(fn){ |
|
154 |
+ seq <- readBStringSet(fn) |
|
155 |
+ names(seq) <- sapply(strsplit(names(seq), " "), "[", 1) |
|
156 |
+ foo3 <- lapply(lapply(strsplit(as.character(seq),"") ,"%in%", |
|
157 |
+ c("a","c","g","t")), Rle) |
|
158 |
+ foo4 <- lapply(foo3, as, "IRanges") |
|
159 |
+ foo5 <- GRanges(seqnames=Rle(names(foo4), sapply(foo4, length)), |
|
160 |
+ ranges=IRanges(start=unlist(sapply(foo4, start)), |
|
161 |
+ end=unlist(sapply(foo4, end))), |
|
162 |
+ strand="+") |
|
163 |
+ return(foo5) |
|
164 |
+} |
|
165 |
+ |
|
149 | 166 |
### ----------------------------------------------------------------- |
150 | 167 |
### save the CNE class or GRangePairs object into a local SQLite database |
151 | 168 |
### Exported!! |
1 | 6 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,35 @@ |
1 |
+\name{read.rmskFasta} |
|
2 |
+\alias{read.rmskFasta} |
|
3 |
+\title{ |
|
4 |
+ Read a soft reoeat masked fasta |
|
5 |
+} |
|
6 |
+\description{ |
|
7 |
+ Read a soft repeat masked fasta file into a \code{GRanges}. |
|
8 |
+} |
|
9 |
+\usage{ |
|
10 |
+ read.rmskFasta(fn) |
|
11 |
+} |
|
12 |
+ |
|
13 |
+\arguments{ |
|
14 |
+ \item{fn}{ |
|
15 |
+ \code{character}(1): The filename of the soft repeat masked fasta. |
|
16 |
+ } |
|
17 |
+} |
|
18 |
+\details{ |
|
19 |
+ Only the lower case based ("a", "c", "g", "t") are considered in the |
|
20 |
+ soft repeat masked fasta. |
|
21 |
+} |
|
22 |
+\value{ |
|
23 |
+ \code{GRanges} object with coordinates of repeat masked regions. |
|
24 |
+} |
|
25 |
+\author{ |
|
26 |
+ Ge Tan |
|
27 |
+} |
|
28 |
+\seealso{ |
|
29 |
+ \code{\link{read.rmMask.GRanges}} |
|
30 |
+} |
|
31 |
+\examples{ |
|
32 |
+ fn <- file.path(system.file("extdata", package="CNEr"), |
|
33 |
+ "rmsk.fa") |
|
34 |
+ read.rmskFasta(fn) |
|
35 |
+} |
|
0 | 36 |
\ No newline at end of file |
... | ... |
@@ -102,4 +102,15 @@ test_that("test_readCNERangesFromSQLite", { |
102 | 102 |
cneGRangePairs <- readCNERangesFromSQLite(dbName=dbName, |
103 | 103 |
tableName="danRer10_hg38_45_50") |
104 | 104 |
expect_equal(length(cneGRangePairs), 3209L) |
105 |
+}) |
|
106 |
+ |
|
107 |
+test_that("test_read.rmskFasta", { |
|
108 |
+ library(GenomicRanges) |
|
109 |
+ fn <- file.path(system.file("extdata", package="CNEr"), |
|
110 |
+ "rmsk.fa") |
|
111 |
+ ans <- read.rmskFasta(fn) |
|
112 |
+ expect_equal(ans, GRanges(seqnames=c("chr1", "chr2"), |
|
113 |
+ ranges=IRanges(start=c(6,3), |
|
114 |
+ end=c(7,5)), |
|
115 |
+ strand="+")) |
|
105 | 116 |
}) |
106 | 117 |
\ No newline at end of file |