Browse code

version 1.9.40

Add read.rmskFasta function

Ge Tan authored on 20/09/2016 10:30:22
Showing 7 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: CNEr 
2
-Version: 1.9.39
3
-Date: 2016-09-12
2
+Version: 1.9.40
3
+Date: 2016-09-20
4 4
 Title: CNE Detection and Visualization
5 5
 Description: Large-scale identification and advanced visualization 
6 6
              of sets of conserved noncoding elements.
... ...
@@ -35,7 +35,8 @@ importMethodsFrom(GenomicAlignments, last)
35 35
 importFrom(GenomeInfoDb, Seqinfo)
36 36
 importFrom(GenomicRanges, GRanges)
37 37
 importFrom(GenomicAlignments, explodeCigarOps, CIGAR_OPS, explodeCigarOpLengths)
38
-importFrom(Biostrings, DNAStringSet, DNA_BASES, fasta.seqlengths)
38
+importFrom(Biostrings, DNAStringSet, DNA_BASES, fasta.seqlengths,
39
+           readBStringSet)
39 40
 importFrom(RSQLite, SQLite)
40 41
 importFrom(methods, is, new, "as", "extends", "validObject")
41 42
 importFrom(rtracklayer, TwoBitFile)
... ...
@@ -126,6 +127,7 @@ export(
126 127
   writeAxt,
127 128
   read.rmMask.GRanges,
128 129
   saveCNEToSQLite,
130
+  read.rmskFasta,
129 131
 
130 132
   ## ceScan.R
131 133
   
... ...
@@ -1,8 +1,10 @@
1 1
 CHANGES IN Bioc 3.4
2
+------------------------
2 3
 NEW FEATURES
3 4
     o Updated CNE class for storing all the information about 
4
-      running the pipeline
5
-    o Add read.rmMask.GRanges to read RepeatMasker .out file
5
+      running the pipeline.
6
+    o Add read.rmMask.GRanges to read RepeatMasker .out file.
7
+    o Add read.rmskFasta to read soft repeat masked fasta file.
6 8
     o Add the distribution plot of axt alignment matches.
7 9
     o Add the distribution plot of CNE length.
8 10
     o Add the syntenicDotplot for axt alignment and GRangePairs.
... ...
@@ -12,7 +14,10 @@ NEW FEATURES
12 14
     o Add parallel subAxt for Axt alignment.
13 15
     o Add the plot of genomic distribution of CNE.
14 16
     o Add the function to make bed and bigwig files of CNEs.
15
-
17
+BUG FIXES
18
+    o Instead of an error, an empty GRangePairs is returned when no CNEs
19
+      identified.
20
+    
16 21
 CHANGES IN Bioc 3.3
17 22
 ------------------------
18 23
 BUG FIXES
... ...
@@ -146,6 +146,23 @@ read.rmMask.GRanges <- function(fn){
146 146
   return(rmMaskGRanges)
147 147
 }
148 148
 
149
+### -----------------------------------------------------------------
150
+### read a soft-repeatMasked fasta (repeats in lower case) and 
151
+### get the repeats regions
152
+### Exported!
153
+read.rmskFasta <- function(fn){
154
+  seq <- readBStringSet(fn)
155
+  names(seq) <- sapply(strsplit(names(seq), " "), "[", 1)
156
+  foo3 <- lapply(lapply(strsplit(as.character(seq),"") ,"%in%",
157
+                       c("a","c","g","t")), Rle)
158
+  foo4 <- lapply(foo3, as, "IRanges")
159
+  foo5 <- GRanges(seqnames=Rle(names(foo4), sapply(foo4, length)),
160
+                  ranges=IRanges(start=unlist(sapply(foo4, start)),
161
+                                 end=unlist(sapply(foo4, end))),
162
+                  strand="+")
163
+  return(foo5)
164
+}
165
+
149 166
 ### -----------------------------------------------------------------
150 167
 ### save the CNE class or GRangePairs object into a local SQLite database
151 168
 ### Exported!!
152 169
new file mode 100644
... ...
@@ -0,0 +1,4 @@
1
+>chr1
2
+ACCCTgcTT
3
+>chr2
4
+GCattTT
0 5
\ No newline at end of file
1 6
new file mode 100644
... ...
@@ -0,0 +1,35 @@
1
+\name{read.rmskFasta}
2
+\alias{read.rmskFasta}
3
+\title{
4
+  Read a soft reoeat masked fasta
5
+}
6
+\description{
7
+  Read a soft repeat masked fasta file into a \code{GRanges}.
8
+}
9
+\usage{
10
+  read.rmskFasta(fn)
11
+}
12
+
13
+\arguments{
14
+  \item{fn}{
15
+    \code{character}(1): The filename of the soft repeat masked fasta.
16
+  }
17
+}
18
+\details{
19
+  Only the lower case based ("a", "c", "g", "t") are considered in the
20
+  soft repeat masked fasta.
21
+}
22
+\value{
23
+  \code{GRanges} object with coordinates of repeat masked regions.
24
+}
25
+\author{
26
+  Ge Tan
27
+}
28
+\seealso{
29
+  \code{\link{read.rmMask.GRanges}}
30
+}
31
+\examples{
32
+  fn <- file.path(system.file("extdata", package="CNEr"),
33
+                  "rmsk.fa")
34
+  read.rmskFasta(fn)
35
+}
0 36
\ No newline at end of file
... ...
@@ -102,4 +102,15 @@ test_that("test_readCNERangesFromSQLite", {
102 102
   cneGRangePairs <- readCNERangesFromSQLite(dbName=dbName, 
103 103
                                             tableName="danRer10_hg38_45_50")
104 104
   expect_equal(length(cneGRangePairs), 3209L)
105
+})
106
+
107
+test_that("test_read.rmskFasta", {
108
+  library(GenomicRanges)
109
+  fn <- file.path(system.file("extdata", package="CNEr"),
110
+                  "rmsk.fa")
111
+  ans <- read.rmskFasta(fn)
112
+  expect_equal(ans, GRanges(seqnames=c("chr1", "chr2"),
113
+                            ranges=IRanges(start=c(6,3),
114
+                                           end=c(7,5)),
115
+                            strand="+"))
105 116
 })
106 117
\ No newline at end of file