git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/CNEr@86790 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,25 @@ |
1 |
+Package: CNEr |
|
2 |
+Version: 0.99.8 |
|
3 |
+Date: 2014-02-13 |
|
4 |
+Title: CNE detection and visualization. |
|
5 |
+Description: Large-scale identification and advanced visualization of |
|
6 |
+ sets of conserved noncoding elements. |
|
7 |
+Author: Ge Tan <ge.tan09@imperial.ac.uk> |
|
8 |
+Maintainer: Ge Tan <ge.tan09@imperial.ac.uk> |
|
9 |
+Imports: Biostrings(>= 2.26.3), RSQLite(>= 0.11.4), GenomicRanges(>= |
|
10 |
+ 1.12.5), rtracklayer(>= 1.20.4), RMySQL(>= 0.9-3), XVector(>= |
|
11 |
+ 0.2.0), DBI(>= 0.2-7), GenomicAlignments(>= 0.99.17), methods, |
|
12 |
+ IRanges(>= 1.19.38) |
|
13 |
+Depends: R (>= 3.0.2) |
|
14 |
+Suggests: Gviz(>= 1.7.4), RUnit, BiocGenerics |
|
15 |
+LinkingTo: IRanges, XVector |
|
16 |
+System Requirements: blat |
|
17 |
+License: GPL-2 | file LICENSE |
|
18 |
+License_restricts_use: yes |
|
19 |
+URL: http://ancora.genereg.net/ |
|
20 |
+Type: Package |
|
21 |
+biocViews: GeneRegulation, Visualization, DataImport |
|
22 |
+NeedsCompilation: yes |
|
23 |
+LazyData: no |
|
24 |
+Collate: AllGenerics.r AllClasses.r utils.r ceScan.r plot.r |
|
25 |
+ makeGeneDbFromUCSC.r io.r |
0 | 26 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,17 @@ |
1 |
+CONTENTS AND COPYRIGHT |
|
2 |
+ |
|
3 |
+The source code under src/ucsc comes from Jim Kent's source utilities. |
|
4 |
+Any usage of this code should follow the license restriction. |
|
5 |
+ |
|
6 |
+The following lines are taken from README of Kent's utilities. |
|
7 |
+ |
|
8 |
+This directory contains the entire source tree for the |
|
9 |
+UCSC Genome Bioinformatics Group's suite of biological analysis |
|
10 |
+and web display programs as well as some of Jim Kent's own tools. |
|
11 |
+All files are copyrighted, but license is hereby granted for personal, |
|
12 |
+academic, and non-profit use. A license is also granted for the contents |
|
13 |
+of the src/lib, src/inc and src/utils and python directories for commercial |
|
14 |
+users. Commercial users should contact kent@soe.ucsc.edu for access to other |
|
15 |
+modules. Commercial users interested in the UCSC Genome Browser in particular |
|
16 |
+please see |
|
17 |
+ http://genome.ucsc.edu/license/. |
0 | 18 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,110 @@ |
1 |
+useDynLib(CNEr, .registration = TRUE) |
|
2 |
+ |
|
3 |
+### ----------------------------------------------------------------- |
|
4 |
+### Import |
|
5 |
+### ----------------------------------------------------------------- |
|
6 |
+import(IRanges) |
|
7 |
+ |
|
8 |
+### ----------------------------------------------------------------- |
|
9 |
+### Import S4 classes defined in other packages |
|
10 |
+### |
|
11 |
+#import(Biostrings) |
|
12 |
+#import(GenomicRanges) |
|
13 |
+importClassesFrom(methods, ANY, character, integer, missing) |
|
14 |
+ |
|
15 |
+### ----------------------------------------------------------------- |
|
16 |
+### Import S4 methods defined in other packages |
|
17 |
+### |
|
18 |
+importMethodsFrom(rtracklayer, import.bed) |
|
19 |
+importMethodsFrom(GenomicRanges, seqnames, seqlengths, strand) |
|
20 |
+importMethodsFrom(XVector, subseq) |
|
21 |
+importMethodsFrom(DBI, dbGetQuery, dbConnect, dbDisconnect, dbWriteTable) |
|
22 |
+importMethodsFrom(methods, initialize, show) |
|
23 |
+ |
|
24 |
+### ----------------------------------------------------------------- |
|
25 |
+### Import ordinary functions, variables in other packages |
|
26 |
+### |
|
27 |
+importFrom(GenomicRanges, GRanges, Seqinfo) |
|
28 |
+importFrom(GenomicAlignments, explodeCigarOps, CIGAR_OPS, explodeCigarOpLengths) |
|
29 |
+importFrom(Biostrings, DNAStringSet) |
|
30 |
+importFrom(RMySQL, MySQL) |
|
31 |
+importFrom(RSQLite, SQLite) |
|
32 |
+importFrom(methods, is, new) |
|
33 |
+ |
|
34 |
+### ----------------------------------------------------------------- |
|
35 |
+### Export S4 classes defined in CNEr |
|
36 |
+### |
|
37 |
+exportClasses(Axt, |
|
38 |
+ CNE) |
|
39 |
+ |
|
40 |
+### ----------------------------------------------------------------- |
|
41 |
+### Export S4 methods for generics not defined in CNEr |
|
42 |
+### |
|
43 |
+exportMethods( |
|
44 |
+ "[", |
|
45 |
+ c, |
|
46 |
+ ## class.r |
|
47 |
+ length, |
|
48 |
+ score |
|
49 |
+) |
|
50 |
+ |
|
51 |
+### ----------------------------------------------------------------- |
|
52 |
+### Export non-generic functions |
|
53 |
+### |
|
54 |
+ |
|
55 |
+export( |
|
56 |
+ ## utils.r |
|
57 |
+ reverseCigar, |
|
58 |
+ binFromCoordRange, |
|
59 |
+ binRangesFromCoordRange, |
|
60 |
+ binRestrictionString, |
|
61 |
+ readCNERangesFromSQLite, |
|
62 |
+ #queryAnnotationSQLite, |
|
63 |
+ fetchChromSizes, |
|
64 |
+ |
|
65 |
+ ## AllClasses.r |
|
66 |
+ Axt, |
|
67 |
+ CNE, |
|
68 |
+ |
|
69 |
+ # io.r |
|
70 |
+ readBed, |
|
71 |
+ readAxt, |
|
72 |
+ axtInfo, |
|
73 |
+ writeAxt, |
|
74 |
+ |
|
75 |
+ ## ceScan.r |
|
76 |
+ ceScan, |
|
77 |
+ cneMerge, |
|
78 |
+ blatCNE, |
|
79 |
+ ceScanOneStep, |
|
80 |
+ |
|
81 |
+ ## makeGeneDbFromUCSC.r |
|
82 |
+ #supportedUCSCtables, |
|
83 |
+ #queryrefGene, queryknownGene, queryensGene, |
|
84 |
+ #makeGeneDbFromUCSC, |
|
85 |
+ |
|
86 |
+ ## plot.r |
|
87 |
+ CNEDensity |
|
88 |
+) |
|
89 |
+ |
|
90 |
+### ----------------------------------------------------------------- |
|
91 |
+### Export S4 generics defined in CNEr + export corresponding methods |
|
92 |
+### |
|
93 |
+ |
|
94 |
+exportMethods( |
|
95 |
+ ## AllClasses.r |
|
96 |
+ targetRanges, queryRanges, |
|
97 |
+ targetSeqs, querySeqs, |
|
98 |
+ symCount, |
|
99 |
+ subAxt, |
|
100 |
+ assembly1, assembly2, |
|
101 |
+ CNE1, CNE2, |
|
102 |
+ thresholds, |
|
103 |
+ CNEMerged, CNERepeatsFiltered, |
|
104 |
+ |
|
105 |
+ ## utils.r |
|
106 |
+ saveCNEToSQLite, |
|
107 |
+ ## plot.r |
|
108 |
+ CNEDensity |
|
109 |
+) |
|
110 |
+ |
0 | 111 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,382 @@ |
1 |
+ |
|
2 |
+### ----------------------------------------------------------------- |
|
3 |
+### Axt class |
|
4 |
+### Exported! |
|
5 |
+setClass(Class="Axt", |
|
6 |
+ slots=c(targetRanges="GRanges", |
|
7 |
+ targetSeqs="DNAStringSet", |
|
8 |
+ queryRanges="GRanges", |
|
9 |
+ querySeqs="DNAStringSet", |
|
10 |
+ score="integer", |
|
11 |
+ symCount="integer" |
|
12 |
+ ) |
|
13 |
+ ) |
|
14 |
+ |
|
15 |
+setValidity("Axt", |
|
16 |
+ function(object){ |
|
17 |
+ if(!isConstant(c(length(object@targetRanges), |
|
18 |
+ length(object@targetSeqs), |
|
19 |
+ length(object@queryRanges), |
|
20 |
+ length(object@querySeqs), |
|
21 |
+ length(object@score), |
|
22 |
+ length(object@symCount)))) |
|
23 |
+ return("The lengths of targetRanges, targetSeqs, |
|
24 |
+ queryRanges, querySeqs, score and symCount |
|
25 |
+ must have be same!") |
|
26 |
+ if(any(object@symCount <= 0L)) |
|
27 |
+ return("Then symCount must be larger than 0!") |
|
28 |
+ return(TRUE) |
|
29 |
+ } |
|
30 |
+ ) |
|
31 |
+ |
|
32 |
+### ----------------------------------------------------------------- |
|
33 |
+### CNE class |
|
34 |
+### |
|
35 |
+setClass(Class="CNE", |
|
36 |
+ slots=c(assembly1="character", |
|
37 |
+ assembly2="character", |
|
38 |
+ thresholds="character", |
|
39 |
+ CNE1="list", |
|
40 |
+ CNE2="list", |
|
41 |
+ CNEMerged="list", |
|
42 |
+ CNERepeatsFiltered="list", |
|
43 |
+ alignMethod="character" |
|
44 |
+ ) |
|
45 |
+ ) |
|
46 |
+ |
|
47 |
+setValidity("CNE", |
|
48 |
+ function(object){ |
|
49 |
+ if(length(object@assembly1) != 1L) |
|
50 |
+ return("The name of assembly1 must be length 1!") |
|
51 |
+ if(length(object@alignMethod) != 1L) |
|
52 |
+ return("The align method must be length 1!") |
|
53 |
+ if(length(object@assembly2) != 1L) |
|
54 |
+ return("The name of assembly2 must be length 1!") |
|
55 |
+ if(!all(grepl("^\\d+_\\d+$", object@thresholds))) |
|
56 |
+ return("The thresholds must be in format of 49_50!") |
|
57 |
+ if(any(as.integer( |
|
58 |
+ sapply(strsplit(object@thresholds, "_"), "[", 2)) |
|
59 |
+ < as.integer( |
|
60 |
+ sapply(strsplit(object@thresholds, "_"), "[", 1)))) |
|
61 |
+ return("The window size cannot be smaller than identity score!") |
|
62 |
+ if(length(object@CNE1) != length(object@thresholds) || |
|
63 |
+ length(object@CNE2) != length(object@thresholds) || |
|
64 |
+ length(object@CNEMerged) != length(object@thresholds) || |
|
65 |
+ length(object@CNERepeatsFiltered) != length(object@thresholds)) |
|
66 |
+ return("The number of cne tables must be same with |
|
67 |
+ number of thresholds!") |
|
68 |
+ return(TRUE) |
|
69 |
+ } |
|
70 |
+ ) |
|
71 |
+ |
|
72 |
+ |
|
73 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
74 |
+### Axt Slot getters and setters. |
|
75 |
+### |
|
76 |
+setMethod("targetRanges", "Axt", function(x) x@targetRanges) |
|
77 |
+setMethod("targetSeqs", "Axt", function(x) x@targetSeqs) |
|
78 |
+setMethod("queryRanges", "Axt", function(x) x@queryRanges) |
|
79 |
+setMethod("querySeqs", "Axt", function(x) x@querySeqs) |
|
80 |
+setMethod("score", "Axt", function(x) x@score) |
|
81 |
+setMethod("symCount", "Axt", function(x) x@symCount) |
|
82 |
+setMethod("length", "Axt", function(x) length(targetRanges(x))) |
|
83 |
+ |
|
84 |
+### ----------------------------------------------------------------- |
|
85 |
+### CNE Slot getters and setters. |
|
86 |
+### |
|
87 |
+setMethod("assembly1", "CNE", function(x) x@assembly1) |
|
88 |
+setMethod("assembly2", "CNE", function(x) x@assembly2) |
|
89 |
+setMethod("CNE1", "CNE", function(x) x@CNE1) |
|
90 |
+setMethod("CNE2", "CNE", function(x) x@CNE2) |
|
91 |
+setMethod("thresholds", "CNE", function(x) x@thresholds) |
|
92 |
+setMethod("CNEMerged", "CNE", function(x) x@CNEMerged) |
|
93 |
+setMethod("CNERepeatsFiltered", "CNE", function(x) x@CNERepeatsFiltered) |
|
94 |
+ |
|
95 |
+### -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
96 |
+### Axt Constructor. |
|
97 |
+### |
|
98 |
+Axt <- function(targetRanges=GRanges(), targetSeqs=DNAStringSet(), |
|
99 |
+ queryRanges=GRanges(), querySeqs=DNAStringSet(), |
|
100 |
+ score=integer(), symCount=integer()){ |
|
101 |
+ new("Axt", targetRanges=targetRanges, targetSeqs=targetSeqs, |
|
102 |
+ queryRanges=queryRanges, querySeqs=querySeqs, |
|
103 |
+ score=score, symCount=symCount) |
|
104 |
+} |
|
105 |
+ |
|
106 |
+### ----------------------------------------------------------------- |
|
107 |
+### CNE constructor. |
|
108 |
+### |
|
109 |
+CNE <- function(assembly1=character(), assembly2=character(), |
|
110 |
+ thresholds=character(), |
|
111 |
+ CNE1=list(), CNE2=list(), |
|
112 |
+ CNEMerged=list(), CNERepeatsFiltered=list(), |
|
113 |
+ alignMethod=character() |
|
114 |
+ ){ |
|
115 |
+ new("CNE", assembly1=assembly1, assembly2=assembly2, |
|
116 |
+ thresholds=thresholds, CNE1=CNE1, CNE2=CNE2, |
|
117 |
+ CNEMerged=CNEMerged, CNERepeatsFiltered=CNERepeatsFiltered, |
|
118 |
+ alignMethod=alignMethod) |
|
119 |
+} |
|
120 |
+ |
|
121 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
122 |
+### Updating and cloning. |
|
123 |
+### |
|
124 |
+### An object is either 'update'd in place (usually with a replacement |
|
125 |
+### method) or 'clone'd (copied), with specified slots/fields overridden. |
|
126 |
+ |
|
127 |
+### For an object with a pure S4 slot representation, these both map to |
|
128 |
+### initialize. Reference classes will want to override 'update'. Other |
|
129 |
+### external representations need further customization. |
|
130 |
+ |
|
131 |
+setMethod("update", "Axt", |
|
132 |
+ function(object, ..., check=TRUE){ |
|
133 |
+ initialize(object, ...) |
|
134 |
+ } |
|
135 |
+ ) |
|
136 |
+ |
|
137 |
+setMethod("update", "CNE", |
|
138 |
+ function(object, ..., check=TRUE){ |
|
139 |
+ initialize(object, ...) |
|
140 |
+ } |
|
141 |
+ ) |
|
142 |
+ |
|
143 |
+setMethod("clone", "ANY", # not exported |
|
144 |
+ function(x, ...) |
|
145 |
+ { |
|
146 |
+ if (nargs() > 1L) |
|
147 |
+ initialize(x, ...) |
|
148 |
+ else |
|
149 |
+ x |
|
150 |
+ } |
|
151 |
+) |
|
152 |
+ |
|
153 |
+ |
|
154 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
155 |
+### Subsetting and combining. |
|
156 |
+### |
|
157 |
+ |
|
158 |
+setMethod("[", "Axt", |
|
159 |
+ function(x, i, ..., drop){ |
|
160 |
+ if(length(list(...)) > 0L) |
|
161 |
+ stop("invalid subsetting") |
|
162 |
+ if(missing(i)) |
|
163 |
+ return(x) |
|
164 |
+ #i = IRanges:::normalizeSingleBracketSubscript(i, x) |
|
165 |
+ ans_targetRanges <- targetRanges(x)[i] |
|
166 |
+ ans_targetSeqs <- targetSeqs(x)[i] |
|
167 |
+ ans_queryRanges <- queryRanges(x)[i] |
|
168 |
+ ans_querySeqs <- querySeqs(x)[i] |
|
169 |
+ ans_score <- score(x)[i] |
|
170 |
+ ans_symCount <- symCount(x)[i] |
|
171 |
+ clone(x, targetRanges=ans_targetRanges, targetSeqs=ans_targetSeqs, |
|
172 |
+ queryRanges=ans_queryRanges, querySeqs=ans_querySeqs, |
|
173 |
+ score=ans_score, symCount=ans_symCount) |
|
174 |
+ } |
|
175 |
+ ) |
|
176 |
+ |
|
177 |
+setMethod("c", "Axt", |
|
178 |
+ function(x, ...){ |
|
179 |
+ if(missing(x)){ |
|
180 |
+ args <- unname(list(...)) |
|
181 |
+ x <- args[[1L]] |
|
182 |
+ }else{ |
|
183 |
+ args <- unname(list(x, ...)) |
|
184 |
+ } |
|
185 |
+ if(length(args) == 1L) |
|
186 |
+ return(x) |
|
187 |
+ arg_is_null <- sapply(args, is.null) |
|
188 |
+ if(any(arg_is_null)) |
|
189 |
+ args[arg_is_null] <- NULL |
|
190 |
+ if(!all(sapply(args, is, class(x)))) |
|
191 |
+ stop("all arguments in '...' must be ", |
|
192 |
+ class(x), " objects (or NULLs)") |
|
193 |
+ new_targetRanges <- do.call(c, lapply(args, targetRanges)) |
|
194 |
+ new_targetSeqs <- do.call(c, lapply(args, targetSeqs)) |
|
195 |
+ new_queryRanges <- do.call(c, lapply(args, queryRanges)) |
|
196 |
+ new_querySeqs <- do.call(c, lapply(args, querySeqs)) |
|
197 |
+ new_score <- do.call(c, lapply(args, score)) |
|
198 |
+ new_symCount <- do.call(c, lapply(args, symCount)) |
|
199 |
+ |
|
200 |
+ initialize(x, |
|
201 |
+ targetRanges=new_targetRanges, |
|
202 |
+ targetSeqs=new_targetSeqs, |
|
203 |
+ queryRanges=new_queryRanges, |
|
204 |
+ querySeqs=new_querySeqs, |
|
205 |
+ score=new_score, |
|
206 |
+ symCount=new_symCount) |
|
207 |
+ } |
|
208 |
+ ) |
|
209 |
+ |
|
210 |
+ |
|
211 |
+setMethod("subAxt", "Axt", |
|
212 |
+## This is to fetch the Axts within the specific chrs, starts, ends |
|
213 |
+## based on target sequences. |
|
214 |
+ function(x, chr, start, end, #strand=c("+", "-", "*"), |
|
215 |
+ select=c("target", "query"), |
|
216 |
+ type=c("any", "within"), |
|
217 |
+ qSize=NULL){ |
|
218 |
+ type <- match.arg(type) |
|
219 |
+ select <- match.arg(select) |
|
220 |
+ if(select == "query"){ |
|
221 |
+ if(is.null(qSize)) |
|
222 |
+ stop("When selecting on query alignments, |
|
223 |
+ qSize must be provided.") |
|
224 |
+ if(!is(qSize, "integer")) |
|
225 |
+ stop("qSize must be an integer object.") |
|
226 |
+ } |
|
227 |
+ if(select == "target"){ |
|
228 |
+ searchGRanges = GRanges(seqnames=chr, |
|
229 |
+ ranges=IRanges(start=start, end=end), |
|
230 |
+ strand="+") |
|
231 |
+ if(length(searchGRanges) == 0) |
|
232 |
+ return(x) |
|
233 |
+ index <- queryHits(findOverlaps(targetRanges(x), |
|
234 |
+ searchGRanges, type=type, |
|
235 |
+ select="all")) |
|
236 |
+ }else if(select == "query"){ |
|
237 |
+ # first search Axts on positive strand |
|
238 |
+ searchGRanges = GRanges(seqnames=chr, |
|
239 |
+ ranges=IRanges(start=start, end=end), |
|
240 |
+ strand="+") |
|
241 |
+ if(length(searchGRanges) == 0) |
|
242 |
+ return(x) |
|
243 |
+ indexPositive <- queryHits(findOverlaps(queryRanges(x), |
|
244 |
+ searchGRanges, type=type, |
|
245 |
+ select="all")) |
|
246 |
+ # then search Axts on negative strand. |
|
247 |
+ # we need to prepare the searchGRanges on negative strand. |
|
248 |
+ searchGRanges <- GRanges(seqnames=chr, |
|
249 |
+ ranges=IRanges(start=qSize-end+1, |
|
250 |
+ end=qSize-start+1), |
|
251 |
+ strand="-") |
|
252 |
+ indexNegative <- queryHits(findOverlaps(queryRanges(x), |
|
253 |
+ searchGRanges, type=type, |
|
254 |
+ select="all")) |
|
255 |
+ index <- sort(c(indexPositive, indexNegative)) |
|
256 |
+ }else{ |
|
257 |
+ stop("Wrong select!") |
|
258 |
+ } |
|
259 |
+ return(x[index]) |
|
260 |
+ } |
|
261 |
+ ) |
|
262 |
+ |
|
263 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
264 |
+### "show" method. |
|
265 |
+### |
|
266 |
+### 'x' must be an XString or MaskedXString object. |
|
267 |
+toSeqSnippet <- function(x, width) |
|
268 |
+{ |
|
269 |
+ if (width < 7L) |
|
270 |
+ width <- 7L |
|
271 |
+ seqlen <- length(x) |
|
272 |
+ if (seqlen <= width) { |
|
273 |
+ as.character(x) |
|
274 |
+ } else { |
|
275 |
+ w1 <- (width - 2) %/% 2 |
|
276 |
+ w2 <- (width - 3) %/% 2 |
|
277 |
+ paste(as.character(subseq(x, start=1, width=w1)), |
|
278 |
+ "...", |
|
279 |
+ as.character(subseq(x, end=seqlen, width=w2)), |
|
280 |
+ sep="") |
|
281 |
+ } |
|
282 |
+} |
|
283 |
+ |
|
284 |
+ |
|
285 |
+.axt.show_frame_line <- function(x, i, iW, tNameW, tStartW, tEndW, |
|
286 |
+ qNameW, qStartW, qEndW, scoreW){ |
|
287 |
+ cat(format(i, width=iW, justify="right"), " ", |
|
288 |
+ format(as.character(seqnames(targetRanges(x)[i])), |
|
289 |
+ width=tNameW, justify="right"), " ", |
|
290 |
+ format(start(targetRanges(x)[i]), width=tStartW, justify="right"), " ", |
|
291 |
+ format(end(targetRanges(x)[i]), width=tEndW, justify="right"), " ", |
|
292 |
+ format(as.character(seqnames(queryRanges(x)[i])), |
|
293 |
+ width=qNameW, justify="right"), " ", |
|
294 |
+ format(start(queryRanges(x)[i]), width=qStartW, justify="right"), " ", |
|
295 |
+ format(end(queryRanges(x)[i]), width=qEndW, justify="right"), " ", |
|
296 |
+ format(as.character(strand(queryRanges(x))[i]), |
|
297 |
+ width=1, justify="right"), " ", |
|
298 |
+ format(score(x)[i], width=scoreW, justify="right"), " ", |
|
299 |
+ sep="" |
|
300 |
+ ) |
|
301 |
+ cat("\n") |
|
302 |
+ snippetWidth <- getOption("width") |
|
303 |
+ seq_snippet <- toSeqSnippet(targetSeqs(x)[[i]], snippetWidth) |
|
304 |
+ cat(seq_snippet) |
|
305 |
+ cat("\n") |
|
306 |
+ seq_snippet <- toSeqSnippet(querySeqs(x)[[i]], snippetWidth) |
|
307 |
+ cat(seq_snippet) |
|
308 |
+ cat("\n") |
|
309 |
+} |
|
310 |
+ |
|
311 |
+showAxt <- function(x, margin="", half_nrow=5L){ |
|
312 |
+ lx <- length(x) |
|
313 |
+ if(is.null((head_nrow = getOption("showHeadLines")))) |
|
314 |
+ head_nrow = half_nrow |
|
315 |
+ if(is.null((tail_nrow = getOption("showTailLines")))) |
|
316 |
+ tail_nrow = half_nrow |
|
317 |
+ iW = nchar(as.character(lx)) |
|
318 |
+ if(lx < (2*half_nrow+1L) | (lx < (head_nrow+tail_nrow+1L))) { |
|
319 |
+ tNameW <- max(nchar(as.character(seqnames(targetRanges(x))))) |
|
320 |
+ tStartW <- max(nchar(as.character(start(targetRanges(x))))) |
|
321 |
+ tEndW <- max(nchar(as.character(end(targetRanges(x))))) |
|
322 |
+ qNameW <- max(nchar(as.character(seqnames(queryRanges(x))))) |
|
323 |
+ qStartW <- max(nchar(as.character(start(queryRanges(x))))) |
|
324 |
+ qEndW <- max(nchar(as.character(end(queryRanges(x))))) |
|
325 |
+ scoreW <- max(nchar(as.character(score(x)))) |
|
326 |
+ for(i in seq_len(lx)) |
|
327 |
+ .axt.show_frame_line(x, i, iW, tNameW, tStartW, tEndW, |
|
328 |
+ qNameW, qStartW, qEndW, scoreW) |
|
329 |
+ }else{ |
|
330 |
+ tNameW <- max(nchar(as.character(seqnames(targetRanges(x) |
|
331 |
+ [c(1:head_nrow, (lx-tail_nrow+1L):lx)])))) |
|
332 |
+ tStartW <- max(nchar(as.character(start(targetRanges(x) |
|
333 |
+ [c(1:head_nrow, (lx-tail_nrow+1L):lx)])))) |
|
334 |
+ tEndW <- max(nchar(as.character(end(targetRanges(x) |
|
335 |
+ [c(1:head_nrow, (lx-tail_nrow+1L):lx)])))) |
|
336 |
+ qNameW <- max(nchar(as.character(seqnames(queryRanges(x) |
|
337 |
+ [c(1:head_nrow, (lx-tail_nrow+1L):lx)])))) |
|
338 |
+ qStartW <- max(nchar(as.character(start(queryRanges(x) |
|
339 |
+ [c(1:head_nrow, (lx-tail_nrow+1L):lx)])))) |
|
340 |
+ qEndW <- max(nchar(as.character(end(queryRanges(x) |
|
341 |
+ [c(1:head_nrow, (lx-tail_nrow+1L):lx)])))) |
|
342 |
+ scoreW <- max(nchar(as.character(score(x) |
|
343 |
+ [c(1:head_nrow, (lx-tail_nrow+1L):lx)]))) |
|
344 |
+ if(head_nrow > 0){ |
|
345 |
+ for(i in 1:head_nrow) |
|
346 |
+ .axt.show_frame_line(x, i, iW, tNameW, tStartW, tEndW, |
|
347 |
+ qNameW, qStartW, qEndW, scoreW) |
|
348 |
+ } |
|
349 |
+ cat(format("...", width=iW, justify="right"), |
|
350 |
+ format("...", width=tNameW, justify="right"), |
|
351 |
+ format("...", width=tStartW, justify="right"), |
|
352 |
+ format("...", width=tEndW, justify="right"), |
|
353 |
+ format("...", width=qNameW, justify="right"), |
|
354 |
+ format("...", width=qStartW, justify="right"), |
|
355 |
+ format("...", width=qEndW, justify="right"), |
|
356 |
+ format("...", width=scoreW, justify="right") |
|
357 |
+ ) |
|
358 |
+ cat("\n") |
|
359 |
+ if(tail_nrow > 0){ |
|
360 |
+ for(i in (lx-tail_nrow+1L):lx) |
|
361 |
+ .axt.show_frame_line(x, i, iW, tNameW, tStartW, tEndW, |
|
362 |
+ qNameW, qStartW, qEndW, scoreW) |
|
363 |
+ } |
|
364 |
+ } |
|
365 |
+} |
|
366 |
+ #out = makePrettyMatrixForCompactPrintingAxt(x, .makeNakedMatFromAxt) |
|
367 |
+ #if(nrow(out) != 0L) |
|
368 |
+ # rownames(out) = paste0(margin, rownames(out)) |
|
369 |
+ # print(out, quote=FALSE, right=TRUE) |
|
370 |
+ |
|
371 |
+setMethod("show", "Axt", |
|
372 |
+ function(object){ |
|
373 |
+ lx <- length(object) |
|
374 |
+ cat(" A ", class(object), " with ", length(object), " ", |
|
375 |
+ ifelse(lx == 1L, "alignment pair", "alignment pairs"), |
|
376 |
+ ":\n", sep="") |
|
377 |
+ if(lx != 0){ |
|
378 |
+ showAxt(object, margin=" ") |
|
379 |
+ } |
|
380 |
+ } |
|
381 |
+) |
|
382 |
+ |
0 | 383 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,49 @@ |
1 |
+ |
|
2 |
+### ----------------------------------------------------------------- |
|
3 |
+### Axt object related |
|
4 |
+### |
|
5 |
+setGeneric("targetRanges", function(x) standardGeneric("targetRanges")) |
|
6 |
+setGeneric("targetSeqs", function(x) standardGeneric("targetSeqs")) |
|
7 |
+setGeneric("queryRanges", function(x) standardGeneric("queryRanges")) |
|
8 |
+setGeneric("querySeqs", function(x) standardGeneric("querySeqs")) |
|
9 |
+setGeneric("symCount", function(x) standardGeneric("symCount")) |
|
10 |
+setGeneric("subAxt", function(x, chr, start, end, #strand=c("+", "-", "*"), |
|
11 |
+ select=c("target", "query"), |
|
12 |
+ type=c("any", "within"), |
|
13 |
+ qSize=NULL) |
|
14 |
+ standardGeneric("subAxt") |
|
15 |
+ ) |
|
16 |
+ |
|
17 |
+### ----------------------------------------------------------------- |
|
18 |
+### CNE class related |
|
19 |
+### |
|
20 |
+setGeneric("assembly1", function(x) standardGeneric("assembly1")) |
|
21 |
+setGeneric("assembly2", function(x) standardGeneric("assembly2")) |
|
22 |
+setGeneric("CNE1", function(x) standardGeneric("CNE1")) |
|
23 |
+setGeneric("CNE2", function(x) standardGeneric("CNE2")) |
|
24 |
+setGeneric("thresholds", function(x) standardGeneric("thresholds")) |
|
25 |
+setGeneric("CNEMerged", function(x) standardGeneric("CNEMerged")) |
|
26 |
+setGeneric("CNERepeatsFiltered", function(x) |
|
27 |
+ standardGeneric("CNERepeatsFiltered")) |
|
28 |
+ |
|
29 |
+setGeneric("saveCNEToSQLite", |
|
30 |
+ function(CNE, dbName, tableName, overwrite=FALSE) |
|
31 |
+ standardGeneric("saveCNEToSQLite")) |
|
32 |
+setGeneric("CNEDensity", |
|
33 |
+ function(dbName, tableName, assembly1, assembly2, threshold, |
|
34 |
+ chr, start, end, windowSize, minLength=NULL) |
|
35 |
+ standardGeneric("CNEDensity")) |
|
36 |
+ |
|
37 |
+### ----------------------------------------------------------------- |
|
38 |
+### general |
|
39 |
+### Not Exported! |
|
40 |
+setGeneric("clone", function(x, ...) standardGeneric("clone")) |
|
41 |
+ |
|
42 |
+### ----------------------------------------------------------------- |
|
43 |
+### ceScan |
|
44 |
+### Exported! |
|
45 |
+setGeneric("ceScan", |
|
46 |
+ function(axts, tFilter, qFilter, qSizes, thresholds="49_50") |
|
47 |
+ standardGeneric("ceScan")) |
|
48 |
+ |
|
49 |
+ |
0 | 50 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,382 @@ |
1 |
+ |
|
2 |
+### ----------------------------------------------------------------- |
|
3 |
+### The main function for scanning the axts and get the CNEs |
|
4 |
+### Not exported! |
|
5 |
+ceScanR <- function(axts, tFilter=NULL, qFilter=NULL, qSizes=NULL, |
|
6 |
+ thresholds=c("49_50")){ |
|
7 |
+ ## Here the returned tStart and qStart are 1-based coordinates. |
|
8 |
+ ## Of course ends are also 1-based. |
|
9 |
+ if(!is.null(qFilter)) |
|
10 |
+ if(is.null(qSizes) || !is(qSizes, "Seqinfo")) |
|
11 |
+ stop("qSizes must exist and be a Seqinfo object when qFilter exists") |
|
12 |
+ |
|
13 |
+ winSize <- as.integer(sapply(strsplit(thresholds, "_"), "[", 2)) |
|
14 |
+ minScore <- as.integer(sapply(strsplit(thresholds, "_"), "[", 1)) |
|
15 |
+ resFiles <- tempfile(pattern=paste(minScore, winSize, "ceScan", sep="-"), |
|
16 |
+ tmpdir=tempdir(), fileext="") |
|
17 |
+ ## How stupid I am... |
|
18 |
+ if(is.null(tFilter) && is.null(qFilter)){ |
|
19 |
+ .Call2("myCeScan", NULL, NULL, NULL, |
|
20 |
+ NULL, NULL, NULL, |
|
21 |
+ NULL, NULL, |
|
22 |
+ as.character(seqnames(targetRanges(axts))), |
|
23 |
+ start(targetRanges(axts)), end(targetRanges(axts)), |
|
24 |
+ as.character(strand(targetRanges(axts))), |
|
25 |
+ as.character(targetSeqs(axts)), |
|
26 |
+ as.character(seqnames(queryRanges(axts))), |
|
27 |
+ start(queryRanges(axts)), end(queryRanges(axts)), |
|
28 |
+ as.character(strand(queryRanges(axts))), |
|
29 |
+ as.character(querySeqs(axts)), |
|
30 |
+ score(axts), symCount(axts), winSize, minScore, |
|
31 |
+ as.character(resFiles), |
|
32 |
+ PACKAGE="CNEr") |
|
33 |
+ }else if(is.null(tFilter) && !is.null(qFilter)){ |
|
34 |
+ .Call2("myCeScan", NULL, NULL, NULL, |
|
35 |
+ as.character(seqnames(qFilter)), start(qFilter), end(qFilter), |
|
36 |
+ as.character(seqnames(qSizes)), as.integer(seqlengths(qSizes)), |
|
37 |
+ as.character(seqnames(targetRanges(axts))), |
|
38 |
+ start(targetRanges(axts)), end(targetRanges(axts)), |
|
39 |
+ as.character(strand(targetRanges(axts))), |
|
40 |
+ as.character(targetSeqs(axts)), |
|
41 |
+ as.character(seqnames(queryRanges(axts))), |
|
42 |
+ start(queryRanges(axts)), end(queryRanges(axts)), |
|
43 |
+ as.character(strand(queryRanges(axts))), |
|
44 |
+ as.character(querySeqs(axts)), |
|
45 |
+ score(axts), symCount(axts), winSize, minScore, |
|
46 |
+ as.character(resFiles), |
|
47 |
+ PACKAGE="CNEr") |
|
48 |
+ }else if(!is.null(tFilter) && is.null(qFilter)){ |
|
49 |
+ .Call2("myCeScan", as.character(seqnames(tFilter)), |
|
50 |
+ start(tFilter), end(tFilter), |
|
51 |
+ NULL, NULL, NULL, |
|
52 |
+ NULL, NULL, |
|
53 |
+ as.character(seqnames(targetRanges(axts))), |
|
54 |
+ start(targetRanges(axts)), end(targetRanges(axts)), |
|
55 |
+ as.character(strand(targetRanges(axts))), |
|
56 |
+ as.character(targetSeqs(axts)), |
|
57 |
+ as.character(seqnames(queryRanges(axts))), |
|
58 |
+ start(queryRanges(axts)), end(queryRanges(axts)), |
|
59 |
+ as.character(strand(queryRanges(axts))), |
|
60 |
+ as.character(querySeqs(axts)), |
|
61 |
+ score(axts), symCount(axts), winSize, minScore, |
|
62 |
+ as.character(resFiles), |
|
63 |
+ PACKAGE="CNEr") |
|
64 |
+ }else{ |
|
65 |
+ .Call2("myCeScan", as.character(seqnames(tFilter)), |
|
66 |
+ start(tFilter), end(tFilter), |
|
67 |
+ as.character(seqnames(qFilter)), start(qFilter), end(qFilter), |
|
68 |
+ as.character(seqnames(qSizes)), as.integer(seqlengths(qSizes)), |
|
69 |
+ as.character(seqnames(targetRanges(axts))), |
|
70 |
+ start(targetRanges(axts)), end(targetRanges(axts)), |
|
71 |
+ as.character(strand(targetRanges(axts))), |
|
72 |
+ as.character(targetSeqs(axts)), |
|
73 |
+ as.character(seqnames(queryRanges(axts))), |
|
74 |
+ start(queryRanges(axts)), end(queryRanges(axts)), |
|
75 |
+ as.character(strand(queryRanges(axts))), |
|
76 |
+ as.character(querySeqs(axts)), |
|
77 |
+ score(axts), symCount(axts), winSize, minScore, |
|
78 |
+ as.character(resFiles), |
|
79 |
+ PACKAGE="CNEr") |
|
80 |
+ } |
|
81 |
+ CNE <- lapply(resFiles, |
|
82 |
+ function(x){ |
|
83 |
+ res <- read.table(x, header=FALSE, sep="\t", as.is=TRUE) |
|
84 |
+ colnames(res) <- c("tName", "tStart", "tEnd", "qName", "qStart", |
|
85 |
+ "qEnd", "strand", "score", "cigar") |
|
86 |
+ return(res)}) |
|
87 |
+ names(CNE) <- paste(minScore, winSize, sep="_") |
|
88 |
+ unlink(resFiles) |
|
89 |
+ return(CNE) |
|
90 |
+} |
|
91 |
+ |
|
92 |
+ |
|
93 |
+### ----------------------------------------------------------------- |
|
94 |
+### Another main function for CNEs identification, |
|
95 |
+### but it takes the axt files and bed files as input |
|
96 |
+### Not exported! |
|
97 |
+ceScanFile <- function(axtFiles, tFilterFile=NULL, qFilterFile=NULL, |
|
98 |
+ qSizes=NULL, |
|
99 |
+ thresholds=c("49_50")){ |
|
100 |
+ ## Here the returned tStart and qStart are 1-based coordinates. |
|
101 |
+ ## Of course ends are also 1-based. |
|
102 |
+ if(!is.null(qFilterFile)) |
|
103 |
+ if(is.null(qSizes) || !is(qSizes, "Seqinfo")) |
|
104 |
+ stop("qSizes must exist and be a Seqinfo object when qFilter exists") |
|
105 |
+ winSize <- as.integer(sapply(strsplit(thresholds, "_"), "[", 2)) |
|
106 |
+ minScore <- as.integer(sapply(strsplit(thresholds, "_"), "[", 1)) |
|
107 |
+ resFiles <- tempfile(pattern=paste(minScore, winSize, "ceScan", sep="-"), |
|
108 |
+ tmpdir=tempdir(), fileext="") |
|
109 |
+ .Call2("myCeScanFile", axtFiles, tFilterFile, qFilterFile, |
|
110 |
+ as.character(seqnames(qSizes)), as.character(seqlengths(qSizes)), |
|
111 |
+ winSize, minScore, |
|
112 |
+ resFiles, PACKAGE="CNEr") |
|
113 |
+ CNE <- lapply(resFiles, |
|
114 |
+ function(x){ |
|
115 |
+ res <- read.table(x, header=FALSE, sep="\t", as.is=TRUE) |
|
116 |
+ colnames(res) <- c("tName", "tStart", "tEnd", "qName", |
|
117 |
+ "qStart", "qEnd", "strand", "score", "cigar") |
|
118 |
+ return(res)}) |
|
119 |
+ names(CNE) <- paste(minScore, winSize, sep="_") |
|
120 |
+ unlink(resFiles) |
|
121 |
+ return(CNE) |
|
122 |
+} |
|
123 |
+ |
|
124 |
+### ----------------------------------------------------------------- |
|
125 |
+### The S4 methods for ceScan |
|
126 |
+### Exported! |
|
127 |
+setMethod("ceScan", signature(axts="Axt", tFilter="GRanges", qFilter="GRanges", |
|
128 |
+ qSizes="Seqinfo"), |
|
129 |
+ function(axts, tFilter, qFilter, qSizes, thresholds="49_50"){ |
|
130 |
+ ceScanR(axts, tFilter=tFilter, qFilter=qFilter, |
|
131 |
+ qSizes=qSizes, thresholds=thresholds) |
|
132 |
+ } |
|
133 |
+ ) |
|
134 |
+setMethod("ceScan", signature(axts="Axt", tFilter="missing", qFilter="GRanges", |
|
135 |
+ qSizes="Seqinfo"), |
|
136 |
+ function(axts, tFilter, qFilter, qSizes, thresholds="49_50"){ |
|
137 |
+ ceScanR(axts, tFilter=NULL, qFilter=qFilter, |
|
138 |
+ qSizes=qSizes, thresholds=thresholds) |
|
139 |
+ } |
|
140 |
+ ) |
|
141 |
+setMethod("ceScan", signature(axts="Axt", tFilter="missing", qFilter="missing", |
|
142 |
+ qSizes="missing"), |
|
143 |
+ function(axts, tFilter, qFilter, qSizes, thresholds="49_50"){ |
|
144 |
+ ceScanR(axts, tFilter=NULL, qFilter=NULL, |
|
145 |
+ qSizes=NULL, thresholds=thresholds) |
|
146 |
+ } |
|
147 |
+ ) |
|
148 |
+setMethod("ceScan", signature(axts="Axt", tFilter="GRanges", qFilter="missing", |
|
149 |
+ qSizes="missing"), |
|
150 |
+ function(axts, tFilter, qFilter, qSizes, thresholds="49_50"){ |
|
151 |
+ ceScanR(axts, tFilter=tFilter, qFilter=NULL, |
|
152 |
+ qSizes=NULL, thresholds=thresholds) |
|
153 |
+ } |
|
154 |
+ ) |
|
155 |
+setMethod("ceScan", signature(axts="character", tFilter="character", |
|
156 |
+ qFilter="character", qSizes="Seqinfo"), |
|
157 |
+ function(axts, tFilter, qFilter, qSizes, thresholds="49_50"){ |
|
158 |
+ ceScanFile(axtFiles=axts, tFilterFile=tFilter, qFilterFile=qFilter, |
|
159 |
+ qSizes=qSizes, thresholds=thresholds) |
|
160 |
+ } |
|
161 |
+ ) |
|
162 |
+setMethod("ceScan", signature(axts="character", tFilter="missing", |
|
163 |
+ qFilter="character", qSizes="Seqinfo"), |
|
164 |
+ function(axts, tFilter, qFilter, qSizes, thresholds="49_50"){ |
|
165 |
+ ceScanFile(axtFiles=axts, tFilterFile=NULL, qFilterFile=qFilter, |
|
166 |
+ qSizes=qSizes, thresholds=thresholds) |
|
167 |
+ } |
|
168 |
+ ) |
|
169 |
+setMethod("ceScan", signature(axts="character", tFilter="missing", |
|
170 |
+ qFilter="missing", qSizes="missing"), |
|
171 |
+ function(axts, tFilter, qFilter, qSizes, thresholds="49_50"){ |
|
172 |
+ ceScanFile(axtFiles=axts, tFilterFile=NULL, qFilterFile=NULL, |
|
173 |
+ qSizes=NULL, thresholds=thresholds) |
|
174 |
+ } |
|
175 |
+ ) |
|
176 |
+setMethod("ceScan", signature(axts="character", tFilter="character", |
|
177 |
+ qFilter="missing", qSizes="missing"), |
|
178 |
+ function(axts, tFilter, qFilter, qSizes, thresholds="49_50"){ |
|
179 |
+ ceScanFile(axtFiles=axts, tFilterFile=tFilter, qFilterFile=NULL, |
|
180 |
+ qSizes=NULL, thresholds=thresholds) |
|
181 |
+ } |
|
182 |
+ ) |
|
183 |
+ |
|
184 |
+### ----------------------------------------------------------------- |
|
185 |
+### Merge two side cnes |
|
186 |
+### Exported! |
|
187 |
+cneMerge <- function(cne1, cne2){ |
|
188 |
+ # In this function, cne's start is 1-based coordinates. ends are 1-based too. |
|
189 |
+ # Although in cne1 and cne2, query strand can be negative, |
|
190 |
+ # but the coordinate is already on positive strand. |
|
191 |
+ ## first reverse the cne2's cigar |
|
192 |
+ ## cne2 = transform(cne2, cigar=chartr("DI", "ID", cigar)) |
|
193 |
+ cne2$cigar <- chartr("DI", "ID", cne2$cigar) |
|
194 |
+ if(any(cne2$strand == "-")){ |
|
195 |
+ #cne2[cne2$strand=="-", ] = transform(subset(cne2, strand=="-"), |
|
196 |
+ # cigar=reverseCigar(cigar)) |
|
197 |
+ cne2[cne2$strand == "-", "cigar"] <- |
|
198 |
+ reverseCigar(cne2[cne2$strand == "-", "cigar"]) |
|
199 |
+ } |
|
200 |
+ colnames(cne2) <- c("qName", "qStart", "qEnd", "tName", |
|
201 |
+ "tStart", "tEnd", "strand", "score", "cigar") |
|
202 |
+ cne1T <- GRanges(seqnames=cne1$tName, |
|
203 |
+ ranges=IRanges(start=cne1$tStart, end=cne1$tEnd), |
|
204 |
+ strand="+") |
|
205 |
+ cne1Q <- GRanges(seqnames=cne1$qName, |
|
206 |
+ ranges=IRanges(start=cne1$qStart, end=cne1$qEnd), |
|
207 |
+ strand="+") |
|
208 |
+ cne2T <- GRanges(seqnames=cne2$tName, |
|
209 |
+ ranges=IRanges(start=cne2$tStart, end=cne2$tEnd), |
|
210 |
+ strand="+") |
|
211 |
+ cne2Q <- GRanges(seqnames=cne2$qName, |
|
212 |
+ ranges=IRanges(start=cne2$qStart, end=cne2$qEnd), |
|
213 |
+ strand="+") |
|
214 |
+ cneT <- c(cne1T, cne2T) |
|
215 |
+ cneQ <- c(cne1Q, cne2Q) |
|
216 |
+ # Here, I just removed the CNEs which are within another big CNEs. |
|
217 |
+ # In very rare cases(1 in 100000), some cnes may just connect |
|
218 |
+ # and need to merge them. Needs to be done in the future (perhaps not easy to be done in R). |
|
219 |
+ cneT_overlap <- findOverlaps(cneT, type="within", |
|
220 |
+ ignoreSelf=TRUE, ignoreRedundant=TRUE) |
|
221 |
+ #cneT_overlap1 = findOverlaps(cneT, type="equal", |
|
222 |
+ #ignoreSelf=TRUE, ignoreRedundant=TRUE) |
|
223 |
+ #cneT_overlap2 = findOverlaps(cneT, type="any", |
|
224 |
+ #ignoreSelf=TRUE, ignoreRedundant=TRUE) |
|
225 |
+ cneQ_overlap <- findOverlaps(cneQ, type="within", |
|
226 |
+ ignoreSelf=TRUE, ignoreRedundant=TRUE) |
|
227 |
+ #cneQ_overlap1 = findOverlaps(cneQ, type="equal", |
|
228 |
+ #ignoreSelf=TRUE, ignoreRedundant=TRUE) |
|
229 |
+ #cneQ_overlap2 = findOverlaps(cneQ, type="any", |
|
230 |
+ #ignoreSelf=TRUE, ignoreRedundant=TRUE) |
|
231 |
+ redundance <- IRanges::intersect(cneT_overlap, cneQ_overlap) |
|
232 |
+ #any_overlap = intersect(cneT_overlap2, cneQ_overlap2) |
|
233 |
+ #foo = setdiff(any_overlap, redundance) |
|
234 |
+ #paste(subjectHits(foo), queryHits(foo), sep=",") |
|
235 |
+ # %in% paste(queryHits(redundance), subjectHits(redundance), sep=",") |
|
236 |
+ res <- rbind(cne1, cne2)[-queryHits(redundance), ] |
|
237 |
+ # After the merge, we'd better name them as 1 and 2 |
|
238 |
+ # rather than the tName and qName. Use the names in mysql cne db. |
|
239 |
+ colnames(res) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", |
|
240 |
+ "strand", "similarity", "cigar") |
|
241 |
+ return(res) |
|
242 |
+} |
|
243 |
+ |
|
244 |
+#cutoffs1 = 4 |
|
245 |
+#cutoffs2 = 8 |
|
246 |
+ |
|
247 |
+blatCNE <- function(CNE, winSize, cutoffs1, cutoffs2, |
|
248 |
+ assembly1Twobit, assembly2Twobit, |
|
249 |
+ blatOptions=NULL, cutIdentity=90, |
|
250 |
+ tmpDir=tempdir(), blatBinary="blat"){ |
|
251 |
+ # In this function, the input CNE's start and end are 1-based coordinates. |
|
252 |
+ blatOptionsALL <- list("DEF_BLAT_OPT_WSLO"= |
|
253 |
+ "-tileSize=9 -minScore=24 -repMatch=16384", |
|
254 |
+ "DEF_BLAT_OPT_WSMID"= |
|
255 |
+ "-tileSize=10 -minScore=28 -repMatch=4096", |
|
256 |
+ "DEF_BLAT_OPT_WSHI"= |
|
257 |
+ "-tileSize=11 -minScore=30 -repMatch=1024") |
|
258 |
+ if(!is(winSize, "integer")) |
|
259 |
+ stop("winSize must be an integer!") |
|
260 |
+ if(is.null(blatOptions)){ |
|
261 |
+ if(winSize > 45L) |
|
262 |
+ blatOptions <- blatOptionsALL[["DEF_BLAT_OPT_WSHI"]] |
|
263 |
+ else if(winSize > 35L) |
|
264 |
+ blatOptions <- blatOptionsALL[["DEF_BLAT_OPT_WSMID"]] |
|
265 |
+ else |
|
266 |
+ blatOptions <- blatOptionsALL[["DEF_BLAT_OPT_WSLO"]] |
|
267 |
+ } |
|
268 |
+ if(cutIdentity > 100 || cutIdentity < 0) |
|
269 |
+ stop("cutIdentity must be between 0 and 100!") |
|
270 |
+ if(!is(cutoffs1, "integer")) |
|
271 |
+ stop("cutoffs1 must be an integer!") |
|
272 |
+ if(!is(cutoffs2, "integer")) |
|
273 |
+ stop("cutoffs2 must be an integer!") |
|
274 |
+ |
|
275 |
+ .run_blat <- function(cne, cutIdentity, whichAssembly, |
|
276 |
+ assemblyTwobit, blatBinary, blatOptions, tmpDir){ |
|
277 |
+ temp_cne <- tempfile(pattern="cne-", tmpdir=tmpDir) |
|
278 |
+ temp_psl <- tempfile(pattern="psl-", tmpdir=tmpDir) |
|
279 |
+ # For Blat, the start is 0-based and end is 1-based. |
|
280 |
+ # So make cne's coordinates to comply with it. |
|
281 |
+ if(whichAssembly == 1){ |
|
282 |
+ cne <- paste0(assemblyTwobit, ":", cne[,"chr1"], |
|
283 |
+ ":", cne[,"start1"]-1, "-", cne[,"end1"]) |
|
284 |
+ }else{ |
|
285 |
+ cne <- paste0(assemblyTwobit, ":", cne[,"chr2"], |
|
286 |
+ ":", cne[,"start2"]-1, "-", cne[,"end2"]) |
|
287 |
+ } |
|
288 |
+ cne <- unique(cne) |
|
289 |
+ writeLines(cne, con=temp_cne) |
|
290 |
+ cmd <- paste0(blatBinary, " ", blatOptions, " ", |
|
291 |
+ "-minIdentity=", cutIdentity, |
|
292 |
+ " ", assemblyTwobit, " ", temp_cne, " ", temp_psl) |
|
293 |
+ my.system(cmd) |
|
294 |
+ unlink(temp_cne) |
|
295 |
+ return(temp_psl) |
|
296 |
+ } |
|
297 |
+ psl1Fn <- .run_blat(CNE, cutIdentity, 1, assembly1Twobit, |
|
298 |
+ blatBinary, blatOptions, tmpDir) |
|
299 |
+ psl2Fn <- .run_blat(CNE, cutIdentity, 2, assembly2Twobit, |
|
300 |
+ blatBinary, blatOptions, tmpDir) |
|
301 |
+ psl1 <- read.table(psl1Fn, header=FALSE, sep="\t", skip=5, as.is=TRUE) |
|
302 |
+ psl2 <- read.table(psl2Fn, header=FALSE, sep="\t", skip=5, as.is=TRUE) |
|
303 |
+ colnames(psl1) <- colnames(psl2) <- |
|
304 |
+ c("matches", "misMatches", "repMatches", "nCount", |
|
305 |
+ "qNumInsert", "qBaseInsert", "tNumInsert", "tBaseInsert", |
|
306 |
+ "strand", "qName", "qSize", "qStart", "qEnd", "tName", |
|
307 |
+ "tSize", "tStart", "tEnd", "blockCount", "blockSizes", |
|
308 |
+ "qStarts", "tStarts") |
|
309 |
+ ##psl1 = subset(psl1, matches/qSize >= cutIdentity/100) |
|
310 |
+ ##for some reason, subset will cause error duing the Bioconductor build |
|
311 |
+ psl1 <- psl1[psl1$matches / psl1$qSize >= cutIdentity/100, ] |
|
312 |
+ ##psl2 = subset(psl2, matches/qSize >= cutIdentity/100) |
|
313 |
+ psl2 <- psl2[psl2$matches / psl2$qSize >= cutIdentity/100, ] |
|
314 |
+ psl1 <- table(psl1[ , "qName"]) |
|
315 |
+ psl2 <- table(psl2[ , "qName"]) |
|
316 |
+ CNEtNameIndex <- unname(psl1[paste0(CNE[ ,1], ":", CNE[ ,2]-1, "-", |
|
317 |
+ CNE[ ,3])]) |
|
318 |
+ CNEtNameIndex[is.na(CNEtNameIndex)] <- 0 |
|
319 |
+ CNEqNameIndex <- unname(psl2[paste0(CNE[ ,4], ":", CNE[ ,5]-1, "-", |
|
320 |
+ CNE[ ,6])]) |
|
321 |
+ CNEqNameIndex[is.na(CNEqNameIndex)] <- 0 |
|
322 |
+ CNE <- CNE[CNEtNameIndex <= cutoffs1 & CNEqNameIndex <= cutoffs2, ] |
|
323 |
+ return(CNE) |
|
324 |
+ # Here, the CNE's starts and ends are still 1-based. |
|
325 |
+} |
|
326 |
+ |
|
327 |
+ |
|
328 |
+ceScanOneStep <- function(axt1, filter1=NULL, sizes1, assembly1, twoBit1, |
|
329 |
+ axt2, filter2=NULL, sizes2, assembly2, twoBit2, |
|
330 |
+ thresholds=c("49_50"), |
|
331 |
+ blatBinary="blat", |
|
332 |
+ blatCutoff1, blatCutoff2 |
|
333 |
+ ){ |
|
334 |
+ if(grepl("_", assembly1) || grepl("_", assembly2)) |
|
335 |
+ stop("The assembly name must not contain \"_\"") |
|
336 |
+ if(assembly1 < assembly2){ |
|
337 |
+ .ceScanSwap(axt1=axt1, filter1=filter1, sizes1=sizes1, assembly1=assembly1, |
|
338 |
+ twoBit1=twoBit1, axt2=axt2, filter2=filter2, sizes2=sizes2, |
|
339 |
+ assembly2=assembly2, twoBit2=twoBit2, thresholds=thresholds, |
|
340 |
+ blatBinary=blatBinary, blatCutoff1=blatCutoff1, |
|
341 |
+ blatCutoff2=blatCutoff2) |
|
342 |
+ }else{ |
|
343 |
+ .ceScanSwap(axt1=axt2, filter1=filter2, sizes1=sizes2, assembly1=assembly2, |
|
344 |
+ twoBit1=twoBit2, axt2=axt1, filter2=filter1, |
|
345 |
+ sizes2=sizes1, assembly2=assembly1, twoBit2=twoBit1, |
|
346 |
+ thresholds=thresholds, blatBinary=blatBinary, |
|
347 |
+ blatCutoff1=blatCutoff2, blatCutoff2=blatCutoff1) |
|
348 |
+ } |
|
349 |
+} |
|
350 |
+ |
|
351 |
+.ceScanSwap <- function(axt1, filter1=NULL, sizes1, assembly1, twoBit1, |
|
352 |
+ axt2, filter2=NULL, sizes2, assembly2, twoBit2, |
|
353 |
+ thresholds=c("49_50"), |
|
354 |
+ blatBinary="blat", |
|
355 |
+ blatCutoff1, blatCutoff2 |
|
356 |
+ ){ |
|
357 |
+ ## In this function, we make sure "assembly1" is smaller than "assembly2". |
|
358 |
+ ## danRer7 is smaller than hg19. |
|
359 |
+ ## This is just for easier database storage table name: "danRer7_hg19_49_50" |
|
360 |
+ CNE1 <- ceScan(axt1, filter1, filter2, sizes2, thresholds) |
|
361 |
+ CNE2 <- ceScan(axt2, filter2, filter1, sizes1, thresholds) |
|
362 |
+ CNEMerged <- mapply(cneMerge, CNE1, CNE2, SIMPLIFY=FALSE) |
|
363 |
+ names(CNEMerged) = paste(assembly1, assembly2, thresholds, sep="_") |
|
364 |
+ CNEBlated <- list() |
|
365 |
+ for(i in 1:length(CNEMerged)){ |
|
366 |
+ CNEBlated[[names(CNEMerged)[i]]] <- |
|
367 |
+ blatCNE(CNEMerged[[i]], as.integer(sub(".+_.+_\\d+_", "", |
|
368 |
+ names(CNEMerged)[i])), |
|
369 |
+ cutoffs1=blatCutoff1, cutoffs2=blatCutoff2, |
|
370 |
+ assembly1Twobit=twoBit1, assembly2Twobit=twoBit2, |
|
371 |
+ blatBinary=blatBinary) |
|
372 |
+ } |
|
373 |
+ ans <- CNE(assembly1=assembly1, assembly2=assembly2, |
|
374 |
+ thresholds=thresholds, |
|
375 |
+ CNE1=CNE1, CNE2=CNE2, CNEMerged=CNEMerged, |
|
376 |
+ CNERepeatsFiltered=CNEBlated, |
|
377 |
+ alignMethod=blatBinary) |
|
378 |
+ return(ans) |
|
379 |
+} |
|
380 |
+ |
|
381 |
+ |
|
382 |
+ |
0 | 383 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,84 @@ |
1 |
+### ----------------------------------------------------------------- |
|
2 |
+### read the bed file into GRanges. |
|
3 |
+### |
|
4 |
+readBedR <- function(bedFile){ |
|
5 |
+## This GRanges is in 1-based. |
|
6 |
+ bed <- import.bed(bedFile, asRangedData=FALSE) |
|
7 |
+ strand(bed) <- "+" |
|
8 |
+ bed <- reduce(bed) |
|
9 |
+} |
|
10 |
+ |
|
11 |
+### ----------------------------------------------------------------- |
|
12 |
+### read the bed file (with only 3 columns) into GRanges. |
|
13 |
+### Exported! |
|
14 |
+readBed <- function(bedFile=NULL){ |
|
15 |
+## This GRanges have the different coordinates system |
|
16 |
+## with the original bed file. i.e. with 1-based start end coordinates. |
|
17 |
+ if(is.null(bedFile)){ |
|
18 |
+ return(NULL) |
|
19 |
+ } |
|
20 |
+ if(!file.exists(bedFile)){ |
|
21 |
+ stop("No such file ", bedFile) |
|
22 |
+ } |
|
23 |
+ bed <- .Call2("myReadBed", bedFile, PACKAGE="CNEr") |
|
24 |
+ bed <- GRanges(seqnames=Rle(bed[[1]]), |
|
25 |
+ ranges=IRanges(start=bed[[2]], end=bed[[3]]), |
|
26 |
+ strand=factor("+")) |
|
27 |
+ return(bed) |
|
28 |
+} |
|
29 |
+ |
|
30 |
+### ----------------------------------------------------------------- |
|
31 |
+### read the axt files into an axt object. |
|
32 |
+### Exported! |
|
33 |
+readAxt <- function(axtFiles){ |
|
34 |
+ # Read axt files into R axt object. |
|
35 |
+ # The coordinates are 1-based for start and end. |
|
36 |
+ index_noexists <- !file.exists(axtFiles) |
|
37 |
+ if(any(index_noexists)){ |
|
38 |
+ stop("No such file ", paste(axtFiles[index_noexists], sep=" ")) |
|
39 |
+ } |
|
40 |
+ myAxt <- .Call2("myReadAxt", axtFiles, PACKAGE="CNEr") |
|
41 |
+ axts <- Axt(targetRanges=GRanges(seqnames=Rle(myAxt[[1]]), |
|
42 |
+ ranges=IRanges(start=myAxt[[2]], |
|
43 |
+ end=myAxt[[3]]), |
|
44 |
+ strand=Rle(myAxt[[4]])), |
|
45 |
+ targetSeqs=DNAStringSet(myAxt[[5]]), |
|
46 |
+ queryRanges=GRanges(seqnames=Rle(myAxt[[6]]), |
|
47 |
+ ranges=IRanges(start=myAxt[[7]], |
|
48 |
+ end=myAxt[[8]]), |
|
49 |
+ strand=Rle(myAxt[[9]])), |
|
50 |
+ querySeqs=DNAStringSet(myAxt[[10]]), |
|
51 |
+ score=myAxt[[11]], |
|
52 |
+ symCount=myAxt[[12]] |
|
53 |
+ ) |
|
54 |
+ return(axts) |
|
55 |
+} |
|
56 |
+ |
|
57 |
+### ----------------------------------------------------------------- |
|
58 |
+### read the axt files and return the widths of all the alignments |
|
59 |
+### Exported! |
|
60 |
+axtInfo <- function(axtFiles){ |
|
61 |
+ ans <- .Call2("axt_info", axtFiles, PACKAGE="CNEr") |
|
62 |
+ return(ans) |
|
63 |
+} |
|
64 |
+ |
|
65 |
+### ----------------------------------------------------------------- |
|
66 |
+### write the Axt object to an axt file |
|
67 |
+### Exported! |
|
68 |
+writeAxt <- function(axt, con){ |
|
69 |
+ firstLine <- paste(0:(length(axt)-1), seqnames(targetRanges(axt)), |
|
70 |
+ start(targetRanges(axt)), end(targetRanges(axt)), |
|
71 |
+ seqnames(queryRanges(axt)), |
|
72 |
+ start(queryRanges(axt)), end(queryRanges(axt)), |
|
73 |
+ strand(queryRanges(axt)), score(axt) |
|
74 |
+ ) |
|
75 |
+ secondLine <- targetSeqs(axt) |
|
76 |
+ thirdLine <- querySeqs(axt) |
|
77 |
+ wholeLines <- paste(firstLine, as.character(targetSeqs(axt)), |
|
78 |
+ as.character(querySeqs(axt)), |
|
79 |
+ "", sep="\n") |
|
80 |
+ writeLines(wholeLines, con) |
|
81 |
+} |
|
82 |
+ |
|
83 |
+ |
|
84 |
+ |
0 | 85 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,122 @@ |
1 |
+## This is used to download the annotation from UCSC and |
|
2 |
+### incooperate it for the tracks display |
|
3 |
+ |
|
4 |
+#.SUPPORTED_UCSC_TABLES = c( |
|
5 |
+# ## tablename (unique key) track subtrack auxiliary tablename |
|
6 |
+# "knownGene", "UCSC Genes", NA, |
|
7 |
+# "refGene", "RefSeq Genes", NA, |
|
8 |
+# "ensGene", "Ensembl Genes", NA |
|
9 |
+# ) |
|
10 |
+ |
|
11 |
+.SUPPORTED_UCSC_TABLES = list( |
|
12 |
+ "knownGene" = c("knownGene", "kgXref"), |
|
13 |
+ "refGene" = c("refGene"), |
|
14 |
+ "ensGene" = c("ensGene", "ensemblToGeneName") |
|
15 |
+ ) |
|
16 |
+ |
|
17 |
+supportedUCSCtables = function(){ |
|
18 |
+ .SUPPORTED_UCSC_TABLES |
|
19 |
+} |
|
20 |
+ |
|
21 |
+queryrefGene = function(con){ |
|
22 |
+ query = "SELECT distinct name, name2, chrom, strand, exonStarts, exonEnds |
|
23 |
+ FROM refGene |
|
24 |
+ ORDER BY name2, name" |
|
25 |
+ ans = dbGetQuery(con, query) |
|
26 |
+ # process the ans into one exon per line |
|
27 |
+ exonStarts = strsplit(ans$exonStarts, ",") |
|
28 |
+ exonEnds = strsplit(ans$exonEnds, ",") |
|
29 |
+ stopifnot(all(sapply(exonStarts, length) == sapply(exonEnds, length))) |
|
30 |
+ repNum = sapply(exonStarts, length) |
|
31 |
+ res = data.frame(chromosome=rep(ans$chrom, repNum), |
|
32 |
+ start=as.integer(unlist(exonStarts))+1, |
|
33 |
+ end=as.integer(unlist(exonEnds)), |
|
34 |
+ strand=rep(ans$strand, repNum), |
|
35 |
+ gene=rep(ans$name2, repNum), |
|
36 |
+ transcript=rep(ans$name, repNum), |
|
37 |
+ symbol=rep(ans$name2, repNum)) |
|
38 |
+ return(res) |
|
39 |
+} |
|
40 |
+ |
|
41 |
+queryknownGene = function(con){ |
|
42 |
+ query = "SELECT distinct kgID, geneSymbol, chrom, strand, exonStarts, exonEnds |
|
43 |
+ FROM knownGene, kgXref WHERE knownGene.name=kgXref.kgID |
|
44 |
+ ORDER BY geneSymbol, kgID" |
|
45 |
+ ans = dbGetQuery(con, query) |
|
46 |
+ # process the ans into one exon per line |
|
47 |
+ exonStarts = strsplit(ans$exonStarts, ",") |
|
48 |
+ exonEnds = strsplit(ans$exonEnds, ",") |
|
49 |
+ stopifnot(all(sapply(exonStarts, length) == sapply(exonEnds, length))) |
|
50 |
+ repNum = sapply(exonStarts, length) |
|
51 |
+ res = data.frame(chromosome=rep(ans$chrom, repNum), |
|
52 |
+ start=as.integer(unlist(exonStarts))+1, |
|
53 |
+ # The internal ucsc database use the 0-based start, |
|
54 |
+ # 1-based end. We only use 1-based. |
|
55 |
+ end=as.integer(unlist(exonEnds)), |
|
56 |
+ strand=rep(ans$strand, repNum), |
|
57 |
+ gene=rep(ans$geneSymbol, repNum), |
|
58 |
+ transcript=rep(ans$kgID, repNum), |
|
59 |
+ symbol=rep(ans$geneSymbol, repNum)) |
|
60 |
+ return(res) |
|
61 |
+} |
|
62 |
+ |
|
63 |
+queryensGene = function(con){ |
|
64 |
+ query = "SELECT distinct chrom, strand, exonStarts, exonEnds, |
|
65 |
+ ensGene.name2, ensGene.name, ensemblToGeneName.value |
|
66 |
+ FROM ensGene, ensemblToGeneName WHERE ensGene.name=ensemblToGeneName.name |
|
67 |
+ ORDER BY ensGene.name, ensemblToGeneName.value" |
|
68 |
+ ans = dbGetQuery(con, query) |
|
69 |
+ # process the ans into one exon per line |
|
70 |
+ exonStarts = strsplit(ans$exonStarts, ",") |
|
71 |
+ exonEnds = strsplit(ans$exonEnds, ",") |
|
72 |
+ stopifnot(all(sapply(exonStarts, length) == sapply(exonEnds, length))) |
|
73 |
+ repNum = sapply(exonStarts, length) |
|
74 |
+ res = data.frame(chromosome=rep(ans$chrom, repNum), |
|
75 |
+ start=as.integer(unlist(exonStarts))+1, |
|
76 |
+ end=as.integer(unlist(exonEnds)), |
|
77 |
+ strand=rep(ans$strand, repNum), |
|
78 |
+ gene=rep(ans$name2, repNum), |
|
79 |
+ transcript=rep(ans$name, repNum), |
|
80 |
+ symbol=rep(ans$value, repNum)) |
|
81 |
+ return(res) |
|
82 |
+} |
|
83 |
+ |
|
84 |
+makeGeneDbFromUCSC = function(genome="hg19", |
|
85 |
+ tablename="refGene", |
|
86 |
+ host="genome-mysql.cse.ucsc.edu", |
|
87 |
+ user="genome", |
|
88 |
+ password=NULL, |
|
89 |
+ dbnameSQLite="geneAnnotation.sqlite", |
|
90 |
+ tablenameSQLite=paste(genome, tablename, sep="_"), |
|
91 |
+ overwrite=FALSE |
|
92 |
+ ){ |
|
93 |
+ if(!isSingleString(genome)) |
|
94 |
+ stop("'genome' must be a single string") |
|
95 |
+ if(!isSingleString(tablename)) |
|
96 |
+ stop("'tablename' must be a single string") |
|
97 |
+ if(!tablename %in% names(.SUPPORTED_UCSC_TABLES)) |
|
98 |
+ stop("table \"", tablename, "\" is not supported") |
|
99 |
+ if(!isSingleString(host)) |
|
100 |
+ stop("'url' must be a single string") |
|
101 |
+ con = dbConnect(MySQL(), user=user, password=password, |
|
102 |
+ dbname=genome, host=host) |
|
103 |
+ tableNames = .SUPPORTED_UCSC_TABLES[[tablename]] |
|
104 |
+ message("Download the ", tablename, " table ... ") |
|
105 |
+ ans = switch(tablename, |
|
106 |
+ "refGene"=queryrefGene(con), |
|
107 |
+ "knownGene"=queryknownGene(con), |
|
108 |
+ "ensGene"=queryensGene(con) |
|
109 |
+ ) |
|
110 |
+ dbDisconnect(con) |
|
111 |
+ # add the bin column |
|
112 |
+ ans$bin = binFromCoordRange(ans$start, ans$end) |
|
113 |
+ # reorder the columns, not necessary |
|
114 |
+ ans = ans[ ,c("bin","chromosome","start","end","strand", |
|
115 |
+ "gene", "transcript","symbol")] |
|
116 |
+ con = dbConnect(SQLite(), dbname=dbnameSQLite) |
|
117 |
+ dbWriteTable(con, tablenameSQLite, ans, overwrite=overwrite) |
|
118 |
+ dbDisconnect(con) |
|
119 |
+} |
|
120 |
+ |
|
121 |
+ |
|
122 |
+ |
0 | 123 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,200 @@ |
1 |
+### ----------------------------------------------------------------- |
|
2 |
+### Fetch the CNE coordinates from SQL and compute the densities |
|
3 |
+### Exported! |
|
4 |
+setMethod("CNEDensity", |
|
5 |
+ signature(tableName="character", assembly1="character", |
|
6 |
+ assembly2="missing", threshold="missing"), |
|
7 |
+ function(dbName, tableName, assembly1, assembly2, threshold, |
|
8 |
+ chr, start, end, windowSize, minLength=NULL){ |
|
9 |
+ if(!grepl("^.+_.+_\\d+_\\d+$", tableName)) |
|
10 |
+ stop("The tableName should be in the format danRer7_hg19_49_50.") |
|
11 |
+ assemblyNames <- strsplit(tableName, "_")[[1]][1:2] |
|
12 |
+ stopifnot(assembly1 %in% assemblyNames) |
|
13 |
+ if(which(assembly1 == assemblyNames) == 1){ |
|
14 |
+ whichAssembly = "L" |
|
15 |
+ }else{ |
|
16 |
+ whichAssembly = "R" |
|
17 |
+ } |
|
18 |
+ .CNEDensityInternal(dbName=dbName, tableName=tableName, |
|
19 |
+ whichAssembly=whichAssembly, |
|
20 |
+ chr=chr, start=start, end=end, |
|
21 |
+ windowSize=windowSize, minLength=minLength) |
|
22 |
+ } |
|
23 |
+ ) |
|
24 |
+setMethod("CNEDensity", signature(tableName="missing", assembly1="character", |
|
25 |
+ assembly2="character", threshold="character"), |
|
26 |
+ function(dbName, tableName, assembly1, assembly2, threshold, |
|
27 |
+ chr, start, end, windowSize, minLength=NULL){ |
|
28 |
+ stopifnot(length(threshold) == 1L) |
|
29 |
+ builtTableName = paste(paste(sort(c(assembly1, assembly2)), |
|
30 |
+ sep="_", collapse="_"), threshold, |
|
31 |
+ sep="_", collapse="_") |
|
32 |
+ if(which(assembly1 == sort(c(assembly1, assembly2))) == 1){ |
|
33 |
+ whichAssembly = "L" |
|
34 |
+ }else{ |
|
35 |
+ whichAssembly = "R" |
|
36 |
+ } |
|
37 |
+ .CNEDensityInternal(dbName=dbName, tableName=builtTableName, |
|
38 |
+ whichAssembly=whichAssembly, |
|
39 |
+ chr=chr, start=start, end=end, |
|
40 |
+ windowSize=windowSize, minLength=minLength) |
|
41 |
+ } |
|
42 |
+ ) |
|
43 |
+ |
|
44 |
+ |
|
45 |
+ |
|
46 |
+.CNEDensityInternal <- function(dbName, tableName, whichAssembly=c("L","R"), |
|
47 |
+ chr, start, end, windowSize, |
|
48 |
+ minLength=NULL){ |
|
49 |
+ nrGraphs <- 1 |
|
50 |
+ CNEstart <- start |
|
51 |
+ CNEend <- end |
|
52 |
+ # This is the pipeline of doing the density plot |
|
53 |
+ # The windowSize is in kb. |
|
54 |
+ whichAssembly <- match.arg(whichAssembly) |
|
55 |
+ if(!is(windowSize, "integer")) |
|
56 |
+ stop("windowSize must be an integer!") |
|
57 |
+ windowSize <- windowSize * 1000 |
|
58 |
+ CNElength <- CNEend - CNEstart + 1 |
|
59 |
+ pixel_width <- 2048 |
|
60 |
+ if(CNElength <= pixel_width) { |
|
61 |
+ step_size <- 1 |
|
62 |
+ }else{ |
|
63 |
+ step_size <- as.integer(CNElength/pixel_width) |
|
64 |
+ if(step_size > windowSize/10) |
|
65 |
+ step_size <- windowSize/10 |
|
66 |
+ while(windowSize %% step_size){ |
|
67 |
+ step_size <- step_size - 1 |
|
68 |
+ } |
|
69 |
+ } |
|
70 |
+ # make things easier |
|
71 |
+ if(windowSize %% 2 == 0) |
|
72 |
+ windowSize <- windowSize - 1L |
|
73 |
+ context_start <- as.integer(max(CNEstart - (windowSize-1L)/2, 1)) |
|
74 |
+ context_end <- as.integer(CNEend + (windowSize-1)/2) |
|
75 |
+ #win_nr_steps = windowSize / step_size |
|
76 |
+ #context_start = CNEstart - as.integer(((win_nr_steps-1)*step_size)/2+0.5) |
|
77 |
+ #if(context_start < 1) |
|
78 |
+ # context_start = 1 |
|
79 |
+ #context_end = CNEend + |
|
80 |
+ # as.integer(((win_nr_steps-1)*step_size)/2+step_size+0.5) |
|
81 |
+ ranges <- readCNERangesFromSQLite(dbName, tableName, chr, |
|
82 |
+ context_start, context_end, |
|
83 |
+ whichAssembly, minLength) |
|
84 |
+ # Implement get_cne_ranges_in_region_partitioned_by_other_chr later!!! |
|
85 |
+ ranges <- reduce(ranges) |
|
86 |
+ covAll <- coverage(ranges, width=context_end) |
|
87 |
+ runMeanAll <- runmean(covAll, k=windowSize, "constant") |
|
88 |
+ resStart <- max(CNEstart, (windowSize-1)/2+1) |
|
89 |
+ resEnd <- min(CNEend, context_end-(windowSize-1)/2) |
|
90 |
+ resCoords <- seq(resStart, resEnd, by=step_size) |
|
91 |
+ if(nrGraphs == 1){ |
|
92 |
+ runMeanRes <- runMeanAll[resCoords]*100 |
|
93 |
+ res <- cbind(resCoords, as.numeric(runMeanRes)) |
|
94 |
+ colnames(res) <- c("coordinates", "y") |
|
95 |
+ }else{ |
|
96 |
+ runMeanRes <- lapply(runMeanAll, "[", resCoords) |
|
97 |
+ runMeanRes <- lapply(runMeanRes, "*", 100) |
|
98 |
+ res <- list() |
|
99 |
+ for(i in 1:length(runMeanRes)){ |
|
100 |
+ res[[names(runMeanRes)[i]]] <- cbind(resCoords, |
|
101 |
+ as.numeric(runMeanRes[[i]])) |
|
102 |
+ colnames(res[[names(runMeanRes)[i]]]) <- c("coordinates", "y") |
|
103 |
+ } |
|
104 |
+ } |
|
105 |
+ return(res) |
|
106 |
+} |
|
107 |
+ |
|
108 |
+ |
|
109 |
+#calc_window_scores = function(CNEstart, CNEend, ranges, |
|
110 |
+ # win_nr_steps, step_size){ |
|
111 |
+# ## Here the starts and ends are 1-based. |
|
112 |
+# CNElength = CNEend - CNEstart + 1 |
|
113 |
+# win_size = win_nr_steps * step_size |
|
114 |
+# offsetBlk = as.integer(((win_nr_steps-1)*step_size)/2+0.5) |
|
115 |
+# context_start = CNEstart - offsetBlk |
|
116 |
+# if(context_start < 1) |
|
117 |
+# context_start = 1 |
|
118 |
+# context_end = CNEend + offsetBlk |
|
119 |
+# context_size = context_end - context_start + 1 |
|
120 |
+# #nr_blocks = as.integer(context_size/step_size) + |
|
121 |
+ # ifelse(context_size%%step_size, 1, 0) |
|
122 |
+# #blk_scores = numeric(ifelse(nr_blocks>win_nr_steps, |
|
123 |
+ # nr_blocks, win_nr_steps+1)) |
|
124 |
+# |
|
125 |
+# covAll = coverage(ranges, width=context_end) |
|
126 |
+# |
|
127 |
+# #runMeanAll = runmean(covAll, k=windowSize, "constant") |
|
128 |
+# #resStart = max(CNEstart, (windowSize-1)/2+1) |
|
129 |
+# #resEnd = min(CNEend, computeEnd-(windowSize-1)/2) |
|
130 |
+# #height = runMeanAll[resStart:resEnd]*100 |
|
131 |
+#} |
|
132 |
+ |
|
133 |
+#listToPlot = list(a=res, b=res) |
|
134 |
+ |
|
135 |
+#plotCNE = function(listToPlot, horizonscale=2, nbands=3){ |
|
136 |
+# mergedDf = as.data.frame(do.call(rbind, listToPlot)) |
|
137 |
+# mergedDf$grouping = rep(names(listToPlot), sapply(listToPlot, nrow)) |
|
138 |
+# mergedDf = mergedDf[ ,c("coordinates", "grouping", "y")] |
|
139 |
+# p = horizon.panel.ggplot(mergedDf, horizonscale=horizonscale, nbands=nbands) |
|
140 |
+# #if(!is.null(file)){ |
|
141 |
+# # postscript(file=file) |
|
142 |
+# # on.exit(dev.off()) |
|
143 |
+# #} |
|
144 |
+# return(p) |
|
145 |
+#} |
|
146 |
+ |
|
147 |
+#horizon.panel.ggplot = function(mergedDf, horizonscale=2, |
|
148 |
+# nbands=3, my.title="fun"){ |
|
149 |
+# #require(ggplot2) |
|
150 |
+# #require(reshape2) |
|
151 |
+# origin = 0 |
|
152 |
+# #require(RColorBrewer) |
|
153 |
+# #col.brew = brewer.pal(name="RdBu",n=10) |
|
154 |
+# #col.brew = c("#67001F", "#B2182B", "#D6604D", |
|
155 |
+# #"#F4A582", "#FDDBC7", "#D1E5F0", |
|
156 |
+# #"#92C5DE", "#4393C3", "#2166AC", "#053061") |
|
157 |
+# col.brew = c("yellow", "orange", "red", "chartreuse", "blue") |
|
158 |
+# colnames(mergedDf) = c("coordinates", "grouping", "y") |
|
159 |
+# for(i in 1:nbands){ |
|
160 |
+# #do positive |
|
161 |
+# mergedDf[ ,paste("ypos",i,sep="")] = |
|