... | ... |
@@ -2,9 +2,9 @@ Package: ModCon |
2 | 2 |
Type: Package |
3 | 3 |
Title: ModCon |
4 | 4 |
Version: 0.99.0 |
5 |
-Authors@R: c(person("Johannes", "Ptok", role = c("aut", "cre"), email = "Johannes.Ptok@posteo.de"), person("Gene", "Yeo", role = c("ctb"), email = "geneyeo@ucsd.edu", comment = "first author for perl script to calculate MaxEntScan score of splice sites.")) |
|
6 |
-Description: Proposing a nucleotide sequence surrounding for splice donors to either activate or repress donor usage. |
|
7 |
-License: file LICENSE |
|
5 |
+Authors@R: person("Johannes", "Ptok", role = c("aut", "cre"), email = "Johannes.Ptok@posteo.de") |
|
6 |
+Description: Collection of functions to calculate a nucleotide sequence surrounding for splice donors sites to either activate or repress donor usage. The proposed alternative nucleotide sequence encodes the same amino acid and could be applied e.g. in reporter systems to silence or activate cryptic splice donor sites. |
|
7 |
+License: GPL-3 |
|
8 | 8 |
Encoding: UTF-8 |
9 | 9 |
LazyData: true |
10 | 10 |
VignetteBuilder: knitr |
... | ... |
@@ -13,10 +13,11 @@ Depends: |
13 | 13 |
parallel, |
14 | 14 |
utils, |
15 | 15 |
stats, |
16 |
- R (>= 3.6) |
|
16 |
+ R (>= 4.1) |
|
17 | 17 |
Suggests: |
18 | 18 |
testthat, |
19 |
- knitr |
|
19 |
+ knitr, |
|
20 |
+ rmarkdown |
|
20 | 21 |
selectBestAndMean: Installation of any version of Perl is needed (e.g. strawberry perl). |
21 | 22 |
biocViews: FunctionalGenomics, AlternativeSplicing |
22 | 23 |
URL: https://github.com/caggtaagtat/ModCon |
79 | 79 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,416 @@ |
1 |
+#Load ggplot package to render the plots |
|
2 |
+library("ggplot2") |
|
3 |
+library("shinycssloaders") |
|
4 |
+library("shiny") |
|
5 |
+library("shinyFiles") |
|
6 |
+library("seqinr") |
|
7 |
+library("Biostrings") |
|
8 |
+library("BSgenome") |
|
9 |
+library("VarCon") |
|
10 |
+ |
|
11 |
+readRDS("exampleTransCoord") |
|
12 |
+ |
|
13 |
+########################################### |
|
14 |
+## Server and UI code |
|
15 |
+ |
|
16 |
+## Server in and output Skript |
|
17 |
+server <- function(input, output, session) { |
|
18 |
+ |
|
19 |
+ ## close the R session when app closes |
|
20 |
+ session$onSessionEnded(function() { |
|
21 |
+ stopApp() |
|
22 |
+ }) |
|
23 |
+ |
|
24 |
+ uploadReferenceDNA <- eventReactive(path$pth,{ |
|
25 |
+ |
|
26 |
+ testFASTA <- strsplit(path$pth,"\\.")[[1]] |
|
27 |
+ if(testFASTA[length(testFASTA)] %in% c("fa","fasta")){ |
|
28 |
+ referenceDnaStringSet2 <- readDNAStringSet(path$pth, format="fasta",use.names=TRUE) |
|
29 |
+ ref_names <- as.character(lapply(names(referenceDnaStringSet2), |
|
30 |
+ function(x){ strsplit(x, " ")[[1]][[1]]})) |
|
31 |
+ names(referenceDnaStringSet2) <- ref_names |
|
32 |
+ referenceDnaStringSet <- referenceDnaStringSet2 |
|
33 |
+ }else{ |
|
34 |
+ load(path$pth) |
|
35 |
+ } |
|
36 |
+ |
|
37 |
+ referenceDnaStringSet |
|
38 |
+ }) |
|
39 |
+ |
|
40 |
+ |
|
41 |
+ |
|
42 |
+ uploadTranscriptTable <- eventReactive(path3$pth3,{ |
|
43 |
+ |
|
44 |
+ ## Get human transcript tables e.g. from https://github.com/caggtaagtat/VarConTables |
|
45 |
+ testCSV <- strsplit(path3$pth3,"\\.")[[1]] |
|
46 |
+ if(testCSV[length(testCSV)] == "csv"){ |
|
47 |
+ transCoord <- read.csv(path3$pth3, sep=";") |
|
48 |
+ }else{ transCoord <- readRDS(path3$pth3)} |
|
49 |
+ |
|
50 |
+ transCoord |
|
51 |
+ }) |
|
52 |
+ |
|
53 |
+ ## Report when upload of reference genome is complete |
|
54 |
+ output$sum_text2 <- renderUI({ |
|
55 |
+ |
|
56 |
+ ## Genome fasta to download e.g. from |
|
57 |
+ ## ftp://ftp.ensembl.org/pub/release-99/fasta/homo_sapiens/ |
|
58 |
+ ## dna/Homo_sapiens.GRCh38.dna.toplevel.fa.gz |
|
59 |
+ |
|
60 |
+ test <- uploadReferenceDNA() |
|
61 |
+ HTML("Upload of reference genome completed...") |
|
62 |
+ |
|
63 |
+ }) |
|
64 |
+ |
|
65 |
+ ## Report when upload of reference genome is complete |
|
66 |
+ output$sum_text22 <- renderUI({ |
|
67 |
+ |
|
68 |
+ test2 <- uploadTranscriptTable() |
|
69 |
+ HTML("Upload of transcript table completed...") |
|
70 |
+ |
|
71 |
+ }) |
|
72 |
+ |
|
73 |
+ |
|
74 |
+ |
|
75 |
+ ## Generate the text for describing the difference between the Hexplorer Scores of both sequences |
|
76 |
+ output$sum_text <- renderUI({ |
|
77 |
+ |
|
78 |
+ gene2transcript <- read.csv(path2$pth2, sep=";", stringsAsFactors=FALSE) |
|
79 |
+ referenceDnaStringSet <- uploadReferenceDNA() |
|
80 |
+ transCoord <- uploadTranscriptTable() |
|
81 |
+ |
|
82 |
+ #Get information about the SNV |
|
83 |
+ res <- getSeqInfoFromVariation(referenceDnaStringSet, input$transcriptID, input$variation, |
|
84 |
+ ntWindow= input$ntWindow, transCoord, gene2transcript=gene2transcript) |
|
85 |
+ |
|
86 |
+ #Sum up the info |
|
87 |
+ HTML(paste0("For the given annotation ",res$funcAnnotation, |
|
88 |
+ " within transcript ", res$transcript, |
|
89 |
+ " following sequence was found around the chromosomal coordinate ", |
|
90 |
+ res$genomicCoordinate," on chromosome ",res$chromosome, " :"), |
|
91 |
+ "",paste0("Ref Seq: ",res$sequence),"", |
|
92 |
+ paste0("Ref+vari:",res$altSeq) , sep="<br/>") |
|
93 |
+ |
|
94 |
+ }) |
|
95 |
+ |
|
96 |
+ ## Generate the plot where you can mark the area to zoom in |
|
97 |
+ output$plot <- renderPlot({ |
|
98 |
+ |
|
99 |
+ gene2transcript <- read.csv(path2$pth2, sep=";", stringsAsFactors=FALSE) |
|
100 |
+ referenceDnaStringSet <- uploadReferenceDNA() |
|
101 |
+ transCoord <- uploadTranscriptTable() |
|
102 |
+ |
|
103 |
+ ## Retrieve information form genome |
|
104 |
+ res <- getSeqInfoFromVariation(referenceDnaStringSet, input$transcriptID, |
|
105 |
+ input$variation, ntWindow=input$ntWindow, transCoord, |
|
106 |
+ gene2transcript=gene2transcript) |
|
107 |
+ |
|
108 |
+ ## Calculate HZEI values |
|
109 |
+ durchzahl <- calculateHZEIperNT(res$sequence) |
|
110 |
+ |
|
111 |
+ durchzahl$Sequence <- "sequence of interest" |
|
112 |
+ |
|
113 |
+ plot <- ggplot(durchzahl, aes(x = durchzahl, y = endhex, fill=Sequence )) +scale_y_continuous(name="Hexplorer score",breaks=c(seq(-75,0,5),seq(2,34,2)),limits=c(min(durchzahl$endhex)-5,max(c(durchzahl$hbs,durchzahl$endhex))+1) )+ |
|
114 |
+ scale_fill_manual(values=c("#56B4E9", "#000000"))+ |
|
115 |
+ geom_bar(stat='identity', position = "dodge")+ xlab("Sequence")+ylab("Hexplorer score")+ |
|
116 |
+ theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank())+ |
|
117 |
+ annotate("text", label =substr(durchzahl$seq9[durchzahl$Sequence=="sequence of interest"],6,6 ) , x= 1:((nrow(durchzahl))), y = min(durchzahl$endhex-8), size = 3, colour = "black") |
|
118 |
+ |
|
119 |
+ plot |
|
120 |
+ |
|
121 |
+ }) |
|
122 |
+ |
|
123 |
+ ## Generate the plot where you can mark the area to zoom in |
|
124 |
+ output$plot2 <- renderPlot({ |
|
125 |
+ |
|
126 |
+ gene2transcript <- read.csv(path2$pth2, sep=";", stringsAsFactors=FALSE) |
|
127 |
+ referenceDnaStringSet <- uploadReferenceDNA() |
|
128 |
+ transCoord <- uploadTranscriptTable() |
|
129 |
+ |
|
130 |
+ ## Retrieve information form genome |
|
131 |
+ res <- getSeqInfoFromVariation(referenceDnaStringSet, input$transcriptID, |
|
132 |
+ input$variation, ntWindow=input$ntWindow, transCoord, |
|
133 |
+ gene2transcript=gene2transcript) |
|
134 |
+ |
|
135 |
+ generateHEXplorerPlot(res,input$ntWindow) |
|
136 |
+ |
|
137 |
+ }) |
|
138 |
+ |
|
139 |
+ ## Create reactive value ranges, for the zooming plot |
|
140 |
+ ranges2 <- reactiveValues(x = NULL) |
|
141 |
+ |
|
142 |
+ ## Genereate the plot, where you can see a zoomed in version of the plot above |
|
143 |
+ output$plot_zoom <- renderPlot({ |
|
144 |
+ |
|
145 |
+ gene2transcript <- read.csv(path2$pth2, sep=";", stringsAsFactors=FALSE) |
|
146 |
+ referenceDnaStringSet <- uploadReferenceDNA() |
|
147 |
+ transCoord <- uploadTranscriptTable() |
|
148 |
+ |
|
149 |
+ |
|
150 |
+ ## Retrieve information form genome |
|
151 |
+ res <- getSeqInfoFromVariation(referenceDnaStringSet, input$transcriptID, |
|
152 |
+ input$variation, ntWindow=input$ntWindow, |
|
153 |
+ transCoord,gene2transcript=gene2transcript) |
|
154 |
+ |
|
155 |
+ |
|
156 |
+ ## Calculate HZEI values |
|
157 |
+ durchzahl <- calculateHZEIperNT(res$sequence) |
|
158 |
+ |
|
159 |
+ durchzahl$Sequence <- "sequence of interest" |
|
160 |
+ |
|
161 |
+ plot <- ggplot(durchzahl, aes(x = durchzahl, y = endhex, fill=Sequence )) +scale_y_continuous(name="Hexplorer score",breaks=c(seq(-75,0,5),seq(2,34,2)),limits=c(min(durchzahl$endhex)-6,max(c(durchzahl$hbs,durchzahl$endhex))+1) )+ |
|
162 |
+ scale_fill_manual(values=c("#56B4E9", "#000000"))+ |
|
163 |
+ geom_bar(stat='identity', position = "dodge")+ xlab("Sequence")+ylab("Hexplorer score")+ |
|
164 |
+ theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank())+ |
|
165 |
+ annotate("text", label =substr(durchzahl$seq9[durchzahl$Sequence=="sequence of interest"],6,6 ) , x= 1:((nrow(durchzahl))), y = min(durchzahl$endhex-4), size = 3, colour = "black")+ |
|
166 |
+ coord_cartesian(xlim = ranges2$x, expand = FALSE) |
|
167 |
+ plot |
|
168 |
+ |
|
169 |
+ |
|
170 |
+ |
|
171 |
+ }) |
|
172 |
+ |
|
173 |
+ ## Genereate the plot, where you can see a zoomed in version of the plot above |
|
174 |
+ output$plot2_zoom <- renderPlot({ |
|
175 |
+ |
|
176 |
+ gene2transcript <- read.csv(path2$pth2, sep=";", stringsAsFactors=FALSE) |
|
177 |
+ referenceDnaStringSet <- uploadReferenceDNA() |
|
178 |
+ transCoord <- uploadTranscriptTable() |
|
179 |
+ |
|
180 |
+ |
|
181 |
+ ## Retrieve information form genome |
|
182 |
+ res <- getSeqInfoFromVariation(referenceDnaStringSet, input$transcriptID, |
|
183 |
+ input$variation, ntWindow=input$ntWindow, transCoord, |
|
184 |
+ gene2transcript=gene2transcript) |
|
185 |
+ |
|
186 |
+ results_plot <- generateHEXplorerPlot(res,input$ntWindow) |
|
187 |
+ results_plot+coord_cartesian(xlim = ranges2$x, expand = FALSE) |
|
188 |
+ |
|
189 |
+ }) |
|
190 |
+ |
|
191 |
+ ## Create a function which keeps checking on the input with the mouse |
|
192 |
+ observe({ |
|
193 |
+ brush <- input$plot2_brush |
|
194 |
+ if (!is.null(brush)) { |
|
195 |
+ ranges2$x <- c(brush$xmin, brush$xmax) |
|
196 |
+ |
|
197 |
+ } else { |
|
198 |
+ ranges2$x <- NULL |
|
199 |
+ } |
|
200 |
+ }) |
|
201 |
+ |
|
202 |
+ ## Generate the text for describing the difference between the Hexplorer Scores of both sequences |
|
203 |
+ output$plot2_text <- renderUI({ |
|
204 |
+ |
|
205 |
+ gene2transcript <- read.csv(path2$pth2, sep=";", stringsAsFactors=FALSE) |
|
206 |
+ referenceDnaStringSet <- uploadReferenceDNA() |
|
207 |
+ transCoord <- uploadTranscriptTable() |
|
208 |
+ |
|
209 |
+ ## Retrieve information form genome |
|
210 |
+ res <- getSeqInfoFromVariation(referenceDnaStringSet, input$transcriptID, |
|
211 |
+ input$variation, ntWindow=input$ntWindow, |
|
212 |
+ transCoord,gene2transcript=gene2transcript) |
|
213 |
+ |
|
214 |
+ ## calculte HZEI values |
|
215 |
+ durchzahl <- calculateHZEIperNT(res$sequence) |
|
216 |
+ |
|
217 |
+ durchzahl$Sequence <- "reference" |
|
218 |
+ |
|
219 |
+ durchzahl2 <- calculateHZEIperNT(res$altSeq) |
|
220 |
+ durchzahl2$Sequence <- "alternative" |
|
221 |
+ |
|
222 |
+ durchzahl$mut_hex <- durchzahl2$endhex |
|
223 |
+ durchzahl$diff_hex <- durchzahl$mut_hex- durchzahl$endhex |
|
224 |
+ |
|
225 |
+ pre <- "" |
|
226 |
+ if(sum(durchzahl$diff_hex) > 0) pre <- "+" |
|
227 |
+ |
|
228 |
+ ## Return the Difference in the score |
|
229 |
+ HTML(paste0("The difference in the HEXplorer Score integral amounts to ", paste0(pre, sum(durchzahl$diff_hex)), " in total.")) |
|
230 |
+ |
|
231 |
+ }) |
|
232 |
+ |
|
233 |
+ |
|
234 |
+ ## Define reactive paths |
|
235 |
+ path <- reactiveValues( |
|
236 |
+ pth= system.file("extdata", "referenceDnaStringSet.fa", package="VarCon") |
|
237 |
+ ) |
|
238 |
+ |
|
239 |
+ |
|
240 |
+ path2 <- reactiveValues( |
|
241 |
+ pth2= system.file("extdata", "fastaEx.fa", package="Biostrings") |
|
242 |
+ ) |
|
243 |
+ |
|
244 |
+ path3 <- reactiveValues( |
|
245 |
+ pth3= system.file("extdata", "exampleTransCoord", package="VarCon") |
|
246 |
+ ) |
|
247 |
+ |
|
248 |
+ |
|
249 |
+ |
|
250 |
+ observeEvent(input$filechoose,{ |
|
251 |
+ path$pth <- file.choose() |
|
252 |
+ }) |
|
253 |
+ |
|
254 |
+ observeEvent(input$filechoose2,{ |
|
255 |
+ path2$pth2 <- file.choose() |
|
256 |
+ }) |
|
257 |
+ |
|
258 |
+ observeEvent(input$filechoose3,{ |
|
259 |
+ path3$pth3 <- file.choose() |
|
260 |
+ }) |
|
261 |
+ |
|
262 |
+ |
|
263 |
+ |
|
264 |
+} |
|
265 |
+ |
|
266 |
+#User Interface Script |
|
267 |
+ |
|
268 |
+ui <- fluidPage( |
|
269 |
+ |
|
270 |
+ ## Type Headline |
|
271 |
+ titlePanel("VarCon: Retrieve genomic sequence around sequence variation"), |
|
272 |
+ |
|
273 |
+ "VarCon retrieves the surrounding genomic sequence of a stated sequence variation and visualizes potential changes in sequence elements important for splicing. Please first upload the fasta file of the respective reference genome sequence. Loading and processing of the data will take up to 2 minutes.", |
|
274 |
+ |
|
275 |
+ br(), |
|
276 |
+ br(), |
|
277 |
+ |
|
278 |
+ ## Have different tabs in your programm |
|
279 |
+ tabsetPanel(type = "tabs", |
|
280 |
+ |
|
281 |
+ tabPanel("Upload reference data", |
|
282 |
+ |
|
283 |
+ |
|
284 |
+ fluidRow( |
|
285 |
+ |
|
286 |
+ column(4, h4("Fasta reference genome"), |
|
287 |
+ actionButton("filechoose",label = "Select FASTA file") |
|
288 |
+ |
|
289 |
+ |
|
290 |
+ ), |
|
291 |
+ |
|
292 |
+ |
|
293 |
+ column(4, h4("Transcript table"), |
|
294 |
+ actionButton("filechoose3",label = "Select transcript table")), |
|
295 |
+ |
|
296 |
+ column(3, h4("Optional: gene/transcript table"), |
|
297 |
+ actionButton("filechoose2",label = "Select gene2transcript table") |
|
298 |
+ |
|
299 |
+ |
|
300 |
+ ) |
|
301 |
+ |
|
302 |
+ ), |
|
303 |
+ |
|
304 |
+ |
|
305 |
+ fluidRow( |
|
306 |
+ |
|
307 |
+ column(4, withSpinner(htmlOutput("sum_text2"), type=6)), |
|
308 |
+ column(4, withSpinner(htmlOutput("sum_text22"), type=6)) |
|
309 |
+ |
|
310 |
+ |
|
311 |
+ |
|
312 |
+ ) |
|
313 |
+ |
|
314 |
+ |
|
315 |
+ |
|
316 |
+ ), |
|
317 |
+ |
|
318 |
+ tabPanel("Retrieve sequence around SNV", |
|
319 |
+ |
|
320 |
+ |
|
321 |
+ fluidRow( |
|
322 |
+ |
|
323 |
+ column(3, |
|
324 |
+ h4("Please enter the required information"), |
|
325 |
+ helpText("Please enter the transcript of interrest", |
|
326 |
+ "an the annotation of the functional variation." |
|
327 |
+ )), |
|
328 |
+ |
|
329 |
+ |
|
330 |
+ column(3, textInput("transcriptID", label = h4("Transcript ID (ENSEMBL)"),value= "ENST00000544455")), |
|
331 |
+ |
|
332 |
+ column(3, textInput("variation", label = h4("Functional variation"),value= "c.516+21A>T")), |
|
333 |
+ |
|
334 |
+ column(3, numericInput("ntWindow", label = h4("Seq x nt up/downstream"), value= 20, min=5, max=150)) |
|
335 |
+ |
|
336 |
+ |
|
337 |
+ |
|
338 |
+ ), |
|
339 |
+ br(), |
|
340 |
+ |
|
341 |
+ |
|
342 |
+ fluidRow( |
|
343 |
+ |
|
344 |
+ column(12, withSpinner(htmlOutput("sum_text"), type=6)) |
|
345 |
+ |
|
346 |
+ |
|
347 |
+ ), |
|
348 |
+ |
|
349 |
+ br() |
|
350 |
+ |
|
351 |
+ |
|
352 |
+ |
|
353 |
+ |
|
354 |
+ |
|
355 |
+ |
|
356 |
+ ), |
|
357 |
+ |
|
358 |
+ tabPanel("Impact splice site strength and SREs", |
|
359 |
+ |
|
360 |
+ br(), |
|
361 |
+ |
|
362 |
+ br(), |
|
363 |
+ |
|
364 |
+ withSpinner(plotOutput("plot2", height = 200, |
|
365 |
+ brush = brushOpts( |
|
366 |
+ id = "plot2_brush", |
|
367 |
+ resetOnNew = TRUE |
|
368 |
+ )), type =6), |
|
369 |
+ |
|
370 |
+ h4("Zoomed in plot:"), |
|
371 |
+ plotOutput("plot2_zoom", height = 200), |
|
372 |
+ |
|
373 |
+ br(), |
|
374 |
+ |
|
375 |
+ |
|
376 |
+ htmlOutput("plot2_text") |
|
377 |
+ |
|
378 |
+ |
|
379 |
+ ), |
|
380 |
+ |
|
381 |
+ tabPanel("Manual", |
|
382 |
+ h3("1. Upload reference genome fasta file"), |
|
383 |
+ "First, please upload the fasta file (or zipped fasta.gz) of the reference genome sequence.", |
|
384 |
+ br(), |
|
385 |
+ "Potentially required data for the reference genome GRCh37 and GRCh38 is availible in the directory of this application.", |
|
386 |
+ br(), |
|
387 |
+ "If needed, the required file can be downloaded from the Ensembl ftp server ftp://ftp.ensembl.org/pub/release-99/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz", |
|
388 |
+ br(), |
|
389 |
+ br(), |
|
390 |
+ |
|
391 |
+ h3("2. Select genome assembly"), |
|
392 |
+ "Next, select whether the uploaded genome reference file originated from assembly GRCh37 or GRCh38.", |
|
393 |
+ "The respective transcript table, holding the genomic exon coordinates will be selected.", |
|
394 |
+ br(), |
|
395 |
+ br(), |
|
396 |
+ |
|
397 |
+ h3("3. Select gene to transcript conversion table (optional)"), |
|
398 |
+ "Select a csv-table holding gene names and gene transcripts which shall be used synonymously during the querries.", |
|
399 |
+ br(), |
|
400 |
+ br(), |
|
401 |
+ |
|
402 |
+ h3("4. Data entry"), |
|
403 |
+ "On the next panel, now enter the respective transcript name (or gene name) and the single nucleotide variation of interest.", |
|
404 |
+ "The sequence variations can either refer to the nucleotide positions within the coding sequence or genomic coordiantes.", |
|
405 |
+ "Example variations: c.142+2A>T or g.12746124G>A", |
|
406 |
+ br(), |
|
407 |
+ br() |
|
408 |
+ |
|
409 |
+ ) |
|
410 |
+ |
|
411 |
+ ) |
|
412 |
+ |
|
413 |
+) |
|
414 |
+ |
|
415 |
+ |
|
416 |
+shinyApp(ui = ui, server = server) |
2 | 0 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,153 @@ |
1 |
+>pseudoChromosome1 |
|
2 |
+AGTACGTAGTCGCTGCTGCTACGGGCGCTAGCTAGTACGTCACGACGTAGATGCTAGCTGACTAAAC |
|
3 |
+>pseudoChromosome2 |
|
4 |
+ATCGAGCCAACTAAGTAACAGTAACACTGCAAAACAGGGGATACTGCTGTAGACTATAGTGGCCGCGAGAATGGAAGGAA |
|
5 |
+TATGGAAGCGAATCCCGTCGATCCGCCGTCGCGTGACGCTTTATCCCCAAGCCCTCATATCCTCAGTTTGGGCGTTTACC |
|
6 |
+ACGTCGGGCCACAAGTGGCAAAGTCCATAGTAGTGCCGACAACCTTGCCTCAGCTCGCGCTTAAGGGTAATACATGGCGA |
|
7 |
+ATCTGACGGGCTGTCGTGTAATGACAGCGACACATCTTTGACAATCGCATGAGGTCCTAACGATAACAGTCCCAAGACAT |
|
8 |
+ACAGGTTAGCGAGCGACGCGGGCTCAGAGTCTCGTCACGTGCGGCGAAAAGTTATTTTAAATCGGTGTCTTGTACGGGGG |
|
9 |
+TGCTAAACTTAGTCCTCCAATTGACGCGTACGGCTAGCATGACTTAAATCTACTTGTGTAGTGGATTTTGAATAGGTCCC |
|
10 |
+ACCTCTGCCATGAGCTATAGGTGCCAGCATGATTTAACTGGGTCCAATAGAAGGTAAAAAGCCAAATTGTGAAACACAAA |
|
11 |
+GTAACACGGCTTGGCCCAACGCTGGGAACAGCGTACGTACAAAGTCCTCTCATCAAGCCCATCCTGGGGGGGCAGACCAT |
|
12 |
+CACGAATCTCCAAGGGTCAACAATATCGGCCAGTGGTTTAAGGGAGAAGTTCTACACACGTAGCTTAAATCGAATAAGGG |
|
13 |
+TAGCTTACCCGCAGATGATGCGACGCCCGCGTCCCTACCAAATTCCGACACTCAGTAAGAATTCATAGAGTGAGGCCTGC |
|
14 |
+GTTCGGAGATGAAGTACCGCACCCCCTTTCCTGGAGCAGTAGGGCTTGAACGTAATATGTCATTTGGAACAAGTACTCTG |
|
15 |
+TCGAGATTAGCCGTTGCCAAAAACCCCAAGGCGTGACAACAACCGTTTCGCGATGGACAAGTAGACACGGCGACTCGCTA |
|
16 |
+ACCCAACCCAGCCGCCAGTGCCGAGCGCCCGGTCTGTTCCGATCCTCATTGTTCGGAAGGGGCATCGAGTTTTTTTAGGA |
|
17 |
+ACTCAGCGGCAGCCCGAACAAGTGTTCCTTTTGTGTTGCTTGATAGACCAGCTACCGTCGTTGTCTGAAGGGTTATTGAC |
|
18 |
+CGACATAATACGTTAAGGCAAAACGAGCTCTTCGCTGAGGGAAGGGTAGCGGGGATAAGCGTCCCAGAGCAATAACACGA |
|
19 |
+TAGCGTATTAAAGCCCACAAGTGATCCCATCTACACAAGTATTCTTGTTTCAATTCATATGACAAGGGTCTGACCTTATT |
|
20 |
+ACTGCTTGCAATTAGCAATCCGGCTAGCGGGTGCCATCTAGTCACGCTTTTATCCATTGAAGACTTTGGGAGGTTGTAAG |
|
21 |
+CTCCACATTCATTGCCGACGTAGATTTTCTTGAAGGCGGCGCTAATTAAACAGGCACTCGTCACGGGTGTTTGGAATGGT |
|
22 |
+CGTCAACGGCGCGTCACCTTCGTGGTCGCAAAACAAACCAAGTTTTGTTGTGTTTCTAGTGTACCCCTCGTTGACGCTCT |
|
23 |
+TAATATTTTTTCCAGTCCGCATAGAGGTGATGACGTAGGGGTAACGCTGAAGTCCACGAGAAAAGTTCCTTAGTAGCTTG |
|
24 |
+CGCTCAACTCGAGTAACTATTACCCTGGGGTATTTACTCTAGTGTGGAATGATCGCAGTCTGGGTGACTCTAAAAAACGA |
|
25 |
+CGCACTTCGTAACGGGTGTCGTCACAGCAAGGTGCGTATGAACATCGTGATACGTAATGTTCGAAGGCCGGATTAGCGCT |
|
26 |
+TTGTGAGATTTGGAGACATCTCAGTCGTACTTGGAGTATAAGGAAGGATGTTGACTCACTCGCACAAGGTGTGACTGATT |
|
27 |
+TGTTGGGGGGGGGATATGTACCGCAAAGGTTTGCGGCCATAGCTCTATGGGCTTCGGAGATAGTAAGCAGAGGGCTTTTG |
|
28 |
+AATGACATGATAGCTAACACCGGTTATTAAATCTAGCAGCCTCGAATTAGACTGAAGCAGTTGAGCACCCCTTCCTTCGG |
|
29 |
+GCAACTTCGGATCCTAAACGCTCTTTGTCGATATTCTACAGCCTGATACCCAAGTGGAGCGATATCGGCCGCGACCAGCT |
|
30 |
+AGCACCAGATTAGCAAAGGGCACCGAGTGACTAATGCTCTAAGTGAAATGTATGAAACCGCCCAATAGGCCTGCTACTAT |
|
31 |
+ATGCAGCGTCCTTGGTCTATATGGTATACGTCAAATATCAGCCCCAAACCCACTGGTCAGCTGATGCGGCTCTGATTGCA |
|
32 |
+TTCCAGTGTTTTCGACTGTGGTGATTCCTGCCAGGAACCGCGGATCTGACGAGATCCGGCTCGTGAAGTCAAATCCAACC |
|
33 |
+GGGTGCCCAATCCCTGCTACGTCGCACAGGGAATCAAGATAGCGCTTCAGGCCGCCCCTTGTCAAGTAGGGGCGGCCCAC |
|
34 |
+CTACATCTTTGTCCCTGGATACCCGATCAGTGTACACTAGAGGAGATAGGTCTCGGAACCGTCGATATCAGCGGTTTCTG |
|
35 |
+AATCGTGGGAATTACTGAGTCCTCATCGTGCAGGATATTAGCTATTGTATTAACTCAAGGGTCCTCAACACGCCTCAGCT |
|
36 |
+CCTAAACTCAGTTGTGACCTCGCCTAGGGACTCAGTTATCTTAGACCTTTCAGAGCAGCATATGACGGCTGCCTGTGTAT |
|
37 |
+TCAGCAGGATAGATTTCTGGTGTCAGTCGAAGTCCGGCGATCTTACGTAACGCAAGGGTCCCTTAGCTACCCCTTCATCC |
|
38 |
+GATACACCATCCCCGATATGTGGGGTCAGAGCTCGCACTATTGCTATGGTTCAGTGGTTTACACTGTTGTTATGTTTTTA |
|
39 |
+TACCAGCGGAATTACCGAATCTCGAACTTACTAACTTGTTTGTCTCCCTCCCACATCGGCCTCCAGATCTGTGGATGCTA |
|
40 |
+CACATTAAATCAGACAGCCCTATTTTCCATCGACAAAGACTGATAAGAGACAGGTACGCCTTCGCGTCGAACTCGGCTGG |
|
41 |
+ACGTAGGGAAAGTAATCGCCGACCTAACTCAATTGGCCGATGGATAGCGCAGCAATTAGCTTACCTACTGACTGTGAAAT |
|
42 |
+AGAATTGGCGACGTTATTTCCCGAGAAGAGTACAGGTGATTTGGAATTACTGCTACAATCCGTAAGAGCCAAGTGGGGAA |
|
43 |
+GTCGGGTCCCGTGTGAACCTTATACGCCTGCTGCGACAGGAAATGGCAAAAACCGTGTCTCGACGTAGTATAGCTCGCAT |
|
44 |
+GCGGGAGTGTGTTTTAGTACAGACTATTCGCTTATAAAGTGACTCCCAAACCAACAGACCTGGGTCAAGTATATCTCCAT |
|
45 |
+GAACGGGATCAACAAGACGTCTTCGCAATGCAAACGTCGCAGTTCGTCAAAGACGTAGAGGCCGTCATTAAGTCTTCTAA |
|
46 |
+AGATGTTACCCAGATCTGCGGCTGATCGGACTCACGGAACATCGGAAAACTAGTGCGCGCTTCTGTATGGTACGTATATC |
|
47 |
+TAACCGGAAAAGCGCATGCAAGGGGCTAACCAACAGTGTCCAGAGTTTAAAAAGTGAGTAGAAATCCGTGCGGGTTCAGA |
|
48 |
+CAACGCAAGATGACCAACGATAAGCCGAAGGTCACCGGCAATCGACATATAGTTCGCAAACTCTAGATGGCCCATGTCAA |
|
49 |
+TCGGTCATGTTACAAAGCCAGCTGGCTATGTCTGCTAAGTTTCATCGATATGGAGCCGGCTCAGGGAGCGCCAGCCGGTC |
|
50 |
+CTGCAAACTCTATTTTACACAATAAGGTTAGTGCACCGCCGACTGGAAACTATGATATTTGCCGAACACACGTAACGTTC |
|
51 |
+TACCGAGAGCTACGCAGTTGATACCTCACCCTTATGGCACCAATGTAACTCTTTCACTAAACATTCAGCCTAGGCGGCGT |
|
52 |
+GTACTCTTTCCCTTGTAGGCTCTTTAACCAAAAAGTGGTGTTACGGAGGGGTCTCGTTCACCACATATCTAGGTTTCACA |
|
53 |
+CTAGGCAGACATGTCTGCAGACTTGCCTGACGGGCACTTGTTACGCCCCACGACAACGTCACTTGCATTCTTGCGAATAT |
|
54 |
+CTGTTCTGGGTGGGCCGGTCCAGCTAACTCATACTGTAACGCAAGATGGGGCACCGGGTGCATCTCCTTTTAGCGAGGGA |
|
55 |
+AGATGCAATTAGTGAACGCCCTTCGTTTCAAGTTGCGCTATCAGAACGTTGACTTACGTTATATTCAACCTCAAACAATT |
|
56 |
+TAAACATGATAAAATCTTGTGCAGAGAATGCTGCACGCGTCCAGATCCTTAACTGAGCAAGCTTATTCTTGTCATGTCGG |
|
57 |
+TAGTGTCCCAGCGCACCGCTGATTAGATGGATGCAGAAAGAATTGCGATGGCCACCGTACGCCTGTACCGACCGCTCCGA |
|
58 |
+AACTGAATGCCTAGTAAACGACCTACCAAGCCGCATCATAGCACGAAATAGCGCGGTACAACTACTAGGGAGGCGCCCCG |
|
59 |
+TGATGAACACGTTTTGCAAAATAATTGCCCCTCACAAGTACTTGCCAAATGTCTATAGGTTTCCATGTGGGGTATGTTCA |
|
60 |
+GTTCCCTATATGGTCAATAGTAAACTTGCGACTCTATAGGAAGAAGGCTAAAATACTGAATTGGTAGTCGATTACTCCAA |
|
61 |
+TCCCCTAACCAATTGAATTCACAGCTTTAGAGGAACACCTACGCTTCCACTATTATTTGCGGATGAATGTGATACGGTTG |
|
62 |
+GGCCTCCACACATGCTCAATCTGGGGATGAAGAAACCGCTTGCTCGCTCCGCTCTATCGTGAAGTACCATTACAACATCG |
|
63 |
+GGAAGGAGCCCCTTGCTTTGGTCCGCATACTTGGGCTATTTACGCAATGGGAGGCCTAATGGACGGGCATGGGCGAAGGA |
|
64 |
+CCCTCACGTGTGGATGCAGGCGTCCCAATGGCCACAACGAAGATGACGCATCTATAATTCTTGCCGTTGGGGTAGCATCG |
|
65 |
+CTTCGCACGCACCACCCTGACTATCCGCACTGGTGAAGACGTTCGACTCCCGGTTCGATTCCAGTTTGCCGTTTGCGTCC |
|
66 |
+GTGCCCCTTGACTTGCTTAGCCCTCATTGGGACACAAAGTTACCGACCCCAAAGCGTAGTTAGCTGTCTGTTAGTCCAGT |
|
67 |
+CGGCTGTGGACCACCGGGAAATCTTTGCCGGTCCAACGCGTCGACAATAAAGGTATAGTAGAAGGGATGTTTACGGCGTC |
|
68 |
+GACTCCTACCCTATTTGCAACGATCCGAAGCCGTTTACTCGCCGCCGCTCCCCGTTCCAGTCTCAAGGGTGTTGCATTGT |
|
69 |
+TCAGGTATATTTCCCCTTCGTTAAGGCGGGATCGATTCAGATGAAACTTAAGAAACCGTGGCACATGCGACTACGGCTGA |
|
70 |
+AGCCGAGCGAGCAACTACTGTAGGATCCTCTTGGCACTCTATTCCCGCGGCGCGGGGCTTGGCAAGAATTCGAAATGGAG |
|
71 |
+GTCGCATAAGTAGGAACGGTCGGTCAACCCACCGTACGCCTATGCATATAGATAATCCCGCGTGTTTTTGGGGTGGCATA |
|
72 |
+CTCCTTCGTCTACGGGCTTCTATACATATGGGTTGCTTAAGGTGTAGCAGAAGCTTAGTGGTTTGCGTAGTAATGCGGCT |
|
73 |
+GAATTTCACCTACACTATTGGGCGGGGGCCCCTACGCTAAGACTCTTTTACATCTACTTCCAAGCCAGTATGAAGATAGA |
|
74 |
+GGCCCCCCTCTACCGTCACATAGCAGTCTGGTGGTTATGGATTTGCGGCTATGTGCTAGAGATCGTCTTGAGATCCACGA |
|
75 |
+ACCCCGACCTTCAAAGGGTGGCCCTTCAGGCCGTCTTTACCTACATCACCAACCAAACGCATGGACGGCGAGCTATGCAG |
|
76 |
+GTCCTGCTGTCCACATCTCGTAGCTAACACACCCGTCTCGGGGAGAAGTCAATGCTTACATGACTGAGGCCGAAAAAGAC |
|
77 |
+CAATCTGCGAGTGTTGAATTCTATATGCGCCTTATCCCGGGTACCGTCGCACACCCCCGAATTGTGCAGCACAATGAGCA |
|
78 |
+ATGTTCAGCTTTCCTGCCGCGAAGTATAGTTCCGGGTTTGTTTACGTTTGGGCCGCAAATATTTTTACGATTCGGACGGG |
|
79 |
+CCGTCGTGAACCTTCCCATGGGGTGCACCATCTCTTGAGTTACTTGCGAGATCGACAGGTCGTAACGGACGTTTCGATAC |
|
80 |
+CAGTCCGATTTGAGGTGGAGACGAGCAATCCTAGGGCCTTGCATACTGTCAGCAAGTAGCAGTGATCAACTGCTGGGCCT |
|
81 |
+CATTCAATTAAGGCCCATACCGCATCCTTGTCCACCGATGCGCTAAGACCTTTATTCCACCTAAAGACATGTCGCGTTTC |
|
82 |
+GGTCCGCCGTAACGTCTATGATCGGTCTTTCTGTTACCACTCATTTCTGCCTCGTACCGGAGGCGGTCGGCATTAGGAAA |
|
83 |
+AGCGTCGAGTGGGCCCAGAATCCCGAGATCTTTTGTGAGCCAATACTACACCGCCCCGATCAATCATGCAAAAGATTTGA |
|
84 |
+CTTAGATATATCGGAAGCCCTTCAAGAGCTAAATTTATACAATCGCCCAAAGGCTTTAGGCAATCCGGAGCACTTGCCGA |
|
85 |
+AACTCATAGTTCATGAAAATAAGAAATGAACCATTAAGGATCATGTGTTGTAGCTGGTTTACCGTTCACTTAGGAGTTTC |
|
86 |
+ATCGGCTGTCGTCGATGAGTCTCTATTCGGCATAGCCAGCATTCTGCCAGGGTTAAGGGCAATCATTGTCCATAGGCACG |
|
87 |
+AACCCTCTTGCCGGTCAATCAGTTTTGGAAAATGTAACCCGTGTGGATTATCTACCCAGGGCTCCATCAGAATCGGCTGG |
|
88 |
+ACCAGTTGACGTGGACCTGGACTTTAGATCGCCACAGTGGTAAAGGGTGTGAGGGTATGGCATCAGAGCTACTGGCTGTC |
|
89 |
+GGAGAAATCGTCGGGTGGCAAAGGTAAATATACATTTATGTGTAGACGTAGTTTCGTACTGATCGAACAAGGGAGGGCGA |
|
90 |
+ACCGTGGTGATTGGCTAGCTTACGCCTTTCTGCAAACGATAATCCGGTTAGCGTCTGGCAATCGCGCCCACAGTCAGACA |
|
91 |
+TCCCCGATTACAAGCCGGATCCTTGCAGGATAGCATTCGAGCAACACGACAGTAGCTGCTAACGGTCACGGGTAAGGCGA |
|
92 |
+CAACCTTCTCCCCCTTACTAGTTGCAAGTCAGTTTGTAGATCGGCATACGGCCAGCCTTCCGCCAACAGCTTTTGGATTA |
|
93 |
+AAGCCGTGTCGAGGTAATCTTTCCGACGATTCGGTCAATAAGATTCTCATACGAGTGGGTAAGGGTTATTGATCTCACCT |
|
94 |
+TGCCCACCGAATAGTTGTTCCCATGTGGTACATTCGTGATTAGGATTCTATCTAAGTATTTTGGCCAAATTGACAGCGGC |
|
95 |
+TGTGACCACTGCACGGCAAATTGACCACCCTAGCAAACGGGTAGGCGACAGCTTTCTTCGTGTAGATCATACGACAGGGT |
|
96 |
+ACAAGTATAGAAAAGACTTGAGTTAGTTGAGCTTCTAACTGAGCGATAGTCGGTCGCATGCCTCTATCTGTAGCCCTTTG |
|
97 |
+TAGGATAAGGCCGCTGTTACCTGGAGCCGGAAAGCACTGTGTACTTGCCCGTCTCGCTGCCATTCCACCTGTTGGTTAGA |
|
98 |
+CGTTCAGTCTGTGGGGCTCCAGCCTGGTCATAAATCTTCACATTTAACAGATCCATCTCGGCGTCGAGTCCGTAGTTCTG |
|
99 |
+AGGCACCACGGGCACGAATGCACCTAATACTCAACTGATCCGTGTGACACGTCCGAATTTTGATCTACTAATTGGTCAAA |
|
100 |
+AGGTCTGTGGTAGCAATTCAGACGGGAAACAACCCAACTATCACCTAAGCCTCAGCAAGTAGCGGACATCGCTCATTTGA |
|
101 |
+AAAATCCGATGTCACTGCCATCCGTGCCCACCAGAAGCCCCCTGAAATCTGATGCCCCCCCGCAGTCAACTGGGGTTAGC |
|
102 |
+GGGATCGCATATAGTTGGAGCGCTTACTCCCCTCGCTTACATGAGTGGACTGTTGGTCGCCTTGCAAACGTGTGTTGAAG |
|
103 |
+GATACAGAGTGATAAGATTCGAGGGTGGGCGATCAGAGACTACAGCTAGTCGACACCCACAACCTGCGACTCTATAGGAG |
|
104 |
+GGTGTACGCACTTATGTCTTTTTTTCAGTCCGCAACTTCGGCGTACAACAGAAGTATGCAAGAGATACCGTTTCGCACCT |
|
105 |
+CCAGTCCTCAGGACTGCGTGGGGCGTCTATGCCGACGTGACGTTTCCAGTGCCCTGATGGCCCTTATGGTGCCCTAGTGA |
|
106 |
+GCAGTGCTTGAATAACAGTGAAGTATCCAGCGGCACGTGGTAACTCGGTAGTGTAGCCAGGAGATCGGTAACAGGGCATT |
|
107 |
+CCCGGAAAGACTAGTATAACGTAGAGACATTAAAATAGCTTAGTACCTGGAATTGCGGAGAAACTGGCCTCACGGCTAGG |
|
108 |
+CGCATTCGCCAACGAATAATGTTAGGATACCAGGTGCACTAACGGTGCCTCCACTAAAACTCCAATGAAAAAGTTGTGCG |
|
109 |
+AACCTATTAGCTGATTTAGGAAGAACGCCTAGCGCCTACTCATATTACCCTATACACATCCTATTTTCCTTCTCCCTTCG |
|
110 |
+AATGTTCTAAGTCGCAAACATACCATCTGAAACTTTGTCGCCGGTTACTAAGCTACGTGAGGCGGATATTGGACAAGTAG |
|
111 |
+GCAGCATACTACGGTTAGACGGCTTATGTTTACGTCTAGGTGGCGATGAAAAATCATCGATCTCCTTTGCAAACGCGACG |
|
112 |
+CCTATTTCTGAAAAGCTGCTCCAATAAGTTCAGTGGTTTCGGTGAGCACGCGGGGGTTCAGGGTAAATTCATTTGAGATT |
|
113 |
+ATACAGAGGGCGATTACCCAAGACCGTATTTGATTTATGGCCACCGGTCATTCACATGCTCCTTAGTCTGGTGGCGGCGG |
|
114 |
+AGGAATATATCGAACACGGTTAAATGAAGCATCCATTGATCTCCTGCCCGCAGATCTGGGAGGTGACACGGACAATCGCC |
|
115 |
+CGGGGTATACTCCCATATTGACACCTTGTAACGTGTGCTGTGGACGCGCAAGAGTAGCAGGCGATAGCAACATACCTTGC |
|
116 |
+ACTACGGTTCCCTAGAAAGGACACTGCGCGGTGATAGTTGGTTCCTATTGCTCAAAGAAGCCCGTTTAATGCGCTTGGGT |
|
117 |
+ACGTACTATAACGGAAGAATCCAAGAGTCTATTGCTAAGATTTCAATGCTAATGCCGTGGTATTTGCTTAATACCGTTGG |
|
118 |
+CAAGACCGTGGGCTGCGCGCTTAGTAGGGTGGGAGGTATGAGTCCACCAACGTCGAACCCGACCCTGTGCTATGTCGCAT |
|
119 |
+CTGATCATCGACAACTTGTTGATATTGCCCTGTTTACCCAGCAATAGATTACTCTAGCTGAGGGTTAGAGGGTTTTAGTC |
|
120 |
+ATTCGGAAGTAGTTTGGGAGGGTGAGATATCTAATTGGAGGTTAATACTTACGACCGCCCAAAGTCGCTGTCCCCACATC |
|
121 |
+ATATCAAAGAGGCCACGGCATTTGCAGTGATGTAAGTTCAATAGAATGAGGCCCAGGGCCAGGCGATAACTTTAATGTCG |
|
122 |
+GGCGAAGCACGTGCCCATCGGGTCCCCGCGCCACAGCCTTATTGTTTCTCTACACTCTCGCTGCTCCGACTCGCGAGGAT |
|
123 |
+AGTGATCGCTAGTGATCGGCAATGAATATATGTCGGAGGAAAACTAACTTAATTTCGCCATGAGACTTAGGAGGACCGGA |
|
124 |
+GACCCCTCGTCATTGTTCAAAAGTGCACGCTCAGGATCCGCAGGATCCCTTGGGGTGCATGCGTGTGATTTCCTGTGCTT |
|
125 |
+CGTAAGCAGACTACTTCTCCCCAGGCACATACGCATCTCCAACTTGGTCCAGTGGAAGCCGCGACCCGTTTCTCAAAACT |
|
126 |
+TACCATTAATCCCTCCCAGCATCCGTGGGTCTCCGGGCTCCCCAGTTTAGGAGCACTAGGTTGAACGAAAAGTGGACATA |
|
127 |
+GTGGGGGCCATCTCGATGAAGGCGAACGGTGCATCACGAATCATGACATCAGAATTAGGGACGAGCCTCGCGAACTACAT |
|
128 |
+AACTTGCAATCCCCGAACAGATGACAGGCCAGACTCGGCGTTTTTACCTGCAATATCACATAGTGGTGCGTTGAACTCAC |
|
129 |
+TCTAACGGGGGCTATCAAGTTTCGTTCCCGGTACCTGTAACACGTAAGCTAGGTCCGGCGTTAACCTTGACGGGGAGGTT |
|
130 |
+CAATAGGACCAGCTCGCTCGATTCACCTCACCAACTTATTGCAATACTGAGGTCCGAACTCCGGGCGATAAGATAAACAC |
|
131 |
+TGCCCGAACCCACCTGGACAACACAACAGCTTCACCCCAGGCTGGTTACGAATCACTGGGTTAGTTGCGGTTTCTCTCGA |
|
132 |
+CCTCAGTACGAACCTGTTAGTAGCGGCAATCACGCACAGCTTATTCCTGCGCTATGTCTTCCCTCCTCTTGCCGCGCGGG |
|
133 |
+CCGCTGACGGTAGACCCGTGCGTGCCTGCCAAGCTAATGAGACTCCAGAACGGTCCACCTAGTGTGCCAAGTGCAATCGG |
|
134 |
+AATAGCTGCTCTGTATCTCCAAAGAAGACGCCTGTTTGTTCACCTCCACATGATTATGCGACACTAATGTCGCACAAATG |
|
135 |
+CTGCGCTTGACGTAAGTCCGGTTAGCTTCCTTAAGTTCTTAAGACCCACGTAATCTACTTTTTGTCGTGCCCCGGGGAGT |
|
136 |
+TTGGCAAGGTGGGTGGTGACTGGACTGGCGAAGCAGACGCGTTCCCTCGGAGGCGCGTAACCGTCCTCGCCAGTACGTCA |
|
137 |
+AATACGAACGGATTATTTTTCTTTTTGTGTCAGTTAGGTGAAAAAAACGACATTCAGGCATTGCATGAGGACCACTTACA |
|
138 |
+GCTACCCGCCCGATTGAGTGTAACGAAGTCGTAAAGGCGATAGCGATGCGCTGCCTCGTTTGGAATCAGGAGGACTCAAG |
|
139 |
+GAGTAGAGCCACCCTGAGGAGTGCAGCCTGCGACATTACGCGGTAGGACAAAAGGAACGTACTATTATTTTAGTTGAGTT |
|
140 |
+AGGTCAGGCTGCTCTGAACATTGAGTAATGGCTGGGTCCGCATTAGGGTTTTGAGTTGAAGATGCAACCGGGGTCGTAAT |
|
141 |
+TAAGTTGCAGTACATGTGAATCGTTAACGCATAAGCTAGTTAGCTCCTACAGACCTTCGGAGTGCTCTCTCTTCCACCGC |
|
142 |
+CATCGTGTCGCCCCCGGTTATCAGTGACGACTAGTTTCGGTCTAGTACCCCATCAAACCGTACCTAGCTGTTTAGCGTAG |
|
143 |
+ACTGTCTGGGGGAGTCTGTGCTGTGTCGGGTCGCGCCCTTCTAACAGCACATTTCTCTATACACCAAAGTCAAGCCGAGG |
|
144 |
+CTACCGTCCTTCCTTTGTCGCGCTTCTCCCGACCCACTGTTTGGCTCCTGAATTGGTGACGAGCAAAAAAGGTAATCTGT |
|
145 |
+TAGCCGTGGACCGCATTTTGCCCCGCTTCATACAATCATTAAGTTGGGAGACAGTAGTGATGGAAATTCCGTCCGAATCA |
|
146 |
+TATGGATACAAATAAGATCGTGATACGGGTGGACTCGTCTTTAGTCCGATTAACTTAAAGCATCGAGCAAATATGTCGTC |
|
147 |
+CGGGCAATCGTATCCGTGAAACACGTTTCCCATGTAAAAAACGATCTCGCCAAGTCTCATACTCCTGTGCCCACAGGACT |
|
148 |
+GTCTGGCTCCGCTCCTGTTCAACCGTTCTCTGATGACCGTTGCTTCACTTCAGCGACGCAGTAGTTCAAGTGTGTGCGAG |
|
149 |
+GAAATTACGTGAATTGCCGCATGACCCTGGACTCATCTTGCGAAGAAATTTATGGTTCCCGGGAAGAAAATTTAACAGGG |
|
150 |
+GACTACGTTACCAGGGCCACTCATGTCTTACGAGTTTATAAATTAAAGTTCGTACTATTTATATTTCAAGGCGCGGGGTC |
|
151 |
+GCGCCGGCTTGATATTTGTTCACTGAAGCGGCTGATGCTGCGTCGCCTCAGCTTACAAGTCGGCGCAAGAGAGCCTGTCC |
|
152 |
+TGTAAAACTGCTTCTTAACTCTGGTGTTGAAACAGTAGACCAACGGTTCCTCGTCCGTTTCCATCTCTGTTATCCTGGAA |
|
153 |
+GATTGATATGGTTAGTGACAGTTAATGGTGAAGGTTCTTGCGCTACGTCTTGCCAGTAAGACCTATCGTAAAACTGGACG |
... | ... |
@@ -71,48 +71,7 @@ sub makemaxentscores{ |
71 | 71 |
} |
72 | 72 |
return @metables; |
73 | 73 |
} |
74 |
-sub makewmmscores{ |
|
75 |
- my $dir = "/bionet/geneyeo_essentials/MaxEntropy/webserver/splicemodels/"; |
|
76 |
- my @list = ('me1s0acc1','me1s0acc2','me1s0acc3','me1s0acc4', |
|
77 |
- 'me1s0acc5','me1s0acc6','me1s0acc7','me1s0acc8','me1s0acc9'); |
|
78 |
- my @metables; |
|
79 |
- my $num = 0 ; |
|
80 |
- foreach my $file (@list) { |
|
81 |
- my $n = 0; |
|
82 |
- open (SCOREF,"<".$dir.$file) || die "Can't open $file!\n"; |
|
83 |
- while(<SCOREF>) { |
|
84 |
- chomp; |
|
85 |
- $_=~ s/\s//; |
|
86 |
- $metables[$num]{$n} = $_; |
|
87 |
- $n++; |
|
88 |
- } |
|
89 |
- close(SCOREF); |
|
90 |
- #print STDERR $file."\t".$num."\t".$n."\n"; |
|
91 |
- $num++; |
|
92 |
- } |
|
93 |
- return @metables; |
|
94 |
-} |
|
95 |
-sub makemmscores{ |
|
96 |
- my $dir = "/bionet/geneyeo_essentials/MaxEntropy/webserver/splicemodels/"; |
|
97 |
- my @list = ('me2s0acc1','me2s0acc2','me2s0acc3','me2s0acc4', |
|
98 |
- 'me2s0acc5','me2s0acc6','me2s0acc7','me2s0acc8','me2s0acc9'); |
|
99 |
- my @metables; |
|
100 |
- my $num = 0 ; |
|
101 |
- foreach my $file (@list) { |
|
102 |
- my $n = 0; |
|
103 |
- open (SCOREF,"<".$dir.$file) || die "Can't open $file!\n"; |
|
104 |
- while(<SCOREF>) { |
|
105 |
- chomp; |
|
106 |
- $_=~ s/\s//; |
|
107 |
- $metables[$num]{$n} = $_; |
|
108 |
- $n++; |
|
109 |
- } |
|
110 |
- close(SCOREF); |
|
111 |
- #print STDERR $file."\t".$num."\t".$n."\n"; |
|
112 |
- $num++; |
|
113 |
- } |
|
114 |
- return @metables; |
|
115 |
-} |
|
74 |
+ |
|
116 | 75 |
sub maxentscore{ |
117 | 76 |
my $seq = shift; |
118 | 77 |
my $table_ref = shift; |
... | ... |
@@ -3,7 +3,7 @@ |
3 | 3 |
\title{ModCon} |
4 | 4 |
\usage{ |
5 | 5 |
ModCon(cds, sdSeqStartPosition, upChangeCodonsIn=16, downChangeCodonsIn=16, |
6 |
-optimizeContext="optimalContext", sdMaximalHBS=10, acMaximalMaxent=4, optiRate=100, |
|
6 |
+optimizeContext=TRUE, sdMaximalHBS=10, acMaximalMaxent=4, optiRate=100, |
|
7 | 7 |
nGenerations=30, parentSize=300, startParentSize=1000, bestRate=40, |
8 | 8 |
semiLuckyRate=20, luckyRate=5, mutationRate=1e-04, nCores=-1) |
9 | 9 |
} |
10 | 10 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,18 @@ |
1 |
+\docType{data} |
|
2 |
+\name{cds} |
|
3 |
+\alias{cds} |
|
4 |
+\title{CDS of firefly luciferase} |
|
5 |
+\format{character string |
|
6 |
+} |
|
7 |
+\usage{ |
|
8 |
+cds |
|
9 |
+} |
|
10 |
+\description{ |
|
11 |
+Character string of the nucleotide sequence encoding the firefly luciferase. |
|
12 |
+} |
|
13 |
+\examples{ |
|
14 |
+ cds |
|
15 |
+} |
|
16 |
+\keyword{datasets} |
|
17 |
+ |
|
18 |
+ |
... | ... |
@@ -13,8 +13,7 @@ vignette: > |
13 | 13 |
```{r setup, echo=FALSE, results="hide"} |
14 | 14 |
knitr::opts_chunk$set(tidy = FALSE, |
15 | 15 |
cache = FALSE, |
16 |
- dev = "png", |
|
17 |
- message = FALSE, error = FALSE, warning = TRUE) |
|
16 |
+ dev = "png") |
|
18 | 17 |
``` |
19 | 18 |
|
20 | 19 |
|
... | ... |
@@ -91,21 +90,31 @@ cds <- paste0('ATGGAAGACGCCAAAAACATAAAGAAAGGCCCGGCGCCATTCTATCCGCTG', |
91 | 90 |
|
92 | 91 |
## Executing ModCon to increase the splice site HEXplorer weigth of |
93 | 92 |
## the splice donor at position 103 |
94 |
-cdsSSHWincreased <- ModCon(cds, 103) |
|
93 |
+cdsSSHWincreased <- ModCon(cds, 103, nCores=1) |
|
95 | 94 |
cdsSSHWincreased |
96 | 95 |
|
97 | 96 |
``` |
98 | 97 |
|
99 |
-To achive the minimal SSHW, the ModCon function parameter `modconMode` |
|
100 |
-has to be set to `suboptimalContext`. |
|
98 |
+The resulting character string holds the alternative nucleotide sequence |
|
99 |
+with an increased SSHW for the index splice donor site at position 103. |
|
100 |
+The new CDS encodes the same amino acid sequence as before. |
|
101 |
+ |
|
102 |
+ |
|
103 |
+To achive the minimal SSHW, the ModCon function parameter `optimizeContext` |
|
104 |
+has to be set to `FALSE`. |
|
101 | 105 |
|
102 | 106 |
```{r, eval=TRUE} |
103 | 107 |
## Executing ModCon to decrease the splice site HEXplorer weigth of |
104 | 108 |
## the splice donor at position 103 |
105 |
-cdsSSHWdecreased <- ModCon(cds, 103, modconMode='suboptimalContext') |
|
109 |
+cdsSSHWdecreased <- ModCon(cds, 103, optimizeContext=FALSE, nCores=1) |
|
106 | 110 |
cdsSSHWdecreased |
107 | 111 |
``` |
108 | 112 |
|
113 |
+The resulting character string holds the alternative nucleotide sequence |
|
114 |
+with an decreased SSHW for the index splice donor site at position 103. |
|
115 |
+Again, the new CDS encodes the same amino acid sequence as before. |
|
116 |
+ |
|
117 |
+ |
|
109 | 118 |
The extent of SSHW minimization and maximization can alternatively be |
110 | 119 |
limited to e.g. 60% of the maximum or minimum setting the `optiRate` |
111 | 120 |
to 60. The progress is omitted per generation (not shown in this vignette). |
... | ... |
@@ -113,12 +122,22 @@ to 60. The progress is omitted per generation (not shown in this vignette). |
113 | 122 |
```{r, eval=TRUE} |
114 | 123 |
## Executing ModCon to increase the splice site HEXplorer weigth of |
115 | 124 |
## the splice donor at position 103 to around 60% of the maximum |
116 |
-suppressMessages(cdsSSHWincreased <- ModCon(cds, 103, optiRate=60)) |
|
117 |
-suppressMessages(cdsSSHWdecreased <- ModCon(cds, 103, optiRate=60, modconMode='suboptimalContext')) |
|
125 |
+suppressMessages(cdsSSHWincreased <- ModCon(cds, 103, optiRate=60, nCores=1)) |
|
126 |
+suppressMessages(cdsSSHWdecreased <- ModCon(cds, 103, optiRate=60, optimizeContext=FALSE, nCores=1)) |
|
118 | 127 |
cdsSSHWincreased |
119 | 128 |
cdsSSHWdecreased |
120 | 129 |
``` |
121 | 130 |
|
131 |
+The resulting character strings hold the alternative nucleotide sequences |
|
132 |
+with either an increased or decreased SSHW for the index splice donor site |
|
133 |
+at position 103. With setting the parameter `optiRate` to 60, the SSHW |
|
134 |
+increase and SSHW decrease was only performed to reach the around 60% of |
|
135 |
+the highest or lowest SSHW possible. |
|
136 |
+Again, the new coding sequences encode the same amino acid sequence as |
|
137 |
+the original CDS. |
|
138 |
+ |
|
139 |
+ |
|
140 |
+ |
|
122 | 141 |
Changing the `optiRate` parameter of the `ModCon` function from the |
123 | 142 |
default value 100 triggers usage of the genetic algorithm, instead of |
124 | 143 |
the sliding window approach. Most parameters of the genetic algorithm |
... | ... |
@@ -128,8 +147,8 @@ adjusted with the respective `ModCon` function parameter. |
128 | 147 |
```{r, eval=TRUE} |
129 | 148 |
## Executing ModCon to increase the splice site HEXplorer weigth of |
130 | 149 |
## the splice donor at position 103 to around 60% of the maximum |
131 |
-suppressMessages(cdsSSHWincreased <- ModCon(cds, 103, modconMode='optimalContext', |
|
132 |
- sdMaximalHBS=10, acMaximalMaxent=4, optiRate=50, |
|
150 |
+suppressMessages(cdsSSHWincreased <- ModCon(cds, 103, |
|
151 |
+ sdMaximalHBS=10, acMaximalMaxent=4, optiRate=60, |
|
133 | 152 |
nGenerations=5, parentSize=200, startParentSize=800, |
134 | 153 |
bestRate=50, semiLuckyRate=10, luckyRate=5, |
135 | 154 |
mutationRate=1e-03, nCores=1)) |
... | ... |
@@ -137,6 +156,12 @@ suppressMessages(cdsSSHWincreased <- ModCon(cds, 103, modconMode='optimalContext |
137 | 156 |
cdsSSHWincreased |
138 | 157 |
``` |
139 | 158 |
|
159 |
+As with the sliding window approach, the resulting character string holds |
|
160 |
+the alternative nucleotide sequence with an increased SSHW for the |
|
161 |
+index splice donor site at position 103. The new CDS encodes the |
|
162 |
+same amino acid sequence as before. |
|
163 |
+ |
|
164 |
+ |
|
140 | 165 |
The size of the sequence surroundings can be set using the parameters |
141 | 166 |
`upChangeCodonsIn` and `downChangeCodonsIn`, which define the number of |
142 | 167 |
codons to be adjusted around the splice site for SSHW adjustment (default=16). |
... | ... |
@@ -144,10 +169,16 @@ codons to be adjusted around the splice site for SSHW adjustment (default=16). |
144 | 169 |
```{r, eval=TRUE} |
145 | 170 |
## Executing ModCon to decrease the splice site HEXplorer weigth of |
146 | 171 |
## the splice donor at position 103 |
147 |
-cdsSSHWdecreased <- ModCon(cds, 103, downChangeCodonsIn=20, upChangeCodonsIn=21) |
|
172 |
+cdsSSHWdecreased <- ModCon(cds, 103, downChangeCodonsIn=20, upChangeCodonsIn=21, nCores=1) |
|
148 | 173 |
cdsSSHWdecreased |
149 | 174 |
``` |
150 | 175 |
|
176 |
+As with the sliding window approach, the resulting character string holds |
|
177 |
+the alternative nucleotide sequence with an decreased SSHW for the |
|
178 |
+index splice donor site at position 103. The new CDS encodes the |
|
179 |
+same amino acid sequence as before. |
|
180 |
+ |
|
181 |
+ |
|
151 | 182 |
The **ModCon** package additionally holds functions to increase or decrease |
152 | 183 |
the intrinsic strength (Hbond score) of a secific splice donor site while |
153 | 184 |
keeping the underlying encoded amino acid sequences the same. |
... | ... |
@@ -160,6 +191,10 @@ cdsHBondUp <- increaseGTsiteStrength(cds, 103) |
160 | 191 |
cdsHBondDown |
161 | 192 |
``` |
162 | 193 |
|
194 |
+`cdsHBondDown` states a coding sequence, encoding the same amino acid as the input CDS. However, a splice donor sequence at the stated index position within the CDS will be aimed to be decrased in its intrinsic strength. |
|
195 |
+`cdsHBondUp` states a coding sequence, encoding the same amino acid as the input CDS. However, a splice donor sequence at the stated index position within the CDS will be aimed to be decrased in its intrinsic strength. |
|
196 |
+ |
|
197 |
+ |
|
163 | 198 |
Integrated functions also include functions to decrease the intrinisc strength |
164 | 199 |
of every splice donor or acceptor within a coding sequence, while considering |
165 | 200 |
whether the overall HEXplorer profile should be increased or decreased of the |
... | ... |
@@ -177,6 +212,8 @@ cdsSDlow |
177 | 212 |
cdsSAlow |
178 | 213 |
``` |
179 | 214 |
|
215 |
+`cdsSDlow` states a coding sequence, encoding the same amino acid as the input CDS. However, every potential splice donor sequence within the CDS, which exceeds a Hbond score treshold (`maxhbs`) will be aimed to be degraded in its intrinsic strength as much as possible. If, additionally, potential splice acceptor sites should be degraded in their intrinsic strengt, the following function `degradeSAs` will reduce the number of potential relevant splice acceptor sites within the CDS. |
|
216 |
+`cdsSAlow` states a coding sequence, encoding the same amino acid as the input CDS. However, every potential splice donor sequence within the CDS, which exceeds a Hbond score treshold (`maxhbs`) and every potential splice acceptor site, which exceeds a certain MaxEntScan score treshold, will be aimed to be degraded in its intrinsic strength as much as possible. |
|
180 | 217 |
|
181 | 218 |
# Session info |
182 | 219 |
|