Browse code

make package ready for upload

caggtaagtat authored on 19/01/2021 12:54:10
Showing 14 changed files

... ...
@@ -2,9 +2,9 @@ Package: ModCon
2 2
 Type: Package
3 3
 Title: ModCon
4 4
 Version: 0.99.0
5
-Authors@R: c(person("Johannes", "Ptok", role = c("aut", "cre"), email = "Johannes.Ptok@posteo.de"), person("Gene", "Yeo", role = c("ctb"), email = "geneyeo@ucsd.edu", comment = "first author for perl script to calculate MaxEntScan score of splice sites."))
6
-Description: Proposing a nucleotide sequence surrounding for splice donors to either activate or repress donor usage.
7
-License: file LICENSE
5
+Authors@R: person("Johannes", "Ptok", role = c("aut", "cre"), email = "Johannes.Ptok@posteo.de")
6
+Description: Collection of functions to calculate a nucleotide sequence surrounding for splice donors sites to either activate or repress donor usage. The proposed alternative nucleotide sequence encodes the same amino acid and could be applied e.g. in reporter systems to silence or activate cryptic splice donor sites.
7
+License: GPL-3
8 8
 Encoding: UTF-8
9 9
 LazyData: true
10 10
 VignetteBuilder: knitr
... ...
@@ -13,10 +13,11 @@ Depends:
13 13
     parallel,
14 14
     utils,
15 15
     stats,
16
-    R (>= 3.6)
16
+    R (>= 4.1)
17 17
 Suggests: 
18 18
     testthat,
19
-    knitr
19
+    knitr,
20
+    rmarkdown
20 21
 selectBestAndMean: Installation of any version of Perl is needed (e.g. strawberry perl).
21 22
 biocViews: FunctionalGenomics, AlternativeSplicing
22 23
 URL: https://github.com/caggtaagtat/ModCon
... ...
@@ -63,6 +63,6 @@ calculateMaxEntScanScore <- function(seqVector, ssType) {
63 63
 }
64 64
 
65 65
 
66
-# use_data(Codons, hbg, hex, hex2, cds, overwrite = TRUE)
66
+# use_data(Codons, hbg, hex,  cds, overwrite = TRUE)
67 67
 
68 68
 
69 69
new file mode 100644
70 70
Binary files /dev/null and b/data/Codons.rda differ
71 71
new file mode 100644
72 72
Binary files /dev/null and b/data/cds.rda differ
73 73
new file mode 100644
74 74
Binary files /dev/null and b/data/hbg.rda differ
75 75
new file mode 100644
76 76
Binary files /dev/null and b/data/hex.rda differ
77 77
new file mode 100644
78 78
Binary files /dev/null and b/data/hex2.rda differ
79 79
new file mode 100644
... ...
@@ -0,0 +1,416 @@
1
+#Load ggplot package to render the plots
2
+library("ggplot2")
3
+library("shinycssloaders")
4
+library("shiny")
5
+library("shinyFiles")
6
+library("seqinr")
7
+library("Biostrings")
8
+library("BSgenome")
9
+library("VarCon")
10
+
11
+readRDS("exampleTransCoord")
12
+
13
+###########################################
14
+## Server and UI code
15
+
16
+## Server in and output Skript
17
+server <- function(input, output, session) {
18
+  
19
+  ## close the R session when app closes
20
+  session$onSessionEnded(function() {
21
+    stopApp()
22
+  })
23
+  
24
+  uploadReferenceDNA <- eventReactive(path$pth,{
25
+    
26
+    testFASTA <- strsplit(path$pth,"\\.")[[1]]
27
+    if(testFASTA[length(testFASTA)] %in% c("fa","fasta")){
28
+      referenceDnaStringSet2 <- readDNAStringSet(path$pth, format="fasta",use.names=TRUE)
29
+      ref_names <- as.character(lapply(names(referenceDnaStringSet2),
30
+                                       function(x){ strsplit(x, " ")[[1]][[1]]}))
31
+      names(referenceDnaStringSet2) <- ref_names
32
+      referenceDnaStringSet <- referenceDnaStringSet2
33
+    }else{
34
+      load(path$pth)
35
+    }   
36
+    
37
+    referenceDnaStringSet
38
+  }) 
39
+  
40
+  
41
+  
42
+  uploadTranscriptTable <- eventReactive(path3$pth3,{
43
+    
44
+    ## Get human transcript tables e.g. from https://github.com/caggtaagtat/VarConTables
45
+    testCSV <- strsplit(path3$pth3,"\\.")[[1]]
46
+    if(testCSV[length(testCSV)] == "csv"){
47
+      transCoord <- read.csv(path3$pth3, sep=";")
48
+    }else{ transCoord <- readRDS(path3$pth3)}
49
+    
50
+    transCoord
51
+  }) 
52
+  
53
+  ## Report when upload of reference genome is complete
54
+  output$sum_text2 <- renderUI({
55
+    
56
+    ## Genome fasta to download e.g. from 
57
+    ## ftp://ftp.ensembl.org/pub/release-99/fasta/homo_sapiens/
58
+    ## dna/Homo_sapiens.GRCh38.dna.toplevel.fa.gz
59
+    
60
+    test <- uploadReferenceDNA()
61
+    HTML("Upload of reference genome completed...")
62
+    
63
+  })
64
+  
65
+  ## Report when upload of reference genome is complete
66
+  output$sum_text22 <- renderUI({
67
+    
68
+    test2 <- uploadTranscriptTable()
69
+    HTML("Upload of transcript table completed...")
70
+    
71
+  })
72
+  
73
+  
74
+  
75
+  ## Generate the text for describing the difference between the Hexplorer Scores of both sequences
76
+  output$sum_text <- renderUI({
77
+    
78
+    gene2transcript <- read.csv(path2$pth2, sep=";", stringsAsFactors=FALSE)
79
+    referenceDnaStringSet <- uploadReferenceDNA()
80
+    transCoord <- uploadTranscriptTable()
81
+    
82
+    #Get information about the SNV
83
+    res <-  getSeqInfoFromVariation(referenceDnaStringSet, input$transcriptID, input$variation,
84
+                                    ntWindow= input$ntWindow, transCoord, gene2transcript=gene2transcript)
85
+    
86
+    #Sum up the info
87
+    HTML(paste0("For the given annotation  ",res$funcAnnotation,
88
+                      " within transcript ", res$transcript,
89
+                      " following sequence was found around the chromosomal coordinate ",
90
+                      res$genomicCoordinate," on chromosome ",res$chromosome, " :"),
91
+               "",paste0("Ref Seq: ",res$sequence),"", 
92
+               paste0("Ref+vari:",res$altSeq) , sep="<br/>")
93
+    
94
+  })
95
+  
96
+  ## Generate the plot where you can mark the area to zoom in
97
+  output$plot <- renderPlot({
98
+    
99
+    gene2transcript <- read.csv(path2$pth2, sep=";", stringsAsFactors=FALSE)
100
+    referenceDnaStringSet <- uploadReferenceDNA()
101
+    transCoord <- uploadTranscriptTable()
102
+    
103
+    ## Retrieve information form genome
104
+    res <-  getSeqInfoFromVariation(referenceDnaStringSet, input$transcriptID,
105
+                                    input$variation, ntWindow=input$ntWindow, transCoord,
106
+                                    gene2transcript=gene2transcript)
107
+    
108
+    ## Calculate HZEI values
109
+    durchzahl <-  calculateHZEIperNT(res$sequence)
110
+    
111
+    durchzahl$Sequence <- "sequence of interest"
112
+    
113
+    plot <- ggplot(durchzahl, aes(x = durchzahl, y = endhex, fill=Sequence )) +scale_y_continuous(name="Hexplorer score",breaks=c(seq(-75,0,5),seq(2,34,2)),limits=c(min(durchzahl$endhex)-5,max(c(durchzahl$hbs,durchzahl$endhex))+1) )+
114
+      scale_fill_manual(values=c("#56B4E9", "#000000"))+
115
+      geom_bar(stat='identity', position = "dodge")+ xlab("Sequence")+ylab("Hexplorer score")+
116
+      theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank())+
117
+      annotate("text", label =substr(durchzahl$seq9[durchzahl$Sequence=="sequence of interest"],6,6 ) , x= 1:((nrow(durchzahl))), y = min(durchzahl$endhex-8), size = 3, colour = "black")
118
+    
119
+    plot
120
+    
121
+  })
122
+  
123
+  ## Generate the plot where you can mark the area to zoom in
124
+  output$plot2 <- renderPlot({
125
+    
126
+    gene2transcript <- read.csv(path2$pth2, sep=";", stringsAsFactors=FALSE)
127
+    referenceDnaStringSet <- uploadReferenceDNA()
128
+    transCoord <- uploadTranscriptTable()
129
+    
130
+    ## Retrieve information form genome
131
+    res <-  getSeqInfoFromVariation(referenceDnaStringSet, input$transcriptID, 
132
+                                    input$variation, ntWindow=input$ntWindow, transCoord,
133
+                                    gene2transcript=gene2transcript)
134
+    
135
+    generateHEXplorerPlot(res,input$ntWindow)
136
+    
137
+  })
138
+  
139
+  ## Create reactive value ranges, for the zooming plot
140
+  ranges2 <- reactiveValues(x = NULL)
141
+  
142
+  ## Genereate the plot, where you can see a zoomed in version of the plot above
143
+  output$plot_zoom <- renderPlot({
144
+    
145
+    gene2transcript <- read.csv(path2$pth2, sep=";", stringsAsFactors=FALSE)
146
+    referenceDnaStringSet <- uploadReferenceDNA()
147
+    transCoord <- uploadTranscriptTable()
148
+    
149
+    
150
+    ## Retrieve information form genome
151
+    res <-  getSeqInfoFromVariation(referenceDnaStringSet, input$transcriptID,
152
+                                    input$variation, ntWindow=input$ntWindow,
153
+                                    transCoord,gene2transcript=gene2transcript)
154
+    
155
+    
156
+    ## Calculate HZEI values
157
+    durchzahl <-  calculateHZEIperNT(res$sequence)
158
+    
159
+    durchzahl$Sequence <- "sequence of interest"
160
+    
161
+    plot <- ggplot(durchzahl, aes(x = durchzahl, y = endhex, fill=Sequence )) +scale_y_continuous(name="Hexplorer score",breaks=c(seq(-75,0,5),seq(2,34,2)),limits=c(min(durchzahl$endhex)-6,max(c(durchzahl$hbs,durchzahl$endhex))+1) )+
162
+      scale_fill_manual(values=c("#56B4E9", "#000000"))+
163
+      geom_bar(stat='identity', position = "dodge")+ xlab("Sequence")+ylab("Hexplorer score")+
164
+      theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank())+
165
+      annotate("text", label =substr(durchzahl$seq9[durchzahl$Sequence=="sequence of interest"],6,6 ) , x= 1:((nrow(durchzahl))), y = min(durchzahl$endhex-4), size = 3, colour = "black")+
166
+      coord_cartesian(xlim = ranges2$x, expand = FALSE)
167
+    plot
168
+    
169
+    
170
+    
171
+  })
172
+  
173
+  ## Genereate the plot, where you can see a zoomed in version of the plot above
174
+  output$plot2_zoom <- renderPlot({
175
+    
176
+    gene2transcript <- read.csv(path2$pth2, sep=";", stringsAsFactors=FALSE)
177
+    referenceDnaStringSet <- uploadReferenceDNA()
178
+    transCoord <- uploadTranscriptTable()
179
+    
180
+    
181
+    ## Retrieve information form genome
182
+    res <-  getSeqInfoFromVariation(referenceDnaStringSet, input$transcriptID, 
183
+                                    input$variation, ntWindow=input$ntWindow, transCoord,
184
+                                    gene2transcript=gene2transcript)
185
+    
186
+    results_plot <- generateHEXplorerPlot(res,input$ntWindow)
187
+    results_plot+coord_cartesian(xlim = ranges2$x, expand = FALSE)
188
+    
189
+  })
190
+  
191
+  ## Create a function which keeps checking on the input with the mouse
192
+  observe({
193
+    brush <- input$plot2_brush
194
+    if (!is.null(brush)) {
195
+      ranges2$x <- c(brush$xmin, brush$xmax)
196
+      
197
+    } else {
198
+      ranges2$x <- NULL
199
+    }
200
+  })
201
+  
202
+  ## Generate the text for describing the difference between the Hexplorer Scores of both sequences
203
+  output$plot2_text <- renderUI({
204
+    
205
+    gene2transcript <- read.csv(path2$pth2, sep=";", stringsAsFactors=FALSE)
206
+    referenceDnaStringSet <- uploadReferenceDNA()
207
+    transCoord <- uploadTranscriptTable()
208
+    
209
+    ## Retrieve information form genome
210
+    res <-  getSeqInfoFromVariation(referenceDnaStringSet, input$transcriptID,
211
+                                    input$variation, ntWindow=input$ntWindow,
212
+                                    transCoord,gene2transcript=gene2transcript)
213
+    
214
+    ## calculte HZEI values
215
+    durchzahl <-  calculateHZEIperNT(res$sequence)
216
+    
217
+    durchzahl$Sequence <- "reference"
218
+    
219
+    durchzahl2 <-  calculateHZEIperNT(res$altSeq)
220
+    durchzahl2$Sequence <- "alternative"
221
+    
222
+    durchzahl$mut_hex  <- durchzahl2$endhex
223
+    durchzahl$diff_hex <- durchzahl$mut_hex- durchzahl$endhex
224
+    
225
+    pre <- ""
226
+    if(sum(durchzahl$diff_hex) > 0) pre <- "+"
227
+    
228
+    ## Return the Difference in the score
229
+    HTML(paste0("The difference in the HEXplorer Score integral amounts to ", paste0(pre, sum(durchzahl$diff_hex)), " in total."))
230
+    
231
+  })
232
+  
233
+  
234
+  ## Define reactive paths
235
+  path <- reactiveValues(
236
+    pth= system.file("extdata", "referenceDnaStringSet.fa", package="VarCon")
237
+  )
238
+  
239
+  
240
+  path2 <- reactiveValues(
241
+    pth2= system.file("extdata", "fastaEx.fa", package="Biostrings")
242
+  )
243
+  
244
+  path3 <- reactiveValues(
245
+    pth3= system.file("extdata", "exampleTransCoord", package="VarCon")
246
+  )
247
+  
248
+  
249
+  
250
+  observeEvent(input$filechoose,{
251
+    path$pth <- file.choose()
252
+  })
253
+  
254
+  observeEvent(input$filechoose2,{
255
+    path2$pth2 <- file.choose()
256
+  })
257
+  
258
+  observeEvent(input$filechoose3,{
259
+    path3$pth3 <- file.choose()
260
+  })
261
+  
262
+  
263
+  
264
+}
265
+
266
+#User Interface Script
267
+
268
+ui <- fluidPage(
269
+  
270
+  ## Type Headline
271
+  titlePanel("VarCon: Retrieve genomic sequence around sequence variation"),
272
+  
273
+  "VarCon retrieves the surrounding genomic sequence of a stated sequence variation and visualizes potential changes in sequence elements important for splicing. Please first upload the fasta file of the respective reference genome sequence. Loading and processing of the data will take up to 2 minutes.",
274
+  
275
+  br(),
276
+  br(),
277
+  
278
+  ## Have different tabs in your programm
279
+  tabsetPanel(type = "tabs",
280
+              
281
+              tabPanel("Upload reference data",
282
+                       
283
+                       
284
+                       fluidRow(
285
+                         
286
+                         column(4,  h4("Fasta reference genome"),
287
+                                actionButton("filechoose",label = "Select FASTA file")
288
+                                
289
+                                
290
+                         ),
291
+                         
292
+                         
293
+                         column(4, h4("Transcript table"),
294
+                                actionButton("filechoose3",label = "Select transcript table")),
295
+                         
296
+                         column(3,  h4("Optional: gene/transcript table"),
297
+                                actionButton("filechoose2",label = "Select gene2transcript table")
298
+                                
299
+                                
300
+                         )
301
+                         
302
+                       ),
303
+                       
304
+                       
305
+                       fluidRow(
306
+                         
307
+                         column(4,   withSpinner(htmlOutput("sum_text2"), type=6)),
308
+                         column(4,   withSpinner(htmlOutput("sum_text22"), type=6))
309
+                         
310
+                         
311
+                         
312
+                       )
313
+                       
314
+                       
315
+                       
316
+              ),
317
+              
318
+              tabPanel("Retrieve sequence around SNV",
319
+                       
320
+                       
321
+                       fluidRow(
322
+                         
323
+                         column(3,
324
+                                h4("Please enter the required information"),
325
+                                helpText("Please enter the transcript of interrest",
326
+                                         "an the annotation of the functional variation."
327
+                                )),
328
+                         
329
+                         
330
+                         column(3, textInput("transcriptID", label = h4("Transcript ID (ENSEMBL)"),value= "ENST00000544455")),
331
+                         
332
+                         column(3, textInput("variation", label = h4("Functional variation"),value= "c.516+21A>T")),
333
+                         
334
+                         column(3, numericInput("ntWindow", label = h4("Seq x nt up/downstream"), value= 20, min=5, max=150))
335
+                         
336
+                         
337
+                         
338
+                       ),
339
+                       br(),
340
+                       
341
+                       
342
+                       fluidRow(
343
+                         
344
+                         column(12, withSpinner(htmlOutput("sum_text"), type=6))
345
+                         
346
+                         
347
+                       ),
348
+                       
349
+                       br()
350
+                       
351
+                       
352
+                       
353
+                       
354
+                       
355
+                       
356
+              ),
357
+              
358
+              tabPanel("Impact splice site strength and SREs",
359
+                       
360
+                       br(),
361
+                       
362
+                       br(),
363
+                       
364
+                       withSpinner(plotOutput("plot2", height = 200,
365
+                                              brush = brushOpts(
366
+                                                id = "plot2_brush",
367
+                                                resetOnNew = TRUE
368
+                                              )), type =6),
369
+                       
370
+                       h4("Zoomed in plot:"),
371
+                       plotOutput("plot2_zoom", height = 200),
372
+                       
373
+                       br(),
374
+                       
375
+                       
376
+                       htmlOutput("plot2_text")
377
+                       
378
+                       
379
+              ),
380
+              
381
+              tabPanel("Manual",
382
+                       h3("1. Upload reference genome fasta file"),
383
+                       "First, please upload the fasta file (or zipped fasta.gz) of the reference genome sequence.",
384
+                       br(),
385
+                       "Potentially required data for the reference genome GRCh37 and GRCh38 is availible in the directory of this application.",
386
+                       br(),
387
+                       "If needed, the required file can be downloaded from the Ensembl ftp server ftp://ftp.ensembl.org/pub/release-99/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz",
388
+                       br(),
389
+                       br(),
390
+                       
391
+                       h3("2. Select genome assembly"), 
392
+                       "Next, select whether the uploaded genome reference file originated from assembly GRCh37 or GRCh38.",
393
+                       "The respective transcript table, holding the genomic exon coordinates will be selected.",
394
+                       br(),
395
+                       br(),
396
+                       
397
+                       h3("3. Select gene to transcript conversion table (optional)"), 
398
+                       "Select a csv-table holding gene names and gene transcripts which shall be used synonymously during the querries.",
399
+                       br(),
400
+                       br(),
401
+                       
402
+                       h3("4. Data entry"), 
403
+                       "On the next panel, now enter the respective transcript name (or gene name) and the single nucleotide variation of interest.",
404
+                       "The sequence variations can either refer to the nucleotide positions within the coding sequence or genomic coordiantes.",
405
+                       "Example variations: c.142+2A>T  or g.12746124G>A",
406
+                       br(),
407
+                       br()
408
+                       
409
+              )
410
+              
411
+  )
412
+  
413
+)
414
+
415
+
416
+shinyApp(ui = ui, server = server)
0 417
deleted file mode 100644
... ...
@@ -1 +0,0 @@
1
-TGTCTTTTTCTGTGTGGCAGTGG
2 0
new file mode 100644
... ...
@@ -0,0 +1,153 @@
1
+>pseudoChromosome1
2
+AGTACGTAGTCGCTGCTGCTACGGGCGCTAGCTAGTACGTCACGACGTAGATGCTAGCTGACTAAAC
3
+>pseudoChromosome2
4
+ATCGAGCCAACTAAGTAACAGTAACACTGCAAAACAGGGGATACTGCTGTAGACTATAGTGGCCGCGAGAATGGAAGGAA
5
+TATGGAAGCGAATCCCGTCGATCCGCCGTCGCGTGACGCTTTATCCCCAAGCCCTCATATCCTCAGTTTGGGCGTTTACC
6
+ACGTCGGGCCACAAGTGGCAAAGTCCATAGTAGTGCCGACAACCTTGCCTCAGCTCGCGCTTAAGGGTAATACATGGCGA
7
+ATCTGACGGGCTGTCGTGTAATGACAGCGACACATCTTTGACAATCGCATGAGGTCCTAACGATAACAGTCCCAAGACAT
8
+ACAGGTTAGCGAGCGACGCGGGCTCAGAGTCTCGTCACGTGCGGCGAAAAGTTATTTTAAATCGGTGTCTTGTACGGGGG
9
+TGCTAAACTTAGTCCTCCAATTGACGCGTACGGCTAGCATGACTTAAATCTACTTGTGTAGTGGATTTTGAATAGGTCCC
10
+ACCTCTGCCATGAGCTATAGGTGCCAGCATGATTTAACTGGGTCCAATAGAAGGTAAAAAGCCAAATTGTGAAACACAAA
11
+GTAACACGGCTTGGCCCAACGCTGGGAACAGCGTACGTACAAAGTCCTCTCATCAAGCCCATCCTGGGGGGGCAGACCAT
12
+CACGAATCTCCAAGGGTCAACAATATCGGCCAGTGGTTTAAGGGAGAAGTTCTACACACGTAGCTTAAATCGAATAAGGG
13
+TAGCTTACCCGCAGATGATGCGACGCCCGCGTCCCTACCAAATTCCGACACTCAGTAAGAATTCATAGAGTGAGGCCTGC
14
+GTTCGGAGATGAAGTACCGCACCCCCTTTCCTGGAGCAGTAGGGCTTGAACGTAATATGTCATTTGGAACAAGTACTCTG
15
+TCGAGATTAGCCGTTGCCAAAAACCCCAAGGCGTGACAACAACCGTTTCGCGATGGACAAGTAGACACGGCGACTCGCTA
16
+ACCCAACCCAGCCGCCAGTGCCGAGCGCCCGGTCTGTTCCGATCCTCATTGTTCGGAAGGGGCATCGAGTTTTTTTAGGA
17
+ACTCAGCGGCAGCCCGAACAAGTGTTCCTTTTGTGTTGCTTGATAGACCAGCTACCGTCGTTGTCTGAAGGGTTATTGAC
18
+CGACATAATACGTTAAGGCAAAACGAGCTCTTCGCTGAGGGAAGGGTAGCGGGGATAAGCGTCCCAGAGCAATAACACGA
19
+TAGCGTATTAAAGCCCACAAGTGATCCCATCTACACAAGTATTCTTGTTTCAATTCATATGACAAGGGTCTGACCTTATT
20
+ACTGCTTGCAATTAGCAATCCGGCTAGCGGGTGCCATCTAGTCACGCTTTTATCCATTGAAGACTTTGGGAGGTTGTAAG
21
+CTCCACATTCATTGCCGACGTAGATTTTCTTGAAGGCGGCGCTAATTAAACAGGCACTCGTCACGGGTGTTTGGAATGGT
22
+CGTCAACGGCGCGTCACCTTCGTGGTCGCAAAACAAACCAAGTTTTGTTGTGTTTCTAGTGTACCCCTCGTTGACGCTCT
23
+TAATATTTTTTCCAGTCCGCATAGAGGTGATGACGTAGGGGTAACGCTGAAGTCCACGAGAAAAGTTCCTTAGTAGCTTG
24
+CGCTCAACTCGAGTAACTATTACCCTGGGGTATTTACTCTAGTGTGGAATGATCGCAGTCTGGGTGACTCTAAAAAACGA
25
+CGCACTTCGTAACGGGTGTCGTCACAGCAAGGTGCGTATGAACATCGTGATACGTAATGTTCGAAGGCCGGATTAGCGCT
26
+TTGTGAGATTTGGAGACATCTCAGTCGTACTTGGAGTATAAGGAAGGATGTTGACTCACTCGCACAAGGTGTGACTGATT
27
+TGTTGGGGGGGGGATATGTACCGCAAAGGTTTGCGGCCATAGCTCTATGGGCTTCGGAGATAGTAAGCAGAGGGCTTTTG
28
+AATGACATGATAGCTAACACCGGTTATTAAATCTAGCAGCCTCGAATTAGACTGAAGCAGTTGAGCACCCCTTCCTTCGG
29
+GCAACTTCGGATCCTAAACGCTCTTTGTCGATATTCTACAGCCTGATACCCAAGTGGAGCGATATCGGCCGCGACCAGCT
30
+AGCACCAGATTAGCAAAGGGCACCGAGTGACTAATGCTCTAAGTGAAATGTATGAAACCGCCCAATAGGCCTGCTACTAT
31
+ATGCAGCGTCCTTGGTCTATATGGTATACGTCAAATATCAGCCCCAAACCCACTGGTCAGCTGATGCGGCTCTGATTGCA
32
+TTCCAGTGTTTTCGACTGTGGTGATTCCTGCCAGGAACCGCGGATCTGACGAGATCCGGCTCGTGAAGTCAAATCCAACC
33
+GGGTGCCCAATCCCTGCTACGTCGCACAGGGAATCAAGATAGCGCTTCAGGCCGCCCCTTGTCAAGTAGGGGCGGCCCAC
34
+CTACATCTTTGTCCCTGGATACCCGATCAGTGTACACTAGAGGAGATAGGTCTCGGAACCGTCGATATCAGCGGTTTCTG
35
+AATCGTGGGAATTACTGAGTCCTCATCGTGCAGGATATTAGCTATTGTATTAACTCAAGGGTCCTCAACACGCCTCAGCT
36
+CCTAAACTCAGTTGTGACCTCGCCTAGGGACTCAGTTATCTTAGACCTTTCAGAGCAGCATATGACGGCTGCCTGTGTAT
37
+TCAGCAGGATAGATTTCTGGTGTCAGTCGAAGTCCGGCGATCTTACGTAACGCAAGGGTCCCTTAGCTACCCCTTCATCC
38
+GATACACCATCCCCGATATGTGGGGTCAGAGCTCGCACTATTGCTATGGTTCAGTGGTTTACACTGTTGTTATGTTTTTA
39
+TACCAGCGGAATTACCGAATCTCGAACTTACTAACTTGTTTGTCTCCCTCCCACATCGGCCTCCAGATCTGTGGATGCTA
40
+CACATTAAATCAGACAGCCCTATTTTCCATCGACAAAGACTGATAAGAGACAGGTACGCCTTCGCGTCGAACTCGGCTGG
41
+ACGTAGGGAAAGTAATCGCCGACCTAACTCAATTGGCCGATGGATAGCGCAGCAATTAGCTTACCTACTGACTGTGAAAT
42
+AGAATTGGCGACGTTATTTCCCGAGAAGAGTACAGGTGATTTGGAATTACTGCTACAATCCGTAAGAGCCAAGTGGGGAA
43
+GTCGGGTCCCGTGTGAACCTTATACGCCTGCTGCGACAGGAAATGGCAAAAACCGTGTCTCGACGTAGTATAGCTCGCAT
44
+GCGGGAGTGTGTTTTAGTACAGACTATTCGCTTATAAAGTGACTCCCAAACCAACAGACCTGGGTCAAGTATATCTCCAT
45
+GAACGGGATCAACAAGACGTCTTCGCAATGCAAACGTCGCAGTTCGTCAAAGACGTAGAGGCCGTCATTAAGTCTTCTAA
46
+AGATGTTACCCAGATCTGCGGCTGATCGGACTCACGGAACATCGGAAAACTAGTGCGCGCTTCTGTATGGTACGTATATC
47
+TAACCGGAAAAGCGCATGCAAGGGGCTAACCAACAGTGTCCAGAGTTTAAAAAGTGAGTAGAAATCCGTGCGGGTTCAGA
48
+CAACGCAAGATGACCAACGATAAGCCGAAGGTCACCGGCAATCGACATATAGTTCGCAAACTCTAGATGGCCCATGTCAA
49
+TCGGTCATGTTACAAAGCCAGCTGGCTATGTCTGCTAAGTTTCATCGATATGGAGCCGGCTCAGGGAGCGCCAGCCGGTC
50
+CTGCAAACTCTATTTTACACAATAAGGTTAGTGCACCGCCGACTGGAAACTATGATATTTGCCGAACACACGTAACGTTC
51
+TACCGAGAGCTACGCAGTTGATACCTCACCCTTATGGCACCAATGTAACTCTTTCACTAAACATTCAGCCTAGGCGGCGT
52
+GTACTCTTTCCCTTGTAGGCTCTTTAACCAAAAAGTGGTGTTACGGAGGGGTCTCGTTCACCACATATCTAGGTTTCACA
53
+CTAGGCAGACATGTCTGCAGACTTGCCTGACGGGCACTTGTTACGCCCCACGACAACGTCACTTGCATTCTTGCGAATAT
54
+CTGTTCTGGGTGGGCCGGTCCAGCTAACTCATACTGTAACGCAAGATGGGGCACCGGGTGCATCTCCTTTTAGCGAGGGA
55
+AGATGCAATTAGTGAACGCCCTTCGTTTCAAGTTGCGCTATCAGAACGTTGACTTACGTTATATTCAACCTCAAACAATT
56
+TAAACATGATAAAATCTTGTGCAGAGAATGCTGCACGCGTCCAGATCCTTAACTGAGCAAGCTTATTCTTGTCATGTCGG
57
+TAGTGTCCCAGCGCACCGCTGATTAGATGGATGCAGAAAGAATTGCGATGGCCACCGTACGCCTGTACCGACCGCTCCGA
58
+AACTGAATGCCTAGTAAACGACCTACCAAGCCGCATCATAGCACGAAATAGCGCGGTACAACTACTAGGGAGGCGCCCCG
59
+TGATGAACACGTTTTGCAAAATAATTGCCCCTCACAAGTACTTGCCAAATGTCTATAGGTTTCCATGTGGGGTATGTTCA
60
+GTTCCCTATATGGTCAATAGTAAACTTGCGACTCTATAGGAAGAAGGCTAAAATACTGAATTGGTAGTCGATTACTCCAA
61
+TCCCCTAACCAATTGAATTCACAGCTTTAGAGGAACACCTACGCTTCCACTATTATTTGCGGATGAATGTGATACGGTTG
62
+GGCCTCCACACATGCTCAATCTGGGGATGAAGAAACCGCTTGCTCGCTCCGCTCTATCGTGAAGTACCATTACAACATCG
63
+GGAAGGAGCCCCTTGCTTTGGTCCGCATACTTGGGCTATTTACGCAATGGGAGGCCTAATGGACGGGCATGGGCGAAGGA
64
+CCCTCACGTGTGGATGCAGGCGTCCCAATGGCCACAACGAAGATGACGCATCTATAATTCTTGCCGTTGGGGTAGCATCG
65
+CTTCGCACGCACCACCCTGACTATCCGCACTGGTGAAGACGTTCGACTCCCGGTTCGATTCCAGTTTGCCGTTTGCGTCC
66
+GTGCCCCTTGACTTGCTTAGCCCTCATTGGGACACAAAGTTACCGACCCCAAAGCGTAGTTAGCTGTCTGTTAGTCCAGT
67
+CGGCTGTGGACCACCGGGAAATCTTTGCCGGTCCAACGCGTCGACAATAAAGGTATAGTAGAAGGGATGTTTACGGCGTC
68
+GACTCCTACCCTATTTGCAACGATCCGAAGCCGTTTACTCGCCGCCGCTCCCCGTTCCAGTCTCAAGGGTGTTGCATTGT
69
+TCAGGTATATTTCCCCTTCGTTAAGGCGGGATCGATTCAGATGAAACTTAAGAAACCGTGGCACATGCGACTACGGCTGA
70
+AGCCGAGCGAGCAACTACTGTAGGATCCTCTTGGCACTCTATTCCCGCGGCGCGGGGCTTGGCAAGAATTCGAAATGGAG
71
+GTCGCATAAGTAGGAACGGTCGGTCAACCCACCGTACGCCTATGCATATAGATAATCCCGCGTGTTTTTGGGGTGGCATA
72
+CTCCTTCGTCTACGGGCTTCTATACATATGGGTTGCTTAAGGTGTAGCAGAAGCTTAGTGGTTTGCGTAGTAATGCGGCT
73
+GAATTTCACCTACACTATTGGGCGGGGGCCCCTACGCTAAGACTCTTTTACATCTACTTCCAAGCCAGTATGAAGATAGA
74
+GGCCCCCCTCTACCGTCACATAGCAGTCTGGTGGTTATGGATTTGCGGCTATGTGCTAGAGATCGTCTTGAGATCCACGA
75
+ACCCCGACCTTCAAAGGGTGGCCCTTCAGGCCGTCTTTACCTACATCACCAACCAAACGCATGGACGGCGAGCTATGCAG
76
+GTCCTGCTGTCCACATCTCGTAGCTAACACACCCGTCTCGGGGAGAAGTCAATGCTTACATGACTGAGGCCGAAAAAGAC
77
+CAATCTGCGAGTGTTGAATTCTATATGCGCCTTATCCCGGGTACCGTCGCACACCCCCGAATTGTGCAGCACAATGAGCA
78
+ATGTTCAGCTTTCCTGCCGCGAAGTATAGTTCCGGGTTTGTTTACGTTTGGGCCGCAAATATTTTTACGATTCGGACGGG
79
+CCGTCGTGAACCTTCCCATGGGGTGCACCATCTCTTGAGTTACTTGCGAGATCGACAGGTCGTAACGGACGTTTCGATAC
80
+CAGTCCGATTTGAGGTGGAGACGAGCAATCCTAGGGCCTTGCATACTGTCAGCAAGTAGCAGTGATCAACTGCTGGGCCT
81
+CATTCAATTAAGGCCCATACCGCATCCTTGTCCACCGATGCGCTAAGACCTTTATTCCACCTAAAGACATGTCGCGTTTC
82
+GGTCCGCCGTAACGTCTATGATCGGTCTTTCTGTTACCACTCATTTCTGCCTCGTACCGGAGGCGGTCGGCATTAGGAAA
83
+AGCGTCGAGTGGGCCCAGAATCCCGAGATCTTTTGTGAGCCAATACTACACCGCCCCGATCAATCATGCAAAAGATTTGA
84
+CTTAGATATATCGGAAGCCCTTCAAGAGCTAAATTTATACAATCGCCCAAAGGCTTTAGGCAATCCGGAGCACTTGCCGA
85
+AACTCATAGTTCATGAAAATAAGAAATGAACCATTAAGGATCATGTGTTGTAGCTGGTTTACCGTTCACTTAGGAGTTTC
86
+ATCGGCTGTCGTCGATGAGTCTCTATTCGGCATAGCCAGCATTCTGCCAGGGTTAAGGGCAATCATTGTCCATAGGCACG
87
+AACCCTCTTGCCGGTCAATCAGTTTTGGAAAATGTAACCCGTGTGGATTATCTACCCAGGGCTCCATCAGAATCGGCTGG
88
+ACCAGTTGACGTGGACCTGGACTTTAGATCGCCACAGTGGTAAAGGGTGTGAGGGTATGGCATCAGAGCTACTGGCTGTC
89
+GGAGAAATCGTCGGGTGGCAAAGGTAAATATACATTTATGTGTAGACGTAGTTTCGTACTGATCGAACAAGGGAGGGCGA
90
+ACCGTGGTGATTGGCTAGCTTACGCCTTTCTGCAAACGATAATCCGGTTAGCGTCTGGCAATCGCGCCCACAGTCAGACA
91
+TCCCCGATTACAAGCCGGATCCTTGCAGGATAGCATTCGAGCAACACGACAGTAGCTGCTAACGGTCACGGGTAAGGCGA
92
+CAACCTTCTCCCCCTTACTAGTTGCAAGTCAGTTTGTAGATCGGCATACGGCCAGCCTTCCGCCAACAGCTTTTGGATTA
93
+AAGCCGTGTCGAGGTAATCTTTCCGACGATTCGGTCAATAAGATTCTCATACGAGTGGGTAAGGGTTATTGATCTCACCT
94
+TGCCCACCGAATAGTTGTTCCCATGTGGTACATTCGTGATTAGGATTCTATCTAAGTATTTTGGCCAAATTGACAGCGGC
95
+TGTGACCACTGCACGGCAAATTGACCACCCTAGCAAACGGGTAGGCGACAGCTTTCTTCGTGTAGATCATACGACAGGGT
96
+ACAAGTATAGAAAAGACTTGAGTTAGTTGAGCTTCTAACTGAGCGATAGTCGGTCGCATGCCTCTATCTGTAGCCCTTTG
97
+TAGGATAAGGCCGCTGTTACCTGGAGCCGGAAAGCACTGTGTACTTGCCCGTCTCGCTGCCATTCCACCTGTTGGTTAGA
98
+CGTTCAGTCTGTGGGGCTCCAGCCTGGTCATAAATCTTCACATTTAACAGATCCATCTCGGCGTCGAGTCCGTAGTTCTG
99
+AGGCACCACGGGCACGAATGCACCTAATACTCAACTGATCCGTGTGACACGTCCGAATTTTGATCTACTAATTGGTCAAA
100
+AGGTCTGTGGTAGCAATTCAGACGGGAAACAACCCAACTATCACCTAAGCCTCAGCAAGTAGCGGACATCGCTCATTTGA
101
+AAAATCCGATGTCACTGCCATCCGTGCCCACCAGAAGCCCCCTGAAATCTGATGCCCCCCCGCAGTCAACTGGGGTTAGC
102
+GGGATCGCATATAGTTGGAGCGCTTACTCCCCTCGCTTACATGAGTGGACTGTTGGTCGCCTTGCAAACGTGTGTTGAAG
103
+GATACAGAGTGATAAGATTCGAGGGTGGGCGATCAGAGACTACAGCTAGTCGACACCCACAACCTGCGACTCTATAGGAG
104
+GGTGTACGCACTTATGTCTTTTTTTCAGTCCGCAACTTCGGCGTACAACAGAAGTATGCAAGAGATACCGTTTCGCACCT
105
+CCAGTCCTCAGGACTGCGTGGGGCGTCTATGCCGACGTGACGTTTCCAGTGCCCTGATGGCCCTTATGGTGCCCTAGTGA
106
+GCAGTGCTTGAATAACAGTGAAGTATCCAGCGGCACGTGGTAACTCGGTAGTGTAGCCAGGAGATCGGTAACAGGGCATT
107
+CCCGGAAAGACTAGTATAACGTAGAGACATTAAAATAGCTTAGTACCTGGAATTGCGGAGAAACTGGCCTCACGGCTAGG
108
+CGCATTCGCCAACGAATAATGTTAGGATACCAGGTGCACTAACGGTGCCTCCACTAAAACTCCAATGAAAAAGTTGTGCG
109
+AACCTATTAGCTGATTTAGGAAGAACGCCTAGCGCCTACTCATATTACCCTATACACATCCTATTTTCCTTCTCCCTTCG
110
+AATGTTCTAAGTCGCAAACATACCATCTGAAACTTTGTCGCCGGTTACTAAGCTACGTGAGGCGGATATTGGACAAGTAG
111
+GCAGCATACTACGGTTAGACGGCTTATGTTTACGTCTAGGTGGCGATGAAAAATCATCGATCTCCTTTGCAAACGCGACG
112
+CCTATTTCTGAAAAGCTGCTCCAATAAGTTCAGTGGTTTCGGTGAGCACGCGGGGGTTCAGGGTAAATTCATTTGAGATT
113
+ATACAGAGGGCGATTACCCAAGACCGTATTTGATTTATGGCCACCGGTCATTCACATGCTCCTTAGTCTGGTGGCGGCGG
114
+AGGAATATATCGAACACGGTTAAATGAAGCATCCATTGATCTCCTGCCCGCAGATCTGGGAGGTGACACGGACAATCGCC
115
+CGGGGTATACTCCCATATTGACACCTTGTAACGTGTGCTGTGGACGCGCAAGAGTAGCAGGCGATAGCAACATACCTTGC
116
+ACTACGGTTCCCTAGAAAGGACACTGCGCGGTGATAGTTGGTTCCTATTGCTCAAAGAAGCCCGTTTAATGCGCTTGGGT
117
+ACGTACTATAACGGAAGAATCCAAGAGTCTATTGCTAAGATTTCAATGCTAATGCCGTGGTATTTGCTTAATACCGTTGG
118
+CAAGACCGTGGGCTGCGCGCTTAGTAGGGTGGGAGGTATGAGTCCACCAACGTCGAACCCGACCCTGTGCTATGTCGCAT
119
+CTGATCATCGACAACTTGTTGATATTGCCCTGTTTACCCAGCAATAGATTACTCTAGCTGAGGGTTAGAGGGTTTTAGTC
120
+ATTCGGAAGTAGTTTGGGAGGGTGAGATATCTAATTGGAGGTTAATACTTACGACCGCCCAAAGTCGCTGTCCCCACATC
121
+ATATCAAAGAGGCCACGGCATTTGCAGTGATGTAAGTTCAATAGAATGAGGCCCAGGGCCAGGCGATAACTTTAATGTCG
122
+GGCGAAGCACGTGCCCATCGGGTCCCCGCGCCACAGCCTTATTGTTTCTCTACACTCTCGCTGCTCCGACTCGCGAGGAT
123
+AGTGATCGCTAGTGATCGGCAATGAATATATGTCGGAGGAAAACTAACTTAATTTCGCCATGAGACTTAGGAGGACCGGA
124
+GACCCCTCGTCATTGTTCAAAAGTGCACGCTCAGGATCCGCAGGATCCCTTGGGGTGCATGCGTGTGATTTCCTGTGCTT
125
+CGTAAGCAGACTACTTCTCCCCAGGCACATACGCATCTCCAACTTGGTCCAGTGGAAGCCGCGACCCGTTTCTCAAAACT
126
+TACCATTAATCCCTCCCAGCATCCGTGGGTCTCCGGGCTCCCCAGTTTAGGAGCACTAGGTTGAACGAAAAGTGGACATA
127
+GTGGGGGCCATCTCGATGAAGGCGAACGGTGCATCACGAATCATGACATCAGAATTAGGGACGAGCCTCGCGAACTACAT
128
+AACTTGCAATCCCCGAACAGATGACAGGCCAGACTCGGCGTTTTTACCTGCAATATCACATAGTGGTGCGTTGAACTCAC
129
+TCTAACGGGGGCTATCAAGTTTCGTTCCCGGTACCTGTAACACGTAAGCTAGGTCCGGCGTTAACCTTGACGGGGAGGTT
130
+CAATAGGACCAGCTCGCTCGATTCACCTCACCAACTTATTGCAATACTGAGGTCCGAACTCCGGGCGATAAGATAAACAC
131
+TGCCCGAACCCACCTGGACAACACAACAGCTTCACCCCAGGCTGGTTACGAATCACTGGGTTAGTTGCGGTTTCTCTCGA
132
+CCTCAGTACGAACCTGTTAGTAGCGGCAATCACGCACAGCTTATTCCTGCGCTATGTCTTCCCTCCTCTTGCCGCGCGGG
133
+CCGCTGACGGTAGACCCGTGCGTGCCTGCCAAGCTAATGAGACTCCAGAACGGTCCACCTAGTGTGCCAAGTGCAATCGG
134
+AATAGCTGCTCTGTATCTCCAAAGAAGACGCCTGTTTGTTCACCTCCACATGATTATGCGACACTAATGTCGCACAAATG
135
+CTGCGCTTGACGTAAGTCCGGTTAGCTTCCTTAAGTTCTTAAGACCCACGTAATCTACTTTTTGTCGTGCCCCGGGGAGT
136
+TTGGCAAGGTGGGTGGTGACTGGACTGGCGAAGCAGACGCGTTCCCTCGGAGGCGCGTAACCGTCCTCGCCAGTACGTCA
137
+AATACGAACGGATTATTTTTCTTTTTGTGTCAGTTAGGTGAAAAAAACGACATTCAGGCATTGCATGAGGACCACTTACA
138
+GCTACCCGCCCGATTGAGTGTAACGAAGTCGTAAAGGCGATAGCGATGCGCTGCCTCGTTTGGAATCAGGAGGACTCAAG
139
+GAGTAGAGCCACCCTGAGGAGTGCAGCCTGCGACATTACGCGGTAGGACAAAAGGAACGTACTATTATTTTAGTTGAGTT
140
+AGGTCAGGCTGCTCTGAACATTGAGTAATGGCTGGGTCCGCATTAGGGTTTTGAGTTGAAGATGCAACCGGGGTCGTAAT
141
+TAAGTTGCAGTACATGTGAATCGTTAACGCATAAGCTAGTTAGCTCCTACAGACCTTCGGAGTGCTCTCTCTTCCACCGC
142
+CATCGTGTCGCCCCCGGTTATCAGTGACGACTAGTTTCGGTCTAGTACCCCATCAAACCGTACCTAGCTGTTTAGCGTAG
143
+ACTGTCTGGGGGAGTCTGTGCTGTGTCGGGTCGCGCCCTTCTAACAGCACATTTCTCTATACACCAAAGTCAAGCCGAGG
144
+CTACCGTCCTTCCTTTGTCGCGCTTCTCCCGACCCACTGTTTGGCTCCTGAATTGGTGACGAGCAAAAAAGGTAATCTGT
145
+TAGCCGTGGACCGCATTTTGCCCCGCTTCATACAATCATTAAGTTGGGAGACAGTAGTGATGGAAATTCCGTCCGAATCA
146
+TATGGATACAAATAAGATCGTGATACGGGTGGACTCGTCTTTAGTCCGATTAACTTAAAGCATCGAGCAAATATGTCGTC
147
+CGGGCAATCGTATCCGTGAAACACGTTTCCCATGTAAAAAACGATCTCGCCAAGTCTCATACTCCTGTGCCCACAGGACT
148
+GTCTGGCTCCGCTCCTGTTCAACCGTTCTCTGATGACCGTTGCTTCACTTCAGCGACGCAGTAGTTCAAGTGTGTGCGAG
149
+GAAATTACGTGAATTGCCGCATGACCCTGGACTCATCTTGCGAAGAAATTTATGGTTCCCGGGAAGAAAATTTAACAGGG
150
+GACTACGTTACCAGGGCCACTCATGTCTTACGAGTTTATAAATTAAAGTTCGTACTATTTATATTTCAAGGCGCGGGGTC
151
+GCGCCGGCTTGATATTTGTTCACTGAAGCGGCTGATGCTGCGTCGCCTCAGCTTACAAGTCGGCGCAAGAGAGCCTGTCC
152
+TGTAAAACTGCTTCTTAACTCTGGTGTTGAAACAGTAGACCAACGGTTCCTCGTCCGTTTCCATCTCTGTTATCCTGGAA
153
+GATTGATATGGTTAGTGACAGTTAATGGTGAAGGTTCTTGCGCTACGTCTTGCCAGTAAGACCTATCGTAAAACTGGACG
... ...
@@ -71,48 +71,7 @@ sub makemaxentscores{
71 71
     }
72 72
     return @metables;
73 73
 }
74
-sub makewmmscores{
75
-    my $dir = "/bionet/geneyeo_essentials/MaxEntropy/webserver/splicemodels/";
76
-    my @list = ('me1s0acc1','me1s0acc2','me1s0acc3','me1s0acc4',
77
-		'me1s0acc5','me1s0acc6','me1s0acc7','me1s0acc8','me1s0acc9');
78
-    my @metables;
79
-    my $num = 0 ;
80
-    foreach my $file (@list) {
81
-	my $n = 0;
82
-	open (SCOREF,"<".$dir.$file) || die "Can't open $file!\n";
83
-	while(<SCOREF>) {
84
-	    chomp;
85
-	    $_=~ s/\s//;
86
-	    $metables[$num]{$n} = $_;
87
-	    $n++;
88
-	}
89
-	close(SCOREF);
90
-	#print STDERR $file."\t".$num."\t".$n."\n";
91
-	$num++;
92
-    }
93
-    return @metables;
94
-}
95
-sub makemmscores{
96
-    my $dir = "/bionet/geneyeo_essentials/MaxEntropy/webserver/splicemodels/";
97
-    my @list = ('me2s0acc1','me2s0acc2','me2s0acc3','me2s0acc4',
98
-		'me2s0acc5','me2s0acc6','me2s0acc7','me2s0acc8','me2s0acc9');
99
-    my @metables;
100
-    my $num = 0 ;
101
-    foreach my $file (@list) {
102
-	my $n = 0;
103
-	open (SCOREF,"<".$dir.$file) || die "Can't open $file!\n";
104
-	while(<SCOREF>) {
105
-	    chomp;
106
-	    $_=~ s/\s//;
107
-	    $metables[$num]{$n} = $_;
108
-	    $n++;
109
-	}
110
-	close(SCOREF);
111
-	#print STDERR $file."\t".$num."\t".$n."\n";
112
-	$num++;
113
-    }
114
-    return @metables;
115
-}
74
+
116 75
 sub maxentscore{
117 76
     my $seq = shift;
118 77
     my $table_ref = shift;
... ...
@@ -3,7 +3,7 @@
3 3
 \title{ModCon}
4 4
 \usage{
5 5
 ModCon(cds, sdSeqStartPosition, upChangeCodonsIn=16, downChangeCodonsIn=16,
6
-optimizeContext="optimalContext", sdMaximalHBS=10, acMaximalMaxent=4, optiRate=100,
6
+optimizeContext=TRUE, sdMaximalHBS=10, acMaximalMaxent=4, optiRate=100,
7 7
 nGenerations=30, parentSize=300, startParentSize=1000, bestRate=40,
8 8
 semiLuckyRate=20, luckyRate=5, mutationRate=1e-04, nCores=-1)
9 9
 }
10 10
new file mode 100644
... ...
@@ -0,0 +1,18 @@
1
+\docType{data}
2
+\name{cds}
3
+\alias{cds}
4
+\title{CDS of firefly luciferase}
5
+\format{character string
6
+}
7
+\usage{
8
+cds
9
+}
10
+\description{
11
+Character string of the nucleotide sequence encoding the firefly luciferase.
12
+}
13
+\examples{
14
+ cds
15
+}
16
+\keyword{datasets}
17
+
18
+
... ...
@@ -13,8 +13,7 @@ vignette: >
13 13
 ```{r setup, echo=FALSE, results="hide"}
14 14
 knitr::opts_chunk$set(tidy = FALSE,
15 15
                       cache = FALSE,
16
-                      dev = "png",
17
-                      message = FALSE, error = FALSE, warning = TRUE)
16
+                      dev = "png")
18 17
 ```	
19 18
 
20 19
 
... ...
@@ -91,21 +90,31 @@ cds <- paste0('ATGGAAGACGCCAAAAACATAAAGAAAGGCCCGGCGCCATTCTATCCGCTG',
91 90
 
92 91
 ## Executing ModCon to increase the splice site HEXplorer weigth of 
93 92
 ## the splice donor at position 103
94
-cdsSSHWincreased <- ModCon(cds, 103)
93
+cdsSSHWincreased <- ModCon(cds, 103, nCores=1)
95 94
 cdsSSHWincreased
96 95
 
97 96
 ```
98 97
 
99
-To achive the minimal SSHW, the ModCon function parameter `modconMode`
100
-has to be set to `suboptimalContext`.
98
+The resulting character string holds the alternative nucleotide sequence
99
+with an increased SSHW for the index splice donor site at position 103. 
100
+The new CDS encodes the same amino acid sequence as before.
101
+
102
+
103
+To achive the minimal SSHW, the ModCon function parameter `optimizeContext`
104
+has to be set to `FALSE`.
101 105
 
102 106
 ```{r, eval=TRUE}
103 107
 ## Executing ModCon to decrease the splice site HEXplorer weigth of 
104 108
 ## the splice donor at position 103
105
-cdsSSHWdecreased <- ModCon(cds, 103, modconMode='suboptimalContext')
109
+cdsSSHWdecreased <- ModCon(cds, 103, optimizeContext=FALSE, nCores=1)
106 110
 cdsSSHWdecreased
107 111
 ```
108 112
 
113
+The resulting character string holds the alternative nucleotide sequence
114
+with an decreased SSHW for the index splice donor site at position 103. 
115
+Again, the new CDS encodes the same amino acid sequence as before.
116
+
117
+
109 118
 The extent of SSHW minimization and maximization can alternatively be 
110 119
 limited to e.g. 60% of the maximum or minimum setting the `optiRate` 
111 120
 to 60. The progress is omitted per generation (not shown in this vignette).
... ...
@@ -113,12 +122,22 @@ to 60. The progress is omitted per generation (not shown in this vignette).
113 122
 ```{r, eval=TRUE}
114 123
 ## Executing ModCon to increase the splice site HEXplorer weigth of 
115 124
 ## the splice donor at position 103 to around 60% of the maximum
116
-suppressMessages(cdsSSHWincreased <- ModCon(cds, 103, optiRate=60))
117
-suppressMessages(cdsSSHWdecreased <- ModCon(cds, 103, optiRate=60, modconMode='suboptimalContext'))
125
+suppressMessages(cdsSSHWincreased <- ModCon(cds, 103, optiRate=60, nCores=1))
126
+suppressMessages(cdsSSHWdecreased <- ModCon(cds, 103, optiRate=60, optimizeContext=FALSE, nCores=1))
118 127
 cdsSSHWincreased
119 128
 cdsSSHWdecreased
120 129
 ```
121 130
 
131
+The resulting character strings hold the alternative nucleotide sequences 
132
+with either an increased or decreased SSHW for the index splice donor site
133
+at position 103. With setting the parameter `optiRate` to 60, the SSHW 
134
+increase and SSHW decrease was only performed to reach the around 60% of 
135
+the highest or lowest SSHW possible.
136
+Again, the new coding sequences encode the same amino acid sequence as 
137
+the original CDS.
138
+
139
+
140
+
122 141
 Changing the `optiRate` parameter of the `ModCon` function from the
123 142
 default value 100 triggers usage of the genetic algorithm, instead of
124 143
 the sliding window approach. Most parameters of the genetic algorithm
... ...
@@ -128,8 +147,8 @@ adjusted with the respective `ModCon` function parameter.
128 147
 ```{r, eval=TRUE}
129 148
 ## Executing ModCon to increase the splice site HEXplorer weigth of 
130 149
 ## the splice donor at position 103 to around 60% of the maximum
131
-suppressMessages(cdsSSHWincreased <- ModCon(cds, 103, modconMode='optimalContext',
132
-                                sdMaximalHBS=10, acMaximalMaxent=4, optiRate=50,
150
+suppressMessages(cdsSSHWincreased <- ModCon(cds, 103, 
151
+                                sdMaximalHBS=10, acMaximalMaxent=4, optiRate=60,
133 152
                                 nGenerations=5, parentSize=200, startParentSize=800,
134 153
                                 bestRate=50, semiLuckyRate=10, luckyRate=5,
135 154
                                 mutationRate=1e-03, nCores=1))
... ...
@@ -137,6 +156,12 @@ suppressMessages(cdsSSHWincreased <- ModCon(cds, 103, modconMode='optimalContext
137 156
 cdsSSHWincreased
138 157
 ```
139 158
 
159
+As with the sliding window approach, the resulting character string holds
160
+the alternative nucleotide sequence with an increased SSHW for the 
161
+index splice donor site at position 103. The new CDS encodes the 
162
+same amino acid sequence as before.
163
+
164
+
140 165
 The size of the sequence surroundings can be set using the parameters
141 166
 `upChangeCodonsIn` and `downChangeCodonsIn`, which define the number of
142 167
 codons to be adjusted around the splice site for SSHW adjustment (default=16).
... ...
@@ -144,10 +169,16 @@ codons to be adjusted around the splice site for SSHW adjustment (default=16).
144 169
 ```{r, eval=TRUE}
145 170
 ## Executing ModCon to decrease the splice site HEXplorer weigth of 
146 171
 ## the splice donor at position 103
147
-cdsSSHWdecreased <- ModCon(cds, 103, downChangeCodonsIn=20, upChangeCodonsIn=21)
172
+cdsSSHWdecreased <- ModCon(cds, 103, downChangeCodonsIn=20, upChangeCodonsIn=21, nCores=1)
148 173
 cdsSSHWdecreased
149 174
 ```
150 175
 
176
+As with the sliding window approach, the resulting character string holds
177
+the alternative nucleotide sequence with an decreased SSHW for the 
178
+index splice donor site at position 103. The new CDS encodes the 
179
+same amino acid sequence as before.
180
+
181
+
151 182
 The **ModCon** package additionally holds functions to increase or decrease
152 183
 the intrinsic strength (Hbond score) of a secific splice donor site while 
153 184
 keeping the underlying encoded amino acid sequences the same. 
... ...
@@ -160,6 +191,10 @@ cdsHBondUp <- increaseGTsiteStrength(cds, 103)
160 191
 cdsHBondDown
161 192
 ```
162 193
 
194
+`cdsHBondDown` states a coding sequence, encoding the same amino acid as the input CDS. However, a splice donor sequence at the stated index position within the CDS will be aimed to be decrased in its intrinsic strength.
195
+`cdsHBondUp` states a coding sequence, encoding the same amino acid as the input CDS. However, a splice donor sequence at the stated index position within the CDS will be aimed to be decrased in its intrinsic strength.
196
+
197
+
163 198
 Integrated functions also include functions to decrease the intrinisc strength
164 199
 of every splice donor or acceptor within a coding sequence, while considering 
165 200
 whether the overall HEXplorer profile should be increased or decreased of the 
... ...
@@ -177,6 +212,8 @@ cdsSDlow
177 212
 cdsSAlow
178 213
 ```
179 214
 
215
+`cdsSDlow` states a coding sequence, encoding the same amino acid as the input CDS. However, every potential splice donor sequence within the CDS, which exceeds a Hbond score treshold (`maxhbs`) will be aimed to be degraded in its intrinsic strength as much as possible. If, additionally, potential splice acceptor sites should be degraded in their intrinsic strengt, the following function `degradeSAs` will reduce the number of potential relevant splice acceptor sites within the CDS.
216
+`cdsSAlow` states a coding sequence, encoding the same amino acid as the input CDS. However, every potential splice donor sequence within the CDS, which exceeds a Hbond score treshold (`maxhbs`) and every potential splice acceptor site, which exceeds a certain MaxEntScan score treshold, will be aimed to be degraded in its intrinsic strength as much as possible.
180 217
 
181 218
 # Session info
182 219