Browse code

Prepare for reimport

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/seqTools@128651 bc3139a8-67e5-0310-9ffc-ced21a209358

Wolfgang Kaisers authored on 13/04/2017 15:54:01
Showing1 changed files

1 1
deleted file mode 100644
... ...
@@ -1,250 +0,0 @@
1
-
2
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
3
-##                                                                           ##
4
-##  Project   :   seqTools                                                   ##
5
-##  Created   :   26.August.2013                                             ##
6
-##  Author    :   W. Kaisers                                                 ##
7
-##  File      :   kMer.r                                                     ##
8
-##  Content   :   Functionality for counting DNA k-mers                      ##
9
-##                (independent of fastq or fasta files)                      ##
10
-##                countKmers, countDnaKmers, revCountDnaKmers,               ##
11
-##                countGenomeKmers, countSpliceKmers                         ##
12
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
13
-
14
-
15
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
16
-## Counts DNA k-mers on specified regions inside single (character) sequence
17
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
18
-countDnaKmers <- function(dna, k, start=1, width=nchar(dna) - k + 1)
19
-{
20
-    if(!is.character(dna))
21
-        stop("'dna' must be character.")
22
-    
23
-    if(length(dna) != 1)
24
-        stop("'dna' must have length 1.")
25
-    
26
-    if(is.numeric(start))
27
-        start <- as.integer(start)
28
-    
29
-    if(is.numeric(width))
30
-        width <- as.integer(width)
31
-    
32
-    if(length(width) == 1)
33
-        width <- rep(width, length(start))
34
-    
35
-    if(is.numeric(k))
36
-        k <- as.integer(k)
37
-    
38
-    if(length(k)!=1)
39
-        stop("'k' must have length 1.")
40
-    
41
-    if(k < 1)
42
-        stop("'k' must be positive.")
43
-    
44
-    if(k > max_k)
45
-        stop("'k' must not exceed", max_k, ".")
46
-  
47
-    nc <- nchar(dna)
48
-    if(k > nc)
49
-        stop("'k' must be <= nchar(dna).")
50
-    if(any(start + width + k > nc + 2))
51
-        stop("Search region exceeds string end.")
52
-  
53
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
54
-    ## Counts N's
55
-    ## ToDo: Return value
56
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
57
-    nn <- integer(length(start))
58
-    return(.Call("count_dna_Kmers", 
59
-                            dna, start, width, k, nn, PACKAGE="seqTools"))
60
-    
61
-}
62
-
63
-
64
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
65
-## Counts DNA k-mers on specified regions inside single (character) sequence 
66
-## in reverse direction
67
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
68
-revCountDnaKmers <- function(dna, k, start, width)
69
-{
70
-    if(!is.character(dna))
71
-        stop("'dna' must be character.")
72
-    
73
-    if(length(dna) != 1)
74
-        stop("'dna' must have length 1.")
75
-    
76
-    if(is.numeric(start))
77
-        start <- as.integer(start)
78
-    
79
-    if(is.numeric(width))
80
-        width <- as.integer(width)
81
-    
82
-    if(length(width) == 1)
83
-        width <- rep(width, length(start))
84
-    
85
-    if(length(width) != length(start))
86
-        stop("'width' must have length 1 or the same length as 'start'.")
87
-    
88
-    if(is.numeric(k))
89
-        k <- as.integer(k)
90
-    
91
-    if(any(width + k > start))
92
-        stop("'width' must be <=  'start' - 'k'.")
93
-    
94
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
95
-    ## nn contains N counts
96
-    ## ToDo: Add value of nn to returned object
97
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
98
-    nn <- integer(length(start))
99
-    
100
-    return(.Call("rev_count_dna_Kmers",
101
-                            dna, start, width, k, nn, PACKAGE="seqTools"))
102
-}
103
-
104
-
105
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
106
-## Counts DNA k-mers on specified regions inside multiple (character) sequences
107
-## in possibly reversed direction (depending on strand)
108
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
109
-countGenomeKmers <- function(dna, seqid, start, width, strand, k)
110
-{
111
-    if(!is.character(dna))
112
-        stop("'dna' must be character.")
113
-    
114
-    if(!is.numeric(seqid))
115
-        seqid <- as.integer(seqid)
116
-    rg <- range(seqid)
117
-    
118
-    if(rg[1] < 0)
119
-        stop("Negative seqid's are not allowed.")
120
-    
121
-    if(rg[2] > length(dna))
122
-        stop("Out of range seqid's.")
123
-    
124
-    if(!is.numeric(start))
125
-        stop("'start' must be numeric.")
126
-    start <- as.integer(start)
127
-    
128
-    if(!is.numeric(width))
129
-        stop("'width' must be numeric")
130
-    width <- as.integer(width)
131
-    
132
-    if(is.factor(strand))
133
-        strand <- as.integer(strand)
134
-    else
135
-    {
136
-        if(!is.numeric("strand"))
137
-            strand <- as.integer(strand)        
138
-    }
139
-    
140
-    nStart <- length(start)
141
-    if( (length(seqid) != nStart) | (length(width) != nStart) |
142
-                (length(strand) != nStart) )
143
-        stop("'seqid', 'start', 'width' and 'strand' must have same length.")
144
-    
145
-    if(length(k) != 1)
146
-        stop("'k' must be a single value.")
147
-    
148
-    if(!is.numeric(k))
149
-        stop("'k' must be numeric.")
150
-    k<-as.integer(k)
151
-    
152
-    if(k <= 0)
153
-        stop("'k' must be >=1")
154
-    
155
-    if(k > max_k)
156
-        stop("'k' must not exceed", max_k, ".")
157
-    
158
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
159
-    ## Counts N's
160
-    ## ToDo: Return value
161
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
162
-    nn <- integer(length(start))
163
-    return(.Call("count_genome_Kmers", dna, seqid, start, width,
164
-                            strand, k, nn, PACKAGE="seqTools"))
165
-}
166
-
167
-
168
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
169
-## Counts DNA k-mers on each border of a splice-site defined by wLend and 
170
-## wRstart in range of size width
171
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
172
-
173
-countSpliceKmers <- function(dna, seqid, lEnd, rStart, width, strand, k)
174
-{
175
-    if(!is.character(dna))
176
-        stop("'dna' must be character.")
177
-    
178
-    if(!is.numeric(seqid))
179
-        stop("'seqid' must be numeric.")
180
-    seqid <- as.integer(seqid)
181
-    
182
-    rg <- range(seqid)
183
-    if(rg[1] < 0)
184
-        stop("Negative seqid's are not allowed.")
185
-    
186
-    if(rg[2] > length(dna))
187
-        stop("Out of range seqid's.")
188
-    
189
-    if(!is.numeric(lEnd))
190
-        stop("'lEnd' must be numeric.")
191
-    lEnd <- as.integer(lEnd)
192
-    
193
-    if(!is.numeric(rStart))
194
-        stop("'rStart' must be numeric.")
195
-    rStart <- as.integer(rStart)
196
-    
197
-    if(!is.numeric(width))
198
-        stop("'width' must be numeric.")
199
-    width <- as.integer(width)
200
-    
201
-    if(is.factor(strand))
202
-        strand <- as.integer(strand)
203
-    else
204
-    {
205
-        if(!is.numeric("strand"))
206
-            strand <- as.integer(strand)        
207
-    }
208
-    
209
-    nStart <- length(lEnd)
210
-    if(length(seqid) != nStart | length(rStart) != nStart | 
211
-             length(width) != nStart | length(strand) != nStart)
212
-    {
213
-        stop(paste("'seqid', 'lEnd', 'rStart', 'width'",
214
-                            "and 'strand' must have equal length."))
215
-    }
216
-    if(!is.numeric(k))
217
-        stop("'k' must be numeric.")
218
-    k <- as.integer(k)
219
-    
220
-    if(length(k) != 1)
221
-        stop("'k' must be a single value.")
222
-    
223
-    if(k > max_k)
224
-        stop("'k' must not exceed", max_k, ".")
225
-    
226
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
227
-    ## Plus strand
228
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
229
-    plus_strand<-strand == 1
230
-    
231
-    if(sum(plus_strand) > 0)
232
-    {
233
-        if(any((lEnd[plus_strand] - width[plus_strand] - k + 1) < 0))
234
-            stop("lEnd must be >= width+k-1 for all +-strand coordinates")
235
-    }
236
-    
237
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
238
-    ## Counts N's
239
-    ## ToDo: Return value
240
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
241
-    nn <- integer(length = nStart)
242
-    
243
-    return(.Call("count_splice_Kmers", dna, seqid, lEnd, rStart, width, 
244
-                            strand, k, nn, PACKAGE="seqTools")) 
245
-}
246
-
247
-
248
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
249
-## END OF FILE
250
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##