Browse code

Reimport with .R suffix

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/seqTools@128659 bc3139a8-67e5-0310-9ffc-ced21a209358

Wolfgang Kaisers authored on 13/04/2017 15:56:26
Showing1 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,250 @@
1
+
2
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
3
+##                                                                           ##
4
+##  Project   :   seqTools                                                   ##
5
+##  Created   :   26.August.2013                                             ##
6
+##  Author    :   W. Kaisers                                                 ##
7
+##  File      :   kMer.r                                                     ##
8
+##  Content   :   Functionality for counting DNA k-mers                      ##
9
+##                (independent of fastq or fasta files)                      ##
10
+##                countKmers, countDnaKmers, revCountDnaKmers,               ##
11
+##                countGenomeKmers, countSpliceKmers                         ##
12
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
13
+
14
+
15
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
16
+## Counts DNA k-mers on specified regions inside single (character) sequence
17
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
18
+countDnaKmers <- function(dna, k, start=1, width=nchar(dna) - k + 1)
19
+{
20
+    if(!is.character(dna))
21
+        stop("'dna' must be character.")
22
+    
23
+    if(length(dna) != 1)
24
+        stop("'dna' must have length 1.")
25
+    
26
+    if(is.numeric(start))
27
+        start <- as.integer(start)
28
+    
29
+    if(is.numeric(width))
30
+        width <- as.integer(width)
31
+    
32
+    if(length(width) == 1)
33
+        width <- rep(width, length(start))
34
+    
35
+    if(is.numeric(k))
36
+        k <- as.integer(k)
37
+    
38
+    if(length(k)!=1)
39
+        stop("'k' must have length 1.")
40
+    
41
+    if(k < 1)
42
+        stop("'k' must be positive.")
43
+    
44
+    if(k > max_k)
45
+        stop("'k' must not exceed", max_k, ".")
46
+  
47
+    nc <- nchar(dna)
48
+    if(k > nc)
49
+        stop("'k' must be <= nchar(dna).")
50
+    if(any(start + width + k > nc + 2))
51
+        stop("Search region exceeds string end.")
52
+  
53
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
54
+    ## Counts N's
55
+    ## ToDo: Return value
56
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
57
+    nn <- integer(length(start))
58
+    return(.Call("count_dna_Kmers", 
59
+                            dna, start, width, k, nn, PACKAGE="seqTools"))
60
+    
61
+}
62
+
63
+
64
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
65
+## Counts DNA k-mers on specified regions inside single (character) sequence 
66
+## in reverse direction
67
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
68
+revCountDnaKmers <- function(dna, k, start, width)
69
+{
70
+    if(!is.character(dna))
71
+        stop("'dna' must be character.")
72
+    
73
+    if(length(dna) != 1)
74
+        stop("'dna' must have length 1.")
75
+    
76
+    if(is.numeric(start))
77
+        start <- as.integer(start)
78
+    
79
+    if(is.numeric(width))
80
+        width <- as.integer(width)
81
+    
82
+    if(length(width) == 1)
83
+        width <- rep(width, length(start))
84
+    
85
+    if(length(width) != length(start))
86
+        stop("'width' must have length 1 or the same length as 'start'.")
87
+    
88
+    if(is.numeric(k))
89
+        k <- as.integer(k)
90
+    
91
+    if(any(width + k > start))
92
+        stop("'width' must be <=  'start' - 'k'.")
93
+    
94
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
95
+    ## nn contains N counts
96
+    ## ToDo: Add value of nn to returned object
97
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
98
+    nn <- integer(length(start))
99
+    
100
+    return(.Call("rev_count_dna_Kmers",
101
+                            dna, start, width, k, nn, PACKAGE="seqTools"))
102
+}
103
+
104
+
105
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
106
+## Counts DNA k-mers on specified regions inside multiple (character) sequences
107
+## in possibly reversed direction (depending on strand)
108
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
109
+countGenomeKmers <- function(dna, seqid, start, width, strand, k)
110
+{
111
+    if(!is.character(dna))
112
+        stop("'dna' must be character.")
113
+    
114
+    if(!is.numeric(seqid))
115
+        seqid <- as.integer(seqid)
116
+    rg <- range(seqid)
117
+    
118
+    if(rg[1] < 0)
119
+        stop("Negative seqid's are not allowed.")
120
+    
121
+    if(rg[2] > length(dna))
122
+        stop("Out of range seqid's.")
123
+    
124
+    if(!is.numeric(start))
125
+        stop("'start' must be numeric.")
126
+    start <- as.integer(start)
127
+    
128
+    if(!is.numeric(width))
129
+        stop("'width' must be numeric")
130
+    width <- as.integer(width)
131
+    
132
+    if(is.factor(strand))
133
+        strand <- as.integer(strand)
134
+    else
135
+    {
136
+        if(!is.numeric("strand"))
137
+            strand <- as.integer(strand)        
138
+    }
139
+    
140
+    nStart <- length(start)
141
+    if( (length(seqid) != nStart) | (length(width) != nStart) |
142
+                (length(strand) != nStart) )
143
+        stop("'seqid', 'start', 'width' and 'strand' must have same length.")
144
+    
145
+    if(length(k) != 1)
146
+        stop("'k' must be a single value.")
147
+    
148
+    if(!is.numeric(k))
149
+        stop("'k' must be numeric.")
150
+    k<-as.integer(k)
151
+    
152
+    if(k <= 0)
153
+        stop("'k' must be >=1")
154
+    
155
+    if(k > max_k)
156
+        stop("'k' must not exceed", max_k, ".")
157
+    
158
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
159
+    ## Counts N's
160
+    ## ToDo: Return value
161
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
162
+    nn <- integer(length(start))
163
+    return(.Call("count_genome_Kmers", dna, seqid, start, width,
164
+                            strand, k, nn, PACKAGE="seqTools"))
165
+}
166
+
167
+
168
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
169
+## Counts DNA k-mers on each border of a splice-site defined by wLend and 
170
+## wRstart in range of size width
171
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
172
+
173
+countSpliceKmers <- function(dna, seqid, lEnd, rStart, width, strand, k)
174
+{
175
+    if(!is.character(dna))
176
+        stop("'dna' must be character.")
177
+    
178
+    if(!is.numeric(seqid))
179
+        stop("'seqid' must be numeric.")
180
+    seqid <- as.integer(seqid)
181
+    
182
+    rg <- range(seqid)
183
+    if(rg[1] < 0)
184
+        stop("Negative seqid's are not allowed.")
185
+    
186
+    if(rg[2] > length(dna))
187
+        stop("Out of range seqid's.")
188
+    
189
+    if(!is.numeric(lEnd))
190
+        stop("'lEnd' must be numeric.")
191
+    lEnd <- as.integer(lEnd)
192
+    
193
+    if(!is.numeric(rStart))
194
+        stop("'rStart' must be numeric.")
195
+    rStart <- as.integer(rStart)
196
+    
197
+    if(!is.numeric(width))
198
+        stop("'width' must be numeric.")
199
+    width <- as.integer(width)
200
+    
201
+    if(is.factor(strand))
202
+        strand <- as.integer(strand)
203
+    else
204
+    {
205
+        if(!is.numeric("strand"))
206
+            strand <- as.integer(strand)        
207
+    }
208
+    
209
+    nStart <- length(lEnd)
210
+    if(length(seqid) != nStart | length(rStart) != nStart | 
211
+             length(width) != nStart | length(strand) != nStart)
212
+    {
213
+        stop(paste("'seqid', 'lEnd', 'rStart', 'width'",
214
+                            "and 'strand' must have equal length."))
215
+    }
216
+    if(!is.numeric(k))
217
+        stop("'k' must be numeric.")
218
+    k <- as.integer(k)
219
+    
220
+    if(length(k) != 1)
221
+        stop("'k' must be a single value.")
222
+    
223
+    if(k > max_k)
224
+        stop("'k' must not exceed", max_k, ".")
225
+    
226
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
227
+    ## Plus strand
228
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
229
+    plus_strand<-strand == 1
230
+    
231
+    if(sum(plus_strand) > 0)
232
+    {
233
+        if(any((lEnd[plus_strand] - width[plus_strand] - k + 1) < 0))
234
+            stop("lEnd must be >= width+k-1 for all +-strand coordinates")
235
+    }
236
+    
237
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
238
+    ## Counts N's
239
+    ## ToDo: Return value
240
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
241
+    nn <- integer(length = nStart)
242
+    
243
+    return(.Call("count_splice_Kmers", dna, seqid, lEnd, rStart, width, 
244
+                            strand, k, nn, PACKAGE="seqTools")) 
245
+}
246
+
247
+
248
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
249
+## END OF FILE
250
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##