Browse code

Prepare for reimport

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/seqTools@128652 bc3139a8-67e5-0310-9ffc-ced21a209358

Wolfgang Kaisers authored on 13/04/2017 15:54:55
Showing1 changed files

1 1
deleted file mode 100644
... ...
@@ -1,1123 +0,0 @@
1
-
2
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
3
-##                                                                           ##
4
-##  Project   :   seqTools                                                   ##
5
-##  Created   :   26.August.2013                                             ##
6
-##  Author    :   W. Kaisers                                                 ##
7
-##  Content   :   Doing some diagnostic and interventional tasks on fastq    ##
8
-##                and fasta                                                  ##
9
-##                esp. DNA k-mer counts.                                     ##
10
-##  Version   :   0.99.34                                                    ##
11
-##                                                                           ##
12
-##  Changelog :                                                              ##
13
-##  26.08.13  :   0.0.1 Project created                                      ##
14
-##  03.09.13  :   0.0.6 C-Code valgrind tested                               ##
15
-##  27.09.13  :   0.1.0 Added fastqDnaKmers                                  ##
16
-##  14.10.13  :   0.1.1 Added C-library for parsing gz fasta and fastq files ##
17
-##  17.10.13  :   0.1.3 C-Wrapper for fastq working.                         ##
18
-##  17.10.13  :   0.1.6 First version of R package                           ##
19
-##  21.10.13  :   0.3.0 New C library for fastq parsing                      ##
20
-##  28.10.13  :   0.4.0 Added fastq-loc functions                            ##
21
-##  29.10.13  :   0.4.4 seqTools C-code valgrind tested.                     ##
22
-##  01.11.13  :   0.5.0 Distance matrices implemented                        ##
23
-##  02.11.13  :   0.5.1 First working version with clustering based on       ##
24
-##                      K-mers                                               ##
25
-##  07.11.13  :   0.5.4 countFastaKmers now resizes arrays automatically     ##
26
-##  09.11.13  :   0.6.0 count_fastq now resizes arrays automatically         ##
27
-##  11.11.13  :   0.6.5 Added fastq simulation functions                     ##
28
-##  19.11.13  :   0.7.0 Added trimFastq function                             ##
29
-##  30.11.13  :   0.9.0 Separated R source files                             ##
30
-##  22.12.13  :   0.99.2 Added '['-operator for Fastqq class                 ##
31
-##  19.01.14  :   0.99.7 Added zlib version control for correction of        ##
32
-##                       gzBuffer                                            ##
33
-##                       error                                               ##
34
-##                        + checks: cran win-builder + valgrind              ##
35
-##  21.05.14  :   0.99.34 Corrected error in count_fasta_Kmers               ##
36
-##                        which freezed function                             ##
37
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
38
-
39
-.onUnload <- function(libpath) { library.dynam.unload("seqTools", libpath) }
40
-
41
-## see: http://bioconductor.org/developers/how-to/coding-style/
42
-
43
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
44
-## Data collection functions:
45
-## Fastqq,  fastqKmerLocs,  fastqKmerSubsetLocs
46
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
47
-
48
-fastqq <- function(filenames, k=6, probeLabel)
49
-{
50
-    k <- as.integer(k)
51
-    
52
-    tl <- list()
53
-    tl$start <- Sys.time()
54
-    filenames <- path.expand(filenames)
55
-    
56
-    res <- .Call("count_fastq", filenames, k, PACKAGE="seqTools")
57
-    
58
-    tl$end <- Sys.time()
59
-    res@collectTime <- tl
60
-    
61
-    # Correct minSeqLen when empty files are counted
62
-    if(any(res@nReads==0))
63
-        res@seqLen[1,res@nReads==0] <- 0
64
-    
65
-    if(!missing(probeLabel))
66
-    {
67
-        if(length(probeLabel) == res@nFiles)
68
-            res@probeLabel <- as.character(probeLabel) 
69
-        else{
70
-            warning("[Fastqq] probeLabel and filenames must have equal length.")
71
-            res@probeLabel <- as.character(1:res@nFiles)
72
-        }
73
-    }else{
74
-        res@probeLabel <- as.character(1:res@nFiles)
75
-    }
76
-    return(res)
77
-}
78
-
79
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
80
-## fastq K-mer locs
81
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
82
-
83
-fastqKmerLocs <- function(filenames, k=4)
84
-{
85
-    if(!is.numeric(k))
86
-        stop("'k' must be numeric.")
87
-    k <- as.integer(k)
88
-    
89
-    if( (k < 0) || (k > max_k) )
90
-        stop("'k' must be in range 0, ... , 16.")
91
-    
92
-    return(.Call("fastq_Kmer_locs", filenames, k, PACKAGE="seqTools"))
93
-}
94
-
95
-
96
-fastqKmerSubsetLocs <- function(filenames, k=4, kIndex)
97
-{
98
-    # Returns list with matrix elements.
99
-    if(!is.numeric(k))
100
-        stop("'k' must be numeric.")
101
-    
102
-    k <- as.integer(k)
103
-    
104
-    if( (k < 0) || (k > max_k) )
105
-        stop("'k' must be in range 0, ... , ", max_k, ".")
106
-    
107
-    if(!is.numeric(kIndex))
108
-        stop("'kIndex' must be numeric.")
109
-    kIndex <- as.integer(kIndex)
110
-    
111
-    if(any(kIndex < 0))
112
-        stop("No negative 'kIndex' values allowed.")
113
-    
114
-    if(any(kIndex > (4^k)) )
115
-        stop("'kIndex' out of range (>4^k).")
116
-    
117
-    return(.Call("fastq_KmerSubset_locs",
118
-                                filenames, k, kIndex, PACKAGE="seqTools"))
119
-}
120
-
121
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
122
-## End: Data collection functions.
123
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
124
-
125
-
126
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
127
-## Standard slot accessor functions
128
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
129
-
130
-
131
-setMethod("fileNames", "Fastqq", function(object)
132
-                                            {return(object@filenames)})
133
-
134
-setMethod("collectTime", "Fastqq", function(object)
135
-                                            {return(object@collectTime)})
136
-
137
-setMethod("collectDur", "Fastqq", function(object) {
138
-return(as.numeric(difftime(object@collectTime$end, object@collectTime$start,
139
-                                                    units = "secs")))
140
-})
141
-
142
-setMethod("getK", "Fastqq", function(object) {return(object@k)})
143
-
144
-setMethod("nFiles", "Fastqq", function(object) {return(object@nFiles)})
145
-
146
-setMethod("nNnucs", "Fastqq", function(object) {return(object@nN)})
147
-
148
-setMethod("nReads", "Fastqq", function(object) {return(object@nReads)})
149
-
150
-setMethod("maxSeqLen", "Fastqq", function(object) {return(object@maxSeqLen)})
151
-
152
-setMethod("seqLenCount", "Fastqq", function(object)
153
-{
154
-    res<-object@seqLenCount
155
-    colnames(res) <- object@probeLabel
156
-    return(res)
157
-})
158
-
159
-setMethod("nucFreq", "Fastqq", function(object, i)
160
-{
161
-    if(missing(i))
162
-        stop("Argument 'i' is not optional.")
163
-    
164
-    if(!is.numeric(i))
165
-        stop("'i' must be numeric.")
166
-    
167
-    i <- as.integer(i)
168
-    if( (i < 1) || (i > object@nFiles) )
169
-        stop("'i' must be >0 and < nFiles.")
170
-    
171
-    return(object@nac[[i]])
172
-})
173
-
174
-setMethod("gcContent", "Fastqq", function(object, i)
175
-{
176
-    if(missing(i))
177
-        stop("Argument 'i' is not optional.")
178
-    
179
-    if(!is.numeric(i))
180
-        stop("'i' must be numeric.")
181
-    
182
-    i <- as.integer(i)
183
-    if( (i < 1) || (i > object@nFiles))
184
-        stop("'i' must be >0 and < nFiles.")
185
-    
186
-    return(object@gcContent[, i])
187
-})
188
-
189
-
190
-setMethod("gcContentMatrix", "Fastqq", function(object)
191
-{
192
-    gcc <- object@gcContent
193
-    colnames(gcc) <- object@probeLabel
194
-    return(gcc)
195
-})
196
-
197
-setMethod("seqLen", "Fastqq", function(object)
198
-{
199
-    sql <- object@seqLen
200
-    colnames(sql) <- object@probeLabel
201
-    return(sql)
202
-})
203
-
204
-setMethod("kmerCount", "Fastqq", function(object)
205
-{
206
-    kmer <- object@kmer
207
-    colnames(kmer) <- object@probeLabel
208
-    return(kmer)
209
-})
210
-
211
-
212
-setMethod("probeLabel", "Fastqq", function(object){return(object@probeLabel)})
213
-setReplaceMethod("probeLabel", "Fastqq", function(object, value)
214
-{
215
-    if(length(value) != nFiles(object))
216
-        stop("'value' must have length ", nFiles(object), ".")
217
-    
218
-    val <- as.character(value)
219
-    if(any(table(val)) > 1)
220
-    {
221
-        warning("[probeLabel <- .Fastqq] probeLabel unique suffix added.")
222
-        val <- paste(1:nFiles(object), val, sep="_")
223
-    }
224
-    object@probeLabel <- val
225
-    
226
-    return(object)
227
-})
228
-
229
-setMethod("phred", signature="Fastqq", definition=function(object, i)
230
-{
231
-    if(missing(i))
232
-        stop("Argument 'i' is not optional.")
233
-    
234
-    if(!is.numeric(i))
235
-        stop("'i' must be numeric.")
236
-    
237
-    i <- as.integer(i)
238
-    if( (i < 1) || (i > object@nFiles) )
239
-        stop("'i' must be >0 and < nFiles.")
240
-    
241
-    return(object@phred[[i]])
242
-})
243
-
244
-
245
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
246
-## End: Standard slot accessor functions
247
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
248
-
249
-
250
-setMethod("show", "Fastqq", function(object)
251
-{
252
-    bm <- Sys.localeconv()[7]
253
-    w <- 20
254
-    r <- "right"
255
-    cat("Class       : ", format(class(object), w=w, j=r)                              , "\n", sep="")
256
-    cat("nFiles      : ", format(format(nFiles(object)          , big.m=bm), w=w, j=r) , "\n", sep="")
257
-    cat("maxSeqLen   : ", format(format(maxSeqLen(object)       , big.m=bm), w=w, j=r) , "\n", sep="")
258
-    cat("k (Kmer len): ", format(format(getK(object)            , big.m=bm), w=w, j=r) , "\n", sep="")
259
-    cat("\n")
260
-    cat("nReads      : ", format(format(sum(as.numeric(nReads(object)))    , big.m=bm), w=w, j=r)   , "\n", sep="")
261
-    cat("nr  N   nuc : ", format(format(sum(nNnucs(object))     , big.m=bm), w=w, j=r) , "\n", sep="")
262
-    cat("Min seq len : ", format(format(min(seqLen(object)[1, ]), big.m=bm), w=w, j=r) , "\n", sep="")
263
-    cat("Max seq len : ", format(format(max(seqLen(object)[2, ]), big.m=bm), w=w, j=r) , "\n", sep="")
264
-    return(invisible())
265
-})
266
-
267
-
268
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
269
-## Phred related functions
270
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
271
-
272
-setMethod("phredQuantiles", "Fastqq", function(object, quantiles, i, ...)
273
-{
274
-    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
275
-    # Checking arguments
276
-    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
277
-    
278
-    ## Check quantiles argument
279
-    if(missing(quantiles))
280
-        stop("'quantiles' argument is not optional")
281
-    
282
-    if(!is.numeric(quantiles))
283
-        stop("Quantiles must be numeric.")
284
-    
285
-    if(!(all(quantiles >= 0) & all(quantiles <= 1) ))
286
-        stop("All quantiles mustbe in [0, 1]")
287
-    
288
-    quantiles <- sort(unique(round(quantiles, 2)))
289
-    
290
-    ## Check 'i' argument
291
-    if(missing(i))
292
-        stop("'i' argument is not optional")
293
-    
294
-    if(!is.numeric(i))
295
-        stop("'i' must be numeric.")
296
-    
297
-    if(length(i) > 1)
298
-        stop("'i' must have length 1")
299
-    
300
-    i <- as.integer(i)
301
-    if( (i < 1) || (i > object@nFiles) )
302
-            stop("'i' must be >0 and <nFiles.")
303
-
304
-    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
305
-    # Count qual values for each sequence position
306
-    # Convert integer counts into column-wise relative values
307
-    # Maximum counted read length
308
-    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
309
-    vec <- 1:seqLen(object)[2, i]
310
-    qrel <- as.data.frame(apply(object@phred[[i]][, vec], 2, rel_int))
311
-    names(qrel) <- vec
312
-    
313
-    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
314
-    # Walk through each column and extract row number
315
-    # for given quantile values
316
-    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
317
-    res <- .Call("get_column_quantiles", quantiles, qrel, PACKAGE="seqTools")
318
-    return(res)
319
-})
320
-
321
-
322
-setMethod("plotPhredQuant", "Fastqq", function(object, i, main, ...){
323
-    if(!is.numeric(i))
324
-        stop("'i' must be numeric.")
325
-    
326
-    i <- as.integer(i)
327
-    
328
-    if( (i < 1) || (i > object@nFiles) )
329
-        stop("'i' must be >0 and <nFiles.")
330
-    
331
-    quant <- c(0.1, 0.25, 0.5, 0.75, 0.9)
332
-    cols <- c("#1F78B4", "#FF7F00", "#E31A1C", "#FF7F00", "#1F78B4") 
333
-    qq <- phredQuantiles(object, quant, i)
334
-    maxQ = floor(1.2*max(qq))
335
-    xv <- 1:ncol(qq)
336
-    
337
-    if(missing(main))
338
-    {
339
-        main <- paste("Position wise Phred Quantiles (", 
340
-                                probeLabel(object)[i], ")", sep="")
341
-    }
342
-
343
-    plot(xv, xv, ylim=c(0, maxQ), type="n", bty="n", las=1,
344
-        ylab = "Phred score", xlab="Sequence position", main=main, ...)
345
-     
346
-    lines(xv, qq[1, ], col=cols[1], lty=2)
347
-    lines(xv, qq[2, ], col=cols[2], lty=1)
348
-    lines(xv, qq[3, ], col=cols[3], lwd=2)
349
-    lines(xv, qq[4, ], col=cols[4], lty=1)
350
-    lines(xv, qq[5, ], col=cols[5], lty=2)
351
-    
352
-    legend("top", ncol=6, lty=c(2, 1, 1, 1, 2),
353
-            lwd=c(1, 1, 2, 1, 1), col=cols, xjust=0.5,
354
-            legend=c("10%", "25%", "50%", "75%", "90%"), bty="n", cex=0.8)
355
-    return(invisible())
356
-})
357
-
358
-
359
-
360
-
361
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
362
-## Global Phred content functions
363
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
364
-setMethod("phredDist", "Fastqq", function(object, i){
365
-    idx <- 1:nFiles(object)
366
-    
367
-    if(missing(i))
368
-        i <- idx
369
-    else
370
-    {
371
-        if(!is.numeric(i))
372
-            stop("'i' must be numeric.")
373
-        
374
-        if(!all(is.element(i, idx)))
375
-            stop("'i' is out of range.")
376
-    }
377
-    
378
-    phred <- Reduce("+", object@phred[i])
379
-    phred <- matrix(as.numeric(phred), nrow=nrow(phred))
380
-    
381
-    phred_vals <- apply(phred, 1, sum)
382
-    phred_dist <- phred_vals/sum(phred_vals)
383
-    names(phred_dist) <- rownames(object@phred[[1]])
384
-    
385
-    return(phred_dist)    
386
-})
387
-
388
-
389
-setMethod("plotPhredDist", "Fastqq", function(object, i, maxp=45, col, ...)
390
-{
391
-    if(!is.numeric(maxp))
392
-        stop("maxp must be numeric")
393
-    
394
-    if(!is.integer(maxp))
395
-        maxp<-as.integer(maxp)
396
-    
397
-    if(maxp <= 0)
398
-        stop("maxp must be >=0")
399
-    
400
-    if(missing(col))
401
-        col <- topo.colors(10)[3]
402
-    
403
-    phred <- phredDist(object, i)
404
-    maxy <- ceiling(max(phred) * 5) / 5
405
-    x <- 1:maxp
406
-    xmax <- 10 * (ceiling(maxp / 10))
407
-    
408
-    plot(x, phred[x], ylim=c(0, maxy), xlim=c(0, xmax), type="l", lwd=2,
409
-            col=col, yaxt="n", bty="n", xlab="Phred value",
410
-            ylab="Content (%)", ...)
411
-    
412
-    ylb <- 0:(10 * maxy) / 10
413
-    axis(side=2, at=ylb, labels=100 * ylb, las=1)
414
-    return(invisible())
415
-})
416
-
417
-
418
-# + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
419
-# Not exported:
420
-# + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
421
-pd_l10 <- function(x){ x <- phredDist(x); return(sum(x[1:10]) / sum(x))}
422
-
423
-
424
-setMethod("propPhred","Fastqq",function(object, greater=30, less=93)
425
-{
426
-    if(!is.numeric(greater))
427
-        stop("'greater' must be numeric.")
428
-    
429
-    if(length(greater) != 1)
430
-        stop("'greater' must have length 1.")
431
-    
432
-    if(!is.numeric(less))
433
-        stop("'less' must be numeric.")
434
-    
435
-    if(length(less) != 1)
436
-        stop("'less must have length 1.")
437
-    
438
-    ## + + + + + + + + + + + + + + + + + + + + + + ##
439
-    ## greater and less shall be used as
440
-    ## array indices: increase greater
441
-    ## + + + + + + + + + + + + + + + + + + + + + + ##
442
-    greater<-as.integer(greater+1)
443
-    less<-as.integer(less)
444
-    
445
-    if(greater < 1)
446
-        stop("'greater' must be >=0.")
447
-    
448
-    if(less > 93)
449
-        stop("'less' must be < 94.")
450
-    
451
-    if(greater >= less)
452
-        stop("'greater' must be <= 'less'")
453
-    
454
-    n <- nFiles(object)
455
-    res <- numeric(n)
456
-    for(i in 1:n)
457
-    {
458
-        pd <- phredDist(object, i)
459
-        res[i] <- sum(pd[greater:less])
460
-    }
461
-    names(res) <- probeLabel(object)
462
-    return(res)
463
-})
464
-
465
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
466
-## End: Global Phred content functions
467
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
468
-
469
-setMethod("mergedPhred", "Fastqq", function(object){
470
-    sql <- seqLen(object)
471
-    maxSeqLen <- max(sql[2, ])
472
-    
473
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
474
-    ## as.numeric: Sum on integer is likely to exceed 
475
-    ##                 maximal 32-bit integer  values
476
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
477
-    
478
-    if(sql[2, 1] < maxSeqLen)
479
-    {
480
-        mp <- as.numeric(.Call("r_enlarge_int_mat", object@phred[[1]], 
481
-                c(nrow(object@phred[[1]]), maxSeqLen), PACKAGE="seqTools"))
482
-    }else{
483
-        mp <- as.numeric(object@phred[[1]])
484
-    }
485
-    
486
-    
487
-    n <- nFiles(object)
488
-    if(n > 1)
489
-    {
490
-        for(i in 2:n)
491
-        {
492
-            if(sql[2, i] < maxSeqLen)  
493
-            {
494
-                mp <- mp + as.numeric(.Call("r_enlarge_int_mat",
495
-                            object@phred[[i]],
496
-                            c(nrow(object@phred[[i]]), maxSeqLen), 
497
-                            PACKAGE="seqTools"))
498
-            }else{
499
-                mp <- mp + as.numeric(object@phred[[i]])
500
-            }
501
-        }
502
-    }
503
-    mp <- matrix(mp, ncol = maxSeqLen)
504
-    rownames(mp) <- rownames(object@phred[[1]])
505
-    colnames(mp) <- 1:maxSeqLen
506
-    return(mp)
507
-})
508
-
509
-setMethod("mergedPhredQuantiles", "Fastqq", function(object, quantiles)
510
-{
511
-    if(!(all(quantiles >= 0) & all(quantiles <= 1)) )
512
-        stop("[mergedPhredQuantiles.Fastqq] all quantiles mustbe in [0, 1]")
513
-    quantiles <- sort(unique(round(quantiles, 2)))
514
-  
515
-    sql <- seqLen(object)
516
-    maxSeqLen <- max(sql[2, ])
517
-    
518
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
519
-    ## Count qual values for each sequence position
520
-    ## Convert counts into column-wise relative values
521
-    ## Maximum counted read length
522
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
523
-    vec <- 1:maxSeqLen
524
-    mrg <- mergedPhred(object)
525
-    qrel <- as.data.frame(apply(mrg[, vec], 2, rel_real))
526
-    names(qrel)
527
-    names(qrel) <- vec
528
-    
529
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
530
-    ## Walk through each column and extract row number
531
-    ## for given quantile values
532
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
533
-    res <- .Call("get_column_quantiles", quantiles, qrel, PACKAGE="seqTools")
534
-    return(res)
535
-})
536
-
537
-
538
-setMethod("plotMergedPhredQuant", "Fastqq", function(object, main, ...)
539
-{
540
-    quant <- c(0.1, 0.25, 0.5, 0.75, 0.9)
541
-    cols <- c("#1F78B4", "#FF7F00", "#E31A1C", "#FF7F00", "#1F78B4")
542
-    qq <- mergedPhredQuantiles(object, quant)
543
-    maxQ = floor(1.2*max(qq))
544
-    xv <- 1:ncol(qq)
545
-    
546
-    if(missing(main))
547
-        main <- paste("Merged position wise Phred Quantiles.", sep = "")
548
-    
549
-    plot(xv, xv, ylim=c(0, maxQ), type="n", bty="n", las=1,
550
-        ylab="Phred score", xlab="Sequence position", main=main, ...)
551
-    
552
-    
553
-    lines(xv, qq[1, ], col=cols[1], lty=2)
554
-    lines(xv, qq[2, ], col=cols[2], lty=1)
555
-    lines(xv, qq[3, ], col=cols[3], lwd=2)
556
-    lines(xv, qq[4, ], col=cols[4], lty=1)
557
-    lines(xv, qq[5, ], col=cols[5], lty=2)
558
-    
559
-    legend("top", ncol=6, lty=c(2, 1, 1, 1, 2), 
560
-            lwd=c(1, 1, 2, 1, 1), col=cols, xjust=0.5, 
561
-            legend=c("10%", "25%", "50%", "75%", "90%"), bty="n", cex=0.8)
562
-    
563
-    return(invisible()) 
564
-})
565
-
566
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
567
-## End: Phred related functions
568
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
569
-
570
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
571
-## Nucleotide frequency related functions
572
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
573
-
574
-
575
-setMethod("plotNucFreq", "Fastqq", function(object, i, main, maxx, ...)
576
-{
577
-    if(!is.numeric(i))
578
-        stop("'i' must be numeric.")
579
-    
580
-    i <- as.integer(i)
581
-    
582
-    if( (i < 1) || (i > object@nFiles) )
583
-        stop("'i' must be >0 and <nFiles.")
584
-    
585
-    maxSeqlen <- max(seqLen(object)[2, ])
586
-    if(missing(maxx))
587
-    {
588
-        maxx <- maxSeqlen
589
-    }
590
-    else
591
-    {
592
-        if(!is.numeric(maxx))
593
-            stop("'maxx' must be numeric.")
594
-        maxx <- as.integer(maxx)
595
-        if(maxx < 1)
596
-            stop("'maxx' must be >0.")
597
-        if(maxx > maxSeqlen)
598
-            maxx <- maxSeqlen
599
-    }
600
-    
601
-    # Skip extra line
602
-    x <- 1:maxx
603
-    nac <- object@nac[[i]][1:4, x]
604
-    nacrel <- apply(nac, 2, rel_int)
605
-    maxY = round(1.4 * max(nacrel), 1)
606
-    
607
-    # Maximum counted read length
608
-    nacrel <- apply(nac, 2, rel_int)
609
-    cols <- c("#1F78B4", "#33A02C", "#E31A1C", "#FF7F00")
610
-    
611
-    if(missing(main))
612
-        main <- paste("Position wise Nucleotide frequency (",
613
-                                probeLabel(object)[i], ")", sep="")
614
-  
615
-    plot(x, x, ylim=c(0, maxY), type="n", bty="n", las=1,
616
-                ylab="Nucleotide fequency", xlab="sequence position",
617
-                main=main, ...)
618
- 
619
-    lines(x, nacrel[1, ], col=cols[1], lwd=2)
620
-    lines(x, nacrel[2, ], col=cols[2], lwd=2)
621
-    lines(x, nacrel[3, ], col=cols[3], lwd=2)
622
-    lines(x, nacrel[4, ], col=cols[4], lwd=2)
623
- 
624
-    legend("top", ncol=6, 
625
-            lwd=c(1, 1, 2, 1, 1), col=cols, xjust=0.5, 
626
-            legend=c("A", "C", "G", "T"), bty="n", cex=0.8)
627
-    
628
-    return(invisible())
629
-})
630
-
631
-
632
-setMethod("plotGCcontent", "Fastqq", function(object,main,...)
633
-{
634
-    cols <- c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", 
635
-            "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A")
636
-    
637
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
638
-    ## Normalize matrix to colsum = 1
639
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
640
-    
641
-    gc <- apply(object@gcContent, 2, rel_int)
642
-    maxY = round(1.3*max(gc), 2)
643
-    nf <- nFiles(object)
644
-    x <- 1:nrow(gc)
645
-    
646
-    if(missing(main))
647
-        main<-"GC content"
648
-    
649
-    plot(x, x, ylim=c(0, maxY), type="n", bty="n", las=1,
650
-        ylab="Proportion of reads (%)", xlab="Relative GC content (%)", 
651
-        main=main, ...) 
652
-  
653
-    for(i in 1:nf)
654
-        lines(x, gc[, i], col=cols[i], lwd=2)
655
-  
656
-    legend("right", lwd=2, col=cols, legend=probeLabel(object),
657
-                                                bty="n", cex=0.8)
658
-  
659
-    return(invisible())
660
-})
661
-
662
-
663
-setMethod("plotNucCount", "Fastqq", function(object, nucs=16, maxx, ...)
664
-{
665
-  
666
-    # j = 16: N,    j = 2:3: gc
667
-    if(!is.numeric(nucs))
668
-        stop("'nucs' must be numeric.")
669
-    
670
-    nucs <- as.integer(nucs)
671
-    if(any(nucs < 1) || any(nucs > 19))
672
-        stop("'nucs' must be >0 and <20.")
673
-
674
-    maxSeqlen <- max(seqLen(object)[2, ])
675
-    
676
-    if(missing(maxx))
677
-    {
678
-        maxx <- maxSeqlen
679
-    }
680
-    else
681
-    {
682
-        if(!is.numeric(maxx))
683
-            stop("'maxx' must be numeric.")
684
-        
685
-        maxx <- as.integer(maxx)
686
-        
687
-        if(maxx<1)
688
-            stop("'maxx must be >0.")
689
-        
690
-        if(maxx > maxSeqlen)
691
-            maxx <- maxSeqlen
692
-    }
693
-    
694
-    n <- nFiles(object)
695
-    fvec <- 1:n
696
-    
697
-    i <- 1
698
-    
699
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
700
-    ## Skip extra line
701
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
702
-    x <- 1:maxx
703
-    nac <- object@nac[[i]][, x]
704
-    nacrel <- apply(nac, 2, rel_int)
705
-    
706
-    if(length(nucs) == 1){
707
-        dfr <- data.frame(a = nacrel[nucs, ])
708
-    }else{
709
-        dfr <- data.frame(a = apply(nacrel[nucs, ], 2, sum))
710
-    }
711
-    
712
-    if(n > 1)
713
-    {
714
-        for(i in 2:n)
715
-        {
716
-            nac <- object@nac[[i]][, x]
717
-            nacrel <- apply(nac, 2, rel_int)
718
-            if(length(nucs) == 1){
719
-                dfr[, i] <- nacrel[nucs, ]
720
-            }else{
721
-                dfr[, i] <- apply(nacrel[nucs, ], 2, sum)
722
-            }
723
-        }        
724
-    }
725
-    
726
-    maxY = round(1.4 * max(dfr), 3)
727
-    cols <- c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", 
728
-                    "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A")
729
-    
730
-    nv <- paste(iupac_chars[nucs], collapse = "")
731
-    
732
-    plot(x, x, ylim=c(0, maxY), type="n", bty="n", las=1,
733
-        ylab="Nucleotide fequency", xlab="sequence position",
734
-        main=paste("Position wise Nucleotide frequency:  '",
735
-                                            nv, "'", sep=""))
736
-  
737
-    for(i in fvec)
738
-        lines(x, dfr[, i], col=cols[i %% 10], lwd=2)
739
-    
740
-    legend("top", ncol=6, 
741
-        lwd=c(1, 1, 2, 1, 1), col=cols[fvec %% 10], xjust=0.5,
742
-        legend=probeLabel(object), bty="n", cex=0.8)
743
-    
744
-    return(invisible())
745
-})
746
-
747
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
748
-## End: Nucleotide frequency related functions
749
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
750
-
751
-
752
-
753
-setMethod("plotKmerCount", "Fastqq",
754
-                    function(object, index, mxey, main="K-mer count", ...)
755
-{
756
-    n <- nFiles(object)
757
-    if(missing(index))
758
-    {
759
-        index <- 1:n
760
-    }else{
761
-        if(!is.numeric(index))
762
-            stop("'index' must be numeric.")
763
-        
764
-        index <- sort(unique(as.integer(index)))
765
-        
766
-        if(any(index < 0) || any(index > n))
767
-                stop("'index' must be in 1, .., ", n, " ( = nFiles).")
768
-    }
769
-    
770
-    if(!missing(mxey))
771
-    {
772
-        if(!is.numeric(mxey))
773
-            stop("'mxey' must be numeric.")
774
-        mxey <- as.integer(mxey)
775
-        
776
-        if(mxey<1)
777
-            stop("'mxey' must be positive.")
778
-    }
779
-    
780
-    cols <- c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", 
781
-                "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A")  
782
-    pk <- 6
783
-    if(object@k <= pk)
784
-    {
785
-        pk <- object@k
786
-        x <- 1:(4^pk)
787
-        
788
-        if(missing(mxey))
789
-            lg2y <- floor(log2(1.2 * (max(object@kmer))))
790
-        else
791
-            lg2y <- mxey
792
-        
793
-        maxY <- 2^lg2y
794
-
795
-        
796
-        plot(x, x, ylim=c(0, maxY), type="n", bty="n", las=1,
797
-             ylab="K-mer count", xlab="K-mer index", main=main,
798
-                                                axes=FALSE, ...)
799
-    
800
-    for(i in index)
801
-        lines(x, object@kmer[, i], col=cols[i], lwd=2)
802
-    }
803
-    else
804
-    {
805
-        x <- 1:(4^pk)
806
-        melt_factor <- as.integer(4^(object@k - pk))
807
-        
808
-        y_factor <- max(.Call("melt_vector", object@kmer[, 1], melt_factor,
809
-                        PACKAGE="seqTools")) / max(object@kmer[, 1])
810
-    
811
-        if(missing(mxey))
812
-            lg2y <- floor(log2(1.2 * (max(object@kmer)) * y_factor))
813
-        else
814
-            lg2y <- mxey
815
-        maxY <- 2^lg2y
816
-    
817
-        plot(x, x, ylim=c(0, maxY), type="n", bty="n", las=1,
818
-            ylab="K-mer count", xlab="K-mer index", 
819
-                            main=main, axes=FALSE, ...)
820
-    
821
-        for(i in index)
822
-        {
823
-            lines(x, .Call("melt_vector", object@kmer[, i], melt_factor,
824
-                        PACKAGE="seqTools"), col=cols[i], lwd=2)
825
-        }
826
-    }
827
-    axis(side=1, at=0:4 * 4^(pk - 1), labels=c("A", "C", "G", "T", ""))
828
-  
829
-    axis(side=2, at=c(0, maxY),
830
-                    labels=c(0, paste("2^", lg2y, sep="")), las=1)
831
-  
832
-    legend("right", lwd=2, col=cols[index], 
833
-                legend=probeLabel(object)[index], bty="n", cex=0.8)
834
-  
835
-    return(invisible())
836
-})
837
-
838
-
839
-
840
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
841
-## Merging and melting Fastqq objects
842
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
843
-
844
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
845
-setMethod("mergeFastqq", "Fastqq", function(lhs, rhs)
846
-{
847
-    
848
-    res <- new("Fastqq")
849
-    ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ##
850
-    ## Simple concatenations
851
-    ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ##
852
-    
853
-    res@filenames <- c(lhs@filenames, rhs@filenames)
854
-    res@nFiles <- lhs@nFiles+rhs@nFiles
855
-    res@nReads <- c(lhs@nReads, rhs@nReads)
856
-    res@nN <- c(lhs@nN, rhs@nN)
857
-    res@seqLenCount <- cbind(lhs@seqLenCount, rhs@seqLenCount)
858
-    res@gcContent <- cbind(gcContentMatrix(lhs), gcContentMatrix(rhs))
859
-    res@seqLen <- cbind(lhs@seqLen, rhs@seqLen)
860
-    
861
-    ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ##
862
-    ## Singularize probeLabel
863
-    ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ##
864
-    
865
-    res@probeLabel <- c(lhs@probeLabel, rhs@probeLabel)  
866
-    if(any(table(res@probeLabel) > 1))
867
-    {
868
-        message("[mergeFastqq] Singularizing probeLabel (new suffix).")
869
-        res@probeLabel <- paste(1:res@nFiles, res@probeLabel, sep="_")
870
-    }
871
-    
872
-    ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ##
873
-    ## Eventually resize arrays when lhs and rhs have different maxSeqLen
874
-    ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ##
875
-    if(lhs@maxSeqLen > rhs@maxSeqLen)
876
-    {
877
-        message("[mergeFastqq] Resizing rhs.")
878
-        msl <- lhs@maxSeqLen
879
-        
880
-        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
881
-        ## nac
882
-        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
883
-        
884
-        new_dim <- as.integer(c(nrow(rhs@nac), msl))
885
-        rhs_nac <- .Call("r_enlarge_int_mat", rhs@nac, new_dim, 
886
-                                                    PACKAGE="seqTools")
887
-        res@nac <- c(lhs@nac, rhs_nac)
888
-        
889
-        # seqLenCount
890
-        new_dim <- as.integer(c(msl, rhs@nFiles))
891
-        rhs_seqLenCount <- .Call("r_enlarge_int_mat", 
892
-                                rhs@seqLenCount, new_dim, PACKAGE="seqTools")
893
-        
894
-        res@seqLenCount <- c(lhs@seqLenCount, rhs_seqLenCount)
895
-        
896
-        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
897
-        ## phred
898
-        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
899
-        
900
-        new_dim <- as.integer(nrow(rhs@phred), msl)
901
-        rhs_phred_list <- list()
902
-        
903
-        for(i in 1:nFiles(rhs)){
904
-                rhs_phred_list[[i]] <- .Call("r_enlarge_int_mat", 
905
-                                rhs@phred[[i]], new_dim, PACKAGE="seqTools")
906
-        }
907
-        
908
-        res@phred <- c(lhs@phred, rhs_phred_list)
909
-        res@maxSeqLen <- lhs@maxSeqLen
910
-        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
911
-        
912
-    } else if(rhs@maxSeqLen > lhs@maxSeqLen)
913
-    {
914
-        
915
-        message("[mergeFastqq] Resizing lhs.")
916
-        msl <- rhs@maxSeqLen
917
-        
918
-        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
919
-        ## nac
920
-        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
921
-        
922
-        new_dim <- as.integer(c(nrow(lhs@nac), msl))
923
-        
924
-        lhs_nac <- .Call("r_enlarge_int_mat",
925
-                                lhs@nac, new_dim, PACKAGE="seqTools")
926
-        
927
-        res@nac <- c(rhs@nac, lhs_nac)
928
-        
929
-        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
930
-        ## seqLenCount
931
-        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
932
-        
933
-        new_dim <- as.integer(c(msl, lhs@nFiles))
934
-        
935
-        lhs_seqLenCount <- .Call("r_enlarge_int_mat",
936
-                    lhs@seqLenCount, new_dim, PACKAGE="seqTools")
937
-        
938
-        res@seqLenCount <- c(rhs@seqLenCount, lhs_seqLenCount)
939
-        
940
-        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
941
-        ## phred
942
-        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
943
-        
944
-        new_dim <- as.integer(nrow(lhs@phred), msl)
945
-        lhs_phred_list <- list()
946
-        
947
-        for(i in 1:nFiles(lhs))
948
-        {
949
-            lhs_phred_list[[i]] <- .Call("r_enlarge_int_mat", 
950
-                    lhs@phred[[i]], new_dim, PACKAGE="seqTools")
951
-        }
952
-        res@phred <- c(rhs@phred, lhs_phred_list)
953
-        res@maxSeqLen <- rhs@maxSeqLen
954
-        
955
-        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
956
-        
957
-    } else { 
958
-        
959
-        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
960
-        ## rhs@maxSeqLen == lhs@maxSeqLen
961
-        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
962
-        
963
-        res@maxSeqLen = lhs@maxSeqLen
964
-        res@nac <- c(lhs@nac, rhs@nac)
965
-        res@phred <- c(lhs@phred, rhs@phred)
966
-        
967
-        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
968
-        
969
-    }
970
-    
971
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
972
-    ## Eventually melt down k-mer count matrix
973
-    ## when lhs and rhs have different k
974
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
975
-    res@k <- pmin(lhs@k, rhs@k)
976
-    
977
-    kml <- kmerCount(lhs)
978
-    if(lhs@k > res@k)
979
-    {
980
-        kml <- .Call("melt_kmer_matrix",
981
-                    kml, c(lhs@k, res@k), PACKAGE="seqTools")
982
-    }
983
-    
984
-    kmr <- kmerCount(rhs)
985
-    if(rhs@k > res@k)
986
-    {
987
-        kmr <- .Call("melt_kmer_matrix",
988
-                    kmr, c(rhs@k, res@k), PACKAGE="seqTools")
989
-    }
990
-    res@kmer <- cbind(kml, kmr)
991
-    
992
-    fkml <- lhs@firstKmer
993
-    if(lhs@k > res@k)
994
-    {
995
-        fkml <- .Call("melt_kmer_matrix",
996
-                    fkml, c(lhs@k, res@k), PACKAGE="seqTools")
997
-    }
998
-    fkmr <- rhs@firstKmer
999
-    
1000
-    if(rhs@k > res@k)
1001
-    {
1002
-        fkmr <- .Call("melt_kmer_matrix",
1003
-                    fkmr, c(rhs@k, res@k), PACKAGE="seqTools")
1004
-    }
1005
-    res@firstKmer <- cbind(fkml, fkmr)
1006
-    
1007
-    return(res)
1008
-})
1009
-
1010
-
1011
-setMethod("meltDownK", "Fastqq", function(object, newK)
1012
-{
1013
-  
1014
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
1015
-    if(!is.numeric(newK))
1016
-        stop("'newK' must be numeric")
1017
-    
1018
-    newK <- as.integer(newK)
1019
-    
1020
-    if(length(newK) != 1)
1021
-        stop("'newK' must have length 1.")
1022
-    
1023
-    if(newK < 1 || newK >=  getK(object))
1024
-        stop("'newK' must be >= 1 and <= ", getK(object), ".")
1025
-    
1026
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
1027
-    res <- new("Fastqq")
1028
-    res@filenames <- object@filenames
1029
-    res@nFiles <- object@nFiles
1030
-    res@k <- newK
1031
-    res@maxSeqLen <- object@maxSeqLen
1032
-    
1033
-    res@kmer <- .Call("melt_kmer_matrix",
1034
-        object@kmer, c(getK(object), newK), PACKAGE="seqTools")
1035
-    
1036
-    res@firstKmer <- .Call("melt_kmer_matrix",
1037
-        object@firstKmer, c(getK(object), newK), PACKAGE="seqTools")
1038
-    
1039
-    res@nReads <- object@nReads
1040
-    res@nN <- object@nN
1041
-    res@seqLenCount <- object@seqLenCount
1042
-    res@gcContent <- object@gcContent
1043
-    res@nac <- object@nac
1044
-    res@phred <- object@phred
1045
-    res@seqLen <- object@seqLen
1046
-    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
1047
-  
1048
-    return(res)
1049
-})
1050
-
1051
-
1052
-listMelt <- function(x, oldK, newK)
1053
-{
1054
-    f <- function(x) .Call("melt_kmer_matrix", x, 
1055
-            as.integer(c(oldK, newK)), PACKAGE="seqTools")
1056
-  
1057
-    return(lapply(x, f))
1058
-}
1059
-
1060
-
1061
-
1062
-setMethod("[", "Fastqq", function(x, i, j, drop="missing")
1063
-{
1064
-  
1065
-    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
1066
-    res <- new("Fastqq")
1067
-    res@filenames <- x@filenames[i]
1068
-    res@probeLabel <- x@probeLabel[i]
1069
-    res@nFiles <- length(i)
1070
-    res@k <- x@k
1071
-    res@seqLen <- x@seqLen[, i, drop=FALSE]
1072
-    res@maxSeqLen <- max(res@seqLen[2, ])
1073
-    res@kmer <- x@kmer[, i, drop=FALSE]
1074
-    res@firstKmer <- x@firstKmer[, i, drop=FALSE]
1075
-    res@nN <- x@nN[i]
1076
-    res@seqLenCount <- x@seqLenCount[, i, drop=FALSE]
1077
-    res@gcContent <- x@gcContent[, i, drop=FALSE]
1078
-    res@nac <- x@nac[i]
1079
-    res@phred <- x@phred[i]
1080
-    res@nReads <- x@nReads[i]
1081
-    res@collectTime <- x@collectTime
1082
-    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
1083
-    
1084
-    return(res)
1085
-})
1086
-
1087
-
1088
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
1089
-## Calculation of distance matrix based on Canberra distance
1090
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
1091
-
1092
-setMethod("cbDistMatrix", "Fastqq", 
1093
-        function(object, nReadNorm=max(nReads(object)))
1094
-{
1095
-    if(!is.numeric(nReadNorm))
1096
-        stop("'nReadNorm' must be numeric.")
1097
-    nReadNorm <- as.integer(nReadNorm)
1098
-    
1099
-    if(nReadNorm < max(nReads(object)))
1100
-        stop("'nReadNorm' must be greater than all nRead.")
1101
-    
1102
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
1103
-    ## Column-wise normalizing read counts (by upscaling) 
1104
-    ## so that column sums become nearly equal in order to
1105
-    ## compensate sequencing depth artifacts in 
1106
-    ## Canberra distance values
1107
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
1108
-    
1109
-    scale <- nReadNorm/nReads(object)
1110
-    
1111
-    scaled <- .Call("scale_kmer_matrix",
1112
-            kmerCount(object), scale, PACKAGE="seqTools")
1113
-    
1114
-    colnames(scaled) <- object@probeLabel
1115
-    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
1116
-  
1117
-    return(.Call("cb_distance_matrix", scaled, PACKAGE="seqTools"))
1118
-})
1119
-
1120
-
1121
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
1122
-## END OF FILE
1123
-## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##