Browse code

Reimport with .R suffix

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/seqTools@128660 bc3139a8-67e5-0310-9ffc-ced21a209358

Wolfgang Kaisers authored on 13/04/2017 15:56:37
Showing1 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,1144 @@
1
+
2
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
3
+##                                                                           ##
4
+##  Project   :   seqTools                                                   ##
5
+##  Created   :   26.August.2013                                             ##
6
+##  Author    :   W. Kaisers                                                 ##
7
+##  Content   :   Doing some diagnostic and interventional tasks on fastq    ##
8
+##                and fasta                                                  ##
9
+##                esp. DNA k-mer counts.                                     ##
10
+##  Version   :   0.99.34                                                    ##
11
+##                                                                           ##
12
+##  Changelog :                                                              ##
13
+##  26.08.13  :   0.0.1 Project created                                      ##
14
+##  03.09.13  :   0.0.6 C-Code valgrind tested                               ##
15
+##  27.09.13  :   0.1.0 Added fastqDnaKmers                                  ##
16
+##  14.10.13  :   0.1.1 Added C-library for parsing gz fasta and fastq files ##
17
+##  17.10.13  :   0.1.3 C-Wrapper for fastq working.                         ##
18
+##  17.10.13  :   0.1.6 First version of R package                           ##
19
+##  21.10.13  :   0.3.0 New C library for fastq parsing                      ##
20
+##  28.10.13  :   0.4.0 Added fastq-loc functions                            ##
21
+##  29.10.13  :   0.4.4 seqTools C-code valgrind tested.                     ##
22
+##  01.11.13  :   0.5.0 Distance matrices implemented                        ##
23
+##  02.11.13  :   0.5.1 First working version with clustering based on       ##
24
+##                      K-mers                                               ##
25
+##  07.11.13  :   0.5.4 countFastaKmers now resizes arrays automatically     ##
26
+##  09.11.13  :   0.6.0 count_fastq now resizes arrays automatically         ##
27
+##  11.11.13  :   0.6.5 Added fastq simulation functions                     ##
28
+##  19.11.13  :   0.7.0 Added trimFastq function                             ##
29
+##  30.11.13  :   0.9.0 Separated R source files                             ##
30
+##  22.12.13  :   0.99.2 Added '['-operator for Fastqq class                 ##
31
+##  19.01.14  :   0.99.7 Added zlib version control for correction of        ##
32
+##                       gzBuffer                                            ##
33
+##                       error                                               ##
34
+##                        + checks: cran win-builder + valgrind              ##
35
+##  21.05.14  :   0.99.34 Corrected error in count_fasta_Kmers               ##
36
+##                        which freezed function                             ##
37
+##  07.10.15  :   1.2.4  Added kmerSvd function                              ##
38
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
39
+
40
+.onUnload <- function(libpath) { library.dynam.unload("seqTools", libpath) }
41
+
42
+## see: http://bioconductor.org/developers/how-to/coding-style/
43
+
44
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
45
+## Data collection functions:
46
+## Fastqq,  fastqKmerLocs,  fastqKmerSubsetLocs
47
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
48
+
49
+fastqq <- function(filenames, k=6, probeLabel)
50
+{
51
+    k <- as.integer(k)
52
+    
53
+    tl <- list()
54
+    tl$start <- Sys.time()
55
+    filenames <- path.expand(filenames)
56
+    
57
+    res <- .Call("count_fastq", filenames, k, PACKAGE="seqTools")
58
+    
59
+    tl$end <- Sys.time()
60
+    res@collectTime <- tl
61
+    
62
+    # Correct minSeqLen when empty files are counted
63
+    if(any(res@nReads==0))
64
+        res@seqLen[1,res@nReads==0] <- 0
65
+    
66
+    if(!missing(probeLabel))
67
+    {
68
+        if(length(probeLabel) == res@nFiles)
69
+            res@probeLabel <- as.character(probeLabel) 
70
+        else{
71
+            warning("[Fastqq] probeLabel and filenames must have equal length.")
72
+            res@probeLabel <- as.character(1:res@nFiles)
73
+        }
74
+    }else{
75
+        res@probeLabel <- as.character(1:res@nFiles)
76
+    }
77
+    return(res)
78
+}
79
+
80
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
81
+## fastq K-mer locs
82
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
83
+
84
+fastqKmerLocs <- function(filenames, k=4)
85
+{
86
+    if(!is.numeric(k))
87
+        stop("'k' must be numeric.")
88
+    k <- as.integer(k)
89
+    
90
+    if( (k < 0) || (k > max_k) )
91
+        stop("'k' must be in range 0, ... , 16.")
92
+    
93
+    return(.Call("fastq_Kmer_locs", filenames, k, PACKAGE="seqTools"))
94
+}
95
+
96
+
97
+fastqKmerSubsetLocs <- function(filenames, k=4, kIndex)
98
+{
99
+    # Returns list with matrix elements.
100
+    if(!is.numeric(k))
101
+        stop("'k' must be numeric.")
102
+    
103
+    k <- as.integer(k)
104
+    
105
+    if( (k < 0) || (k > max_k) )
106
+        stop("'k' must be in range 0, ... , ", max_k, ".")
107
+    
108
+    if(!is.numeric(kIndex))
109
+        stop("'kIndex' must be numeric.")
110
+    kIndex <- as.integer(kIndex)
111
+    
112
+    if(any(kIndex < 0))
113
+        stop("No negative 'kIndex' values allowed.")
114
+    
115
+    if(any(kIndex > (4^k)) )
116
+        stop("'kIndex' out of range (>4^k).")
117
+    
118
+    return(.Call("fastq_KmerSubset_locs",
119
+                                filenames, k, kIndex, PACKAGE="seqTools"))
120
+}
121
+
122
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
123
+## End: Data collection functions.
124
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
125
+
126
+
127
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
128
+## Standard slot accessor functions
129
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
130
+
131
+
132
+setMethod("fileNames", "Fastqq", function(object)
133
+                                            {return(object@filenames)})
134
+
135
+setMethod("collectTime", "Fastqq", function(object)
136
+                                            {return(object@collectTime)})
137
+
138
+setMethod("collectDur", "Fastqq", function(object) {
139
+return(as.numeric(difftime(object@collectTime$end, object@collectTime$start,
140
+                                                    units = "secs")))
141
+})
142
+
143
+setMethod("getK", "Fastqq", function(object) {return(object@k)})
144
+
145
+setMethod("nFiles", "Fastqq", function(object) {return(object@nFiles)})
146
+
147
+setMethod("nNnucs", "Fastqq", function(object) {return(object@nN)})
148
+
149
+setMethod("nReads", "Fastqq", function(object) {return(object@nReads)})
150
+
151
+setMethod("maxSeqLen", "Fastqq", function(object) {return(object@maxSeqLen)})
152
+
153
+setMethod("seqLenCount", "Fastqq", function(object)
154
+{
155
+    res<-object@seqLenCount
156
+    colnames(res) <- object@probeLabel
157
+    return(res)
158
+})
159
+
160
+setMethod("nucFreq", "Fastqq", function(object, i)
161
+{
162
+    if(missing(i))
163
+        stop("Argument 'i' is not optional.")
164
+    
165
+    if(!is.numeric(i))
166
+        stop("'i' must be numeric.")
167
+    
168
+    i <- as.integer(i)
169
+    if( (i < 1) || (i > object@nFiles) )
170
+        stop("'i' must be >0 and < nFiles.")
171
+    
172
+    return(object@nac[[i]])
173
+})
174
+
175
+setMethod("gcContent", "Fastqq", function(object, i)
176
+{
177
+    if(missing(i))
178
+        stop("Argument 'i' is not optional.")
179
+    
180
+    if(!is.numeric(i))
181
+        stop("'i' must be numeric.")
182
+    
183
+    i <- as.integer(i)
184
+    if( (i < 1) || (i > object@nFiles))
185
+        stop("'i' must be >0 and < nFiles.")
186
+    
187
+    return(object@gcContent[, i])
188
+})
189
+
190
+
191
+setMethod("gcContentMatrix", "Fastqq", function(object)
192
+{
193
+    gcc <- object@gcContent
194
+    colnames(gcc) <- object@probeLabel
195
+    return(gcc)
196
+})
197
+
198
+setMethod("seqLen", "Fastqq", function(object)
199
+{
200
+    sql <- object@seqLen
201
+    colnames(sql) <- object@probeLabel
202
+    return(sql)
203
+})
204
+
205
+setMethod("kmerCount", "Fastqq", function(object)
206
+{
207
+    kmer <- object@kmer
208
+    colnames(kmer) <- object@probeLabel
209
+    return(kmer)
210
+})
211
+
212
+
213
+setMethod("probeLabel", "Fastqq", function(object){return(object@probeLabel)})
214
+setReplaceMethod("probeLabel", "Fastqq", function(object, value)
215
+{
216
+    if(length(value) != nFiles(object))
217
+        stop("'value' must have length ", nFiles(object), ".")
218
+    
219
+    val <- as.character(value)
220
+    if(any(table(val)) > 1)
221
+    {
222
+        warning("[probeLabel <- .Fastqq] probeLabel unique suffix added.")
223
+        val <- paste(1:nFiles(object), val, sep="_")
224
+    }
225
+    object@probeLabel <- val
226
+    
227
+    return(object)
228
+})
229
+
230
+setMethod("phred", signature="Fastqq", definition=function(object, i)
231
+{
232
+    if(missing(i))
233
+        stop("Argument 'i' is not optional.")
234
+    
235
+    if(!is.numeric(i))
236
+        stop("'i' must be numeric.")
237
+    
238
+    i <- as.integer(i)
239
+    if( (i < 1) || (i > object@nFiles) )
240
+        stop("'i' must be >0 and < nFiles.")
241
+    
242
+    return(object@phred[[i]])
243
+})
244
+
245
+
246
+setMethod("kmerSvd", "Fastqq", function(object)
247
+{
248
+    # x = Fastqq
249
+    km <- t(log(object@kmer + 1))
250
+    # Column wise centerin
251
+    kmc <- apply(km, 2, function(x) {return(scale(x, center=TRUE, scale=FALSE))})
252
+    ksv <- svd(kmc)
253
+    # Extract Left singular vectors and Singular values
254
+    return(list(lsv=ksv$u, sv=ksv$d))
255
+})
256
+
257
+
258
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
259
+## End: Standard slot accessor functions
260
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
261
+
262
+
263
+setMethod("show", "Fastqq", function(object)
264
+{
265
+    bm <- Sys.localeconv()[7]
266
+    w <- 20
267
+    r <- "right"
268
+    cat("Class       : ", format(class(object), w=w, j=r)                              , "\n", sep="")
269
+    cat("nFiles      : ", format(format(nFiles(object)          , big.m=bm), w=w, j=r) , "\n", sep="")
270
+    cat("maxSeqLen   : ", format(format(maxSeqLen(object)       , big.m=bm), w=w, j=r) , "\n", sep="")
271
+    cat("k (Kmer len): ", format(format(getK(object)            , big.m=bm), w=w, j=r) , "\n", sep="")
272
+    cat("\n")
273
+    cat("nReads      : ", format(format(sum(as.numeric(nReads(object)))    , big.m=bm), w=w, j=r)   , "\n", sep="")
274
+    cat("nr  N   nuc : ", format(format(sum(nNnucs(object))     , big.m=bm), w=w, j=r) , "\n", sep="")
275
+    cat("Min seq len : ", format(format(min(seqLen(object)[1, ]), big.m=bm), w=w, j=r) , "\n", sep="")
276
+    cat("Max seq len : ", format(format(max(seqLen(object)[2, ]), big.m=bm), w=w, j=r) , "\n", sep="")
277
+    return(invisible())
278
+})
279
+
280
+
281
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
282
+## Phred related functions
283
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
284
+
285
+setMethod("phredQuantiles", "Fastqq", function(object, quantiles, i, ...)
286
+{
287
+    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
288
+    # Checking arguments
289
+    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
290
+    
291
+    ## Check quantiles argument
292
+    if(missing(quantiles))
293
+        stop("'quantiles' argument is not optional")
294
+    
295
+    if(!is.numeric(quantiles))
296
+        stop("Quantiles must be numeric.")
297
+    
298
+    if(!(all(quantiles >= 0) & all(quantiles <= 1) ))
299
+        stop("All quantiles mustbe in [0, 1]")
300
+    
301
+    quantiles <- sort(unique(round(quantiles, 2)))
302
+    
303
+    ## Check 'i' argument
304
+    if(missing(i))
305
+        stop("'i' argument is not optional")
306
+    
307
+    if(!is.numeric(i))
308
+        stop("'i' must be numeric.")
309
+    
310
+    if(length(i) > 1)
311
+        stop("'i' must have length 1")
312
+    
313
+    i <- as.integer(i)
314
+    if( (i < 1) || (i > object@nFiles) )
315
+            stop("'i' must be >0 and <nFiles.")
316
+
317
+    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
318
+    # Count qual values for each sequence position
319
+    # Convert integer counts into column-wise relative values
320
+    # Maximum counted read length
321
+    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
322
+    vec <- 1:seqLen(object)[2, i]
323
+    qrel <- as.data.frame(apply(object@phred[[i]][, vec], 2, rel_int))
324
+    names(qrel) <- vec
325
+    
326
+    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
327
+    # Walk through each column and extract row number
328
+    # for given quantile values
329
+    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
330
+    res <- .Call("get_column_quantiles", quantiles, qrel, PACKAGE="seqTools")
331
+    return(res)
332
+})
333
+
334
+
335
+setMethod("plotPhredQuant", "Fastqq", function(object, i, main, ...){
336
+    if(!is.numeric(i))
337
+        stop("'i' must be numeric.")
338
+    
339
+    i <- as.integer(i)
340
+    
341
+    if( (i < 1) || (i > object@nFiles) )
342
+        stop("'i' must be >0 and <nFiles.")
343
+    
344
+    quant <- c(0.1, 0.25, 0.5, 0.75, 0.9)
345
+    cols <- c("#1F78B4", "#FF7F00", "#E31A1C", "#FF7F00", "#1F78B4") 
346
+    qq <- phredQuantiles(object, quant, i)
347
+    maxQ = floor(1.2*max(qq))
348
+    xv <- 1:ncol(qq)
349
+    
350
+    if(missing(main))
351
+    {
352
+        main <- paste("Position wise Phred Quantiles (", 
353
+                                probeLabel(object)[i], ")", sep="")
354
+    }
355
+
356
+    plot(xv, xv, ylim=c(0, maxQ), type="n", bty="n", las=1,
357
+        ylab = "Phred score", xlab="Sequence position", main=main, ...)
358
+     
359
+    lines(xv, qq[1, ], col=cols[1], lty=2)
360
+    lines(xv, qq[2, ], col=cols[2], lty=1)
361
+    lines(xv, qq[3, ], col=cols[3], lwd=2)
362
+    lines(xv, qq[4, ], col=cols[4], lty=1)
363
+    lines(xv, qq[5, ], col=cols[5], lty=2)
364
+    
365
+    legend("top", ncol=6, lty=c(2, 1, 1, 1, 2),
366
+            lwd=c(1, 1, 2, 1, 1), col=cols, xjust=0.5,
367
+            legend=c("10%", "25%", "50%", "75%", "90%"), bty="n", cex=0.8)
368
+    return(invisible())
369
+})
370
+
371
+
372
+
373
+
374
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
375
+## Global Phred content functions
376
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
377
+setMethod("phredDist", "Fastqq", function(object, i){
378
+    idx <- 1:nFiles(object)
379
+    
380
+    if(missing(i))
381
+        i <- idx
382
+    else
383
+    {
384
+        if(!is.numeric(i))
385
+            stop("'i' must be numeric.")
386
+        
387
+        if(!all(is.element(i, idx)))
388
+            stop("'i' is out of range.")
389
+    }
390
+    
391
+    phred <- Reduce("+", object@phred[i])
392
+    phred <- matrix(as.numeric(phred), nrow=nrow(phred))
393
+    
394
+    phred_vals <- apply(phred, 1, sum)
395
+    phred_dist <- phred_vals/sum(phred_vals)
396
+    names(phred_dist) <- rownames(object@phred[[1]])
397
+    
398
+    return(phred_dist)    
399
+})
400
+
401
+
402
+setMethod("plotPhredDist", "Fastqq", function(object, i, maxp=45, col, ...)
403
+{
404
+    if(!is.numeric(maxp))
405
+        stop("maxp must be numeric")
406
+    
407
+    if(!is.integer(maxp))
408
+        maxp<-as.integer(maxp)
409
+    
410
+    if(maxp <= 0)
411
+        stop("maxp must be >=0")
412
+    
413
+    if(missing(col))
414
+        col <- topo.colors(10)[3]
415
+    
416
+    phred <- phredDist(object, i)
417
+    maxy <- ceiling(max(phred) * 5) / 5
418
+    x <- 1:maxp
419
+    xmax <- 10 * (ceiling(maxp / 10))
420
+    
421
+    plot(x, phred[x], ylim=c(0, maxy), xlim=c(0, xmax), type="l", lwd=2,
422
+            col=col, yaxt="n", bty="n", xlab="Phred value",
423
+            ylab="Content (%)", ...)
424
+    
425
+    ylb <- 0:(10 * maxy) / 10
426
+    axis(side=2, at=ylb, labels=100 * ylb, las=1)
427
+    return(invisible())
428
+})
429
+
430
+
431
+# + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
432
+# Not exported:
433
+# + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
434
+pd_l10 <- function(x){ x <- phredDist(x); return(sum(x[1:10]) / sum(x))}
435
+
436
+
437
+setMethod("propPhred","Fastqq",function(object, greater=30, less=93)
438
+{
439
+    if(!is.numeric(greater))
440
+        stop("'greater' must be numeric.")
441
+    
442
+    if(length(greater) != 1)
443
+        stop("'greater' must have length 1.")
444
+    
445
+    if(!is.numeric(less))
446
+        stop("'less' must be numeric.")
447
+    
448
+    if(length(less) != 1)
449
+        stop("'less must have length 1.")
450
+    
451
+    ## + + + + + + + + + + + + + + + + + + + + + + ##
452
+    ## greater and less shall be used as
453
+    ## array indices: increase greater
454
+    ## + + + + + + + + + + + + + + + + + + + + + + ##
455
+    greater<-as.integer(greater+1)
456
+    less<-as.integer(less)
457
+    
458
+    if(greater < 1)
459
+        stop("'greater' must be >=0.")
460
+    
461
+    if(less > 93)
462
+        stop("'less' must be < 94.")
463
+    
464
+    if(greater >= less)
465
+        stop("'greater' must be <= 'less'")
466
+    
467
+    n <- nFiles(object)
468
+    res <- numeric(n)
469
+    for(i in 1:n)
470
+    {
471
+        pd <- phredDist(object, i)
472
+        res[i] <- sum(pd[greater:less])
473
+    }
474
+    names(res) <- probeLabel(object)
475
+    return(res)
476
+})
477
+
478
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
479
+## End: Global Phred content functions
480
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
481
+
482
+setMethod("mergedPhred", "Fastqq", function(object){
483
+    sql <- seqLen(object)
484
+    maxSeqLen <- max(sql[2, ])
485
+    
486
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
487
+    ## as.numeric: Sum on integer is likely to exceed 
488
+    ##                 maximal 32-bit integer  values
489
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
490
+    
491
+    if(sql[2, 1] < maxSeqLen)
492
+    {
493
+        mp <- as.numeric(.Call("r_enlarge_int_mat", object@phred[[1]], 
494
+                c(nrow(object@phred[[1]]), maxSeqLen), PACKAGE="seqTools"))
495
+    }else{
496
+        mp <- as.numeric(object@phred[[1]])
497
+    }
498
+    
499
+    
500
+    n <- nFiles(object)
501
+    if(n > 1)
502
+    {
503
+        for(i in 2:n)
504
+        {
505
+            if(sql[2, i] < maxSeqLen)  
506
+            {
507
+                mp <- mp + as.numeric(.Call("r_enlarge_int_mat",
508
+                            object@phred[[i]],
509
+                            c(nrow(object@phred[[i]]), maxSeqLen), 
510
+                            PACKAGE="seqTools"))
511
+            }else{
512
+                mp <- mp + as.numeric(object@phred[[i]])
513
+            }
514
+        }
515
+    }
516
+    mp <- matrix(mp, ncol = maxSeqLen)
517
+    rownames(mp) <- rownames(object@phred[[1]])
518
+    colnames(mp) <- 1:maxSeqLen
519
+    return(mp)
520
+})
521
+
522
+setMethod("mergedPhredQuantiles", "Fastqq", function(object, quantiles)
523
+{
524
+    if(!(all(quantiles >= 0) & all(quantiles <= 1)) )
525
+        stop("[mergedPhredQuantiles.Fastqq] all quantiles mustbe in [0, 1]")
526
+    quantiles <- sort(unique(round(quantiles, 2)))
527
+  
528
+    sql <- seqLen(object)
529
+    maxSeqLen <- max(sql[2, ])
530
+    
531
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
532
+    ## Count qual values for each sequence position
533
+    ## Convert counts into column-wise relative values
534
+    ## Maximum counted read length
535
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
536
+    vec <- 1:maxSeqLen
537
+    mrg <- mergedPhred(object)
538
+    qrel <- as.data.frame(apply(mrg[, vec], 2, rel_real))
539
+    names(qrel)
540
+    names(qrel) <- vec
541
+    
542
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
543
+    ## Walk through each column and extract row number
544
+    ## for given quantile values
545
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
546
+    res <- .Call("get_column_quantiles", quantiles, qrel, PACKAGE="seqTools")
547
+    return(res)
548
+})
549
+
550
+
551
+setMethod("plotMergedPhredQuant", "Fastqq", function(object, main, ...)
552
+{
553
+    quant <- c(0.1, 0.25, 0.5, 0.75, 0.9)
554
+    cols <- c("#1F78B4", "#FF7F00", "#E31A1C", "#FF7F00", "#1F78B4")
555
+    qq <- mergedPhredQuantiles(object, quant)
556
+    maxQ = floor(1.2*max(qq))
557
+    xv <- 1:ncol(qq)
558
+    
559
+    if(missing(main))
560
+        main <- paste("Merged position wise Phred Quantiles.", sep = "")
561
+    
562
+    plot(xv, xv, ylim=c(0, maxQ), type="n", bty="n", las=1,
563
+        ylab="Phred score", xlab="Sequence position", main=main, ...)
564
+    
565
+    
566
+    lines(xv, qq[1, ], col=cols[1], lty=2)
567
+    lines(xv, qq[2, ], col=cols[2], lty=1)
568
+    lines(xv, qq[3, ], col=cols[3], lwd=2)
569
+    lines(xv, qq[4, ], col=cols[4], lty=1)
570
+    lines(xv, qq[5, ], col=cols[5], lty=2)
571
+    
572
+    legend("top", ncol=6, lty=c(2, 1, 1, 1, 2), 
573
+            lwd=c(1, 1, 2, 1, 1), col=cols, xjust=0.5, 
574
+            legend=c("10%", "25%", "50%", "75%", "90%"), bty="n", cex=0.8)
575
+    
576
+    return(invisible()) 
577
+})
578
+
579
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
580
+## End: Phred related functions
581
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
582
+
583
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
584
+## Nucleotide frequency related functions
585
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
586
+
587
+
588
+setMethod("plotNucFreq", "Fastqq", function(object, i, main, maxx, ...)
589
+{
590
+    if(!is.numeric(i))
591
+        stop("'i' must be numeric.")
592
+    
593
+    i <- as.integer(i)
594
+    
595
+    if( (i < 1) || (i > object@nFiles) )
596
+        stop("'i' must be >0 and <nFiles.")
597
+    
598
+    maxSeqlen <- max(seqLen(object)[2, ])
599
+    if(missing(maxx))
600
+    {
601
+        maxx <- maxSeqlen
602
+    }
603
+    else
604
+    {
605
+        if(!is.numeric(maxx))
606
+            stop("'maxx' must be numeric.")
607
+        maxx <- as.integer(maxx)
608
+        if(maxx < 1)
609
+            stop("'maxx' must be >0.")
610
+        if(maxx > maxSeqlen)
611
+            maxx <- maxSeqlen
612
+    }
613
+    
614
+    # Skip extra line
615
+    x <- 1:maxx
616
+    nac <- object@nac[[i]][1:4, x]
617
+    nacrel <- apply(nac, 2, rel_int)
618
+    maxY = round(1.4 * max(nacrel), 1)
619
+    
620
+    # Maximum counted read length
621
+    nacrel <- apply(nac, 2, rel_int)
622
+    cols <- c("#1F78B4", "#33A02C", "#E31A1C", "#FF7F00")
623
+    
624
+    if(missing(main))
625
+        main <- paste("Position wise Nucleotide frequency (",
626
+                                probeLabel(object)[i], ")", sep="")
627
+  
628
+    plot(x, x, ylim=c(0, maxY), type="n", bty="n", las=1,
629
+                ylab="Nucleotide fequency", xlab="sequence position",
630
+                main=main, ...)
631
+ 
632
+    lines(x, nacrel[1, ], col=cols[1], lwd=2)
633
+    lines(x, nacrel[2, ], col=cols[2], lwd=2)
634
+    lines(x, nacrel[3, ], col=cols[3], lwd=2)
635
+    lines(x, nacrel[4, ], col=cols[4], lwd=2)
636
+ 
637
+    legend("top", ncol=6, 
638
+            lwd=c(1, 1, 2, 1, 1), col=cols, xjust=0.5, 
639
+            legend=c("A", "C", "G", "T"), bty="n", cex=0.8)
640
+    
641
+    return(invisible())
642
+})
643
+
644
+
645
+setMethod("plotGCcontent", "Fastqq", function(object,main,...)
646
+{
647
+    cols <- c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", 
648
+            "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A")
649
+    
650
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
651
+    ## Normalize matrix to colsum = 1
652
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
653
+    
654
+    gc <- apply(object@gcContent, 2, rel_int)
655
+    maxY = round(1.3*max(gc), 2)
656
+    nf <- nFiles(object)
657
+    x <- 1:nrow(gc)
658
+    
659
+    if(missing(main))
660
+        main<-"GC content"
661
+    
662
+    plot(x, x, ylim=c(0, maxY), type="n", bty="n", las=1,
663
+        ylab="Proportion of reads (%)", xlab="Relative GC content (%)", 
664
+        main=main, ...) 
665
+  
666
+    for(i in 1:nf)
667
+        lines(x, gc[, i], col=cols[i], lwd=2)
668
+  
669
+    legend("right", lwd=2, col=cols, legend=probeLabel(object),
670
+                                                bty="n", cex=0.8)
671
+  
672
+    return(invisible())
673
+})
674
+
675
+
676
+setMethod("getGCcontent", "Fastqq", function(object){
677
+    f <- function(x) { return(sum(as.numeric(x) * 0:100) / sum(as.numeric(x)))}
678
+    n <- length(object@filenames)
679
+    gc <- apply(object@gcContent, 2, f)
680
+    names(gc) <- probeLabel(object)
681
+    return(gc)
682
+})
683
+
684
+setMethod("plotNucCount", "Fastqq", function(object, nucs=16, maxx, ...)
685
+{
686
+  
687
+    # j = 16: N,    j = 2:3: gc
688
+    if(!is.numeric(nucs))
689
+        stop("'nucs' must be numeric.")
690
+    
691
+    nucs <- as.integer(nucs)
692
+    if(any(nucs < 1) || any(nucs > 19))
693
+        stop("'nucs' must be >0 and <20.")
694
+
695
+    maxSeqlen <- max(seqLen(object)[2, ])
696
+    
697
+    if(missing(maxx))
698
+    {
699
+        maxx <- maxSeqlen
700
+    }
701
+    else
702
+    {
703
+        if(!is.numeric(maxx))
704
+            stop("'maxx' must be numeric.")
705
+        
706
+        maxx <- as.integer(maxx)
707
+        
708
+        if(maxx<1)
709
+            stop("'maxx must be >0.")
710
+        
711
+        if(maxx > maxSeqlen)
712
+            maxx <- maxSeqlen
713
+    }
714
+    
715
+    n <- nFiles(object)
716
+    fvec <- 1:n
717
+    
718
+    i <- 1
719
+    
720
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
721
+    ## Skip extra line
722
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
723
+    x <- 1:maxx
724
+    nac <- object@nac[[i]][, x]
725
+    nacrel <- apply(nac, 2, rel_int)
726
+    
727
+    if(length(nucs) == 1){
728
+        dfr <- data.frame(a = nacrel[nucs, ])
729
+    }else{
730
+        dfr <- data.frame(a = apply(nacrel[nucs, ], 2, sum))
731
+    }
732
+    
733
+    if(n > 1)
734
+    {
735
+        for(i in 2:n)
736
+        {
737
+            nac <- object@nac[[i]][, x]
738
+            nacrel <- apply(nac, 2, rel_int)
739
+            if(length(nucs) == 1){
740
+                dfr[, i] <- nacrel[nucs, ]
741
+            }else{
742
+                dfr[, i] <- apply(nacrel[nucs, ], 2, sum)
743
+            }
744
+        }        
745
+    }
746
+    
747
+    maxY = round(1.4 * max(dfr), 3)
748
+    cols <- c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", 
749
+                    "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A")
750
+    
751
+    nv <- paste(iupac_chars[nucs], collapse = "")
752
+    
753
+    plot(x, x, ylim=c(0, maxY), type="n", bty="n", las=1,
754
+        ylab="Nucleotide fequency", xlab="sequence position",
755
+        main=paste("Position wise Nucleotide frequency:  '",
756
+                                            nv, "'", sep=""))
757
+  
758
+    for(i in fvec)
759
+        lines(x, dfr[, i], col=cols[i %% 10], lwd=2)
760
+    
761
+    legend("top", ncol=6, 
762
+        lwd=c(1, 1, 2, 1, 1), col=cols[fvec %% 10], xjust=0.5,
763
+        legend=probeLabel(object), bty="n", cex=0.8)
764
+    
765
+    return(invisible())
766
+})
767
+
768
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
769
+## End: Nucleotide frequency related functions
770
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
771
+
772
+
773
+
774
+setMethod("plotKmerCount", "Fastqq",
775
+                    function(object, index, mxey, main="K-mer count", ...)
776
+{
777
+    n <- nFiles(object)
778
+    if(missing(index))
779
+    {
780
+        index <- 1:n
781
+    }else{
782
+        if(!is.numeric(index))
783
+            stop("'index' must be numeric.")
784
+        
785
+        index <- sort(unique(as.integer(index)))
786
+        
787
+        if(any(index < 0) || any(index > n))
788
+                stop("'index' must be in 1, .., ", n, " ( = nFiles).")
789
+    }
790
+    
791
+    if(!missing(mxey))
792
+    {
793
+        if(!is.numeric(mxey))
794
+            stop("'mxey' must be numeric.")
795
+        mxey <- as.integer(mxey)
796
+        
797
+        if(mxey<1)
798
+            stop("'mxey' must be positive.")
799
+    }
800
+    
801
+    cols <- c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", 
802
+                "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A")  
803
+    pk <- 6
804
+    if(object@k <= pk)
805
+    {
806
+        pk <- object@k
807
+        x <- 1:(4^pk)
808
+        
809
+        if(missing(mxey))
810
+            lg2y <- floor(log2(1.2 * (max(object@kmer))))
811
+        else
812
+            lg2y <- mxey
813
+        
814
+        maxY <- 2^lg2y
815
+
816
+        
817
+        plot(x, x, ylim=c(0, maxY), type="n", bty="n", las=1,
818
+             ylab="K-mer count", xlab="K-mer index", main=main,
819
+                                                axes=FALSE, ...)
820
+    
821
+    for(i in index)
822
+        lines(x, object@kmer[, i], col=cols[i], lwd=2)
823
+    }
824
+    else
825
+    {
826
+        x <- 1:(4^pk)
827
+        melt_factor <- as.integer(4^(object@k - pk))
828
+        
829
+        y_factor <- max(.Call("melt_vector", object@kmer[, 1], melt_factor,
830
+                        PACKAGE="seqTools")) / max(object@kmer[, 1])
831
+    
832
+        if(missing(mxey))
833
+            lg2y <- floor(log2(1.2 * (max(object@kmer)) * y_factor))
834
+        else
835
+            lg2y <- mxey
836
+        maxY <- 2^lg2y
837
+    
838
+        plot(x, x, ylim=c(0, maxY), type="n", bty="n", las=1,
839
+            ylab="K-mer count", xlab="K-mer index", 
840
+                            main=main, axes=FALSE, ...)
841
+    
842
+        for(i in index)
843
+        {
844
+            lines(x, .Call("melt_vector", object@kmer[, i], melt_factor,
845
+                        PACKAGE="seqTools"), col=cols[i], lwd=2)
846
+        }
847
+    }
848
+    axis(side=1, at=0:4 * 4^(pk - 1), labels=c("A", "C", "G", "T", ""))
849
+  
850
+    axis(side=2, at=c(0, maxY),
851
+                    labels=c(0, paste("2^", lg2y, sep="")), las=1)
852
+  
853
+    legend("right", lwd=2, col=cols[index], 
854
+                legend=probeLabel(object)[index], bty="n", cex=0.8)
855
+  
856
+    return(invisible())
857
+})
858
+
859
+
860
+
861
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
862
+## Merging and melting Fastqq objects
863
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
864
+
865
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
866
+setMethod("mergeFastqq", "Fastqq", function(lhs, rhs)
867
+{
868
+    
869
+    res <- new("Fastqq")
870
+    ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ##
871
+    ## Simple concatenations
872
+    ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ##
873
+    
874
+    res@filenames <- c(lhs@filenames, rhs@filenames)
875
+    res@nFiles <- lhs@nFiles+rhs@nFiles
876
+    res@nReads <- c(lhs@nReads, rhs@nReads)
877
+    res@nN <- c(lhs@nN, rhs@nN)
878
+    res@seqLenCount <- cbind(lhs@seqLenCount, rhs@seqLenCount)
879
+    res@gcContent <- cbind(gcContentMatrix(lhs), gcContentMatrix(rhs))
880
+    res@seqLen <- cbind(lhs@seqLen, rhs@seqLen)
881
+    
882
+    ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ##
883
+    ## Singularize probeLabel
884
+    ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ##
885
+    
886
+    res@probeLabel <- c(lhs@probeLabel, rhs@probeLabel)  
887
+    if(any(table(res@probeLabel) > 1))
888
+    {
889
+        message("[mergeFastqq] Singularizing probeLabel (new suffix).")
890
+        res@probeLabel <- paste(1:res@nFiles, res@probeLabel, sep="_")
891
+    }
892
+    
893
+    ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ##
894
+    ## Eventually resize arrays when lhs and rhs have different maxSeqLen
895
+    ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ##
896
+    if(lhs@maxSeqLen > rhs@maxSeqLen)
897
+    {
898
+        message("[mergeFastqq] Resizing rhs.")
899
+        msl <- lhs@maxSeqLen
900
+        
901
+        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
902
+        ## nac
903
+        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
904
+        
905
+        new_dim <- as.integer(c(nrow(rhs@nac), msl))
906
+        rhs_nac <- .Call("r_enlarge_int_mat", rhs@nac, new_dim, 
907
+                                                    PACKAGE="seqTools")
908
+        res@nac <- c(lhs@nac, rhs_nac)
909
+        
910
+        # seqLenCount
911
+        new_dim <- as.integer(c(msl, rhs@nFiles))
912
+        rhs_seqLenCount <- .Call("r_enlarge_int_mat", 
913
+                                rhs@seqLenCount, new_dim, PACKAGE="seqTools")
914
+        
915
+        res@seqLenCount <- c(lhs@seqLenCount, rhs_seqLenCount)
916
+        
917
+        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
918
+        ## phred
919
+        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
920
+        
921
+        new_dim <- as.integer(nrow(rhs@phred), msl)
922
+        rhs_phred_list <- list()
923
+        
924
+        for(i in 1:nFiles(rhs)){
925
+                rhs_phred_list[[i]] <- .Call("r_enlarge_int_mat", 
926
+                                rhs@phred[[i]], new_dim, PACKAGE="seqTools")
927
+        }
928
+        
929
+        res@phred <- c(lhs@phred, rhs_phred_list)
930
+        res@maxSeqLen <- lhs@maxSeqLen
931
+        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
932
+        
933
+    } else if(rhs@maxSeqLen > lhs@maxSeqLen)
934
+    {
935
+        
936
+        message("[mergeFastqq] Resizing lhs.")
937
+        msl <- rhs@maxSeqLen
938
+        
939
+        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
940
+        ## nac
941
+        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
942
+        
943
+        new_dim <- as.integer(c(nrow(lhs@nac), msl))
944
+        
945
+        lhs_nac <- .Call("r_enlarge_int_mat",
946
+                                lhs@nac, new_dim, PACKAGE="seqTools")
947
+        
948
+        res@nac <- c(rhs@nac, lhs_nac)
949
+        
950
+        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
951
+        ## seqLenCount
952
+        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
953
+        
954
+        new_dim <- as.integer(c(msl, lhs@nFiles))
955
+        
956
+        lhs_seqLenCount <- .Call("r_enlarge_int_mat",
957
+                    lhs@seqLenCount, new_dim, PACKAGE="seqTools")
958
+        
959
+        res@seqLenCount <- c(rhs@seqLenCount, lhs_seqLenCount)
960
+        
961
+        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
962
+        ## phred
963
+        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
964
+        
965
+        new_dim <- as.integer(nrow(lhs@phred), msl)
966
+        lhs_phred_list <- list()
967
+        
968
+        for(i in 1:nFiles(lhs))
969
+        {
970
+            lhs_phred_list[[i]] <- .Call("r_enlarge_int_mat", 
971
+                    lhs@phred[[i]], new_dim, PACKAGE="seqTools")
972
+        }
973
+        res@phred <- c(rhs@phred, lhs_phred_list)
974
+        res@maxSeqLen <- rhs@maxSeqLen
975
+        
976
+        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
977
+        
978
+    } else { 
979
+        
980
+        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
981
+        ## rhs@maxSeqLen == lhs@maxSeqLen
982
+        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
983
+        
984
+        res@maxSeqLen = lhs@maxSeqLen
985
+        res@nac <- c(lhs@nac, rhs@nac)
986
+        res@phred <- c(lhs@phred, rhs@phred)
987
+        
988
+        ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
989
+        
990
+    }
991
+    
992
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
993
+    ## Eventually melt down k-mer count matrix
994
+    ## when lhs and rhs have different k
995
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
996
+    res@k <- pmin(lhs@k, rhs@k)
997
+    
998
+    kml <- kmerCount(lhs)
999
+    if(lhs@k > res@k)
1000
+    {
1001
+        kml <- .Call("melt_kmer_matrix",
1002
+                    kml, c(lhs@k, res@k), PACKAGE="seqTools")
1003
+    }
1004
+    
1005
+    kmr <- kmerCount(rhs)
1006
+    if(rhs@k > res@k)
1007
+    {
1008
+        kmr <- .Call("melt_kmer_matrix",
1009
+                    kmr, c(rhs@k, res@k), PACKAGE="seqTools")
1010
+    }
1011
+    res@kmer <- cbind(kml, kmr)
1012
+    
1013
+    fkml <- lhs@firstKmer
1014
+    if(lhs@k > res@k)
1015
+    {
1016
+        fkml <- .Call("melt_kmer_matrix",
1017
+                    fkml, c(lhs@k, res@k), PACKAGE="seqTools")
1018
+    }
1019
+    fkmr <- rhs@firstKmer
1020
+    
1021
+    if(rhs@k > res@k)
1022
+    {
1023
+        fkmr <- .Call("melt_kmer_matrix",
1024
+                    fkmr, c(rhs@k, res@k), PACKAGE="seqTools")
1025
+    }
1026
+    res@firstKmer <- cbind(fkml, fkmr)
1027
+    
1028
+    return(res)
1029
+})
1030
+
1031
+
1032
+setMethod("meltDownK", "Fastqq", function(object, newK)
1033
+{
1034
+  
1035
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
1036
+    if(!is.numeric(newK))
1037
+        stop("'newK' must be numeric")
1038
+    
1039
+    newK <- as.integer(newK)
1040
+    
1041
+    if(length(newK) != 1)
1042
+        stop("'newK' must have length 1.")
1043
+    
1044
+    if(newK < 1 || newK >=  getK(object))
1045
+        stop("'newK' must be >= 1 and <= ", getK(object), ".")
1046
+    
1047
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
1048
+    res <- new("Fastqq")
1049
+    res@filenames <- object@filenames
1050
+    res@nFiles <- object@nFiles
1051
+    res@k <- newK
1052
+    res@maxSeqLen <- object@maxSeqLen
1053
+    
1054
+    res@kmer <- .Call("melt_kmer_matrix",
1055
+        object@kmer, c(getK(object), newK), PACKAGE="seqTools")
1056
+    
1057
+    res@firstKmer <- .Call("melt_kmer_matrix",
1058
+        object@firstKmer, c(getK(object), newK), PACKAGE="seqTools")
1059
+    
1060
+    res@nReads <- object@nReads
1061
+    res@nN <- object@nN
1062
+    res@seqLenCount <- object@seqLenCount
1063
+    res@gcContent <- object@gcContent
1064
+    res@nac <- object@nac
1065
+    res@phred <- object@phred
1066
+    res@seqLen <- object@seqLen
1067
+    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
1068
+  
1069
+    return(res)
1070
+})
1071
+
1072
+
1073
+listMelt <- function(x, oldK, newK)
1074
+{
1075
+    f <- function(x) .Call("melt_kmer_matrix", x, 
1076
+            as.integer(c(oldK, newK)), PACKAGE="seqTools")
1077
+  
1078
+    return(lapply(x, f))
1079
+}
1080
+
1081
+
1082
+
1083
+setMethod("[", "Fastqq", function(x, i, j, drop="missing")
1084
+{
1085
+  
1086
+    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
1087
+    res <- new("Fastqq")
1088
+    res@filenames <- x@filenames[i]
1089
+    res@probeLabel <- x@probeLabel[i]
1090
+    res@nFiles <- length(i)
1091
+    res@k <- x@k
1092
+    res@seqLen <- x@seqLen[, i, drop=FALSE]
1093
+    res@maxSeqLen <- max(res@seqLen[2, ])
1094
+    res@kmer <- x@kmer[, i, drop=FALSE]
1095
+    res@firstKmer <- x@firstKmer[, i, drop=FALSE]
1096
+    res@nN <- x@nN[i]
1097
+    res@seqLenCount <- x@seqLenCount[, i, drop=FALSE]
1098
+    res@gcContent <- x@gcContent[, i, drop=FALSE]
1099
+    res@nac <- x@nac[i]
1100
+    res@phred <- x@phred[i]
1101
+    res@nReads <- x@nReads[i]
1102
+    res@collectTime <- x@collectTime
1103
+    # + + + + + + + + + + + + + + + + + + + + + + + + + + + + #
1104
+    
1105
+    return(res)
1106
+})
1107
+
1108
+
1109
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
1110
+## Calculation of distance matrix based on Canberra distance
1111
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
1112
+
1113
+setMethod("cbDistMatrix", "Fastqq", 
1114
+        function(object, nReadNorm=max(nReads(object)))
1115
+{
1116
+    if(!is.numeric(nReadNorm))
1117
+        stop("'nReadNorm' must be numeric.")
1118
+    nReadNorm <- as.integer(nReadNorm)
1119
+    
1120
+    if(nReadNorm < max(nReads(object)))
1121
+        stop("'nReadNorm' must be greater than all nRead.")
1122
+    
1123
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
1124
+    ## Column-wise normalizing read counts (by upscaling) 
1125
+    ## so that column sums become nearly equal in order to
1126
+    ## compensate sequencing depth artifacts in 
1127
+    ## Canberra distance values
1128
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
1129
+    
1130
+    scale <- nReadNorm/nReads(object)
1131
+    
1132
+    scaled <- .Call("scale_kmer_matrix",
1133
+            kmerCount(object), scale, PACKAGE="seqTools")
1134
+    
1135
+    colnames(scaled) <- object@probeLabel
1136
+    ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
1137
+  
1138
+    return(.Call("cb_distance_matrix", scaled, PACKAGE="seqTools"))
1139
+})
1140
+
1141
+
1142
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##
1143
+## END OF FILE
1144
+## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ##