Browse code

fixed BiocCheck Recommended issues

chakalakka authored on 15/03/2016 16:50:38
Showing 25 changed files

... ...
@@ -1,14 +1,14 @@
1 1
 Package: AneuFinder
2 2
 Type: Package
3 3
 Title: Analysis of Copy Number Variation in Single-Cell-Sequencing Data
4
-Version: 0.99.1
5
-Date: 2015-09-19
4
+Version: 0.99.3
5
+Date: 2016-03
6 6
 Author: Aaron Taudt, Bjorn Bakker, David Porubsky
7 7
 Maintainer: Aaron Taudt <aaron.taudt@gmail.com>
8 8
 Description: This package implements functions for CNV calling, plotting, export
9 9
     and analysis from whole-genome single cell sequencing data.
10 10
 Depends:
11
-    R (>= 3.2.3),
11
+    R (>= 3.3.0),
12 12
     GenomicRanges,
13 13
     cowplot,
14 14
     AneuFinderData
... ...
@@ -91,4 +91,4 @@ importFrom(stats,stepfun)
91 91
 importFrom(stats,weighted.mean)
92 92
 importFrom(utils,read.table)
93 93
 importFrom(utils,write.table)
94
-useDynLib(AneuFinder)
94
+useDynLib(AneuFinder, .registration = TRUE, .fixes = "")
... ...
@@ -25,6 +25,7 @@
25 25
 #' @param bw Bandwidth for SCE hotspot detection (see \code{\link{hotspotter}} for further details).
26 26
 #' @param pval P-value for SCE hotspot detection (see \code{\link{hotspotter}} for further details).
27 27
 #' @param cluster.plots A logical indicating whether plots should be clustered by similarity.
28
+#' @return \code{NULL}
28 29
 #' @author Aaron Taudt
29 30
 #' @import foreach
30 31
 #' @import doParallel
... ...
@@ -216,7 +216,7 @@ binReads <- function(file, format, assembly, ID=basename(file), bamindex=file, c
216 216
 		}
217 217
 
218 218
 		### Quality measures ###
219
-		qualityInfo <- list(complexity=complexity, coverage=coverage, spikyness=qc.spikyness(binned.data$counts), shannon.entropy=qc.entropy(binned.data$counts))
219
+		qualityInfo <- list(complexity=complexity, coverage=coverage, spikiness=qc.spikiness(binned.data$counts), shannon.entropy=qc.entropy(binned.data$counts))
220 220
 		attr(binned.data, 'qualityInfo') <- qualityInfo
221 221
 		attr(binned.data, 'min.mapq') <- min.mapq
222 222
 
... ...
@@ -63,7 +63,7 @@ correctMappability <- function(binned.data.list, same.binsize, reference, format
63 63
 
64 64
 		### Quality measures ###
65 65
 		## Spikyness
66
-		attr(binned.data, 'spikyness') <- qc.spikyness(binned.data$counts)
66
+		attr(binned.data, 'spikiness') <- qc.spikiness(binned.data$counts)
67 67
 		## Shannon entropy
68 68
 		attr(binned.data, 'shannon.entropy') <- qc.entropy(binned.data$counts)
69 69
 
... ...
@@ -200,7 +200,7 @@ correctGC <- function(binned.data.list, GC.BSgenome, same.binsize=FALSE) {
200 200
 
201 201
 		### Quality measures ###
202 202
 		## Spikyness
203
-		attr(binned.data, 'spikyness') <- qc.spikyness(binned.data$counts)
203
+		attr(binned.data, 'spikiness') <- qc.spikiness(binned.data$counts)
204 204
 		## Shannon entropy
205 205
 		attr(binned.data, 'shannon.entropy') <- qc.entropy(binned.data$counts)
206 206
 
... ...
@@ -8,6 +8,7 @@
8 8
 #' Use \code{exportReadCounts} to export the binned read counts from an \code{\link{aneuHMM}} object in WIGGLE format.
9 9
 #' Use \code{exportGRanges} to export a \code{\link{GRanges}} object in BED format.
10 10
 #'
11
+#' @return \code{NULL}
11 12
 #' @name export
12 13
 #' @author Aaron Taudt
13 14
 #'@examples
... ...
@@ -84,7 +84,7 @@ findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard", max.time=-1
84 84
 univariate.findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard", max.time=-1, max.iter=-1, num.trials=1, eps.try=NULL, num.threads=1, count.cutoff.quantile=0.999, strand='*', states=c("zero-inflation",paste0(0:10,"-somy")), most.frequent.state="2-somy", algorithm="EM", initial.params=NULL) {
85 85
 
86 86
 	### Define cleanup behaviour ###
87
-	on.exit(.C("R_univariate_cleanup"))
87
+	on.exit(.C("C_univariate_cleanup", PACKAGE = 'AneuFinder'))
88 88
 
89 89
 	## Intercept user input
90 90
 	if (class(binned.data) != 'GRanges') {
... ...
@@ -150,7 +150,7 @@ univariate.findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard",
150 150
 		result$ID <- ID
151 151
 		result$bins <- binned.data
152 152
 	## Quality info
153
-		qualityInfo <- list(shannon.entropy=qc.entropy(counts), spikyness=qc.spikyness(counts), complexity=attr(result$bins,'qualityInfo')$complexity$preseqR, bhattacharyya=NA)
153
+		qualityInfo <- list(shannon.entropy=qc.entropy(counts), spikiness=qc.spikiness(counts), complexity=attr(result$bins,'qualityInfo')$complexity$preseqR, bhattacharyya=NA)
154 154
 		result$qualityInfo <- qualityInfo
155 155
 	## Convergence info
156 156
 		convergenceInfo <- list(eps=eps, loglik=NA, loglik.delta=NA, num.iterations=NA, time.sec=NA, error=NA)
... ...
@@ -251,7 +251,7 @@ univariate.findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard",
251 251
 			prob.initial[index] <- 0.5
252 252
 		}
253 253
 	
254
-		hmm <- .C("R_univariate_hmm",
254
+		hmm <- .C("C_univariate_hmm",
255 255
 			counts = as.integer(counts), # int* O
256 256
 			num.bins = as.integer(numbins), # int* T
257 257
 			num.states = as.integer(numstates), # int* N
... ...
@@ -275,7 +275,8 @@ univariate.findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard",
275 275
 			num.threads = as.integer(num.threads), # int* num_threads
276 276
 			error = as.integer(0), # int* error (error handling)
277 277
 			count.cutoff = as.integer(count.cutoff), # int* count.cutoff
278
-			algorithm = as.integer(algorithm) # int* algorithm
278
+			algorithm = as.integer(algorithm), # int* algorithm
279
+			PACKAGE = 'AneuFinder'
279 280
 		)
280 281
 
281 282
 		hmm$eps <- eps.try
... ...
@@ -322,7 +323,7 @@ univariate.findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard",
322 323
 
323 324
 			# Rerun the HMM with different epsilon and initial parameters from trial run
324 325
 			message(paste0("Rerunning trial ",index2use," with eps = ",eps))
325
-			hmm <- .C("R_univariate_hmm",
326
+			hmm <- .C("C_univariate_hmm",
326 327
 				counts = as.integer(counts), # int* O
327 328
 				num.bins = as.integer(numbins), # int* T
328 329
 				num.states = as.integer(numstates), # int* N
... ...
@@ -346,7 +347,8 @@ univariate.findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard",
346 347
 				num.threads = as.integer(num.threads), # int* num_threads
347 348
 				error = as.integer(0), # int* error (error handling)
348 349
 				count.cutoff = as.integer(count.cutoff), # int* count.cutoff
349
-				algorithm = as.integer(algorithm)
350
+				algorithm = as.integer(algorithm), # int* algorithm
351
+				PACKAGE = 'AneuFinder'
350 352
 			)
351 353
 		}
352 354
 
... ...
@@ -411,7 +413,7 @@ univariate.findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard",
411 413
 			convergenceInfo <- list(eps=eps, loglik=hmm$loglik, loglik.delta=hmm$loglik.delta, num.iterations=hmm$num.iterations, time.sec=hmm$time.sec, error=hmm$error)
412 414
 			result$convergenceInfo <- convergenceInfo
413 415
 		## Quality info
414
-			qualityInfo <- list(shannon.entropy=qc.entropy(counts), spikyness=qc.spikyness(counts), complexity=attr(result$bins,'qualityInfo')$complexity$preseqR, bhattacharyya=qc.bhattacharyya(result))
416
+			qualityInfo <- list(shannon.entropy=qc.entropy(counts), spikiness=qc.spikiness(counts), complexity=attr(result$bins,'qualityInfo')$complexity$preseqR, bhattacharyya=qc.bhattacharyya(result))
415 417
 			result$qualityInfo <- qualityInfo
416 418
 		} else if (hmm$error == 1) {
417 419
 			warlist[[length(warlist)+1]] <- warning(paste0("ID = ",ID,": A NaN occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your library! The following factors are known to cause this error: 1) Your read counts contain very high numbers. Try again with a lower value for 'count.cutoff.quantile'. 2) Your library contains too few reads in each bin. 3) Your library contains reads for a different genome than it was aligned to."))
... ...
@@ -497,7 +499,7 @@ bivariate.findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard", m
497 499
 		result$ID <- ID
498 500
 		result$bins <- binned.data
499 501
 	## Quality info
500
-		qualityInfo <- list(shannon.entropy=qc.entropy(counts), spikyness=qc.spikyness(counts), complexity=attr(result$bins,'qualityInfo')$complexity$preseqR, bhattacharyya=NA)
502
+		qualityInfo <- list(shannon.entropy=qc.entropy(counts), spikiness=qc.spikiness(counts), complexity=attr(result$bins,'qualityInfo')$complexity$preseqR, bhattacharyya=NA)
501 503
 		result$qualityInfo <- qualityInfo
502 504
 
503 505
 	# Check if there are counts in the data, otherwise HMM will blow up
... ...
@@ -712,11 +714,11 @@ bivariate.findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard", m
712 714
 	stopTimedMessage(ptm)
713 715
 		
714 716
 	### Define cleanup behaviour ###
715
-	on.exit(.C("R_multivariate_cleanup", as.integer(num.comb.states)))
717
+	on.exit(.C("C_multivariate_cleanup", as.integer(num.comb.states), PACKAGE = 'AneuFinder'))
716 718
 
717 719
 	### Run the multivariate HMM
718 720
 	# Call the C function
719
-	hmm <- .C("R_multivariate_hmm",
721
+	hmm <- .C("C_multivariate_hmm",
720 722
 		densities = as.double(densities), # double* D
721 723
 		num.bins = as.integer(num.bins), # int* T
722 724
 		num.comb.states = as.integer(num.comb.states), # int* N
... ...
@@ -734,7 +736,8 @@ bivariate.findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard", m
734 736
 		use.initial.params = as.logical(use.initial), # bool* use_initial_params
735 737
 		num.threads = as.integer(num.threads), # int* num_threads
736 738
 		error = as.integer(0), # error handling
737
-		algorithm = as.integer(algorithm) # int* algorithm
739
+		algorithm = as.integer(algorithm), # int* algorithm
740
+		PACKAGE = 'AneuFinder'
738 741
 		)
739 742
 			
740 743
 	### Check convergence ###
... ...
@@ -811,7 +814,7 @@ bivariate.findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard", m
811 814
 			convergenceInfo <- list(eps=eps, loglik=hmm$loglik, loglik.delta=hmm$loglik.delta, num.iterations=hmm$num.iterations, time.sec=hmm$time.sec)
812 815
 			result$convergenceInfo <- convergenceInfo
813 816
 		## Quality info
814
-			qualityInfo <- list(shannon.entropy=qc.entropy(counts), spikyness=qc.spikyness(counts), complexity=attr(result$bins,'qualityInfo')$complexity$preseqR, bhattacharyya=qc.bhattacharyya(result))
817
+			qualityInfo <- list(shannon.entropy=qc.entropy(counts), spikiness=qc.spikiness(counts), complexity=attr(result$bins,'qualityInfo')$complexity$preseqR, bhattacharyya=qc.bhattacharyya(result))
815 818
 			result$qualityInfo <- qualityInfo
816 819
 		## Univariate infos
817 820
 			univariateParams <- list(transitionProbs=uni.transitionProbs, startProbs=uni.startProbs, distributions=distributions[[1]], weights=uni.weights)
... ...
@@ -79,8 +79,8 @@ findSCEs <- function(binned.data, ID=NULL, eps=0.1, init="standard", max.time=-1
79 79
 #' @export
80 80
 #'@examples
81 81
 #'## Load an HMM
82
-#'file <- system.file("extdata", "primary-lung", "hmms", "AvdB150303_I_001.bam.RData",
83
-#'                     package="AneuFinderData")
82
+#'file <- list.files(system.file("extdata", "primary-lung", "hmms",
83
+#'                   package="AneuFinderData"), full.names=TRUE)
84 84
 #'hmm <- loadHmmsFromFiles(file)[[1]]
85 85
 #'## Check number of segments before and after filtering
86 86
 #'length(hmm$segments)
... ...
@@ -1,6 +1,6 @@
1 1
 
2 2
 
3
-#' @useDynLib AneuFinder
3
+#' @useDynLib AneuFinder, .registration = TRUE, .fixes = ""
4 4
 #' @import IRanges
5 5
 #' @import GenomeInfoDb
6 6
 #' @import GenomicRanges
... ...
@@ -258,7 +258,7 @@ plotBivariateHistograms <- function(bihmm) {
258 258
 	binned.data.plus$counts <- binned.data.plus$pcounts
259 259
 	binned.data.plus$counts.gc <- binned.data.plus$pcounts.gc
260 260
 	binned.data.stacked <- c(binned.data.minus, binned.data.plus)
261
-	mask.attributes <- c(grep('complexity', names(attributes(binned.data)), value=TRUE), 'spikyness', 'shannon.entropy')
261
+	mask.attributes <- c(grep('complexity', names(attributes(binned.data)), value=TRUE), 'spikiness', 'shannon.entropy')
262 262
 	attributes(binned.data.stacked)[mask.attributes] <- attributes(binned.data)[mask.attributes]
263 263
 
264 264
 	## Make fake uni.hmm and plot
... ...
@@ -369,10 +369,10 @@ plotUnivariateHistogram <- function(model, state=NULL, strand='*', chromosome=NU
369 369
 
370 370
 	# Quality info
371 371
 	if (is.null(model$qualityInfo$complexity)) { model$qualityInfo$complexity <- NA }
372
-	if (is.null(model$qualityInfo$spikyness)) { model$qualityInfo$spikyness <- NA }
372
+	if (is.null(model$qualityInfo$spikiness)) { model$qualityInfo$spikiness <- NA }
373 373
 	if (is.null(model$qualityInfo$shannon.entropy)) { model$qualityInfo$shannon.entropy <- NA }
374 374
 	if (is.null(model$qualityInfo$bhattacharyya)) { model$qualityInfo$bhattacharyya <- NA }
375
-	quality.string <- paste0('reads = ',round(sum(model$bins$counts)/1e6,2),'M, complexity = ',round(model$qualityInfo$complexity),',  spikyness = ',round(model$qualityInfo$spikyness,2),',  entropy = ',round(model$qualityInfo$shannon.entropy,2),',  bhattacharyya = ',round(model$qualityInfo$bhattacharyya,2), ', num.segments = ',length(model$segments))
375
+	quality.string <- paste0('reads = ',round(sum(model$bins$counts)/1e6,2),'M, complexity = ',round(model$qualityInfo$complexity),',  spikiness = ',round(model$qualityInfo$spikiness,2),',  entropy = ',round(model$qualityInfo$shannon.entropy,2),',  bhattacharyya = ',round(model$qualityInfo$bhattacharyya,2), ', num.segments = ',length(model$segments))
376 376
 
377 377
 	# Plot the histogram
378 378
 	ggplt <- ggplot(data.frame(counts)) + geom_histogram(aes_string(x='counts', y='..density..'), binwidth=1, color='black', fill='white') + coord_cartesian(xlim=c(0,rightxlim)) + theme_bw() + xlab("read count") + ggtitle(bquote(atop(.(model$ID), atop(.(quality.string),''))))
... ...
@@ -453,7 +453,7 @@ plotKaryogram <- function(model, both.strands=FALSE, plot.SCE=FALSE, file=NULL)
453 453
 		model <- list()
454 454
 		model$ID <- ''
455 455
 		model$bins <- binned.data
456
-		model$qualityInfo <- list(shannon.entropy=qc.entropy(binned.data$counts), spikyness=qc.spikyness(binned.data$counts), complexity=attr(binned.data, 'complexity.preseqR'), bhattacharyya=NA)
456
+		model$qualityInfo <- list(shannon.entropy=qc.entropy(binned.data$counts), spikiness=qc.spikiness(binned.data$counts), complexity=attr(binned.data, 'complexity.preseqR'), bhattacharyya=NA)
457 457
 		plot.karyogram(model, both.strands=both.strands, file=file)
458 458
 	} else if (class(model)==class.univariate.hmm) {
459 459
 		plot.karyogram(model, both.strands=both.strands, file=file)
... ...
@@ -496,10 +496,10 @@ plot.karyogram <- function(model, both.strands=FALSE, plot.SCE=FALSE, file=NULL)
496 496
 
497 497
 	# Quality info
498 498
 	if (is.null(model$qualityInfo$complexity)) { model$qualityInfo$complexity <- NA }
499
-	if (is.null(model$qualityInfo$spikyness)) { model$qualityInfo$spikyness <- NA }
499
+	if (is.null(model$qualityInfo$spikiness)) { model$qualityInfo$spikiness <- NA }
500 500
 	if (is.null(model$qualityInfo$shannon.entropy)) { model$qualityInfo$shannon.entropy <- NA }
501 501
 	if (is.null(model$qualityInfo$bhattacharyya)) { model$qualityInfo$bhattacharyya <- NA }
502
-	quality.string <- paste0('reads = ',round(sum(model$bins$counts)/1e6,2),'M, complexity = ',round(model$qualityInfo$complexity),',  spikyness = ',round(model$qualityInfo$spikyness,2),',  entropy = ',round(model$qualityInfo$shannon.entropy,2),',  bhattacharyya = ',round(model$qualityInfo$bhattacharyya,2), ', num.segments = ',length(model$segments))
502
+	quality.string <- paste0('reads = ',round(sum(model$bins$counts)/1e6,2),'M, complexity = ',round(model$qualityInfo$complexity),',  spikiness = ',round(model$qualityInfo$spikiness,2),',  entropy = ',round(model$qualityInfo$shannon.entropy,2),',  bhattacharyya = ',round(model$qualityInfo$bhattacharyya,2), ', num.segments = ',length(model$segments))
503 503
 
504 504
 	## Get SCE coordinates
505 505
 	if (plot.SCE) {
... ...
@@ -918,7 +918,7 @@ plotProfile <- function(model, both.strands=FALSE, plot.SCE=TRUE, file=NULL) {
918 918
 		model <- list()
919 919
 		model$ID <- ''
920 920
 		model$bins <- binned.data
921
-		model$qualityInfo <- list(shannon.entropy=qc.entropy(binned.data$counts), spikyness=qc.spikyness(binned.data$counts), complexity=attr(binned.data, 'complexity.preseqR'))
921
+		model$qualityInfo <- list(shannon.entropy=qc.entropy(binned.data$counts), spikiness=qc.spikiness(binned.data$counts), complexity=attr(binned.data, 'complexity.preseqR'))
922 922
 		plot.profile(model, both.strands=both.strands, plot.SCE=FALSE, file=file)
923 923
 	} else if (class(model)==class.univariate.hmm) {
924 924
 		plot.profile(model, both.strands=FALSE, plot.SCE=FALSE, file=file)
... ...
@@ -1041,10 +1041,10 @@ plot.profile <- function(model, both.strands=FALSE, plot.SCE=TRUE, file=NULL) {
1041 1041
 	ggplt <- ggplt + scale_x_continuous(breaks=seqlengths(model$bins)/2+cum.seqlengths.0[as.character(seqlevels(model$bins))], labels=seqlevels(model$bins))
1042 1042
 	# Quality info
1043 1043
 	if (is.null(model$qualityInfo$complexity)) { model$qualityInfo$complexity <- NA }
1044
-	if (is.null(model$qualityInfo$spikyness)) { model$qualityInfo$spikyness <- NA }
1044
+	if (is.null(model$qualityInfo$spikiness)) { model$qualityInfo$spikiness <- NA }
1045 1045
 	if (is.null(model$qualityInfo$shannon.entropy)) { model$qualityInfo$shannon.entropy <- NA }
1046 1046
 	if (is.null(model$qualityInfo$bhattacharyya)) { model$qualityInfo$bhattacharyya <- NA }
1047
-	quality.string <- paste0('reads = ',round(sum(model$bins$counts)/1e6,2),'M, complexity = ',round(model$qualityInfo$complexity),',  spikyness = ',round(model$qualityInfo$spikyness,2),',  entropy = ',round(model$qualityInfo$shannon.entropy,2),',  bhattacharyya = ',round(model$qualityInfo$bhattacharyya,2), ', num.segments = ',length(model$segments))
1047
+	quality.string <- paste0('reads = ',round(sum(model$bins$counts)/1e6,2),'M, complexity = ',round(model$qualityInfo$complexity),',  spikiness = ',round(model$qualityInfo$spikiness,2),',  entropy = ',round(model$qualityInfo$shannon.entropy,2),',  bhattacharyya = ',round(model$qualityInfo$bhattacharyya,2), ', num.segments = ',length(model$segments))
1048 1048
 	ggplt <- ggplt + ylab('read count') + ggtitle(bquote(atop(.(model$ID), atop(.(quality.string),''))))
1049 1049
 		
1050 1050
 	if (!is.null(file)) {
... ...
@@ -15,12 +15,12 @@
15 15
 #' @author Aaron Taudt
16 16
 NULL
17 17
 
18
-#' @describeIn qualityControl Calculate the spikyness of a library
19
-qc.spikyness <- function(counts) {
18
+#' @describeIn qualityControl Calculate the spikiness of a library
19
+qc.spikiness <- function(counts) {
20 20
 	counts <- as.vector(counts)
21 21
 	sum.counts <- sum(counts)
22
-	spikyness <- sum(abs(diff(counts))) / sum.counts
23
-	return(spikyness)
22
+	spikiness <- sum(abs(diff(counts))) / sum.counts
23
+	return(spikiness)
24 24
 }
25 25
 
26 26
 #' @describeIn qualityControl Calculate the Shannon entropy of a library
... ...
@@ -59,7 +59,7 @@ getQC <- function(hmms) {
59 59
 			qframe[[i1]] <- data.frame(total.read.count=sum(hmm$bins$counts),
60 60
 														binsize=width(hmm$bins)[1],
61 61
 														avg.read.count=mean(hmm$bins$counts),
62
-														spikyness=hmm$qualityInfo$spikyness,
62
+														spikiness=hmm$qualityInfo$spikiness,
63 63
 														entropy=hmm$qualityInfo$shannon.entropy,
64 64
 														complexity=hmm$qualityInfo$complexity,
65 65
 														loglik=hmm$convergenceInfo$loglik,
... ...
@@ -79,7 +79,7 @@ getQC <- function(hmms) {
79 79
 #'
80 80
 #' The employed quality measures are:
81 81
 #' \itemize{
82
-#' \item Spikyness
82
+#' \item Spikiness
83 83
 #' \item Entropy
84 84
 #' \item Number of segments
85 85
 #' \item Bhattacharrya distance
... ...
@@ -89,8 +89,8 @@ getQC <- function(hmms) {
89 89
 #' @param hmms A list of \code{\link{aneuHMM}} objects or a list of files that contain such objects.
90 90
 #' @param G An integer vector specifying the number of clusters that are compared. See \code{\link[mclust:Mclust]{Mclust}} for details.
91 91
 #' @param itmax The maximum number of outer and inner iterations for the \code{\link[mclust:Mclust]{Mclust}} function. See \code{\link[mclust:emControl]{emControl}} for details.
92
-#' @param measures The quality measures that are used for the clustering. Supported is any combination of \code{c('spikyness','entropy','num.segments','bhattacharyya','loglik','complexity','avg.read.count','total.read.count','binsize')}. 
93
-#' @param orderBy The quality measure to order the clusters by. Default is \code{'spikyness'}.
92
+#' @param measures The quality measures that are used for the clustering. Supported is any combination of \code{c('spikiness','entropy','num.segments','bhattacharyya','loglik','complexity','avg.read.count','total.read.count','binsize')}. 
93
+#' @param orderBy The quality measure to order the clusters by. Default is \code{'spikiness'}.
94 94
 #' @param reverseOrder Logical indicating whether the ordering by \code{orderBy} is reversed.
95 95
 #' @return A \code{list} with the classification, parameters and the \code{\link[mclust]{Mclust}} fit.
96 96
 #' @author Aaron Taudt
... ...
@@ -108,7 +108,7 @@ getQC <- function(hmms) {
108 108
 #'## Select files from the best 2 clusters for further processing
109 109
 #'best.files <- unlist(cl$classification[1:2])
110 110
 #'
111
-clusterByQuality <- function(hmms, G=1:9, itmax=c(100,100), measures=c('spikyness','entropy','num.segments','bhattacharyya','loglik'), orderBy='spikyness', reverseOrder=FALSE) {
111
+clusterByQuality <- function(hmms, G=1:9, itmax=c(100,100), measures=c('spikiness','entropy','num.segments','bhattacharyya','loglik'), orderBy='spikiness', reverseOrder=FALSE) {
112 112
 	
113 113
 	hmms <- loadHmmsFromFiles(hmms)
114 114
 	df <- getQC(hmms)
... ...
@@ -41,6 +41,7 @@ readConfig <- function(configfile) {
41 41
 #'
42 42
 #' @param conf A list structure with parameter values. Each entry will be written in one line.
43 43
 #' @param configfile Filename of the outputfile.
44
+#' @return \code{NULL}
44 45
 #' @author Aaron Taudt
45 46
 writeConfig <- function(conf, configfile) {
46 47
 
... ...
@@ -2,6 +2,6 @@
2 2
 
3 3
 .onAttach <- function(...) {
4 4
 
5
-	packageStartupMessage('Loading AneuFinder 0.99.2')
5
+	packageStartupMessage('Loading AneuFinder 0.99.3')
6 6
 
7 7
 }
... ...
@@ -85,6 +85,9 @@ Aneufinder(inputfolder, outputfolder, format, configfile = NULL, numCPU = 1,
85 85
 
86 86
 \item{cluster.plots}{A logical indicating whether plots should be clustered by similarity.}
87 87
 }
88
+\value{
89
+\code{NULL}
90
+}
88 91
 \description{
89 92
 This function is an easy-to-use wrapper to \link[AneuFinder:binning]{bin the data}, \link[AneuFinder:findCNVs]{find copy-number-variations}, \link[AneuFinder:findSCEs]{find sister-chromatid-exchange} events, plot \link[AneuFinder:heatmapGenomewide]{genomewide heatmaps}, \link[AneuFinder:plot.aneuHMM]{distributions, profiles and karyograms}.
90 93
 }
... ...
@@ -5,8 +5,8 @@
5 5
 \title{Cluster based on quality variables}
6 6
 \usage{
7 7
 clusterByQuality(hmms, G = 1:9, itmax = c(100, 100),
8
-  measures = c("spikyness", "entropy", "num.segments", "bhattacharyya",
9
-  "loglik"), orderBy = "spikyness", reverseOrder = FALSE)
8
+  measures = c("spikiness", "entropy", "num.segments", "bhattacharyya",
9
+  "loglik"), orderBy = "spikiness", reverseOrder = FALSE)
10 10
 }
11 11
 \arguments{
12 12
 \item{hmms}{A list of \code{\link{aneuHMM}} objects or a list of files that contain such objects.}
... ...
@@ -15,9 +15,9 @@ clusterByQuality(hmms, G = 1:9, itmax = c(100, 100),
15 15
 
16 16
 \item{itmax}{The maximum number of outer and inner iterations for the \code{\link[mclust:Mclust]{Mclust}} function. See \code{\link[mclust:emControl]{emControl}} for details.}
17 17
 
18
-\item{measures}{The quality measures that are used for the clustering. Supported is any combination of \code{c('spikyness','entropy','num.segments','bhattacharyya','loglik','complexity','avg.read.count','total.read.count','binsize')}.}
18
+\item{measures}{The quality measures that are used for the clustering. Supported is any combination of \code{c('spikiness','entropy','num.segments','bhattacharyya','loglik','complexity','avg.read.count','total.read.count','binsize')}.}
19 19
 
20
-\item{orderBy}{The quality measure to order the clusters by. Default is \code{'spikyness'}.}
20
+\item{orderBy}{The quality measure to order the clusters by. Default is \code{'spikiness'}.}
21 21
 
22 22
 \item{reverseOrder}{Logical indicating whether the ordering by \code{orderBy} is reversed.}
23 23
 }
... ...
@@ -30,7 +30,7 @@ This function uses the \pkg{\link{mclust}} package to cluster the input samples
30 30
 \details{
31 31
 The employed quality measures are:
32 32
 \itemize{
33
-\item Spikyness
33
+\item Spikiness
34 34
 \item Entropy
35 35
 \item Number of segments
36 36
 \item Bhattacharrya distance
... ...
@@ -40,6 +40,9 @@ exportGRanges(gr, filename, header = TRUE, trackname = NULL, score = NULL,
40 40
 
41 41
 \item{chromosome.format}{A character specifying the format of the chromosomes if \code{assembly} is specified. Either 'NCBI' for (1,2,3 ...) or 'UCSC' for (chr1,chr2,chr3 ...).#' @importFrom utils write.table}
42 42
 }
43
+\value{
44
+\code{NULL}
45
+}
43 46
 \description{
44 47
 Export copy-number-variation state or read counts as genome browser viewable file
45 48
 }
... ...
@@ -19,8 +19,8 @@ The input \code{model} with adjusted segments.
19 19
 }
20 20
 \examples{
21 21
 ## Load an HMM
22
-file <- system.file("extdata", "primary-lung", "hmms", "AvdB150303_I_001.bam.RData",
23
-                    package="AneuFinderData")
22
+file <- list.files(system.file("extdata", "primary-lung", "hmms",
23
+                  package="AneuFinderData"), full.names=TRUE)
24 24
 hmm <- loadHmmsFromFiles(file)[[1]]
25 25
 ## Check number of segments before and after filtering
26 26
 length(hmm$segments)
... ...
@@ -3,11 +3,11 @@
3 3
 \name{qualityControl}
4 4
 \alias{qc.bhattacharyya}
5 5
 \alias{qc.entropy}
6
-\alias{qc.spikyness}
6
+\alias{qc.spikiness}
7 7
 \alias{qualityControl}
8 8
 \title{Quality control measures for binned read counts}
9 9
 \usage{
10
-qc.spikyness(counts)
10
+qc.spikiness(counts)
11 11
 
12 12
 qc.entropy(counts)
13 13
 
... ...
@@ -31,7 +31,7 @@ Spikyness is defined as \eqn{K = sum(abs(diff(counts))) / sum(counts)}.
31 31
 }
32 32
 \section{Functions}{
33 33
 \itemize{
34
-\item \code{qc.spikyness}: Calculate the spikyness of a library
34
+\item \code{qc.spikiness}: Calculate the spikiness of a library
35 35
 
36 36
 \item \code{qc.entropy}: Calculate the Shannon entropy of a library
37 37
 
... ...
@@ -11,6 +11,9 @@ writeConfig(conf, configfile)
11 11
 
12 12
 \item{configfile}{Filename of the outputfile.}
13 13
 }
14
+\value{
15
+\code{NULL}
16
+}
14 17
 \description{
15 18
 Write an AneuFinder configuration file from a list structure.
16 19
 }
... ...
@@ -9,8 +9,7 @@ static double** multiD;
9 9
 // ===================================================================================================================================================
10 10
 // This function takes parameters from R, creates a univariate HMM object, creates the distributions, runs the EM and returns the result to R.
11 11
 // ===================================================================================================================================================
12
-extern "C" {
13
-void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff, int* algorithm)
12
+void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff, int* algorithm)
14 13
 {
15 14
 
16 15
 	// Define logging level
... ...
@@ -209,14 +208,12 @@ void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, d
209 208
 	delete hmm;
210 209
 	hmm = NULL; // assign NULL to defuse the additional delete in on.exit() call
211 210
 }
212
-} //extern
213 211
 
214 212
 
215 213
 // =====================================================================================================================================================
216 214
 // This function takes parameters from R, creates a multivariate HMM object, runs the EM and returns the result to R.
217 215
 // =====================================================================================================================================================
218
-extern "C" {
219
-void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* algorithm)
216
+void multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* algorithm)
220 217
 {
221 218
 
222 219
 	// Define logging level {"ERROR", "WARNING", "INFO", "ITERATION", "DEBUG", "DEBUG1", "DEBUG2", "DEBUG3", "DEBUG4"}
... ...
@@ -351,25 +348,20 @@ void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states,
351 348
 	hmm = NULL; // assign NULL to defuse the additional delete in on.exit() call
352 349
 // 	FreeDoubleMatrix(multiD, *N);
353 350
 }
354
-} //extern
355 351
 
356 352
 
357 353
 // =======================================================
358 354
 // This function make a cleanup if anything was left over
359 355
 // =======================================================
360
-extern "C" {
361
-void R_univariate_cleanup()
356
+void univariate_cleanup()
362 357
 {
363 358
 // 	//FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__; // This message will be shown if interrupt happens before start of C-code
364 359
 	delete hmm;
365 360
 }
366
-} //extern
367 361
 
368
-extern "C" {
369
-void R_multivariate_cleanup(int* N)
362
+void multivariate_cleanup(int* N)
370 363
 {
371 364
 	delete hmm;
372 365
 	FreeDoubleMatrix(multiD, *N);
373 366
 }
374
-} //extern
375 367
 
... ...
@@ -12,11 +12,15 @@
12 12
 // #include <omp.h> // parallelization options
13 13
 // #endif
14 14
 
15
-// void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff, int* algorithm);
16
-// 
17
-// void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* algorithm);
18
-// 
19
-// void R_univariate_cleanup();
20
-// 
21
-// void R_multivariate_cleanup(int* N);
22
-// 
15
+extern "C"
16
+void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff, int* algorithm);
17
+
18
+extern "C"
19
+void multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* algorithm);
20
+
21
+extern "C"
22
+void univariate_cleanup();
23
+
24
+extern "C"
25
+void multivariate_cleanup(int* N);
26
+
... ...
@@ -1,21 +1,22 @@
1
-// #include <Rinternals.h>
2
-// #include <R_ext/Rdynload.h>
3
-// #include <R_ext/Visibility.h>
4
-// #include "R_interface.h"
5
-// 
6
-// 
7
-// static const R_CMethodDef CEntries[]  = {
8
-//     {"R_univariate_hmm", (DL_FUNC) &R_univariate_hmm, 24},
9
-//     {"R_multivariate_hmm", (DL_FUNC) &R_multivariate_hmm, 18},
10
-//     {"R_univariate_cleanup", (DL_FUNC) &R_univariate_cleanup, 0},
11
-//     {"R_multivariate_cleanup", (DL_FUNC) &R_multivariate_cleanup, 1},
12
-//     {NULL, NULL, 0}
13
-// };
14
-// 
15
-// 
16
-// void attribute_visible R_init_aneufinder(DllInfo *dll)
17
-// {
18
-//     R_registerRoutines(dll, CEntries, NULL, NULL, NULL);
19
-//     R_useDynamicSymbols(dll, FALSE);
20
-//     R_forceSymbols(dll, TRUE);
21
-// }
1
+#include <Rinternals.h>
2
+#include <R_ext/Rdynload.h>
3
+#include "R_interface.h"
4
+
5
+
6
+static const R_CMethodDef CEntries[]  = {
7
+    {"C_univariate_hmm", (DL_FUNC) &univariate_hmm, 24, (R_NativePrimitiveArgType[24]) {INTSXP, INTSXP, INTSXP, INTSXP, REALSXP, REALSXP, INTSXP, INTSXP, REALSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, LGLSXP, INTSXP, INTSXP, INTSXP, INTSXP}},
8
+    {"C_multivariate_hmm", (DL_FUNC) &multivariate_hmm, 18, (R_NativePrimitiveArgType[18]) {REALSXP, INTSXP, INTSXP, INTSXP, INTSXP, INTSXP, INTSXP, REALSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, LGLSXP, INTSXP, INTSXP, INTSXP}},
9
+    {"C_univariate_cleanup", (DL_FUNC) &univariate_cleanup, 0, NULL},
10
+    {"C_multivariate_cleanup", (DL_FUNC) &multivariate_cleanup, 1, (R_NativePrimitiveArgType[1]) {INTSXP}},
11
+    {NULL, NULL, 0, NULL}
12
+};
13
+
14
+
15
+extern "C" {
16
+void R_init_AneuFinder(DllInfo *dll)
17
+{
18
+	R_registerRoutines(dll, CEntries, NULL, NULL, NULL);
19
+	R_useDynamicSymbols(dll, FALSE);
20
+// 	R_forceSymbols(dll, TRUE);
21
+}
22
+}
... ...
@@ -12,7 +12,7 @@
12 12
 
13 13
 /* custom error handling class */
14 14
 // extern statement to avoid 'multiple definition' errors
15
-extern class exception_nan: public std::exception
15
+const class exception_nan: public std::exception
16 16
 {
17 17
   virtual const char* what() const throw()
18 18
   {
... ...
@@ -23,7 +23,7 @@
23 23
 \tableofcontents
24 24
 \clearpage
25 25
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
26
-<<options,echo=FALSE, results='hide', message=FALSE, eval=TRUE>>=
26
+<<options, echo=FALSE, results='hide', message=FALSE, eval=TRUE>>=
27 27
 options(fig.width=120)
28 28
 library(AneuFinder)
29 29
 @
... ...
@@ -51,7 +51,7 @@ Aneufinder(inputfolder='folder-with-BAM', outputfolder='output-directory',
51 51
 
52 52
 Although in most cases the above command will produce reasonably good results, it might be worthwile to adjust the default parameters to improve performance and the quality of the results (see section \ref{sec:workflow}). You can get a description of all available parameters by typing
53 53
 \begin{scriptsize}
54
-<<>>==
54
+<<eval=TRUE>>==
55 55
 ?Aneufinder
56 56
 @
57 57
 \end{scriptsize}
... ...
@@ -90,32 +90,35 @@ heatmapGenomewide(cl$classification[[1]])
90 90
 
91 91
 \section{\label{sec:workflow}A detailed workflow}
92 92
 
93
-\subsection{Mappability correction}
93
+\subsection{\label{sec:mapcor}Mappability correction}
94 94
 The first step of your workflow should be the production of a reference file for mappability correction. Mappability correction is done via a variable-width binning approach (as compared to fixed-width bins) and requires a euploid reference. You can either simulate this reference file or take a real euploid reference. For optimal results we suggest to use a real reference, e.g. by merging BAM files of single cells from a euploid reference tissue. This can be achieved with the 'samtools merge' command (not part of R). Be careful: All CNVs that are present in the reference will lead to artifacts in the analysis later. This includes sex-chromosomes that are present in one copy only, so we advice to use a female reference and to exclude the Y-chromosome from the analysis. If you have no reference available, you can simulate one with the \Rfunction{simulateReads} command:
95 95
 \begin{scriptsize}
96
-<<eval=FALSE>>==
96
+<<eval=TRUE, message=FALSE, warning=FALSE>>==
97 97
 ## Load human genome
98 98
 library(BSgenome.Hsapiens.UCSC.hg19)
99 99
 
100
-## Get a BAM file for the estimation of quality scores
100
+## Get a BAM file for the estimation of quality scores (adjust this to your experiment)
101 101
 bamfile <- system.file("extdata", "BB150803_IV_074.bam", package="AneuFinderData")
102 102
 
103 103
 ## Simulate reads of length 51bp for human genome
104
-outputfile <- tempfile() # replace this with your destination file
104
+# We simulate reads every 5000bp for demonstration purposes, for a real
105
+# application you should use a much denser spacing (e.g. 500bp or less)
106
+simulatedReads.file <- tempfile() # replace this with your destination file
105 107
 simulateReads(BSgenome.Hsapiens.UCSC.hg19, readLength=51, bamfile=bamfile,
106
-              file=outputfile, every.X.bp=500)
108
+              file=simulatedReads.file, every.X.bp=5000)
107 109
 @
108 110
 \end{scriptsize}
109 111
 
110 112
 This simulated FASTQ file must then be aligned with your aligner of choice (ideally the same that you used for your other samples) and given as reference in the \Rfunction{Aneufinder} function (option \Roption{variable.width.reference}).
111 113
 
112
-\subsection{Blacklisting}
114
+\subsection{\label{sec:blacklist}Blacklisting}
113 115
 To further improve the quality of the results and remove artifacts caused by high mappability repeat regions, e.g. near centromers, a blacklist can be used in option \Roption{blacklist} of the \Rfunction{Aneufinder} function. All reads falling into the regions specified by the blacklist will be discarded when importing the read files.
114 116
 You can either download a blacklist from the UCSC genome browser, e.g. the ``DAC Blacklisted Regions from ENCODE/DAC(Kundaje)'' mappability track, or make your own. For optimal results, we advice to make your own blacklist from a euploid reference. The following code chunck takes a euploid reference and makes fixed-width bins of 100kb. Bins with read count above and beloow the 0.999 and 0.05 quantile are taken as blacklist:
115 117
 \begin{scriptsize}
116 118
 <<eval=TRUE, warning=FALSE, message=FALSE, fig.height=3>>==
117
-## Get a euploid reference
119
+## Get a euploid reference (adjust this to your experiment)
118 120
 bedfile <- system.file("extdata", "hg19_diploid.bam.bed.gz", package="AneuFinderData")
121
+
119 122
 ## Make 100kb fixed-width bins
120 123
 bins <- binReads(bedfile, format='bed', assembly='hg19',
121 124
                  binsizes=100e3, chromosomes=c(1:22,'X'))[[1]]
... ...
@@ -128,21 +131,85 @@ p <- p + geom_hline(aes(yintercept=ucutoff), color='red')
128 131
 print(p)
129 132
 blacklist <- bins[bins$counts < ucutoff & bins$counts > lcutoff]
130 133
 ## Write blacklist to file
131
-exportGRanges(blacklist, filename=tempfile(), header=FALSE,
134
+blacklist.file <- tempfile()
135
+exportGRanges(blacklist, filename=blacklist.file, header=FALSE,
132 136
               chromosome.format='NCBI')
133 137
 @
134 138
 \end{scriptsize}
135 139
 
136 140
 \subsection{Running Aneufinder}
141
+The function \Rfunction{Aneufinder} takes an input folder with BAM or BED files and produces an output folder with results, plots and browserfiles. The following code is an example of how to run \Rfunction{Aneufinder} with variable-width bins (see section \ref{sec:mapcor}), blacklist (see section \ref{sec:blacklist}) and GC-correction. Results will be stored in 'outputfolder/hmms' as RData objects for further processing such as quality filtering and customized plotting.
142
+
143
+\begin{scriptsize}
144
+<<eval=TRUE>>==
145
+## First, get some data and reference files (adjust this to your experiment)
146
+var.width.ref <- system.file("extdata", "hg19_diploid.bam.bed.gz", package="AneuFinderData")
147
+blacklist <- system.file("extdata", "blacklist-hg19.bed.gz", package="AneuFinderData")
148
+datafolder <- system.file("extdata", "B-ALL-B", package = "AneuFinderData")
149
+list.files(datafolder) # only 3 cells for demonstration purposes
150
+
151
+## Library for GC correction
152
+library(BSgenome.Hsapiens.UCSC.hg19)
153
+
154
+## Produce output files
155
+cnv.states <- c('zero-inflation', paste0(1:10, '-somy'))
156
+outputfolder <- tempdir()
157
+Aneufinder(inputfolder = datafolder, outputfolder = outputfolder, format = 'bed',
158
+           numCPU = 3, binsizes = c(5e5, 1e6), variable.width.reference = var.width.ref,
159
+           chromosomes = c(1:22,'X','Y'), blacklist = blacklist, states = cnv.states,
160
+           correction.method = 'GC', GC.BSgenome = BSgenome.Hsapiens.UCSC.hg19,
161
+           num.trials = 1, assembly = 'hg19')
162
+@
163
+\end{scriptsize}
137 164
 
138 165
 \subsection{Quality control}
166
+Once the function \Rfunction{Aneufinder} has completed, results will be accessible as .RData files under 'outputfolder/hmms'. Single cell sequencing is prone to noise and therefore it is a good idea to filter the results by quality to retain only high-quality cells. We found that simple filtering procedures such as cutoffs on the total number of reads etc., are insufficient to distinguish good from bad-quality libraries. Therefore, we have implemented a multivariate clustering approach that works on multiple quality metrics (see ?clusterByQuality for details) to increase robustness of the filtering. Here is an example demonstrating the usage of \Rfunction{clusterByQuality}.
167
+
168
+\begin{scriptsize}
169
+<<eval=TRUE, message=FALSE>>==
170
+## Get some pre-produced results (adjust this to your experiment)
171
+results <- system.file("extdata", "primary-lung", "hmms", package="AneuFinderData")
172
+files <- list.files(results, full.names=TRUE)
173
+
174
+## Cluster by quality
175
+cl <- clusterByQuality(files)
176
+plot(cl$Mclust, what='classification')
177
+print(cl$parameters)
178
+## Apparently, the third cluster corresponds to failed libraries
179
+## while the first cluster contains high-quality libraries
180
+
181
+## Select the best cluster and plot it
182
+selected.files <- unlist(cl$classification[[1]])
183
+heatmapGenomewide(selected.files)
184
+@
185
+\end{scriptsize}
139 186
 
140 187
 \subsection{Karyotype measures}
188
+This package implements two measures to quantify karyotype heterogeneity, an \emph{aneuploidy} and a \emph{heterogeneity} score. Both measures are independent of the number of cells, the length of the genome, and take into account every position in the genome. The following example compares the heterogeneity and aneuploidy between a primary lung cancer and the corresponding liver metastasis.
141 189
 
190
+\begin{scriptsize}
191
+<<eval=TRUE, message=FALSE>>==
192
+## Get some pre-produced results (adjust this to your experiment)
193
+results <- system.file("extdata", "primary-lung", "hmms", package="AneuFinderData")
194
+files.lung <- list.files(results, full.names=TRUE)
195
+results <- system.file("extdata", "metastasis-liver", "hmms", package="AneuFinderData")
196
+files.liver <- list.files(results, full.names=TRUE)
197
+
198
+## Get karyotype measures
199
+k.lung <- karyotypeMeasures(files.lung)
200
+k.liver <- karyotypeMeasures(files.liver)
201
+
202
+## Print the scores in one data.frame
203
+df <- rbind(lung = k.lung$genomewide, liver = k.liver$genomewide)
204
+print(df)
205
+## While the aneuploidy is similar between both cancers, the heterogeneity is
206
+## nearly twice as high for the primary lung cancer.
207
+@
208
+\end{scriptsize}
142 209
 
143 210
 \section{Session Info}
144 211
 \begin{scriptsize}
145
-<<>>=
212
+<<eval=TRUE>>=
146 213
 sessionInfo()
147 214
 warnings()
148 215
 @
149 216
new file mode 100644
150 217
Binary files /dev/null and b/vignettes/AneuFinder.pdf differ