Browse code

adjusted licensing to GPLv3

chakalakka authored on 19/09/2015 14:26:38
Showing 52 changed files

... ...
@@ -9,6 +9,6 @@ Description: not yet
9 9
 Depends: R (>= 3.1.0), GenomicRanges, ggplot2, reshape2, mclust
10 10
 Imports: IRanges, Rsamtools, Biostrings, GenomicAlignments, grid, preseqR, ggdendro, foreach, doParallel
11 11
 Suggests:
12
-License: GPL
12
+License: GPLv3
13 13
 LazyLoad: yes
14
-Packaged: 2014-02-13 13:29:00 CET+1; Taudt
14
+Packaged: 2015-09-19 13:29:00 CET+1; Taudt
... ...
@@ -1,21 +1,15 @@
1
-The MIT License (MIT)
1
+aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+Copyright (C) 2015  Aaron Taudt
2 3
 
3
-Copyright (c) 2014, Aaron Taudt and David Porubsky
4
+This program is free software: you can redistribute it and/or modify
5
+it under the terms of the GNU General Public License as published by
6
+the Free Software Foundation, either version 3 of the License, or
7
+(at your option) any later version.
4 8
 
5
-Permission is hereby granted, free of charge, to any person obtaining a copy
6
-of this software and associated documentation files (the "Software"), to deal
7
-in the Software without restriction, including without limitation the rights
8
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
-copies of the Software, and to permit persons to whom the Software is
10
-furnished to do so, subject to the following conditions:
9
+This program is distributed in the hope that it will be useful,
10
+but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+GNU General Public License for more details.
11 13
 
12
-The above copyright notice and this permission notice shall be included in all
13
-copies or substantial portions of the Software.
14
-
15
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
-SOFTWARE.
14
+You should have received a copy of the GNU General Public License
15
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
... ...
@@ -4,6 +4,7 @@ S3method(plot,GRanges)
4 4
 S3method(plot,aneuBiHMM)
5 5
 S3method(plot,aneuHMM)
6 6
 S3method(plot,character)
7
+export(Aneufinder)
7 8
 export(bam2binned)
8 9
 export(bed2GRanges)
9 10
 export(bed2binned)
... ...
@@ -11,7 +12,6 @@ export(bedGraph2binned)
11 12
 export(clusterByQuality)
12 13
 export(correctGC)
13 14
 export(correctMappability)
14
-export(easyAneufinder)
15 15
 export(exportCNVs)
16 16
 export(exportReadCounts)
17 17
 export(filterSegments)
... ...
@@ -24,6 +24,7 @@ export(heatmapGenomewide)
24 24
 export(karyotypeMeasures)
25 25
 export(loadBinnedFromFiles)
26 26
 export(loadHmmsFromFiles)
27
+export(makeConfig)
27 28
 export(readConfig)
28 29
 export(stateColors)
29 30
 import(GenomicRanges)
30 31
old mode 100755
31 32
new mode 100644
32 33
similarity index 81%
33 34
rename from R/easyAneufinder.R
34 35
rename to R/Aneufinder.R
... ...
@@ -1,15 +1,32 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' Wrapper function for the aneufinder package
2 19
 #'
3 20
 #' This function is an easy-to-use wrapper to \link[aneufinder:binning]{bin the data}, \link[aneufinder:findCNVs]{find copy-number-variations}, \link[aneufinder:findSCEs]{find sister-chromatid-exchange} events and plot the results.
4 21
 #'
5
-#' @param inputfolder Folder with BAM files.
6
-#' @param configfile A file with parameters that are used by the various functions.
7
-#' @param outputfolder Folder to put the results. If it does not exist it will be created.
22
+#' @param inputfolder Folder with either BAM or BED files.
23
+#' @param outputfolder Folder to output the results. If it does not exist it will be created.
24
+#' @param config A file with parameters that are used by the various subfunctions. Alternatively a list with parameters created by \code{\link{makeConfig}}.
8 25
 #' @author Aaron Taudt
9 26
 #' @import foreach
10 27
 #' @import doParallel
11 28
 #' @export
12
-easyAneufinder <- function(inputfolder, configfile, outputfolder) {
29
+Aneufinder <- function(inputfolder, outputfolder, config=makeConfig()) {
13 30
 
14 31
 #=======================
15 32
 ### Helper functions ###
... ...
@@ -21,16 +38,26 @@ as.object <- function(x) {
21 38
 #========================
22 39
 ### General variables ###
23 40
 #========================
24
-## Read config file ##
25
-tC <- tryCatch({
26
-	config <- readConfig(configfile)
27
-}, error = function(err) {
28
-	message("Error: Could not read configuration file ",configfile)
29
-})
41
+if (is.character(config)) {
42
+	## Read config file ##
43
+	errstring <- tryCatch({
44
+		conf <- readConfig(config)
45
+		errstring <- ''
46
+	}, error = function(err) {
47
+		errstring <- paste0("Could not read configuration file ",config)
48
+	})
49
+	if (errstring!='') {
50
+		stop(errstring)
51
+	}
52
+	## Make a copy of the conf file ##
53
+	temp <- file.copy(from=config, to=file.path(outputfolder, basename(config)), overwrite=TRUE)
54
+} else {
55
+	conf <- config
56
+}
30 57
 
31 58
 ## Helpers
32
-binsizes <- config$Binning$binsizes
33
-reads.per.bins <- config$Binning$reads.per.bin
59
+binsizes <- conf$Binning$binsizes
60
+reads.per.bins <- conf$Binning$reads.per.bin
34 61
 patterns <- c(paste0('reads.per.bin_',reads.per.bins,'_'), paste0('binsize_',format(binsizes, scientific=F, trim=T),'_'))
35 62
 patterns <- setdiff(patterns, c('reads.per.bin__','binsize__'))
36 63
 
... ...
@@ -43,7 +70,7 @@ SCEpath <- file.path(outputfolder,'SCE_results')
43 70
 SCEplotpath <- file.path(outputfolder,'SCE_plots')
44 71
 SCEbrowserpath <- file.path(outputfolder,'SCE_browser_files')
45 72
 ## Delete old directory if desired ##
46
-if (config$General$reuse.existing.files==FALSE) {
73
+if (conf$General$reuse.existing.files==FALSE) {
47 74
 	if (file.exists(outputfolder)) {
48 75
 		message("Deleting old directory ",outputfolder)
49 76
 		unlink(outputfolder, recursive=T)
... ...
@@ -52,12 +79,10 @@ if (config$General$reuse.existing.files==FALSE) {
52 79
 if (!file.exists(outputfolder)) {
53 80
 	dir.create(outputfolder)
54 81
 }
55
-## Make a copy of the config file ##
56
-temp <- file.copy(configfile, file.path(outputfolder, basename(configfile)))
57 82
 
58 83
 ## Parallelization ##
59
-message("Using ",config$General$numCPU," CPUs")
60
-cl <- makeCluster(config$General$numCPU)
84
+message("Using ",conf$General$numCPU," CPUs")
85
+cl <- makeCluster(conf$General$numCPU)
61 86
 doParallel::registerDoParallel(cl)
62 87
 
63 88
 #==============
... ...
@@ -66,14 +91,14 @@ doParallel::registerDoParallel(cl)
66 91
 message("Binning the data ...", appendLF=F); ptm <- proc.time()
67 92
 if (!file.exists(binpath.uncorrected)) { dir.create(binpath.uncorrected) }
68 93
 ## Prepare parameter list ##
69
-paramlist <- config$Binning
94
+paramlist <- conf$Binning
70 95
 paramlist$outputfolder <- binpath.uncorrected
71 96
 paramlist$save.as.RData <- TRUE
72 97
 paramlist$format <- NULL
73 98
 paramlist$GC.correction.bsgenome.string <- NULL
74 99
 
75
-files <- list.files(inputfolder, full=T, recursive=T, pattern='.bam$')
76
-temp <- foreach (file = files, .packages=c('aneufinder',config$Binning$GC.correction.bsgenome.string)) %dopar% {
100
+files <- list.files(inputfolder, full=T, recursive=T, pattern=paste0('.',conf$Binning$format,'$'))
101
+temp <- foreach (file = files, .packages=c('aneufinder',conf$Binning$GC.correction.bsgenome.string)) %dopar% {
77 102
 	message(file)
78 103
 	## Check for existing files ##
79 104
 	existing.binfiles <- grep(basename(file), list.files(binpath.uncorrected), value=T)
... ...
@@ -85,11 +110,11 @@ temp <- foreach (file = files, .packages=c('aneufinder',config$Binning$GC.correc
85 110
 		# Make paramter list
86 111
 		paramlist$binsizes <- binsizes.todo
87 112
 		paramlist$reads.per.bin <- rpbin.todo
88
-		if (config$Binning$format=='bam') {
113
+		if (conf$Binning$format=='bam') {
89 114
 			paramlist$bamfile <- file
90 115
 			do.call('bam2binned', paramlist)
91 116
 		} else {
92
-			warning("Unknown file format ",config$Binning$format)
117
+			warning("Unknown file format ",conf$Binning$format)
93 118
 		}
94 119
 	}
95 120
 }
... ...
@@ -98,16 +123,16 @@ time <- proc.time() - ptm; message(" ",round(time[3],2),"s")
98 123
 #=================
99 124
 ### Correction ###
100 125
 #=================
101
-if (!is.null(config$Correction$method)) {
126
+if (!is.null(conf$Correction$method)) {
102 127
 
103
-	message(paste0(config$Correction$method," correction ..."), appendLF=F); ptm <- proc.time()
104
-	if (config$Correction$method=='GC') {
128
+	message(paste0(conf$Correction$method," correction ..."), appendLF=F); ptm <- proc.time()
129
+	if (conf$Correction$method=='GC') {
105 130
 		binpath.corrected <- file.path(outputfolder,'binned_GC-corrected')
106 131
 		if (!file.exists(binpath.corrected)) { dir.create(binpath.corrected) }
107 132
 		## Load BSgenome
108
-		suppressPackageStartupMessages(library(config$Correction$GC.bsgenome, character.only=T))
109
-		config$Correction$GC.bsgenome.string <- config$Correction$GC.bsgenome
110
-		config$Correction$GC.bsgenome <- as.object(config$Correction$GC.bsgenome.string) # replacing string by object
133
+		suppressPackageStartupMessages(library(conf$Correction$GC.bsgenome, character.only=T))
134
+		conf$Correction$GC.bsgenome.string <- conf$Correction$GC.bsgenome
135
+		conf$Correction$GC.bsgenome <- as.object(conf$Correction$GC.bsgenome.string) # replacing string by object
111 136
 
112 137
 		## Go through patterns
113 138
 		temp <- foreach (pattern = patterns, .packages=c('aneufinder')) %dopar% {
... ...
@@ -119,9 +144,9 @@ if (!is.null(config$Correction$method)) {
119 144
 			if (length(binfiles.todo)>0) {
120 145
 				binfiles.todo <- paste0(binpath.uncorrected,.Platform$file.sep,binfiles.todo)
121 146
 				if (grepl('binsize',pattern)) {
122
-					binned.data.list <- suppressMessages(correctGC(binfiles.todo,config$Correction$GC.bsgenome, same.GC.content=TRUE))
147
+					binned.data.list <- suppressMessages(correctGC(binfiles.todo,conf$Correction$GC.bsgenome, same.GC.content=TRUE))
123 148
 				} else {
124
-					binned.data.list <- suppressMessages(correctGC(binfiles.todo,config$Correction$GC.bsgenome))
149
+					binned.data.list <- suppressMessages(correctGC(binfiles.todo,conf$Correction$GC.bsgenome))
125 150
 				}
126 151
 				for (i1 in 1:length(binned.data.list)) {
127 152
 					binned.data <- binned.data.list[[i1]]
... ...
@@ -130,9 +155,9 @@ if (!is.null(config$Correction$method)) {
130 155
 				}
131 156
 			}
132 157
 		}
158
+		binpath <- binpath.corrected
133 159
 	}
134 160
 	time <- proc.time() - ptm; message(" ",round(time[3],2),"s")
135
-	binpath <- binpath.corrected
136 161
 
137 162
 } else {
138 163
 	binpath <- binpath.uncorrected
... ...
@@ -141,12 +166,12 @@ if (!is.null(config$Correction$method)) {
141 166
 #===============
142 167
 ### findCNVs ###
143 168
 #===============
144
-if (config$CNV$findCNVs==TRUE) {
169
+if (conf$CNV$findCNVs==TRUE) {
145 170
 
146 171
 	message("Finding CNVs ...", appendLF=F); ptm <- proc.time()
147 172
 	if (!file.exists(CNVpath)) { dir.create(CNVpath) }
148 173
 	## Prepare parameter list ##
149
-	paramlist <- config$CNV
174
+	paramlist <- conf$CNV
150 175
 	paramlist$findCNVs <- NULL
151 176
 
152 177
 	files <- list.files(binpath, full=T, recursive=T, pattern='.RData$')
... ...
@@ -155,7 +180,6 @@ if (config$CNV$findCNVs==TRUE) {
155 180
 		if (!file.exists(savename)) {
156 181
 			# Make parameter list
157 182
 			paramlist$binned.data <- file
158
-			paramlist$ID <- basename(file)
159 183
 			model <- do.call('findCNVs', paramlist)
160 184
 			save(model, file=savename)
161 185
 		}
... ...
@@ -197,7 +221,7 @@ if (config$CNV$findCNVs==TRUE) {
197 221
 		ifiles <- grep(pattern, ifiles, value=T)
198 222
 		savename=file.path(CNVplotpath,paste0('genomeHeatmap_',sub('_$','',pattern),'.pdf'))
199 223
 		if (!file.exists(savename)) {
200
-			suppressMessages(heatmapGenomewide(ifiles, file=savename, plot.SCE=F, cluster=config$Plotting$cluster))
224
+			suppressMessages(heatmapGenomewide(ifiles, file=savename, plot.SCE=F, cluster=conf$Plotting$cluster))
201 225
 		}
202 226
 	}
203 227
 	temp <- foreach (pattern = patterns, .packages=c('aneufinder')) %dopar% {
... ...
@@ -206,7 +230,7 @@ if (config$CNV$findCNVs==TRUE) {
206 230
 		savename=file.path(CNVplotpath,paste0('aneuploidyHeatmap_',sub('_$','',pattern),'.pdf'))
207 231
 		if (!file.exists(savename)) {
208 232
 			pdf(savename, width=30, height=0.3*length(ifiles))
209
-			ggplt <- suppressMessages(heatmapAneuploidies(ifiles, cluster=config$Plotting$cluster))
233
+			ggplt <- suppressMessages(heatmapAneuploidies(ifiles, cluster=conf$Plotting$cluster))
210 234
 			print(ggplt)
211 235
 			d <- dev.off()
212 236
 		}
... ...
@@ -258,7 +282,7 @@ if (config$CNV$findCNVs==TRUE) {
258 282
 		if (!file.exists(paste0(savename,'.bed.gz'))) {
259 283
 			ifiles <- list.files(CNVpath, pattern='RData$', full=T)
260 284
 			ifiles <- grep(pattern, ifiles, value=T)
261
-			exportCNVs(ifiles, filename=savename, cluster=config$Plotting$cluster, export.CNV=TRUE, export.SCE=FALSE)
285
+			exportCNVs(ifiles, filename=savename, cluster=conf$Plotting$cluster, export.CNV=TRUE, export.SCE=FALSE)
262 286
 		}
263 287
 	}
264 288
 	time <- proc.time() - ptm; message(" ",round(time[3],2),"s")
... ...
@@ -267,12 +291,12 @@ if (config$CNV$findCNVs==TRUE) {
267 291
 #===============
268 292
 ### findSCEs ###
269 293
 #===============
270
-if (config$SCE$findSCEs==TRUE) {
294
+if (conf$SCE$findSCEs==TRUE) {
271 295
 
272 296
 	message("Finding SCEs ...", appendLF=F); ptm <- proc.time()
273 297
 	if (!file.exists(SCEpath)) { dir.create(SCEpath) }
274 298
 	## Prepare parameter list ##
275
-	paramlist <- config$SCE
299
+	paramlist <- conf$SCE
276 300
 	paramlist$findSCEs <- NULL
277 301
 
278 302
 	files <- list.files(binpath, full=T, recursive=T, pattern='.RData$')
... ...
@@ -281,7 +305,6 @@ if (config$SCE$findSCEs==TRUE) {
281 305
 		if (!file.exists(savename)) {
282 306
 			# Make parameter list
283 307
 			paramlist$binned.data <- file
284
-			paramlist$ID <- basename(file)
285 308
 			model <- do.call('findSCEs', paramlist)
286 309
 			save(model, file=savename)
287 310
 		}
... ...
@@ -323,7 +346,7 @@ if (config$SCE$findSCEs==TRUE) {
323 346
 		ifiles <- grep(pattern, ifiles, value=T)
324 347
 		savename=file.path(SCEplotpath,paste0('genomeHeatmap_',sub('_$','',pattern),'.pdf'))
325 348
 		if (!file.exists(savename)) {
326
-			suppressMessages(heatmapGenomewide(ifiles, file=savename, plot.SCE=T, cluster=config$Plotting$cluster))
349
+			suppressMessages(heatmapGenomewide(ifiles, file=savename, plot.SCE=T, cluster=conf$Plotting$cluster))
327 350
 		}
328 351
 	}
329 352
 	temp <- foreach (pattern = patterns, .packages=c('aneufinder')) %dopar% {
... ...
@@ -332,7 +355,7 @@ if (config$SCE$findSCEs==TRUE) {
332 355
 		savename=file.path(SCEplotpath,paste0('aneuploidyHeatmap_',sub('_$','',pattern),'.pdf'))
333 356
 		if (!file.exists(savename)) {
334 357
 			pdf(savename, width=30, height=0.3*length(ifiles))
335
-			ggplt <- suppressMessages(heatmapAneuploidies(ifiles, cluster=config$Plotting$cluster))
358
+			ggplt <- suppressMessages(heatmapAneuploidies(ifiles, cluster=conf$Plotting$cluster))
336 359
 			print(ggplt)
337 360
 			d <- dev.off()
338 361
 		}
... ...
@@ -384,7 +407,7 @@ if (config$SCE$findSCEs==TRUE) {
384 407
 		if (!file.exists(paste0(savename,'.bed.gz'))) {
385 408
 			ifiles <- list.files(SCEpath, pattern='RData$', full=T)
386 409
 			ifiles <- grep(pattern, ifiles, value=T)
387
-			exportCNVs(ifiles, filename=savename, cluster=config$Plotting$cluster, export.CNV=TRUE, export.SCE=TRUE)
410
+			exportCNVs(ifiles, filename=savename, cluster=conf$Plotting$cluster, export.CNV=TRUE, export.SCE=TRUE)
388 411
 		}
389 412
 	}
390 413
 	time <- proc.time() - ptm; message(" ",round(time[3],2),"s")
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 bait.loadSCE <- function(filename) {
2 19
   
3 20
   data <- read.table(filename, header=FALSE)
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' Analysis of Aneuploidies and Copy Number Variation in Single-Cell-Sequencing Data
2 19
 #'
3 20
 #' TODO
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' Convert aligned reads from various file formats into read counts in equidistant bins
2 19
 #'
3 20
 #' Convert aligned reads in .bam or .bed format into read counts in equidistant windows. Convert signal values in .bedGraph format to signal counts in equidistant windows.
... ...
@@ -19,13 +36,13 @@ NULL
19 36
 #' @param bamfile A file in BAM format.
20 37
 #' @param bamindex BAM index file. Can be specified without the .bai ending. If the index file does not exist it will be created and a warning is issued.
21 38
 #' @export
22
-bam2binned <- function(bamfile, bamindex=bamfile, pairedEndReads=FALSE, outputfolder="binned_data", binsizes=NULL, reads.per.bin=NULL, numbins=NULL, chromosomes=NULL, save.as.RData=FALSE, calc.complexity=TRUE, min.mapq=10, remove.duplicate.reads=TRUE, outputfolder.fragments=NULL, return.fragments=FALSE) {
39
+bam2binned <- function(bamfile, bamindex=bamfile, ID=basename(bamfile), pairedEndReads=FALSE, outputfolder="binned_data", binsizes=NULL, reads.per.bin=NULL, numbins=NULL, chromosomes=NULL, save.as.RData=FALSE, calc.complexity=TRUE, min.mapq=10, remove.duplicate.reads=TRUE, outputfolder.fragments=NULL, return.fragments=FALSE) {
23 40
 	call <- match.call()
24 41
 	underline <- paste0(rep('=',sum(nchar(call[[1]]))+3), collapse='')
25 42
 	message("\n",call[[1]],"():")
26 43
 	message(underline)
27 44
 	ptm <- proc.time()
28
-	binned.data <- align2binned(bamfile, format="bam", bamindex=bamindex, pairedEndReads=pairedEndReads, outputfolder=outputfolder, binsizes=binsizes, reads.per.bin=reads.per.bin, numbins=numbins, chromosomes=chromosomes, save.as.RData=save.as.RData, calc.complexity=calc.complexity, min.mapq=min.mapq, remove.duplicate.reads=remove.duplicate.reads, call=call, outputfolder.fragments=outputfolder.fragments, return.fragments=return.fragments)
45
+	binned.data <- align2binned(bamfile, format="bam", ID=ID, bamindex=bamindex, pairedEndReads=pairedEndReads, outputfolder=outputfolder, binsizes=binsizes, reads.per.bin=reads.per.bin, numbins=numbins, chromosomes=chromosomes, save.as.RData=save.as.RData, calc.complexity=calc.complexity, min.mapq=min.mapq, remove.duplicate.reads=remove.duplicate.reads, call=call, outputfolder.fragments=outputfolder.fragments, return.fragments=return.fragments)
29 46
 	time <- proc.time() - ptm
30 47
 	message("Time spent in ", call[[1]],"(): ",round(time[3],2),"s")
31 48
 	return(binned.data)
... ...
@@ -35,13 +52,13 @@ bam2binned <- function(bamfile, bamindex=bamfile, pairedEndReads=FALSE, outputfo
35 52
 #' @inheritParams align2binned
36 53
 #' @param bedfile A file in BED format.
37 54
 #' @export
38
-bed2binned <- function(bedfile, chrom.length.file, outputfolder="binned_data", binsizes=NULL, reads.per.bin=NULL, numbins=NULL, chromosomes=NULL, save.as.RData=FALSE, calc.complexity=TRUE, min.mapq=10, remove.duplicate.reads=TRUE, outputfolder.fragments=NULL, return.fragments=FALSE) {
55
+bed2binned <- function(bedfile, chrom.length.file, ID=basename(bedfile), outputfolder="binned_data", binsizes=NULL, reads.per.bin=NULL, numbins=NULL, chromosomes=NULL, save.as.RData=FALSE, calc.complexity=TRUE, min.mapq=10, remove.duplicate.reads=TRUE, outputfolder.fragments=NULL, return.fragments=FALSE) {
39 56
 	call <- match.call()
40 57
 	underline <- paste0(rep('=',sum(nchar(call[[1]]))+3), collapse='')
41 58
 	message("\n",call[[1]],"():")
42 59
 	message(underline)
43 60
 	ptm <- proc.time()
44
-	binned.data <- align2binned(bedfile, format="bed", chrom.length.file=chrom.length.file, outputfolder=outputfolder, binsizes=binsizes, reads.per.bin=reads.per.bin, numbins=numbins, chromosomes=chromosomes, save.as.RData=save.as.RData, calc.complexity=calc.complexity, min.mapq=min.mapq, remove.duplicate.reads=remove.duplicate.reads, call=call, outputfolder.fragments=outputfolder.fragments, return.fragments=return.fragments)
61
+	binned.data <- align2binned(bedfile, format="bed", ID=ID, chrom.length.file=chrom.length.file, outputfolder=outputfolder, binsizes=binsizes, reads.per.bin=reads.per.bin, numbins=numbins, chromosomes=chromosomes, save.as.RData=save.as.RData, calc.complexity=calc.complexity, min.mapq=min.mapq, remove.duplicate.reads=remove.duplicate.reads, call=call, outputfolder.fragments=outputfolder.fragments, return.fragments=return.fragments)
45 62
 	time <- proc.time() - ptm
46 63
 	message("Time spent in ", call[[1]],"(): ",round(time[3],2),"s")
47 64
 	return(binned.data)
... ...
@@ -51,13 +68,13 @@ bed2binned <- function(bedfile, chrom.length.file, outputfolder="binned_data", b
51 68
 #' @inheritParams align2binned
52 69
 #' @param bedGraphfile A file in bedGraph format.
53 70
 #' @export
54
-bedGraph2binned <- function(bedGraphfile, chrom.length.file, outputfolder="binned_data", binsizes=NULL, reads.per.bin=NULL, numbins=NULL, chromosomes=NULL, save.as.RData=FALSE, calc.complexity=TRUE, min.mapq=10, remove.duplicate.reads=TRUE, outputfolder.fragments=NULL, return.fragments=FALSE) {
71
+bedGraph2binned <- function(bedGraphfile, chrom.length.file, ID=basename(bedGraphfile), outputfolder="binned_data", binsizes=NULL, reads.per.bin=NULL, numbins=NULL, chromosomes=NULL, save.as.RData=FALSE, calc.complexity=TRUE, min.mapq=10, remove.duplicate.reads=TRUE, outputfolder.fragments=NULL, return.fragments=FALSE) {
55 72
 	call <- match.call()
56 73
 	underline <- paste0(rep('=',sum(nchar(call[[1]]))+3), collapse='')
57 74
 	message("\n",call[[1]],"():")
58 75
 	message(underline)
59 76
 	ptm <- proc.time()
60
-	binned.data <- align2binned(bedGraphfile, format="bedGraph", chrom.length.file=chrom.length.file, outputfolder=outputfolder, binsizes=binsizes, reads.per.bin=reads.per.bin, numbins=numbins, chromosomes=chromosomes, save.as.RData=save.as.RData, calc.complexity=calc.complexity, min.mapq=min.mapq, remove.duplicate.reads=remove.duplicate.reads, call=call, outputfolder.fragments=outputfolder.fragments, return.fragments=return.fragments)
77
+	binned.data <- align2binned(bedGraphfile, format="bedGraph", ID=ID, chrom.length.file=chrom.length.file, outputfolder=outputfolder, binsizes=binsizes, reads.per.bin=reads.per.bin, numbins=numbins, chromosomes=chromosomes, save.as.RData=save.as.RData, calc.complexity=calc.complexity, min.mapq=min.mapq, remove.duplicate.reads=remove.duplicate.reads, call=call, outputfolder.fragments=outputfolder.fragments, return.fragments=return.fragments)
61 78
 	time <- proc.time() - ptm
62 79
 	message("Time spent in ", call[[1]],"(): ",round(time[3],2),"s")
63 80
 	return(binned.data)
... ...
@@ -69,6 +86,7 @@ bedGraph2binned <- function(bedGraphfile, chrom.length.file, outputfolder="binne
69 86
 #'
70 87
 #' @param file A file with aligned reads.
71 88
 #' @param format One of \code{c('bam', 'bed', 'bedGraph')}.
89
+#' @param ID An identifier that will be used to identify the file throughout the workflow and in plotting.
72 90
 #' @param bamindex Index file if \code{format='bam'} with or without the .bai ending. If this file does not exist it will be created and a warning is issued.
73 91
 #' @param pairedEndReads Set to \code{TRUE} if you have paired-end reads in your file.
74 92
 #' @param chrom.length.file A file which contains the chromosome lengths in basepairs. The first column contains the chromosome name and the second column the length (see also \code{\link{chrom.length.file}}.
... ...
@@ -88,7 +106,7 @@ bedGraph2binned <- function(bedGraphfile, chrom.length.file, outputfolder="binne
88 106
 #' @importFrom Rsamtools indexBam scanBamHeader ScanBamParam scanBamFlag
89 107
 #' @importFrom GenomicAlignments readGAlignmentPairsFromBam readGAlignmentsFromBam first
90 108
 #' @import preseqR
91
-align2binned <- function(file, format, bamindex=file, pairedEndReads=FALSE, chrom.length.file, outputfolder="binned_data", binsizes=200000, reads.per.bin=NULL, numbins=NULL, chromosomes=NULL, save.as.RData=FALSE, calc.complexity=TRUE, min.mapq=10, remove.duplicate.reads=TRUE, call=match.call(), outputfolder.fragments=NULL, return.fragments=FALSE) {
109
+align2binned <- function(file, format, ID=basename(file), bamindex=file, pairedEndReads=FALSE, chrom.length.file, outputfolder="binned_data", binsizes=200000, reads.per.bin=NULL, numbins=NULL, chromosomes=NULL, save.as.RData=FALSE, calc.complexity=TRUE, min.mapq=10, remove.duplicate.reads=TRUE, call=match.call(), outputfolder.fragments=NULL, return.fragments=FALSE) {
92 110
 
93 111
 	## Check user input
94 112
 	if (is.null(binsizes) & is.null(reads.per.bin)) {
... ...
@@ -433,6 +451,9 @@ align2binned <- function(file, format, bamindex=file, pairedEndReads=FALSE, chro
433 451
 		## Shannon entropy
434 452
 		attr(binned.data, 'shannon.entropy') <- qc.entropy(binned.data$reads)
435 453
 
454
+		### ID ###
455
+		attr(binned.data, 'ID') <- ID
456
+
436 457
 		### Save or return the binned data ###
437 458
 		if (save.as.RData==TRUE) {
438 459
 			# Save to file
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 check.positive.integer = function(testvar) {
2 19
 	if (!is(testvar,"numeric") & !is(testvar,"integer")) return(1)
3 20
 	if (length(testvar)>1) return(2)
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' Collapse consecutive bins
2 19
 #'
3 20
 #' The function will collapse consecutive bins which have, for example, the same CNV-state.
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' Read bed-file into GRanges
2 19
 #'
3 20
 #' This is a simple convenience function to read a (compressed) bed-file into a \code{\link{GRanges}} object. The bed-file is expected to have the following fields: \code{chrom, chromStart, chromEnd, name, score, strand}.
... ...
@@ -1,16 +1,34 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' GC correction
2 19
 #'
3 20
 #' Correct a list of \code{\link{binned.data}} by GC content
4 21
 #'
5 22
 #' @param binned.data.list A \code{list()} with \code{\link{binned.data}} objects or a list of filenames containing such objects.
6
-#' @param bsgenome A \code{BSgenome} object which contains the DNA sequence that is used for the GC correction.
23
+#' @param GC.bsgenome A \code{BSgenome} object which contains the DNA sequence that is used for the GC correction.
7 24
 #' @param same.GC.content If \code{TRUE} the GC content will only be calculated once. Set this to \code{TRUE} if all \code{\link{binned.data}} objects describe the same genome at the same binsize.
8 25
 #' @author Aaron Taudt
9 26
 #' @importFrom Biostrings Views alphabetFrequency
10 27
 #' @export
11
-correctGC <- function(binned.data.list, bsgenome, same.GC.content=FALSE) {
28
+correctGC <- function(binned.data.list, GC.bsgenome, same.GC.content=FALSE) {
12 29
 
13 30
 	binned.data.list <- loadBinnedFromFiles(binned.data.list)
31
+	same.GC.calculated <- FALSE
14 32
 	for (i1 in 1:length(binned.data.list)) {
15 33
 		binned.data <- binned.data.list[[i1]]
16 34
 
... ...
@@ -22,13 +40,15 @@ correctGC <- function(binned.data.list, bsgenome, same.GC.content=FALSE) {
22 40
 		chroms[mask] <- paste0('chr',chroms[mask])
23 41
 		names(chromlengths) <- chroms
24 42
 		# Compare
25
-		compare <- chromlengths[chroms] == seqlengths(bsgenome)[chroms]
43
+		compare <- chromlengths[chroms] == seqlengths(GC.bsgenome)[chroms]
26 44
 		if (any(compare==FALSE, na.rm=T)) {
27
-			stop("Chromosome lengths differ between binned data and 'bsgenome'. Use the correct genome for option 'bsgenome'")
45
+			warning(paste0(attr(binned.data,'ID'),": Chromosome lengths differ between binned data and 'GC.bsgenome'. GC correction skipped. Please use the correct genome for option 'GC.bsgenome'"))
46
+			binned.data.list[[i1]] <- binned.data
47
+			next
28 48
 		}
29 49
 
30 50
 		## Calculate GC content per bin
31
-		if (same.GC.content & i1==1 | !same.GC.content) {
51
+		if (same.GC.content & !same.GC.calculated | !same.GC.content) {
32 52
 			message("Calculating GC content per bin ...", appendLF=F); ptm <- proc.time()
33 53
 			GC.content <- list()
34 54
 			for (chrom in seqlevels(binned.data)) {
... ...
@@ -37,7 +57,7 @@ correctGC <- function(binned.data.list, bsgenome, same.GC.content=FALSE) {
37 57
 				} else {
38 58
 					chr <- chrom
39 59
 				}
40
-				view <- Biostrings::Views(bsgenome[[chr]], ranges(binned.data)[seqnames(binned.data)==chrom])
60
+				view <- Biostrings::Views(GC.bsgenome[[chr]], ranges(binned.data)[seqnames(binned.data)==chrom])
41 61
 				freq <- Biostrings::alphabetFrequency(view, as.prob = T, baseOnly=T)
42 62
 				if (nrow(freq) > 1) {
43 63
 					GC.content[[as.character(chrom)]] <- rowSums(freq[, c("G","C")])
... ...
@@ -46,6 +66,7 @@ correctGC <- function(binned.data.list, bsgenome, same.GC.content=FALSE) {
46 66
 				}
47 67
 			}
48 68
 			GC.content <- unlist(GC.content)
69
+			same.GC.calculated <- TRUE
49 70
 			time <- proc.time() - ptm; message(" ",round(time[3],2),"s")
50 71
 		}
51 72
 		binned.data$GC <- GC.content
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' Binned read counts
2 19
 #'
3 20
 #' A \link{GRanges} object which contains binned read counts as meta data column \code{reads}. It is output of the various \link{binning} functions.#' @name binned.data
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' Export genome browser viewable files
2 19
 #'
3 20
 #' Export copy-number-variation state or read counts as genome browser viewable file
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' Find copy number variations
2 19
 #'
3 20
 #' \code{findCNVs} classifies the binned read counts into several states which represent copy-number-variation.
... ...
@@ -12,11 +29,11 @@
12 29
 #'## Bin the BAM file into bin size 200000bp
13 30
 #'binned.data <- bam2binned(bamfile, binsize=200000, chromosomes=c(1:22,'X','Y'), save.as.RData=FALSE)
14 31
 #'## Fit the Hidden Markov Model
15
-#'model <- findCNVs(binned.data, ID=basename(bamfile), eps=0.1, max.time=60)
32
+#'model <- findCNVs(binned.data, eps=0.1, max.time=60)
16 33
 #'## Check the fit
17 34
 #'plot(model, type='histogram')
18 35
 #' @export
19
-findCNVs <- function(binned.data, ID, method='univariate', eps=0.001, init="standard", max.time=-1, max.iter=-1, num.trials=15, eps.try=10*eps, num.threads=1, read.cutoff.quantile=0.999, strand='*', states=c("zero-inflation","monosomy","disomy","trisomy","tetrasomy","multisomy"), most.frequent.state="disomy") {
36
+findCNVs <- function(binned.data, ID=NULL, method='univariate', eps=0.1, init="standard", max.time=-1, max.iter=1000, num.trials=15, eps.try=10*eps, num.threads=1, read.cutoff.quantile=0.999, strand='*', states=c("zero-inflation","monosomy","disomy","trisomy","tetrasomy","multisomy"), most.frequent.state="disomy") {
20 37
 
21 38
 	call <- match.call()
22 39
 	underline <- paste0(rep('=',sum(nchar(call[[1]]))+3), collapse='')
... ...
@@ -59,17 +76,19 @@ findCNVs <- function(binned.data, ID, method='univariate', eps=0.001, init="stan
59 76
 #' @param states A subset or all of \code{c("zero-inflation","nullsomy","monosomy","disomy","trisomy","tetrasomy","multisomy")}. This vector defines the states that are used in the Hidden Markov Model. The order of the entries should not be changed.
60 77
 #' @param most.frequent.state One of the states that were given in \code{states} or 'none'. The specified state is assumed to be the most frequent one. This can help the fitting procedure to converge into the correct fit. If set to 'none', no state is assumed to be most frequent.
61 78
 #' @return An \code{\link{aneuHMM}} object.
62
-univariate.findCNVs <- function(binned.data, ID, eps=0.001, init="standard", max.time=-1, max.iter=-1, num.trials=1, eps.try=NULL, num.threads=1, read.cutoff.quantile=0.999, strand='*', states=c("zero-inflation","monosomy","disomy","trisomy","tetrasomy","multisomy"), most.frequent.state="disomy") {
79
+univariate.findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard", max.time=-1, max.iter=-1, num.trials=1, eps.try=NULL, num.threads=1, read.cutoff.quantile=0.999, strand='*', states=c("zero-inflation","monosomy","disomy","trisomy","tetrasomy","multisomy"), most.frequent.state="disomy") {
63 80
 
64 81
 	### Define cleanup behaviour ###
65 82
 	on.exit(.C("R_univariate_cleanup"))
66 83
 
67 84
 	## Intercept user input
68
-	IDcheck <- ID  #trigger error if not defined
69 85
 	if (class(binned.data) != 'GRanges') {
70 86
 		binned.data <- get(load(binned.data))
71 87
 		if (class(binned.data) != 'GRanges') stop("argument 'binned.data' expects a GRanges with meta-column 'reads' or a file that contains such an object")
72 88
 	}
89
+	if (is.null(ID)) {
90
+		ID <- attr(binned.data, 'ID')
91
+	}
73 92
 	if (check.positive(eps)!=0) stop("argument 'eps' expects a positive numeric")
74 93
 	if (check.integer(max.time)!=0) stop("argument 'max.time' expects an integer")
75 94
 	if (check.integer(max.iter)!=0) stop("argument 'max.iter' expects an integer")
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' Find sister chromatid exchanges
2 19
 #'
3 20
 #' \code{findSCEs} classifies the binned read counts into several states which represent the number of chromatids on each strand.
... ...
@@ -17,7 +34,7 @@
17 34
 #'## Check the fit
18 35
 #'plot(model, type='histogram')
19 36
 #' @export
20
-findSCEs <- function(binned.data, ID, eps=0.001, init="standard", max.time=-1, max.iter=-1, num.trials=15, eps.try=10*eps, num.threads=1, read.cutoff.quantile=0.999, strand='*', allow.odd.states=TRUE, states=c("zero-inflation","nullsomy","monosomy","disomy","trisomy","tetrasomy","multisomy"), most.frequent.state="monosomy") {
37
+findSCEs <- function(binned.data, ID=NULL, eps=0.1, init="standard", max.time=-1, max.iter=1000, num.trials=15, eps.try=10*eps, num.threads=1, read.cutoff.quantile=0.999, strand='*', allow.odd.states=TRUE, states=c("zero-inflation","nullsomy","monosomy","disomy","trisomy","tetrasomy","multisomy"), most.frequent.state="monosomy") {
21 38
 
22 39
 	call <- match.call()
23 40
 	underline <- paste0(rep('=',sum(nchar(call[[1]]))+3), collapse='')
... ...
@@ -60,17 +77,19 @@ findSCEs <- function(binned.data, ID, eps=0.001, init="standard", max.time=-1, m
60 77
 #' @param states A subset or all of \code{c("zero-inflation","nullsomy","monosomy","disomy","trisomy","tetrasomy","multisomy")}. This vector defines the states that are used in the Hidden Markov Model. The order of the entries should not be changed.
61 78
 #' @param most.frequent.state One of the states that were given in \code{states} or 'none'. The specified state is assumed to be the most frequent one. This can help the fitting procedure to converge into the correct fit. If set to 'none', no state is assumed to be most frequent.
62 79
 #' @return An \code{\link{aneuHMM}} object.
63
-univariate.findSCEs <- function(binned.data, ID, eps=0.001, init="standard", max.time=-1, max.iter=-1, num.trials=1, eps.try=NULL, num.threads=1, read.cutoff.quantile=0.999, strand='*', states=c("zero-inflation","nullsomy","monosomy","disomy","trisomy","tetrasomy","multisomy"), most.frequent.state="monosomy") {
80
+univariate.findSCEs <- function(binned.data, ID=NULL, eps=0.1, init="standard", max.time=-1, max.iter=-1, num.trials=1, eps.try=NULL, num.threads=1, read.cutoff.quantile=0.999, strand='*', states=c("zero-inflation","nullsomy","monosomy","disomy","trisomy","tetrasomy","multisomy"), most.frequent.state="monosomy") {
64 81
 
65 82
 	### Define cleanup behaviour ###
66 83
 	on.exit(.C("R_univariate_cleanup"))
67 84
 
68 85
 	## Intercept user input
69
-	IDcheck <- ID  #trigger error if not defined
70 86
 	if (class(binned.data) != 'GRanges') {
71 87
 		binned.data <- get(load(binned.data))
72 88
 		if (class(binned.data) != 'GRanges') stop("argument 'binned.data' expects a GRanges with meta-column 'reads' or a file that contains such an object")
73 89
 	}
90
+	if (is.null(ID)) {
91
+		ID <- attr(binned.data, 'ID')
92
+	}
74 93
 	if (check.positive(eps)!=0) stop("argument 'eps' expects a positive numeric")
75 94
 	if (check.integer(max.time)!=0) stop("argument 'max.time' expects an integer")
76 95
 	if (check.integer(max.iter)!=0) stop("argument 'max.iter' expects an integer")
... ...
@@ -388,7 +407,7 @@ univariate.findSCEs <- function(binned.data, ID, eps=0.001, init="standard", max
388 407
 #'
389 408
 #' @inheritParams univariate.findSCEs
390 409
 #' @param allow.odd.states If set to \code{TRUE}, all states are allowed. If \code{FALSE}, only states which have an even multiplicity (plus nullsomy-monosomy states) are allowed, e.g. disomy-disomy, monosomy-trisomy.
391
-bivariate.findSCEs <- function(binned.data, ID, eps=0.001, init="standard", max.time=-1, max.iter=-1, num.trials=1, eps.try=NULL, num.threads=1, read.cutoff.quantile=0.999, allow.odd.states=TRUE, states=c("zero-inflation","nullsomy","monosomy","disomy","trisomy","tetrasomy","multisomy"), most.frequent.state="monosomy") {
410
+bivariate.findSCEs <- function(binned.data, ID, eps=0.1, init="standard", max.time=-1, max.iter=-1, num.trials=1, eps.try=NULL, num.threads=1, read.cutoff.quantile=0.999, allow.odd.states=TRUE, states=c("zero-inflation","nullsomy","monosomy","disomy","trisomy","tetrasomy","multisomy"), most.frequent.state="monosomy") {
392 411
 
393 412
 	## Intercept user input
394 413
 	IDcheck <- ID  #trigger error if not defined
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' Chromosome lengths for different assemblies
2 19
 #'
3 20
 #' Get the chromosome lengths of different common assemblies. Only the standard chromosomes (no chrN_random) will be returned.
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 # =================================================================
2 19
 # Extraction of segments and clustering
3 20
 # =================================================================
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' @useDynLib aneufinder
2 19
 #' @import GenomicRanges
3 20
 #' @import IRanges
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 mixedorder = function (x) 
2 19
 {
3 20
     if (length(x) < 1) 
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' Initialize state factor levels and distributions
2 19
 #'
3 20
 #' Initialize the state factor levels and distributions for the specified states.
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' Measures for Karyotype Heterogeneity
2 19
 #' 
3 20
 #' Computes measures for karyotype heterogeneity. See the Details section for how these measures are defined.
... ...
@@ -5,7 +22,7 @@
5 22
 #' We define \eqn{x} as the vector of copy number states for each position and HMM. The number of HMMs is \eqn{S}.
6 23
 #' \describe{
7 24
 #' \item{Divergence from disomic:}{\eqn{D = sqrt(mean((x-2)^2))}}
8
-#' \item{Simple karyotype heterogeneity:}{\eqn{H = table(x) * 0:(length(table(x))-1)}
25
+#' \item{Simple karyotype heterogeneity:}{\eqn{H = table(x) * 0:(length(table(x))-1)}}
9 26
 #' \item{Entropic karyotype heterogeneity:}{\eqn{H = S*log(S) - S + sum(table(x)-table(x)*log(table(x)))}}
10 27
 #' }
11 28
 #'
... ...
@@ -74,7 +91,7 @@ karyotypeMeasures <- function(hmms) {
74 91
 	consensus$simpleHeterogeneity <- unlist(lapply(tabs, function(x) { sum(x * 0:(length(x)-1)) })) / S
75 92
 	consensus$entropicHeterogeneity <- S*log(S) - S + unlist(lapply(tabs, function(x) { sum(x-x*log(x)) }))
76 93
 	weights <- as.numeric(width(consensus))
77
-	result[['genomwide']] <- data.frame(divergenceFromDisomic = weighted.mean(consensus$divergenceFromDisomic, weights),
94
+	result[['genomewide']] <- data.frame(divergenceFromDisomic = weighted.mean(consensus$divergenceFromDisomic, weights),
78 95
 																			simpleHeterogeneity = weighted.mean(consensus$simpleHeterogeneity, weights),
79 96
 																			entropicHeterogeneity = weighted.mean(consensus$entropicHeterogeneity, weights))
80 97
 	## Chromosomes
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' Load HMMs from files
2 19
 #'
3 20
 #' Load \code{\link{aneuHMM}} objects from file into a list.
4 21
new file mode 100644
... ...
@@ -0,0 +1,32 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
18
+#' Aneufinder configuration
19
+#'
20
+#' Make a list of configuration parameters for the \code{\link{Aneufinder}} function.
21
+#'
22
+#' @param General General options.
23
+#' @param Binning Parameters for binning. See \code{\link{binning}} for which options are available.
24
+#' @param Correction Parameters for correction methods.
25
+#' @param CNV Parameters for CNV detection. See \code{\link{findCNVs}} for which options are available.
26
+#' @param SCE Parameters for SCE detection. See \code{\link{findSCEs}} for which options are available.
27
+#' @param Plotting Plotting parameters.
28
+#' @author Aaron Taudt
29
+#' @export
30
+makeConfig <- function(General=list(reuse.existing.files=TRUE, numCPU=1), Binning=list(format='bam', binsizes=500000), Correction=list(), CNV=list(findCNVs=TRUE), SCE=list(findSCEs=FALSE), Plotting=list()) {
31
+	return(list(General=General, Binning=Binning, Correction=Correction, CNV=CNV, SCE=SCE, Plotting=Plotting))
32
+}
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' @import ggplot2
2 19
 #' @import reshape2
3 20
 #' @import grid
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' Quality control measures for binned read counts
2 19
 #'
3 20
 #' Calculate various quality control measures on binned read counts.
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' Read aneufinder configuration file
2 19
 #'
3 20
 #' Read an aneufinder configuration file into a list structure. The configuration file has to be specified in INI format. R expressions can be used and will be evaluated.
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 .onAttach <- function(...) {
2 19
 
3 20
 	packageStartupMessage('Loading aneufinder 0.9.2')
... ...
@@ -1,3 +1,20 @@
1
+# aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+# Copyright (C) 2015  Aaron Taudt
3
+# 
4
+# This program is free software: you can redistribute it and/or modify
5
+# it under the terms of the GNU General Public License as published by
6
+# the Free Software Foundation, either version 3 of the License, or
7
+# (at your option) any later version.
8
+# 
9
+# This program is distributed in the hope that it will be useful,
10
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+# GNU General Public License for more details.
13
+# 
14
+# You should have received a copy of the GNU General Public License
15
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
1 18
 #' The Zero-inflated Negative Binomial Distribution
2 19
 #'
3 20
 #' Density, distribution function, quantile function and random
... ...
@@ -1,3 +1,13 @@
1 1
 !!!THIS PROJECT IS CURRENTLY BEING DEVELOPED!!!
2 2
 
3
-It has not been tested so far and is likely to produce no or incorrect results.
4 3
\ No newline at end of file
4
+The easiest way to install it is currently the following:
5
+1) Install a recent version of R from https://www.r-project.org/
6
+2) Optional: For ease of use, install Rstudio from https://www.rstudio.com/
7
+3) Open R and install all dependencies
8
+	source("http://bioconductor.org/biocLite.R")
9
+	biocLite("GenomicRanges")
10
+	biocLite("GenomicAlignments")
11
+	biocLite("Rsamtools")
12
+	install.packages("devtools")
13
+	install_github("chakalakka/aneufinder")
14
+
... ...
@@ -14,8 +14,8 @@ min.mapq = 10
14 14
 
15 15
 [Correction]
16 16
 method = 'GC'
17
-# GC.bsgenome = 'BSgenome.Hsapiens.UCSC.hg19'
18
-GC.bsgenome = 'BSgenome.Mmusculus.UCSC.mm10'
17
+GC.bsgenome = 'BSgenome.Hsapiens.UCSC.hg19'
18
+# GC.bsgenome = 'BSgenome.Mmusculus.UCSC.mm10'
19 19
 
20 20
 [CNV]
21 21
 findCNVs = TRUE
22 22
new file mode 100644
... ...
@@ -0,0 +1,22 @@
1
+% Generated by roxygen2 (4.1.1): do not edit by hand
2
+% Please edit documentation in R/Aneufinder.R
3
+\name{Aneufinder}
4
+\alias{Aneufinder}
5
+\title{Wrapper function for the aneufinder package}
6
+\usage{
7
+Aneufinder(inputfolder, outputfolder, config = makeConfig())
8
+}
9
+\arguments{
10
+\item{inputfolder}{Folder with either BAM or BED files.}
11
+
12
+\item{outputfolder}{Folder to output the results. If it does not exist it will be created.}
13
+
14
+\item{config}{A file with parameters that are used by the various subfunctions. Alternatively a list with parameters created by \code{\link{makeConfig}}.}
15
+}
16
+\description{
17
+This function is an easy-to-use wrapper to \link[aneufinder:binning]{bin the data}, \link[aneufinder:findCNVs]{find copy-number-variations}, \link[aneufinder:findSCEs]{find sister-chromatid-exchange} events and plot the results.
18
+}
19
+\author{
20
+Aaron Taudt
21
+}
22
+
... ...
@@ -4,11 +4,11 @@
4 4
 \alias{align2binned}
5 5
 \title{Convert aligned reads from various file formats into read counts in equidistant bins}
6 6
 \usage{
7
-align2binned(file, format, bamindex = file, pairedEndReads = FALSE,
8
-  chrom.length.file, outputfolder = "binned_data", binsizes = 2e+05,
9
-  reads.per.bin = NULL, numbins = NULL, chromosomes = NULL,
10
-  save.as.RData = FALSE, calc.complexity = TRUE, min.mapq = 10,
11
-  remove.duplicate.reads = TRUE, call = match.call(),
7
+align2binned(file, format, ID = basename(file), bamindex = file,
8
+  pairedEndReads = FALSE, chrom.length.file, outputfolder = "binned_data",
9
+  binsizes = 2e+05, reads.per.bin = NULL, numbins = NULL,
10
+  chromosomes = NULL, save.as.RData = FALSE, calc.complexity = TRUE,
11
+  min.mapq = 10, remove.duplicate.reads = TRUE, call = match.call(),
12 12
   outputfolder.fragments = NULL, return.fragments = FALSE)
13 13
 }
14 14
 \arguments{
... ...
@@ -16,6 +16,8 @@ align2binned(file, format, bamindex = file, pairedEndReads = FALSE,
16 16
 
17 17
 \item{format}{One of \code{c('bam', 'bed', 'bedGraph')}.}
18 18
 
19
+\item{ID}{An identifier that will be used to identify the file throughout the workflow and in plotting.}
20
+
19 21
 \item{bamindex}{Index file if \code{format='bam'} with or without the .bai ending. If this file does not exist it will be created and a warning is issued.}
20 22
 
21 23
 \item{pairedEndReads}{Set to \code{TRUE} if you have paired-end reads in your file.}
... ...
@@ -7,22 +7,23 @@
7 7
 \alias{binning}
8 8
 \title{Convert aligned reads from various file formats into read counts in equidistant bins}
9 9
 \usage{
10
-bam2binned(bamfile, bamindex = bamfile, pairedEndReads = FALSE,
10
+bam2binned(bamfile, bamindex = bamfile, ID = basename(bamfile),
11
+  pairedEndReads = FALSE, outputfolder = "binned_data", binsizes = NULL,
12
+  reads.per.bin = NULL, numbins = NULL, chromosomes = NULL,
13
+  save.as.RData = FALSE, calc.complexity = TRUE, min.mapq = 10,
14
+  remove.duplicate.reads = TRUE, outputfolder.fragments = NULL,
15
+  return.fragments = FALSE)
16
+
17
+bed2binned(bedfile, chrom.length.file, ID = basename(bedfile),
11 18
   outputfolder = "binned_data", binsizes = NULL, reads.per.bin = NULL,
12 19
   numbins = NULL, chromosomes = NULL, save.as.RData = FALSE,
13 20
   calc.complexity = TRUE, min.mapq = 10, remove.duplicate.reads = TRUE,
14 21
   outputfolder.fragments = NULL, return.fragments = FALSE)
15 22
 
16
-bed2binned(bedfile, chrom.length.file, outputfolder = "binned_data",
17
-  binsizes = NULL, reads.per.bin = NULL, numbins = NULL,
18
-  chromosomes = NULL, save.as.RData = FALSE, calc.complexity = TRUE,
19
-  min.mapq = 10, remove.duplicate.reads = TRUE,
20
-  outputfolder.fragments = NULL, return.fragments = FALSE)
21
-
22
-bedGraph2binned(bedGraphfile, chrom.length.file, outputfolder = "binned_data",
23
-  binsizes = NULL, reads.per.bin = NULL, numbins = NULL,
24
-  chromosomes = NULL, save.as.RData = FALSE, calc.complexity = TRUE,
25
-  min.mapq = 10, remove.duplicate.reads = TRUE,
23
+bedGraph2binned(bedGraphfile, chrom.length.file, ID = basename(bedGraphfile),
24
+  outputfolder = "binned_data", binsizes = NULL, reads.per.bin = NULL,
25
+  numbins = NULL, chromosomes = NULL, save.as.RData = FALSE,
26
+  calc.complexity = TRUE, min.mapq = 10, remove.duplicate.reads = TRUE,
26 27
   outputfolder.fragments = NULL, return.fragments = FALSE)
27 28
 }
28 29
 \arguments{
... ...
@@ -30,6 +31,8 @@ bedGraph2binned(bedGraphfile, chrom.length.file, outputfolder = "binned_data",
30 31
 
31 32
 \item{bamindex}{BAM index file. Can be specified without the .bai ending. If the index file does not exist it will be created and a warning is issued.}
32 33
 
34
+\item{ID}{An identifier that will be used to identify the file throughout the workflow and in plotting.}
35
+
33 36
 \item{pairedEndReads}{Set to \code{TRUE} if you have paired-end reads in your file.}
34 37
 
35 38
 \item{outputfolder}{Folder to which the binned data will be saved. If the specified folder does not exist, it will be created.}
... ...
@@ -4,12 +4,12 @@
4 4
 \alias{correctGC}
5 5
 \title{GC correction}
6 6
 \usage{
7
-correctGC(binned.data.list, bsgenome, same.GC.content = FALSE)
7
+correctGC(binned.data.list, GC.bsgenome, same.GC.content = FALSE)
8 8
 }
9 9
 \arguments{
10 10
 \item{binned.data.list}{A \code{list()} with \code{\link{binned.data}} objects or a list of filenames containing such objects.}
11 11
 
12
-\item{bsgenome}{A \code{BSgenome} object which contains the DNA sequence that is used for the GC correction.}
12
+\item{GC.bsgenome}{A \code{BSgenome} object which contains the DNA sequence that is used for the GC correction.}
13 13
 
14 14
 \item{same.GC.content}{If \code{TRUE} the GC content will only be calculated once. Set this to \code{TRUE} if all \code{\link{binned.data}} objects describe the same genome at the same binsize.}
15 15
 }
16 16
deleted file mode 100644
... ...
@@ -1,22 +0,0 @@
1
-% Generated by roxygen2 (4.1.1): do not edit by hand
2
-% Please edit documentation in R/easyAneufinder.R
3
-\name{easyAneufinder}
4
-\alias{easyAneufinder}
5
-\title{Wrapper function for the aneufinder package}
6
-\usage{
7
-easyAneufinder(inputfolder, configfile, outputfolder)
8
-}
9
-\arguments{
10
-\item{inputfolder}{Folder with BAM files.}
11
-
12
-\item{configfile}{A file with parameters that are used by the various functions.}
13
-
14
-\item{outputfolder}{Folder to put the results. If it does not exist it will be created.}
15
-}
16
-\description{
17
-This function is an easy-to-use wrapper to \link[aneufinder:binning]{bin the data}, \link[aneufinder:findCNVs]{find copy-number-variations}, \link[aneufinder:findSCEs]{find sister-chromatid-exchange} events and plot the results.
18
-}
19
-\author{
20
-Aaron Taudt
21
-}
22
-
... ...
@@ -4,8 +4,8 @@
4 4
 \alias{findCNVs}
5 5
 \title{Find copy number variations}
6 6
 \usage{
7
-findCNVs(binned.data, ID, method = "univariate", eps = 0.001,
8
-  init = "standard", max.time = -1, max.iter = -1, num.trials = 10,
7
+findCNVs(binned.data, ID = NULL, method = "univariate", eps = 0.001,
8
+  init = "standard", max.time = -1, max.iter = -1, num.trials = 15,
9 9
   eps.try = 10 * eps, num.threads = 1, read.cutoff.quantile = 0.999,
10 10
   strand = "*", states = c("zero-inflation", "monosomy", "disomy",
11 11
   "trisomy", "tetrasomy", "multisomy"), most.frequent.state = "disomy")
... ...
@@ -55,7 +55,7 @@ bamfile <- system.file("extdata/BB140820_I_002.bam", package="aneufinder")
55 55
 ## Bin the BAM file into bin size 200000bp
56 56
 binned.data <- bam2binned(bamfile, binsize=200000, chromosomes=c(1:22,'X','Y'), save.as.RData=FALSE)
57 57
 ## Fit the Hidden Markov Model
58
-model <- findCNVs(binned.data, ID=basename(bamfile), eps=0.1, max.time=60)
58
+model <- findCNVs(binned.data, eps=0.1, max.time=60)
59 59
 ## Check the fit
60 60
 plot(model, type='histogram')
61 61
 }
... ...
@@ -4,11 +4,12 @@
4 4
 \alias{findSCEs}
5 5
 \title{Find sister chromatid exchanges}
6 6
 \usage{
7
-findSCEs(binned.data, ID, eps = 0.001, init = "standard", max.time = -1,
8
-  max.iter = -1, num.trials = 10, eps.try = 10 * eps, num.threads = 1,
9
-  read.cutoff.quantile = 0.999, strand = "*", allow.odd.states = TRUE,
10
-  states = c("zero-inflation", "nullsomy", "monosomy", "disomy", "trisomy",
11
-  "tetrasomy", "multisomy"), most.frequent.state = "monosomy")
7
+findSCEs(binned.data, ID = NULL, eps = 0.001, init = "standard",
8
+  max.time = -1, max.iter = -1, num.trials = 15, eps.try = 10 * eps,
9
+  num.threads = 1, read.cutoff.quantile = 0.999, strand = "*",
10
+  allow.odd.states = TRUE, states = c("zero-inflation", "nullsomy",
11
+  "monosomy", "disomy", "trisomy", "tetrasomy", "multisomy"),
12
+  most.frequent.state = "monosomy")
12 13
 }
13 14
 \arguments{
14 15
 \item{binned.data}{A \link{GRanges} object with binned read counts.}
... ...
@@ -13,9 +13,10 @@ karyotypeMeasures(hmms)
13 13
 Computes measures for karyotype heterogeneity. See the Details section for how these measures are defined.
14 14
 }
15 15
 \details{
16
-We define \eqn{x} as the vector of copy number states for each chromosome and HMM. The number of HMMs is \eqn{S}.
16
+We define \eqn{x} as the vector of copy number states for each position and HMM. The number of HMMs is \eqn{S}.
17 17
 \describe{
18 18
 \item{Divergence from disomic:}{\eqn{D = sqrt(mean((x-2)^2))}}
19
+\item{Simple karyotype heterogeneity:}{\eqn{H = table(x) * 0:(length(table(x))-1)}}
19 20
 \item{Entropic karyotype heterogeneity:}{\eqn{H = S*log(S) - S + sum(table(x)-table(x)*log(table(x)))}}
20 21
 }
21 22
 }
22 23
new file mode 100644
... ...
@@ -0,0 +1,31 @@
1
+% Generated by roxygen2 (4.1.1): do not edit by hand
2
+% Please edit documentation in R/makeConfig.R
3
+\name{makeConfig}
4
+\alias{makeConfig}
5
+\title{Aneufinder configuration}
6
+\usage{
7
+makeConfig(General = list(reuse.existing.files = TRUE, numCPU = 1),
8
+  Binning = list(binsizes = 5e+05), Correction = list(),
9
+  CNV = list(findCNVs = TRUE), SCE = list(findSCEs = FALSE),
10
+  Plotting = list())
11
+}
12
+\arguments{
13
+\item{General}{General options.}
14
+
15
+\item{Binning}{Parameters for binning. See \code{\link{binning}} for which options are available.}
16
+
17
+\item{Correction}{Parameters for correction methods.}
18
+
19
+\item{CNV}{Parameters for CNV detection. See \code{\link{findCNVs}} for which options are available.}
20
+
21
+\item{SCE}{Parameters for SCE detection. See \code{\link{findSCEs}} for which options are available.}
22
+
23
+\item{Plotting}{Plotting parameters.}
24
+}
25
+\description{
26
+Make a list of configuration parameters for the \code{\link{Aneufinder}} function.
27
+}
28
+\author{
29
+Aaron Taudt
30
+}
31
+
... ...
@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2 (4.1.1): do not edit by hand
2
-% Please edit documentation in R/global.R
2
+% Please edit documentation in R/plotting.R
3 3
 \name{stateColors}
4 4
 \alias{stateColors}
5 5
 \title{aneufinder color scheme}
... ...
@@ -4,11 +4,11 @@
4 4
 \alias{univariate.findCNVs}
5 5
 \title{Find copy number variations (univariate)}
6 6
 \usage{
7
-univariate.findCNVs(binned.data, ID, eps = 0.001, init = "standard",
8
-  max.time = -1, max.iter = -1, num.trials = 1, eps.try = NULL,
9
-  num.threads = 1, read.cutoff.quantile = 0.999, strand = "*",
10
-  states = c("zero-inflation", "monosomy", "disomy", "trisomy", "tetrasomy",
11
-  "multisomy"), most.frequent.state = "disomy")
7
+univariate.findCNVs(binned.data, ID = NULL, eps = 0.001,
8
+  init = "standard", max.time = -1, max.iter = -1, num.trials = 1,
9
+  eps.try = NULL, num.threads = 1, read.cutoff.quantile = 0.999,
10
+  strand = "*", states = c("zero-inflation", "monosomy", "disomy",
11
+  "trisomy", "tetrasomy", "multisomy"), most.frequent.state = "disomy")
12 12
 }
13 13
 \arguments{
14 14
 \item{binned.data}{A \link{GRanges} object with binned read counts.}
... ...
@@ -4,11 +4,12 @@
4 4
 \alias{univariate.findSCEs}
5 5
 \title{Find copy number variations (univariate)}
6 6
 \usage{
7
-univariate.findSCEs(binned.data, ID, eps = 0.001, init = "standard",
8
-  max.time = -1, max.iter = -1, num.trials = 1, eps.try = NULL,
9
-  num.threads = 1, read.cutoff.quantile = 0.999, strand = "*",
10
-  states = c("zero-inflation", "nullsomy", "monosomy", "disomy", "trisomy",
11
-  "tetrasomy", "multisomy"), most.frequent.state = "monosomy")
7
+univariate.findSCEs(binned.data, ID = NULL, eps = 0.001,
8
+  init = "standard", max.time = -1, max.iter = -1, num.trials = 1,
9
+  eps.try = NULL, num.threads = 1, read.cutoff.quantile = 0.999,
10
+  strand = "*", states = c("zero-inflation", "nullsomy", "monosomy",