Browse code

first working version

chakalakka authored on 06/05/2014 18:46:07
Showing 25 changed files

... ...
@@ -5,7 +5,9 @@ useDynLib(aneufinder)
5 5
 export(
6 6
 bed2bin,
7 7
 bam2bin,
8
-bedGraph2bin
8
+bedGraph2bin,
9
+
10
+find.aneuploidies
9 11
 )
10 12
 
11 13
 # Import all packages listed as Imports or Depends
12 14
new file mode 100644
... ...
@@ -0,0 +1,151 @@
1
+find.aneuploidies <- function(binned.data, eps=0.001, max.time=-1, max.it=-1, num.trials=1, eps.try=NULL, num.threads=1, output.if.not.converged=FALSE, filter.reads=TRUE) {
2
+
3
+	## Intercept user input
4
+	if (check.positive(eps)!=0) stop("argument 'eps' expects a positive numeric")
5
+	if (check.integer(max.time)!=0) stop("argument 'max.time' expects an integer")
6
+	if (check.integer(max.it)!=0) stop("argument 'max.it' expects an integer")
7
+	if (check.positive.integer(num.trials)!=0) stop("argument 'num.trials' expects a positive integer")
8
+	if (!is.null(eps.try)) {
9
+		if (check.positive(eps.try)!=0) stop("argument 'eps.try' expects a positive numeric")
10
+	}
11
+	if (check.positive.integer(num.threads)!=0) stop("argument 'num.threads' expects a positive integer")
12
+	if (check.logical(output.if.not.converged)!=0) stop("argument 'output.if.not.converged' expects a logical (TRUE or FALSE)")
13
+
14
+
15
+	war <- NULL
16
+	if (is.null(eps.try)) eps.try <- eps
17
+
18
+	coordinatenames <- c("chrom","start","end","reads")
19
+	names(binned.data) <- coordinatenames
20
+
21
+	## Assign variables
22
+	statelabels <- c("unmappable","monosomy","disomy","trisomy","tetrasomy")
23
+	numstates <- length(statelabels)
24
+	numbins <- length(binned.data$reads)
25
+
26
+	# Check if there are reads in the data, otherwise HMM will blow up
27
+	if (!any(binned.data$reads!=0)) {
28
+		stop("All reads in data are zero. No univariate HMM done.")
29
+	}
30
+
31
+	# Filter high reads out, makes HMM faster
32
+	if (filter.reads) {
33
+		limit <- 10*ceiling(var(binned.data$reads))
34
+		mask <- binned.data$reads > limit
35
+		binned.data$reads[mask] <- limit
36
+		numfiltered <- length(which(mask))
37
+		if (numfiltered > 0) {
38
+			warning(paste("There are very high read counts (probably artificial) in your data. Replaced read counts > ",limit," (10*variance) by ",limit," in ",numfiltered," bins. Set option 'filter.reads=FALSE' to disable this filtering.", sep=""))
39
+		}
40
+	}
41
+	
42
+	
43
+	## Call univariate in a for loop to enable multiple trials
44
+	modellist <- list()
45
+	for (i_try in 1:num.trials) {
46
+		cat("\n\nTry ",i_try," of ",num.trials," ------------------------------\n")
47
+		hmm <- .C("R_univariate_hmm",
48
+			reads = as.integer(binned.data$reads), # double* O
49
+			num.bins = as.integer(numbins), # int* T
50
+			num.states = as.integer(numstates), # int* N
51
+			means = double(length=numstates), # double* means
52
+			variances = double(length=numstates), # double* variances
53
+			num.iterations = as.integer(max.it), #  int* maxiter
54
+			time.sec = as.integer(max.time), # double* maxtime
55
+			loglik.delta = as.double(eps.try), # double* eps
56
+			posteriors = double(length=numbins * numstates), # double* posteriors
57
+			A = double(length=numstates*numstates), # double* A
58
+			proba = double(length=numstates), # double* proba
59
+			loglik = double(length=1), # double* loglik
60
+			weights = double(length=numstates), # double* weights
61
+			means.initial = double(length=numstates), # double* initial_means
62
+			variances.initial = double(length=numstates), # double* initial_variances
63
+			A.initial = double(length=numstates*numstates), # double* initial_A
64
+			proba.initial = double(length=numstates), # double* initial_proba
65
+			use.initial.params = as.logical(0), # bool* use_initial_params
66
+			num.threads = as.integer(num.threads) # int* num_threads
67
+		)
68
+
69
+		hmm$eps <- eps.try
70
+		hmm$A <- matrix(hmm$A, ncol=hmm$num.states, byrow=TRUE)
71
+		rownames(hmm$A) <- statelabels
72
+		colnames(hmm$A) <- statelabels
73
+		hmm$distributions <- cbind(mean=hmm$means, variance=hmm$variances, size=fsize(hmm$means,hmm$variances), prob=fprob(hmm$means,hmm$variances))
74
+		rownames(hmm$distributions) <- statelabels
75
+		hmm$A.initial <- matrix(hmm$A.initial, ncol=hmm$num.states, byrow=TRUE)
76
+		rownames(hmm$A.initial) <- statelabels
77
+		colnames(hmm$A.initial) <- statelabels
78
+		hmm$distributions.initial <- cbind(mean=hmm$means.initial, variance=hmm$variances.initial, size=fsize(hmm$means.initial,hmm$variances.initial), prob=fprob(hmm$means.initial,hmm$variances.initial))
79
+		rownames(hmm$distributions.initial) <- statelabels
80
+		if (num.trials > 1) {
81
+			if (hmm$loglik.delta > hmm$eps) {
82
+				warning("HMM did not converge in trial run ",i_try,"!\n")
83
+			}
84
+			# Store model in list
85
+			hmm$posteriors <- NULL
86
+			hmm$reads <- NULL
87
+			modellist[[i_try]] <- hmm
88
+		}
89
+	}
90
+
91
+	if (num.trials > 1) {
92
+
93
+		# Select fit with best loglikelihood
94
+		indexmax <- which.max(unlist(lapply(modellist,"[[","loglik")))
95
+		hmm <- modellist[[indexmax]]
96
+
97
+		# Rerun the HMM with different epsilon and initial parameters from trial run
98
+		cat("\n\nRerunning try ",indexmax," with eps =",eps,"--------------------\n")
99
+		hmm <- .C("R_univariate_hmm",
100
+			reads <- as.integer(binned.data$reads), # double* O
101
+			num.bins <- as.integer(numbins), # int* T
102
+			num.states <- as.integer(numstates), # int* N
103
+			means <- double(length=numstates), # double* means
104
+			variances <- double(length=numstates), # double* variances
105
+			num.iterations <- as.integer(max.it), #  int* maxiter
106
+			time.sec <- as.integer(max.time), # double* maxtime
107
+			loglik.delta <- as.double(eps), # double* eps
108
+			posteriors <- double(length=numbins * numstates), # double* posteriors
109
+			A <- double(length=numstates*numstates), # double* A
110
+			proba <- double(length=numstates), # double* proba
111
+			loglik <- double(length=1), # double* loglik
112
+			weights <- double(length=numstates), # double* weights
113
+			means.initial <- as.vector(hmm$distributions[,'mean']), # double* initial_means
114
+			variances.initial <- as.vector(hmm$distributions[,'variance']), # double* initial_variances
115
+			A.initial <- as.vector(hmm$A), # double* initial_A
116
+			proba.initial <- as.vector(hmm$proba), # double* initial_proba
117
+			use.initial.params <- as.logical(1), # bool* use_initial_params
118
+			num.threads <- as.integer(num.threads) # int* num_threads
119
+		)
120
+	}
121
+
122
+	hmm$coordinates <- binned.data[,coordinatenames[1:3]]
123
+	hmm$posteriors <- matrix(hmm$posteriors, ncol=hmm$num.states)
124
+	colnames(hmm$posteriors) <- paste("P(",statelabels,")", sep="")
125
+	class(hmm) <- "aneufinder.model"
126
+	hmm$states <- statelabels[apply(hmm$posteriors, 1, which.max)]
127
+	hmm$eps <- eps
128
+	hmm$A <- matrix(hmm$A, ncol=hmm$num.states, byrow=TRUE)
129
+	rownames(hmm$A) <- statelabels
130
+	colnames(hmm$A) <- statelabels
131
+	hmm$distributions <- cbind(mean=hmm$means, variance=hmm$variances, size=fsize(hmm$means,hmm$variances), prob=fprob(hmm$means,hmm$variances))
132
+	rownames(hmm$distributions) <- statelabels
133
+	hmm$A.initial <- matrix(hmm$A.initial, ncol=hmm$num.states, byrow=TRUE)
134
+	rownames(hmm$A.initial) <- statelabels
135
+	colnames(hmm$A.initial) <- statelabels
136
+	hmm$distributions.initial <- cbind(mean=hmm$means.initial, variance=hmm$variances.initial, size=fsize(hmm$means.initial,hmm$variances.initial), prob=fprob(hmm$means.initial,hmm$variances.initial))
137
+	rownames(hmm$distributions.initial) <- statelabels
138
+	if (num.trials == 1) {
139
+		if (hmm$loglik.delta > hmm$eps) {
140
+			war <- warning("HMM did not converge!\n")
141
+		}
142
+	}
143
+
144
+	if (!is.null(war)) {
145
+		if (output.if.not.converged == TRUE) {
146
+			return(hmm)
147
+		}
148
+	} else {
149
+		return(hmm)
150
+	}
151
+}
0 152
deleted file mode 100644
... ...
@@ -1,23 +0,0 @@
1
-get.states <- function(model, threshold=0.5, separate.zeroinflation=FALSE) {
2
-	numbins <- nrow(model$posteriors)
3
-	states <- rep(NA,numbins)
4
-	if (is(model,"chromStar.univariate.model")) {
5
-		if (separate.zeroinflation) {
6
-			states[ model$posteriors[,3]<=threshold & model$posteriors[,2]<=model$posteriors[,1] ] <- 1
7
-			states[ model$posteriors[,3]<=threshold & model$posteriors[,2]>=model$posteriors[,1] ] <- 2
8
-			states[ model$posteriors[,3]>threshold ] <- 3
9
-			states <- as.factor(c("zeroinflation","unmodified","modified"))[states]
10
-		} else {
11
-			states <- ifelse(model$posteriors[,3]>threshold, 2, 1)
12
-			states <- as.factor(c("unmodified","modified"))[states]
13
-		}
14
-		return(states)
15
-	} else if (is(model,"chromStar.multivariate.model")) {
16
-		states <- model$stateorder[apply(model$posteriors, 1, which.max)]
17
-		return(states)
18
-	} else {
19
-		stop("Supply either a univariate or multivariate chromStar model")
20
-	}
21
-	
22
-}
23
-
24 0
new file mode 100644
... ...
@@ -0,0 +1,7 @@
1
+fsize <- function(mean, variance) {
2
+	return(mean^2 / (variance - mean))
3
+}
4
+
5
+fprob <- function(mean, variance) {
6
+	return(mean/variance)
7
+}
0 8
similarity index 100%
1 9
rename from R/gtools.R
2 10
rename to R/mixedsort.R
3 11
deleted file mode 100644
... ...
@@ -1,165 +0,0 @@
1
-univariate.from.binned.data = function(binned.data, eps=0.001, max.time=-1, max.it=-1, num.trials=1, eps.try=NULL, num.threads=1, output.if.not.converged=FALSE, filter.reads=TRUE) {
2
-
3
-	## Intercept user input
4
-	if (check.positive(eps)!=0) stop("argument 'eps' expects a positive numeric")
5
-	if (check.integer(max.time)!=0) stop("argument 'max.time' expects an integer")
6
-	if (check.integer(max.it)!=0) stop("argument 'max.it' expects an integer")
7
-	if (check.positive.integer(num.trials)!=0) stop("argument 'num.trials' expects a positive integer")
8
-	if (!is.null(eps.try)) {
9
-		if (check.positive(eps.try)!=0) stop("argument 'eps.try' expects a positive numeric")
10
-	}
11
-	if (check.positive.integer(num.threads)!=0) stop("argument 'num.threads' expects a positive integer")
12
-	if (check.logical(output.if.not.converged)!=0) stop("argument 'output.if.not.converged' expects a logical (TRUE or FALSE)")
13
-
14
-
15
-	war = NULL
16
-	if (is.null(eps.try)) eps.try = eps
17
-
18
-	coordinatenames = c("chrom","start","end","reads")
19
-	names(binned.data) = coordinatenames
20
-
21
-	## Assign variables
22
-	# number of states
23
-	numstates = 3
24
-	# prepare densities to use
25
-	densityNames = c("ZI", "NB", "NB")
26
-	enum = c("ZI","NB","ZINB","Other")
27
-	densityNames = factor(densityNames, levels=enum)
28
-	densityNames = as.numeric(densityNames) - 1
29
-	# length of observation sequence
30
-	numbins = length(binned.data$reads) 
31
-
32
-	# Check if there are reads in the data, otherwise HMM will blow up
33
-	if (!any(binned.data$reads!=0)) {
34
-		stop("All reads in data are zero. No univariate HMM done.")
35
-	}
36
-
37
-	# Filter high reads out, makes HMM faster
38
-	if (filter.reads) {
39
-		limit <- 10*ceiling(var(binned.data$reads))
40
-		mask <- binned.data$reads > limit
41
-		binned.data$reads[mask] <- limit
42
-		numfiltered <- length(which(mask))
43
-		if (numfiltered > 0) {
44
-			warning(paste("There are very high read counts (probably artificial) in your data. Replaced read counts > ",limit," (10*variance) by ",limit," in ",numfiltered," bins. Set option 'filter.reads=FALSE' to disable this filtering.", sep=""))
45
-		}
46
-	}
47
-	
48
-	
49
-	## Call univariate in a for loop to enable multiple trials
50
-	modellist = list()
51
-	for (i_try in 1:num.trials) {
52
-		cat("\n\nTry ",i_try," of ",num.trials," ------------------------------\n")
53
-		z = .C("R_univariate_hmm",
54
-			as.integer(binned.data$reads), # double* O
55
-			as.integer(numbins), # int* T
56
-			as.integer(numstates), # int* N
57
-			as.integer(densityNames), # I pass them but they are fixed
58
-			double(length=numstates), # double* r
59
-			double(length=numstates), # double* p
60
-			as.integer(max.it), #  int* maxiter
61
-			as.integer(max.time), # double* maxtime
62
-			as.double(eps.try), # double* eps
63
-			double(length=numbins * numstates), # double* post
64
-			double(length=numstates*numstates), # double* A
65
-			double(length=numstates), # double* proba
66
-			double(length=1), # double* loglik
67
-			double(length=numstates), # double* softweights
68
-			double(length=numstates), # double* initial r
69
-			double(length=numstates), # double* initial p
70
-			double(length=numstates*numstates), # double* initial A
71
-			double(length=numstates), # double* initial proba
72
-			as.logical(0), # bool* use_initial_params
73
-			as.integer(num.threads) # int* num_threads
74
-		)
75
-
76
-		model = NULL
77
-		model$loglik = z[[13]]
78
-		model$iteration = z[[7]]
79
-		model$time.in.sec = z[[8]]
80
-		model$delta.loglik = z[[9]]
81
-		model$epsilon = eps.try
82
-		model$proba = z[[12]]
83
-		model$A = matrix(z[[11]], ncol=numstates, byrow=TRUE)
84
-		model$distributions = cbind(r=z[[5]], p=z[[6]])
85
-		model$softweights = z[[14]]
86
-		model$proba.initial = z[[18]]
87
-		model$A.initial = matrix(z[[17]], ncol=numstates, byrow=TRUE)
88
-		model$distributions.initial = cbind(r=z[[15]], p=z[[16]])
89
-		model$num.threads = z[[20]]
90
-		if (num.trials > 1) {
91
-			if (model$delta.loglik > model$epsilon) {
92
-				warning("HMM did not converge in trial run ",i_try,"!\n")
93
-			}
94
-			# Store model in list
95
-			modellist[[i_try]] = model
96
-		}
97
-	}
98
-
99
-	if (num.trials > 1) {
100
-
101
-		# Select fit with best loglikelihood
102
-		indexmax = which.max(unlist(lapply(modellist,"[[","loglik")))
103
-		model = modellist[[indexmax]]
104
-
105
-		# Rerun the HMM with different epsilon and initial parameters from trial run
106
-		cat("\n\nRerunning try ",indexmax," with eps =",eps,"--------------------\n")
107
-		z = .C("R_univariate_hmm",
108
-			as.integer(binned.data$reads), # double* O
109
-			as.integer(numbins), # int* T
110
-			as.integer(numstates), # int* N
111
-			as.integer(densityNames), # I pass them but they are fixed
112
-			double(length=numstates), # double* r
113
-			double(length=numstates), # double* p
114
-			as.integer(max.it), #  int* maxiter
115
-			as.integer(max.time), # double* maxtime
116
-			as.double(eps), # double* eps
117
-			double(length=numbins * numstates), # double* post
118
-			double(length=numstates*numstates), # double* A
119
-			double(length=numstates), # double* proba
120
-			double(length=1), # double* loglik
121
-			double(length=numstates), # double* softweights
122
-			as.vector(model$distributions[,'r']), # double* initial r
123
-			as.vector(model$distributions[,'p']), # double* initial p
124
-			as.vector(model$A), # double* initial A
125
-			as.vector(model$proba), # double* initial proba
126
-			as.logical(1), # bool* use_initial_params
127
-			as.integer(num.threads) # int* num_threads
128
-		)
129
-	}
130
-
131
-	model = NULL
132
-	model$coordinates = binned.data[,coordinatenames[1:3]]
133
-	model$reads = binned.data$reads
134
-	model$posteriors = matrix(z[[10]], ncol=numstates)
135
-	colnames(model$posteriors) = c("P(state = zero inflation)","P(state = unmodified)","P(state = modified)")
136
-	class(model) = "chromStar.univariate.model"
137
-	model$states = get.states(model)
138
-	model$states.with.zeroinflation = get.states(model, separate.zeroinflation=TRUE)
139
-	model$loglik = z[[13]]
140
-	model$iteration = z[[7]]
141
-	model$time.in.sec = z[[8]]
142
-	model$delta.loglik = z[[9]]
143
-	model$epsilon = eps
144
-	model$proba = z[[12]]
145
-	model$A = matrix(z[[11]], ncol=numstates, byrow=TRUE)
146
-	model$distributions = cbind(r=z[[5]], p=z[[6]])
147
-	model$softweights = z[[14]]
148
-	model$proba.initial = z[[18]]
149
-	model$A.initial = matrix(z[[17]], ncol=numstates, byrow=TRUE)
150
-	model$distributions.initial = cbind(r=z[[15]], p=z[[16]])
151
-	model$num.threads = z[[20]]
152
-	if (num.trials == 1) {
153
-		if (model$delta.loglik > model$epsilon) {
154
-			war = warning("HMM did not converge!\n")
155
-		}
156
-	}
157
-
158
-	if (!is.null(war)) {
159
-		if (output.if.not.converged == TRUE) {
160
-			return(model)
161
-		}
162
-	} else {
163
-		return(model)
164
-	}
165
-}
... ...
@@ -0,0 +1,3 @@
1
+!!!THIS PROJECT IS CURRENTLY BEING DEVELOPED!!!
2
+
3
+It has not been tested so far and is likely to produce no or incorrect results.
0 4
\ No newline at end of file
1 5
new file mode 100644
... ...
@@ -0,0 +1 @@
1
+- update() only disomy and fix others to multiples
0 2
deleted file mode 100644
... ...
@@ -1,436 +0,0 @@
1
-10
2
-
3
-dir
4
-821
5
-svn+ssh://maria@141.80.187.138/home/histoneHMM/svn/histoneHMM/branches/groningen/Rpackage/histoneHMM/man
6
-svn+ssh://maria@141.80.187.138/home/histoneHMM/svn
7
-
8
-
9
-
10
-2013-10-07T16:06:14.303223Z
11
-801
12
-heinig
13
-
14
-
15
-
16
-
17
-
18
-
19
-
20
-
21
-
22
-
23
-
24
-
25
-
26
-
27
-72ef0f4a-728c-0410-918a-5b46bb2d7104
28
-
29
-zinbacopula.hmm.posterior.Rd
30
-file
31
-
32
-
33
-
34
-
35
-2013-10-21T16:01:10.000000Z
36
-bee76a313dfbe41bed927bf74a4227bf
37
-2013-10-07T16:06:14.303223Z
38
-801
39
-heinig
40
-
41
-
42
-
43
-
44
-
45
-
46
-
47
-
48
-
49
-
50
-
51
-
52
-
53
-
54
-
55
-
56
-
57
-
58
-
59
-
60
-
61
-2424
62
-
63
-em.Rd
64
-file
65
-
66
-
67
-
68
-
69
-2013-10-21T16:01:10.000000Z
70
-53932cc64b0e6b28da2d3f2f02ca9594
71
-2013-10-07T16:06:14.303223Z
72
-801
73
-heinig
74
-
75
-
76
-
77
-
78
-
79
-
80
-
81
-
82
-
83
-
84
-
85
-
86
-
87
-
88
-
89
-
90
-
91
-
92
-
93
-
94
-
95
-1631
96
-
97
-callRegions.Rd
98
-file
99
-
100
-
101
-
102
-
103
-2013-10-21T14:01:40.000000Z
104
-a67e330a1954b8ebff32e3eed41f6b58
105
-2013-10-07T16:06:14.303223Z
106
-801
107
-heinig
108
-
109
-
110
-
111
-
112
-
113
-
114
-
115
-
116
-
117
-
118
-
119
-
120
-
121
-
122
-
123
-
124
-
125
-
126
-
127
-
128
-
129
-2911
130
-
131
-histoneHMM-package.Rd
132
-file
133
-
134
-
135
-
136
-
137
-2013-10-21T14:01:40.000000Z
138
-2edef5d833ae20c3799debc3f5a1d2b7
139
-2013-10-07T16:06:14.303223Z
140
-801
141
-heinig
142
-
143
-
144
-
145
-
146
-
147
-
148
-
149
-
150
-
151
-
152
-
153
-
154
-
155
-
156
-
157
-
158
-
159
-
160
-
161
-
162
-
163
-1477
164
-
165
-hmm.viterbi.Rd
166
-file
167
-
168
-
169
-
170
-
171
-2013-10-21T16:01:10.000000Z
172
-9e99a7367d80d53cc3decf70069f0dd6
173
-2013-10-07T16:06:14.303223Z
174
-801
175
-heinig
176
-
177
-
178
-
179
-
180
-
181
-
182
-
183
-
184
-
185
-
186
-
187
-
188
-
189
-
190
-
191
-
192
-
193
-
194
-
195
-
196
-
197
-1506
198
-
199
-Zinba-class.Rd
200
-file
201
-
202
-
203
-
204
-
205
-2013-10-21T14:01:40.000000Z
206
-b782b0675b62e95286b4ce71d70ce150
207
-2013-10-07T16:06:14.303223Z
208
-801
209
-heinig
210
-
211
-
212
-
213
-
214
-
215
-
216
-
217
-
218
-
219
-
220
-
221
-
222
-
223
-
224
-
225
-
226
-
227
-
228
-
229
-
230
-
231
-1599
232
-
233
-fit.zinba.copula.Rd
234
-file
235
-
236
-
237
-
238
-
239
-2013-10-21T16:01:10.000000Z
240
-fc419ce71e8d60af4c2e51732f09942a
241
-2013-10-07T16:06:14.303223Z
242
-801
243
-heinig
244
-
245
-
246
-
247
-
248
-
249
-
250
-
251
-
252
-
253
-
254
-
255
-
256
-
257
-
258
-
259
-
260
-
261
-
262
-
263
-
264
-
265
-1654
266
-
267
-fitzinba.Rd
268
-file
269
-
270
-
271
-
272
-
273
-2013-10-21T16:01:10.000000Z
274
-ca5551165d8a705e6e105d55cfad1bd4
275
-2013-10-07T16:06:14.303223Z
276
-801
277
-heinig
278
-
279
-
280
-
281
-
282
-
283
-
284
-
285
-
286
-
287
-
288
-
289
-
290
-
291
-
292
-
293
-
294
-
295
-
296
-
297
-
298
-
299
-1086
300
-
301
-hmm.posterior.Rd
302
-file
303
-
304
-
305
-
306
-
307
-2013-10-21T14:01:40.000000Z
308
-ff51a842199550e649ab96beecf562ee
309
-2013-10-07T16:06:14.303223Z
310
-801
311
-heinig
312
-
313
-
314
-
315
-
316
-
317
-
318
-
319
-
320
-
321
-
322
-
323
-
324
-
325
-
326
-
327
-
328
-
329
-
330
-
331
-
332
-
333
-1588
334
-
335
-dzinba.Rd
336
-file
337
-
338
-
339
-
340
-
341
-2013-10-21T16:01:10.000000Z
342
-32b6b711ffe537475404e7e94f975ce4
343
-2013-10-07T16:06:14.303223Z
344
-801
345
-heinig
346
-
347
-
348
-
349
-
350
-
351
-
352
-
353
-
354
-
355
-
356
-
357
-
358
-
359
-
360
-
361
-
362
-
363
-
364
-
365
-
366
-
367
-1409
368
-
369
-zinba.hmm.posterior.Rd
370
-file
371
-
372
-
373
-
374
-
375
-2013-10-21T16:01:10.000000Z
376
-26aab945ee71dc2b115238de46f7809a
377
-2013-10-07T16:06:14.303223Z
378
-801
379
-heinig
380
-
381
-
382
-
383
-
384
-
385
-
386
-
387
-
388
-
389
-
390
-
391
-
392
-
393
-
394
-
395
-
396
-
397
-
398
-
399
-
400
-
401
-2162
402
-
403
-dzinba.copula.Rd
404
-file
405
-
406
-
407
-
408
-
409
-2013-10-21T16:01:10.000000Z
410
-22237187562c408cf6899de93779af73
411
-2013-10-07T16:06:14.303223Z
412
-801
413
-heinig
414
-
415
-
416
-
417
-
418
-
419
-
420
-
421
-
422
-
423
-
424
-
425
-
426
-
427
-
428
-
429
-
430
-
431
-
432
-
433
-
434
-
435
-1779
436
-
437 0
deleted file mode 100644
... ...
@@ -1,60 +0,0 @@
1
-\name{Zinba-class}
2
-\Rdversion{1.1}
3
-\docType{class}
4
-\alias{Distribution-class}
5
-\alias{LogNormal-class}
6
-\alias{Zinba-class}
7
-\alias{fit,Zinba,numeric,numeric-method}
8
-\alias{logLikelihood,Zinba,numeric-method}
9
-
10
-\title{Class \code{Distribution}}
11
-\description{
12
-Classes to represent distributions in mixture models in particular to be
13
-used with the \code{\link{em}} function. Currently Zinba is used but
14
-LogNormal is also available.
15
-}
16
-\section{Objects from the Class Zinba}{
17
-Objects can be created by calls of the form \code{new("Zinba",
18
-  ...)}. For a definition of the density see \code{\link{dzinba}}.
19
-}
20
-\section{Slots}{
21
-  \describe{
22
-    \item{\code{size}:}{size parameter of the negative binomial distribution}
23
-    \item{\code{mu}:}{mean parameter of the negative binomial distribution}
24
-    \item{\code{beta}:}{zero inflation parameter}
25
-  }
26
-}
27
-\section{Extends}{
28
-Class \code{"\linkS4class{Distribution}"}, directly.
29
-}
30
-\section{Methods}{
31
-  \describe{
32
-    \item{fit}{\code{signature(model = "Zinba", x = "numeric",
33
-	logExpectation = "numeric")}: used in the maximization step }
34
-    \item{logLikelihood}{\code{signature(model = "Zinba", x =
35
-	"numeric")}: used in the expectation step }
36
-  }
37
-}
38
-
39
-\section{Objects from the Class LogNormal}{
40
-Objects can be created by calls of the form \code{new("LogNormal", ...)}.
41
-}
42
-\section{Slots}{
43
-  \describe{
44
-    \item{\code{meanlog}:}{mean parameter (log space)}
45
-    \item{\code{sdlog}:}{standard deviation parameter (log space)}
46
-  }
47
-}
48
-
49
-
50
-\author{
51
-Matthias Heinig
52
-}
53
-
54
-\seealso{
55
-\code{\link{dzinba}}, \code{\link{dlnorm}}
56
-}
57
-\examples{
58
-showClass("Zinba")
59
-}
60
-\keyword{classes}
61 0
deleted file mode 100644
... ...
@@ -1,95 +0,0 @@
1
-\name{callRegions}
2
-\alias{callRegions}
3
-\title{
4
-call regions from a posterior matrix
5
-}
6
-\description{
7
-Call regions using a cutoff on the posterior probabilities.
8
-}
9
-\usage{
10
-callRegions(avg.posterior, cutoff, posterior.col = "avg.posterior", expr.col = "avg.expression")
11
-}
12
-
13
-\arguments{
14
-  \item{avg.posterior}{
15
-matrix or data.frame with state posterior probabilities
16
-}
17
-  \item{cutoff}{
18
-cutoff on the posterior probability
19
-}
20
-  \item{posterior.col}{
21
-column index or name of the posterior probability
22
-}
23
-  \item{expr.col}{
24
-column index or name of the gene expression column
25
-}
26
-}
27
-
28
-\details{
29
-  The function expects a data.frame or matrix containing the position
30
-  of bins in the genome, the results of the HMM analysis and if
31
-  available gene expression data:
32
-  chrom (chromosome),
33
-  start (start coordinate of the bin),
34
-  end (end coordinate of the bin),
35
-  posterior (posterior probability a HMM state)
36
-  expr (expression values assigned to each bin)
37
-
38
-  The first 3 columns have to be named exactly as given above, the
39
-  colnames of the posterior prob and expression data are taken from the
40
-  arguments to the function.
41
-
42
-  The regions will be called by applying the specified cutoff and
43
-  joining all consecutive bins. Posterior probabilities as well as
44
-  expression values will be avaraged over the called region.
45
-
46
-}
47
-\value{
48
-A GRanges object with metadata columns  
49
-\item{avg.posterior}{Average state posterior in the region}
50
-\item{avg.expr}{Average gene expression in the region}
51
-
52
-}
53
-\references{
54
-}
55
-\author{
56
-Matthias Heinig
57
-}
58
-\note{
59
-}
60
-
61
-\seealso{
62
-
63
-}
64
-\examples{
65
-##---- Should be DIRECTLY executable !! ----
66
-##-- ==>  Define data, use random,
67
-##--	or do  help(data=index)  for the standard data sets.
68
-
69
-## The function is currently defined as
70
-function (avg.posterior, cutoff, posterior.col = "avg.posterior", 
71
-    expr.col = "avg.expression") 
72
-{
73
-    require(GenomicRanges)
74
-    called.regions = avg.posterior[which(avg.posterior[, posterior.col] > 
75
-        cutoff), ]
76
-    called.granges = GRanges(seqnames = called.regions$chrom, 
77
-        ranges = IRanges(start = called.regions$start, end = called.regions$end))
78
-    elementMetadata(called.granges) = DataFrame(called.regions[, 
79
-        c(posterior.col, expr.col)])
80
-    reduced.granges = reduce(called.granges)
81
-    called2reduced = matchMatrix(findOverlaps(called.granges, 
82
-        reduced.granges))
83
-    reduced.posterior = tapply(elementMetadata(called.granges)[, 
84
-        posterior.col], called2reduced[, 2], mean, na.rm = T)
85
-    reduced.expression = tapply(elementMetadata(called.granges)[, 
86
-        expr.col], called2reduced[, 2], mean, na.rm = T)
87
-    elementMetadata(reduced.granges) = DataFrame(avg.posterior = reduced.posterior, 
88
-        avg.expression = reduced.expression)
89
-    return(reduced.granges)
90
-  }
91
-}
92
-% Add one or more standard keywords, see file 'KEYWORDS' in the
93
-% R documentation directory.
94
-\keyword{ ~kwd1 }
95
-\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
96 0
deleted file mode 100644
... ...
@@ -1,67 +0,0 @@
1
-\name{dzinba}
2
-\alias{dzinba}
3
-\alias{pzinba}
4
-\alias{rzinba}
5
-\title{
6
-Zero inflated negative binomial distribution
7
-}
8
-\description{
9
-  Density, CDF and random number generation for the zero inflated
10
-  negative binomial distribution.
11
-}
12
-\usage{
13
-dzinba(x, size, mu, beta, log = FALSE)
14
-pzinba(x, size, mu, beta, lower.tail = TRUE)
15
-rzinba(n, size, mu, beta)
16
-}
17
-\arguments{
18
-  \item{x}{
19
-count data
20
-}
21
-  \item{size}{
22
-size parameter of the negative binomial distribution
23
-}
24
-  \item{mu}{
25
-mean parameter of the negative binomial distribution
26
-}
27
-  \item{beta}{
28
-zero inflation parameter
29
-}
30
-  \item{log}{
31
-logical indicating whether to return log density values
32
-}
33
-\item{lower.tail}{
34
-logical indicating whether to P(X<=k) or P(X>k)
35
-}
36
-\item{n}{
37
-number of random samples from the distribution
38
-}
39
-}
40
-\details{
41
-  The density of the zero inflated negative binomial distribution is defined as
42
-  P(x) = I(x == 0) * beta + I(x != 0) * (1 - beta) * dnbinom(x, size,
43
-  mu). 
44
-}
45
-\value{
46
-dzinba is the density function, pzinba is the CDF, rzinba returns
47
-\code{n} random samples from the distribution.
48
-}
49
-\references{
50
-}
51
-\author{
52
-Matthias Heinig
53
-}
54
-
55
-\seealso{
56
-  \code{\link{fitzinba}}
57
-}
58
-\examples{
59
-
60
-x = rzinba(1000, mu=10, size=15, beta=0.1)
61
-hist(x, breaks=50)
62
-lines(0:max(x), dzinba(0:max(x), mu=10, size=15, beta=0.1) * length(x))
63
-
64
-}
65
-% Add one or more standard keywords, see file 'KEYWORDS' in the
66
-% R documentation directory.
67
-\keyword{ distribution }
68 0
deleted file mode 100644
... ...
@@ -1,77 +0,0 @@
1
-\name{dzinba.copula}
2
-\alias{dzinba.copula}
3
-\alias{pzinba.copula}
4
-\alias{rzinba.copula}
5
-\title{
6
-Density, CDF and random number generation from a bivariate zinba copula
7
-}
8
-\description{
9
-Density, CDF and random number generation from a bivariate zinba copula
10
-distribution
11
-}
12
-\usage{
13
-dzinba.copula(x, y, fit, log = FALSE)
14
-pzinba.copula(x, y, fit)
15
-rzinba.copula(n, fit)
16
-}
17
-%- maybe also 'usage' for other objects documented here.
18
-\arguments{
19
-  \item{x}{
20
-count data vector
21
-}
22
-  \item{y}{
23
-count data vector
24
-}
25
-  \item{fit}{
26
-parameters of the zinba copula distribution as returned by fit.zinba.copula:
27
-  marginal.x (named parameter vector as returned by \code{\link{fitzinba}}),
28
-  marginal.y (named parameter vector as returned by \code{\link{fitzinba}}), 
29
-  sigma (covariance matrix of the transformed variables) and 
30
-  mu (mean vector of the transformed variables)
31
-}
32
-  \item{log}{
33
-logical indicating whether log density should be returned
34
-}
35
-  \item{n}{
36
-number of random samples to be generated
37
-}
38
-}
39
-\details{
40
-dzinba.copula is the density function, zinba.copula is the CDF and
41
-rzinba.copula returns n random samples from the distribution.
42
-}
43
-\value{
44
-}
45
-\references{
46
-}
47
-\author{
48
-Matthias Heinig
49
-}
50
-\note{
51
-}
52
-
53
-\seealso{
54
-\code{\link{fit.zinba.copula}}, \code{\link{fitzinba}}
55
-}
56
-\examples{
57
-
58
-# define a zinba copula distribution
59
-fit =  list(mu=c(0, 0),
60
-  marginal.x=c(size=10, mu=10, beta=0.05),
61
-  marginal.y=c(size=10, mu=20, beta=0.02),
62
-  sigma=matrix(c(1, 0.5, 0.5, 1), ncol=2))
63
-
64
-# enumerate a 100x100 square
65
-x = 0:60
66
-y = 0:60
67
-
68
-# compute the density
69
-d = t(sapply(x, function(xx) sapply(y, function(yy) dzinba.copula(xx, yy, fit))))
70
-
71
-# plot the density
72
-image(x=x, y=y, z=log(d))
73
-
74
-}
75
-% Add one or more standard keywords, see file 'KEYWORDS' in the
76
-% R documentation directory.
77
-\keyword{ distribution }
78 0
deleted file mode 100644
... ...
@@ -1,68 +0,0 @@
1
-\name{em}
2
-\alias{em}
3
-\alias{EMResult-class}
4
-\title{
5
-Expectation maximization algorithm
6
-}
7
-\description{
8
-Expectation maximization algorithm for different classes of
9
-distributions. Currently only the Zinba distribution is used, but
10
-Lognormal is also available.
11
-}
12
-\usage{
13
-em(x, ncomp, prop = NULL, maxit = 100, eps = 1e-04, model.constructor = "Zinba")
14
-}
15
-\arguments{
16
-  \item{x}{
17
-data vector
18
-}
19
-  \item{ncomp}{
20
-number of mixture components
21
-}
22
-  \item{prop}{
23
-initial guess of the mixture proportions
24
-}
25
-  \item{maxit}{
26
-maximum number of iterations
27
-}
28
-  \item{eps}{
29
-stop if the gain in loglikelihood is less than \code{eps}
30
-}
31
-\item{model.constructor}{
32
-name of a constructor for the mixture components, can be "Zinba" or "Lognormal"
33
-}
34
-}
35
-\details{
36
-  If \code{prop} is NULL a uniform distribution is assumed.
37
-}
38
-\value{
39
-  returns an object of class EMResult which has the following slots:
40
-  \item{models}{a list containing the models for each component}
41
-  \item{logLikelihood}{a matrix with the logLikelihood for each data
42
-    point (rows) and each mixture component (cols) }
43
-  \item{dataLogLikelihood}{the total log likelihood of the data}
44
-  \item{proportions}{the mixture proportions}
45
-}
46
-\references{
47
-}
48
-\author{
49
-Matthias Heinig
50
-}
51
-\note{
52
-}
53
-
54
-\seealso{
55
-The models are objects of the class \code{\link{Distribution}} for
56
-example \code{\link{Zinba}} or \code{\link{Lognormal}}.
57
-}
58
-\examples{
59
-x = rzinba(1000, size=10, mu=5, beta=0.1)
60
-y = rzinba(1000, size=10, mu=10, beta=0.02)
61
-
62
-emfit = em(c(x, y), ncomp=2, model.constructor="Zinba")
63
-
64
-}
65
-% Add one or more standard keywords, see file 'KEYWORDS' in the
66
-% R documentation directory.
67
-\keyword{ models }
68
-
69 0
deleted file mode 100644
... ...
@@ -1,62 +0,0 @@
1
-\name{fit.zinba.copula}
2
-\alias{fit.zinba.copula}
3
-\title{
4
-Maximum likelihood fit of the zinba copula distribution
5
-}
6
-\description{
7
-Maximum likelihood fit of the 2 dimensional zinba copula distribution
8
-}
9
-\usage{
10
-fit.zinba.copula(x, y, weight = 1, marginal.x = NULL, marginal.y = NULL)
11
-}
12
-%- maybe also 'usage' for other objects documented here.
13
-\arguments{
14
-  \item{x}{
15
-count data vector
16
-}
17
-  \item{y}{
18
-count data vector
19
-}
20
-  \item{weight}{
21
-weight vector (can be used in the EM algorithm)
22
-}
23
-  \item{marginal.x}{
24
-parameters of the marginal zinba distribution for \code{x}
25
-}
26
-  \item{marginal.y}{
27
-parameters of the marginal zinba distribution for \code{y}
28
-}
29
-}
30
-\details{
31
-if \code{marginal.x} or \code{marginal.y} are NULL they will be
32
-re-estimated, if they are given only the missing parameters will be
33
-re-estimated. The format of the marginal parameters is explained below.
34
-}
35
-\value{
36
-  a list with the parameters of the two marginal distributions:
37
-  \item{marginal.x}{named parameter vector as returned by
38
-    \code{\link{fitzinba}}}
39
-  \item{marginal.y}{named parameter vector as returned by
40
-    \code{\link{fitzinba}}}
41
-  \item{sigma}{covariance matrix of the transformed variables}
42
-  \item{mu}{mean vector of the transformed variables}
43
-}
44
-\references{
45
-}
46
-\author{
47
-Matthias Heinig
48
-}
49
-\note{
50
-}
51
-
52
-\seealso{
53
-\code{\link{fitzinba}}, \code{\link{dzinba.copula}}
54
-}
55
-\examples{
56
-x = rzinba.copula(1000, list(mu=c(0, 0), marginal.x=c(size=10, mu=10, beta=0.1), marginal.y=c(size=10, mu=20, beta=0.02), sigma=matrix(c(1, 0.5, 0.5, 1), ncol=2)))
57
-
58
-fit = fit.zinba.copula(x[,1], x[,2])
59
-
60
-}
61
-% Add one or more standard keywords, see file 'KEYWORDS' in the
62
-% R documentation directory.
63 0
deleted file mode 100644
... ...
@@ -1,45 +0,0 @@
1
-\name{fitzinba}
2
-\alias{fitzinba}
3
-
4
-\title{
5
-Fit parameters of a zero inflated negative binomial distribution
6
-}
7
-\description{
8
-Fit parameters of a zero inflated negative binomial distribution
9
-}
10
-\usage{
11
-fitzinba(x, weight = 1)
12
-}
13
-\arguments{
14
-  \item{x}{vector of count data}
15
-  \item{weight}{weight of each data point (used in the EM algorithm)}
16
-}
17
-\details{
18
-  The density of the zero inflated negative binomial distribution is defined as
19
-  P(x) = I(x == 0) * beta + I(x != 0) * (1 - beta) * dnbinom(x, size,
20
-  mu). The maximum likelihood estimate of the parameters is found using
21
-  numerical optimization (see \code{\link{optim}}).
22
-}
23
-\value{
24
-  Returns a vector with three named values:
25
-  \item{size}{size parameter}
26
-  \item{mu}{mean parameter}
27
-  \item{beta}{zero inflation parameter}
28
-}
29
-\references{
30
-}
31
-\author{
32
-Matthias Heinig
33
-}
34
-
35
-\seealso{
36
-\code{\link{dzinba}}, \code{\link{pzinba}}, \code{\link{rzinba}}
37
-}
38
-\examples{
39
-x = rzinba(n=1000, beta=0.1, mu=10, size=15)
40
-fitzinba(x)
41
-}
42
-% Add one or more standard keywords, see file 'KEYWORDS' in the
43
-% R documentation directory.
44
-\keyword{distribution}
45
-
46 0
deleted file mode 100644
... ...
@@ -1,53 +0,0 @@
1
-\name{histoneHMM-package}
2
-\alias{histoneHMM-package}
3
-\alias{histoneHMM}
4
-\docType{package}
5
-\title{Analysis of histone modification data using a HMM}
6
-\description{
7
-  histoneHMM is a software to analyse ChIP-seq data of histone modifications with broad genomic footprints like H3K27me3. It allows for calling modified regions in single samples as well as for calling differentially modified regions in a comparison of two samples.
8
-}
9
-\details{
10
-\tabular{ll}{
11
-Package: \tab histoneHMM\cr
12
-Type: \tab Package\cr
13
-Version: \tab 1.0\cr
14
-Date: \tab 2011-11-18\cr
15
-License: \tab What license is it under?\cr
16
-LazyLoad: \tab yes\cr
17
-}
18
-
19
-The most important functions of the package are
20
-\code{\link{zinba.hmm.posterior}},
21
-\code{\link{zinbacopula.hmm.posterior}},
22
-\code{\link{em}},
23
-\code{\link{fitzinba}} and
24
-\code{\link{fit.zinba.copula}}.
25
-
26
-}
27
-\author{
28
-Author: Matthias Heinig and Maria Colome Tatche
29
-Maintainer: Matthias Heinig <heinig@molgen.mpg.de>
30
-}
31
-\references{http://histonehmm.molgen.mpg.de}
32
-\keyword{ package }
33
-\seealso{
34
-}
35
-\examples{
36
-
37
-# define model parameters for two states
38
-mu = c(5, 10)
39
-size = c(10, 10)
40
-beta = c(0.1, 0.02)
41
-
42
-# generate observation sequences
43
-x = c(rzinba(1000, mu=mu[1], size=size[1], beta=beta[1]),
44
-  rzinba(1000, mu=mu[2], size=size[2], beta=beta[2]),
45
-  rzinba(1000, mu=mu[1], size=size[1], beta=beta[1]))
46
-
47
-# get the posteriors
48
-posterior = zinba.hmm.posterior(x, mu, size, beta)
49
-
50
-# plot the sequence with state assignments
51
-plot(x, col=apply(posterior, 1, which.max))
52
-
53
-}
54 0
deleted file mode 100644
... ...
@@ -1,65 +0,0 @@
1
-\name{hmm.posterior}
2
-\alias{hmm.posterior}
3
-%- Also NEED an '\alias' for EACH other topic documented here.
4
-\title{
5
-  HMM posterior
6
-}
7
-\description{
8
-  compute the posterior probability of a lognormal emission hmm
9
-}
10
-\usage{
11
-hmm.posterior(x, meanlog, sdlog, max.it=500, eps=1e-4, rho=0)
12
-}
13
-%- maybe also 'usage' for other objects documented here.
14
-\arguments{
15
-  \item{x}{
16
-    observation vector, can be also a two row matrix with bivariate
17
-    observations
18
-  }
19
-  \item{meanlog}{
20
-    vector holding the means of the lognormal emisson distributions, can
21
-    be also a two row matrix with mean vectors in the columns
22
-  }
23
-  \item{sdlog}{
24
-    vector holding the sd of the lognormal emisson distributions, can be
25
-    also a two row matrix with the sqrt of the diagonal elements of sigma
26
-  }
27
-  \item{max.it} {
28
-    maximum number of iterations
29
-  }
30
-  \item{eps} {
31
-    stop iterations if the gain in log likelihood is less than \code{eps}
32
-  }
33
-  \item{rho}{
34
-    if the bivariate model is used this is a vector of correlation
35
-    coefficients for each state
36
-  }
37
-}
38
-\details{
39
-  posterior decoding
40
-}
41
-\value{
42
- matrix (T x N) of posterior probabilities
43
-}
44
-\references{
45
-  Rabiner et al
46
-}
47
-\author{
48
-  Matthias Heinig
49
-}
50
-\note{
51
-}
52
-
53
-\seealso{
54
-\code{\link{zinba.hmm.posterior}}
55
-}
56
-\examples{