... | ... |
@@ -16,7 +16,8 @@ importFrom(graphics, "abline", "axis", "layout", "legend", "lines", |
16 | 16 |
"mtext", "par", "plot", "points", "polygon", "rect", "rug", "text") |
17 | 17 |
import(parallel) |
18 | 18 |
importFrom(locfit, "locfit", "lp") |
19 |
-importFrom(matrixStats, "rowSds", "rowVars", "rowMaxs", "rowMins", "colMeans2") |
|
19 |
+importFrom(matrixStats, "rowSds", "rowVars", "rowMaxs", "rowMins", "colMeans2", |
|
20 |
+ "colSums2") |
|
20 | 21 |
import(IRanges) |
21 | 22 |
import(GenomicRanges) |
22 | 23 |
import(SummarizedExperiment) |
... | ... |
@@ -144,69 +144,56 @@ getMeth <- function(BSseq, regions = NULL, type = c("smooth", "raw"), |
144 | 144 |
} |
145 | 145 |
} |
146 | 146 |
|
147 |
+# TODO: getCoverage() realises the result in memory iff regions is not NULL; |
|
148 |
+# discuss with Kasper |
|
149 |
+# TODO: Whether or not colnames are added to returned value depends on whether |
|
150 |
+# regions is non-NULL; discuss with Kasper |
|
147 | 151 |
getCoverage <- function(BSseq, regions = NULL, type = c("Cov", "M"), |
148 |
- what = c("perBase", "perRegionAverage", "perRegionTotal")) { |
|
152 |
+ what = c("perBase", "perRegionAverage", |
|
153 |
+ "perRegionTotal")) { |
|
149 | 154 |
stopifnot(is(BSseq, "BSseq")) |
150 | 155 |
type <- match.arg(type) |
151 | 156 |
what <- match.arg(what) |
152 |
- if(is.null(regions)) { |
|
153 |
- if(what == "perBase") |
|
157 |
+ if (is.null(regions)) { |
|
158 |
+ if (what == "perBase") { |
|
154 | 159 |
return(getBSseq(BSseq, type = type)) |
155 |
- if(what == "perRegionTotal") |
|
160 |
+ } |
|
161 |
+ if (what == "perRegionTotal") { |
|
156 | 162 |
return(colSums(getBSseq(BSseq, type = type))) |
157 |
- if(what == "perRegionAverage") |
|
163 |
+ } |
|
164 |
+ if (what == "perRegionAverage") { |
|
158 | 165 |
return(colMeans(getBSseq(BSseq, type = type))) |
166 |
+ } |
|
159 | 167 |
} |
160 |
- if(class(regions) == "data.frame") |
|
168 |
+ if (class(regions) == "data.frame") { |
|
161 | 169 |
regions <- data.frame2GRanges(regions) |
170 |
+ } |
|
162 | 171 |
stopifnot(is(regions, "GenomicRanges")) |
163 | 172 |
grBSseq <- granges(BSseq) |
164 |
- mm <- as.matrix(findOverlaps(regions, grBSseq)) |
|
165 |
- mmsplit <- split(mm[,2], mm[,1]) |
|
166 |
- if(what == "perBase") { |
|
167 |
- if(type == "Cov") { |
|
168 |
- out <- lapply(mmsplit, function(xx) { |
|
169 |
- getBSseq(BSseq, "Cov")[xx,,drop = FALSE] |
|
170 |
- }) |
|
171 |
- } |
|
172 |
- if(type == "M") { |
|
173 |
- out <- lapply(mmsplit, function(xx) { |
|
174 |
- getBSseq(BSseq, "M")[xx,,drop = FALSE] |
|
175 |
- }) |
|
176 |
- } |
|
173 |
+ ov <- findOverlaps(grBSseq, regions) |
|
174 |
+ if (type == "Cov") { |
|
175 |
+ coverage <- getBSseq(BSseq, "Cov")[queryHits(ov), , drop = FALSE] |
|
176 |
+ } else if (type == "M") { |
|
177 |
+ coverage <- getBSseq(BSseq, "M")[queryHits(ov), , drop = FALSE] |
|
178 |
+ } |
|
179 |
+ out <- lapply(split(coverage, subjectHits(ov)), matrix, |
|
180 |
+ ncol = ncol(coverage)) |
|
181 |
+ |
|
182 |
+ if (what == "perBase") { |
|
183 |
+ # TODO: Don't really understand the logic of the remaining code; how |
|
184 |
+ # could the results end up in the wrong order wrt to regions? |
|
177 | 185 |
outList <- vector("list", length(regions)) |
178 |
- outList[as.integer(names(mmsplit))] <- out |
|
186 |
+ outList[as.integer(names(out))] <- out |
|
179 | 187 |
return(outList) |
188 |
+ } else if (what == "perRegionAverage") { |
|
189 |
+ out <- do.call(rbind, lapply(out, colMeans2, na.rm = TRUE)) |
|
190 |
+ } else if (what == "perRegionTotal") { |
|
191 |
+ out <- do.call(rbind, lapply(out, colSums2, na.rm = TRUE)) |
|
180 | 192 |
} |
181 |
- if(what == "perRegionAverage") { |
|
182 |
- if(type == "Cov") { |
|
183 |
- out <- lapply(mmsplit, function(xx) { |
|
184 |
- colMeans(getBSseq(BSseq, "Cov")[xx,,drop = FALSE], na.rm = TRUE) |
|
185 |
- }) |
|
186 |
- } |
|
187 |
- if(type == "M") { |
|
188 |
- out <- lapply(mmsplit, function(xx) { |
|
189 |
- colMeans(getBSseq(BSseq, "M")[xx,,drop = FALSE], na.rm = TRUE) |
|
190 |
- }) |
|
191 |
- } |
|
192 |
- } |
|
193 |
- if(what == "perRegionTotal") { |
|
194 |
- if(type == "Cov") { |
|
195 |
- out <- lapply(mmsplit, function(xx) { |
|
196 |
- colSums(getBSseq(BSseq, "Cov")[xx,,drop = FALSE], na.rm = TRUE) |
|
197 |
- }) |
|
198 |
- } |
|
199 |
- if(type == "M") { |
|
200 |
- out <- lapply(mmsplit, function(xx) { |
|
201 |
- colSums(getBSseq(BSseq, "M")[xx,,drop = FALSE], na.rm = TRUE) |
|
202 |
- }) |
|
203 |
- } |
|
204 |
- } |
|
205 |
- out <- do.call(rbind, out) |
|
193 |
+ # TODO: Don't really understand the logic of the remaining code; how |
|
194 |
+ # could the rows end up in the wrong order? |
|
206 | 195 |
outMatrix <- matrix(NA, ncol = ncol(BSseq), nrow = length(regions)) |
207 | 196 |
colnames(outMatrix) <- sampleNames(BSseq) |
208 | 197 |
outMatrix[as.integer(rownames(out)),] <- out |
209 | 198 |
.DelayedMatrix(outMatrix) |
210 | 199 |
} |
211 |
- |
|
212 |
- |