git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/Streamer@68704 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: Streamer |
2 | 2 |
Type: Package |
3 | 3 |
Title: Enabling stream processing of large files |
4 |
-Version: 1.3.4 |
|
4 |
+Version: 1.3.5 |
|
5 | 5 |
Date: 2010-10-13 |
6 | 6 |
Author: Martin Morgan, Nishant Gopalakrishnan |
7 | 7 |
Maintainer: Martin Morgan <mtmorgan@fhcrc.org> |
... | ... |
@@ -15,8 +15,8 @@ Description: Large data files can be difficult to work with in R, |
15 | 15 |
package?Streamer for details. |
16 | 16 |
License: Artistic-2.0 |
17 | 17 |
LazyLoad: yes |
18 |
-Imports: methods, graph, parallel |
|
19 |
-Suggests: BiocGenerics, RUnit, Rsamtools (>= 1.5.53) |
|
18 |
+Imports: methods, graph, parallel, BiocGenerics |
|
19 |
+Suggests: RUnit, Rsamtools (>= 1.5.53) |
|
20 | 20 |
biocViews: Infrastructure, DataImport |
21 | 21 |
Collate: |
22 | 22 |
generics.R OldClass.R |
... | ... |
@@ -2,6 +2,8 @@ useDynLib(Streamer, .registration=TRUE) |
2 | 2 |
|
3 | 3 |
import(methods) |
4 | 4 |
|
5 |
+importFrom(BiocGenerics, lapply, sapply) |
|
6 |
+ |
|
5 | 7 |
importFrom(parallel, mcparallel, mccollect) |
6 | 8 |
|
7 | 9 |
importFrom(graph, graphBAM) |
... | ... |
@@ -10,6 +12,8 @@ importMethodsFrom(graph, degree) |
10 | 12 |
|
11 | 13 |
exportPattern("^[^\\.]") |
12 | 14 |
|
15 |
+exportMethods(lapply, sapply) |
|
16 |
+ |
|
13 | 17 |
exportClasses(Streamer, |
14 | 18 |
Producer, RawInput, Seq, |
15 | 19 |
Consumer, RawToChar, Rev, Team, Downsample, |
... | ... |
@@ -8,3 +8,34 @@ setMethod(stream, "Producer", |
8 | 8 |
else |
9 | 9 |
do.call(stream, c(rev(list(..., verbose=verbose)), list(x))) |
10 | 10 |
}) |
11 |
+ |
|
12 |
+setMethod(lapply, "Producer", |
|
13 |
+ function(X, FUN, ..., .env=parent.frame()) |
|
14 |
+{ |
|
15 |
+ FUN <- match.fun(FUN) |
|
16 |
+ fun <- function(yield) { |
|
17 |
+ y <- tryCatch(yield(), error=function(err) { |
|
18 |
+ stop("'lapply,Producer-method' yield() failed: ", |
|
19 |
+ conditionMessage(err)) |
|
20 |
+ }) |
|
21 |
+ if (!length(y)) |
|
22 |
+ return(y) |
|
23 |
+ tryCatch(eval(FUN(y, ...), .env), error=function(err) { |
|
24 |
+ stop("'lapply,Producer-method' FUN() failed: ", |
|
25 |
+ conditionMessage(err)) |
|
26 |
+ }) |
|
27 |
+ } |
|
28 |
+ ## avoid S4 dispatch on yield(X) |
|
29 |
+ .Call(.lapply_Producer, fun, X$yield, environment()) |
|
30 |
+}) |
|
31 |
+ |
|
32 |
+setMethod(sapply, "Producer", |
|
33 |
+ function(X, FUN, ..., simplify=TRUE, USE.NAMES=TRUE) |
|
34 |
+{ |
|
35 |
+ FUN <- match.fun(FUN) |
|
36 |
+ .env <- parent.frame() |
|
37 |
+ answer <- lapply(X = X, FUN = FUN, ..., .env=.env) |
|
38 |
+ if (!identical(simplify, FALSE) && length(answer)) |
|
39 |
+ simplify2array(answer, higher = (simplify == "array")) |
|
40 |
+ else answer |
|
41 |
+}) |
11 | 42 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,30 @@ |
1 |
+test_lapply_Producer <- function() |
|
2 |
+{ |
|
3 |
+ vals <- split(1:47, rep(1:7, each=7, length.out=47)) |
|
4 |
+ exp <- unname(lapply(vals, mean)) |
|
5 |
+ |
|
6 |
+ ## function |
|
7 |
+ obs <- lapply(Seq(to=47, length.out=7), mean) |
|
8 |
+ checkIdentical(exp, obs) |
|
9 |
+ |
|
10 |
+ ## anonymous function |
|
11 |
+ obs <- lapply(Seq(to=47, length.out=7), function(x) mean(x)) |
|
12 |
+ checkIdentical(exp, obs) |
|
13 |
+ |
|
14 |
+ ## ... args |
|
15 |
+ obs <- lapply(Seq(to=47, length.out=7), function(x, z) mean(z), z=1:10) |
|
16 |
+ checkIdentical(mean(1:10), unique(unlist(obs))) |
|
17 |
+ |
|
18 |
+ ## env |
|
19 |
+ ZZZ <- 1:10 |
|
20 |
+ res <- lapply(Seq(to=47, length.out=7), function(x) mean(ZZZ)) |
|
21 |
+ checkIdentical(mean(ZZZ), unique(unlist(res))) |
|
22 |
+ |
|
23 |
+ ## error |
|
24 |
+ fun <- function(x) if (x == 3) stop("x: ", x) else x |
|
25 |
+ checkException(lapply(Seq(to=5), fun), silent=TRUE) |
|
26 |
+ |
|
27 |
+ ## trigger re-allocation |
|
28 |
+ ## res <- lapply(Seq(to=4096*4), |
|
29 |
+ ## function(x) { if (x %% 1000 == 0) message(x); x }) |
|
30 |
+} |
... | ... |
@@ -2,7 +2,9 @@ |
2 | 2 |
\Rdversion{1.1} |
3 | 3 |
\docType{class} |
4 | 4 |
\alias{Producer-class} |
5 |
-\alias{show,Producer-class} |
|
5 |
+\alias{lapply,Producer-method} |
|
6 |
+\alias{sapply,Producer-method} |
|
7 |
+\alias{show,Producer-method} |
|
6 | 8 |
|
7 | 9 |
\title{Class "Producer"} |
8 | 10 |
|
... | ... |
@@ -18,8 +20,34 @@ |
18 | 20 |
|
19 | 21 |
} |
20 | 22 |
|
23 |
+\usage{ |
|
24 |
+\S4method{lapply}{Producer}(X, FUN, ..., .env=parent.frame()) |
|
25 |
+\S4method{sapply}{Producer}(X, FUN, ..., simplify=TRUE, USE.NAMES=TRUE) |
|
26 |
+} |
|
27 |
+ |
|
28 |
+\arguments{ |
|
29 |
+ \item{X}{An instance of class \code{Producer}} |
|
30 |
+ |
|
31 |
+ \item{FUN}{A function to be applied to each successful \code{yield()} |
|
32 |
+ of \code{X}.} |
|
33 |
+ |
|
34 |
+ \item{...}{Additional arguments to \code{FUN}.} |
|
35 |
+ |
|
36 |
+ \item{simplify}{See \code{?base::sapply}.} |
|
37 |
+ |
|
38 |
+ \item{USE.NAMES}{See \code{?base::sapply} but note that names do not |
|
39 |
+ usually make sense for instances of class \code{Producer}.} |
|
40 |
+ |
|
41 |
+ \item{.env}{For internal use} |
|
42 |
+} |
|
43 |
+ |
|
21 | 44 |
\section{Methods}{ |
22 |
- Methods defined on this class include: |
|
45 |
+ |
|
46 |
+ \code{lapply} and \code{sapply} apply \code{FUN} to each result |
|
47 |
+ applied to \code{yield()}. Infite producers will of course exhaust |
|
48 |
+ memory. |
|
49 |
+ |
|
50 |
+ Inherited methods defined on this class include: |
|
23 | 51 |
\describe{ |
24 | 52 |
\item{stream}{\code{signature(x = "Producer", ...)}: see |
25 | 53 |
\code{?stream}.} |
... | ... |
@@ -40,15 +68,18 @@ |
40 | 68 |
|
41 | 69 |
\section{Class-Based Methods}{ |
42 | 70 |
\describe{ |
43 |
- The \code{Producer} class inherits the methods \code{initialize}, \code{msg}, |
|
44 |
- \code{reset}, \code{status} and \code{yield} from the \code{Streamer} |
|
45 |
- virtual class. Please refer to the \code{\link{Streamer}} class for more |
|
46 |
- details. |
|
71 |
+ |
|
72 |
+ The \code{Producer} class inherits the methods \code{initialize}, |
|
73 |
+ \code{msg}, \code{reset}, \code{status} and \code{yield} from the |
|
74 |
+ \code{Streamer} virtual class. Please refer to the |
|
75 |
+ \code{\link{Streamer}} class for more details. |
|
47 | 76 |
|
48 |
- Derived classes should implement an appropriate \code{initialize} method to |
|
49 |
- initialize the fields of the derived class. Additionally, a \code{yield} method |
|
50 |
- should be implemented to return the contents of the current stream. The |
|
51 |
- default method for the base virtual \code{Streamer} class returns a \code{list()} |
|
77 |
+ Derived classes should implement an appropriate \code{initialize} |
|
78 |
+ method to initialize the fields of the derived class. Additionally, |
|
79 |
+ a \code{yield} method should be implemented to return the contents |
|
80 |
+ of the current stream. The default method for the base virtual |
|
81 |
+ \code{Streamer} class returns a \code{list()} |
|
82 |
+ |
|
52 | 83 |
} |
53 | 84 |
} |
54 | 85 |
|
... | ... |
@@ -61,6 +92,13 @@ |
61 | 92 |
|
62 | 93 |
} |
63 | 94 |
|
64 |
-\examples{showClass("Producer")} |
|
95 |
+\examples{ |
|
96 |
+showClass("Producer") |
|
97 |
+showMethods(class="Producer", where="package:Streamer") |
|
98 |
+ |
|
99 |
+sapply(Seq(to=47, length.out=7), function(elt) { |
|
100 |
+ c(n = length(elt), xbar = mean(elt)) |
|
101 |
+}) |
|
102 |
+} |
|
65 | 103 |
|
66 | 104 |
\keyword{classes} |
... | ... |
@@ -1,10 +1,13 @@ |
1 | 1 |
#include <R_ext/Rdynload.h> |
2 | 2 |
#include "raw_input.h" |
3 |
+#include "lapply.h" |
|
3 | 4 |
|
4 | 5 |
static const R_CallMethodDef callMethods[] = { |
5 | 6 |
/* raw_parse */ |
6 | 7 |
{".raw_parse_count_records", (DL_FUNC) &raw_parse_count_records, 2}, |
7 | 8 |
{".raw_parse", (DL_FUNC) &raw_parse, 3}, |
9 |
+ /* lapply */ |
|
10 |
+ {".lapply_Producer", (DL_FUNC) &lapply_Producer, 3}, |
|
8 | 11 |
{NULL, NULL, 0} |
9 | 12 |
}; |
10 | 13 |
|
11 | 14 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,35 @@ |
1 |
+#include "lapply.h" |
|
2 |
+ |
|
3 |
+SEXP |
|
4 |
+lapply_Producer(SEXP fun, SEXP X, SEXP rho) |
|
5 |
+{ |
|
6 |
+ SEXP call1, ans1, ans; |
|
7 |
+ int iter = 0; |
|
8 |
+ R_len_t curr_size = 4096; |
|
9 |
+ PROTECT_INDEX px; |
|
10 |
+ |
|
11 |
+ /* FIXME: type checks */ |
|
12 |
+ |
|
13 |
+ PROTECT(call1 = lang2(fun, X)); |
|
14 |
+ PROTECT_WITH_INDEX(ans = Rf_allocVector(VECSXP, curr_size), &px); |
|
15 |
+ while (1) { |
|
16 |
+ PROTECT(ans1 = Rf_eval(call1, rho)); |
|
17 |
+ if (0 == Rf_length(ans1)) { |
|
18 |
+ UNPROTECT(1); |
|
19 |
+ break; |
|
20 |
+ } |
|
21 |
+ if (iter == curr_size) { |
|
22 |
+ if (curr_size == R_LEN_T_MAX) |
|
23 |
+ Rf_error("%s cannot create %d-element vector", |
|
24 |
+ "'lapply,Producer-method'", curr_size); |
|
25 |
+ curr_size *= 1.6; |
|
26 |
+ if (curr_size > R_LEN_T_MAX) |
|
27 |
+ curr_size = R_LEN_T_MAX; |
|
28 |
+ REPROTECT(ans = Rf_lengthgets(ans, curr_size), px); |
|
29 |
+ } |
|
30 |
+ SET_VECTOR_ELT(ans, iter++, ans1); |
|
31 |
+ UNPROTECT(1); |
|
32 |
+ } |
|
33 |
+ UNPROTECT(2); |
|
34 |
+ return Rf_lengthgets(ans, iter); |
|
35 |
+} |