Browse code

lapply and sapply,Producer-method

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/Streamer@68704 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 22/08/2012 03:44:51
Showing 8 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: Streamer
2 2
 Type: Package
3 3
 Title: Enabling stream processing of large files
4
-Version: 1.3.4
4
+Version: 1.3.5
5 5
 Date: 2010-10-13
6 6
 Author: Martin Morgan, Nishant Gopalakrishnan
7 7
 Maintainer: Martin Morgan <mtmorgan@fhcrc.org>
... ...
@@ -15,8 +15,8 @@ Description: Large data files can be difficult to work with in R,
15 15
   package?Streamer for details.
16 16
 License: Artistic-2.0
17 17
 LazyLoad: yes
18
-Imports: methods, graph, parallel
19
-Suggests: BiocGenerics, RUnit, Rsamtools (>= 1.5.53)
18
+Imports: methods, graph, parallel, BiocGenerics
19
+Suggests: RUnit, Rsamtools (>= 1.5.53)
20 20
 biocViews: Infrastructure, DataImport
21 21
 Collate: 
22 22
   generics.R OldClass.R
... ...
@@ -2,6 +2,8 @@ useDynLib(Streamer, .registration=TRUE)
2 2
 
3 3
 import(methods)
4 4
 
5
+importFrom(BiocGenerics, lapply, sapply)
6
+
5 7
 importFrom(parallel, mcparallel, mccollect)
6 8
 
7 9
 importFrom(graph, graphBAM)
... ...
@@ -10,6 +12,8 @@ importMethodsFrom(graph, degree)
10 12
 
11 13
 exportPattern("^[^\\.]")
12 14
 
15
+exportMethods(lapply, sapply)
16
+
13 17
 exportClasses(Streamer,
14 18
               Producer, RawInput, Seq, 
15 19
               Consumer, RawToChar, Rev, Team, Downsample,
... ...
@@ -8,3 +8,34 @@ setMethod(stream, "Producer",
8 8
     else
9 9
         do.call(stream, c(rev(list(..., verbose=verbose)), list(x)))
10 10
 })
11
+
12
+setMethod(lapply, "Producer",
13
+    function(X, FUN, ..., .env=parent.frame())
14
+{
15
+    FUN <- match.fun(FUN)
16
+    fun <- function(yield) {
17
+        y <- tryCatch(yield(), error=function(err) {
18
+            stop("'lapply,Producer-method' yield() failed: ",
19
+                 conditionMessage(err))
20
+        })
21
+        if (!length(y))
22
+            return(y)
23
+        tryCatch(eval(FUN(y, ...), .env), error=function(err) {
24
+            stop("'lapply,Producer-method' FUN() failed: ",
25
+                 conditionMessage(err))
26
+        })
27
+    }
28
+    ## avoid S4 dispatch on yield(X)
29
+    .Call(.lapply_Producer, fun, X$yield, environment())
30
+})
31
+
32
+setMethod(sapply, "Producer",
33
+    function(X, FUN, ..., simplify=TRUE, USE.NAMES=TRUE)
34
+{
35
+    FUN <- match.fun(FUN)
36
+    .env <- parent.frame()
37
+    answer <- lapply(X = X, FUN = FUN, ..., .env=.env)
38
+    if (!identical(simplify, FALSE) && length(answer))
39
+        simplify2array(answer, higher = (simplify == "array"))
40
+    else answer
41
+})
11 42
new file mode 100644
... ...
@@ -0,0 +1,30 @@
1
+test_lapply_Producer <- function()
2
+{
3
+    vals <- split(1:47, rep(1:7, each=7, length.out=47))
4
+    exp <- unname(lapply(vals, mean))
5
+
6
+    ## function
7
+    obs <- lapply(Seq(to=47, length.out=7), mean)
8
+    checkIdentical(exp, obs)
9
+
10
+    ## anonymous function
11
+    obs <- lapply(Seq(to=47, length.out=7), function(x) mean(x))
12
+    checkIdentical(exp, obs)
13
+
14
+    ## ... args
15
+    obs <- lapply(Seq(to=47, length.out=7), function(x, z) mean(z), z=1:10)
16
+    checkIdentical(mean(1:10), unique(unlist(obs)))
17
+
18
+    ## env
19
+    ZZZ <- 1:10
20
+    res <- lapply(Seq(to=47, length.out=7), function(x) mean(ZZZ))
21
+    checkIdentical(mean(ZZZ), unique(unlist(res)))
22
+
23
+    ## error
24
+    fun <- function(x) if (x == 3) stop("x: ", x) else x
25
+    checkException(lapply(Seq(to=5), fun), silent=TRUE)
26
+
27
+    ## trigger re-allocation
28
+    ## res <- lapply(Seq(to=4096*4),
29
+    ##               function(x) { if (x %% 1000 == 0) message(x); x })
30
+}
... ...
@@ -2,7 +2,9 @@
2 2
 \Rdversion{1.1}
3 3
 \docType{class}
4 4
 \alias{Producer-class}
5
-\alias{show,Producer-class}
5
+\alias{lapply,Producer-method}
6
+\alias{sapply,Producer-method}
7
+\alias{show,Producer-method}
6 8
 
7 9
 \title{Class "Producer"}
8 10
 
... ...
@@ -18,8 +20,34 @@
18 20
 
19 21
 }
20 22
 
23
+\usage{
24
+\S4method{lapply}{Producer}(X, FUN, ..., .env=parent.frame())
25
+\S4method{sapply}{Producer}(X, FUN, ..., simplify=TRUE, USE.NAMES=TRUE)
26
+}
27
+
28
+\arguments{
29
+  \item{X}{An instance of class \code{Producer}}
30
+
31
+  \item{FUN}{A function to be applied to each successful \code{yield()}
32
+    of \code{X}.}
33
+
34
+  \item{...}{Additional arguments to \code{FUN}.}
35
+
36
+  \item{simplify}{See \code{?base::sapply}.}
37
+
38
+  \item{USE.NAMES}{See \code{?base::sapply} but note that names do not
39
+    usually make sense for instances of class \code{Producer}.}
40
+
41
+  \item{.env}{For internal use}
42
+}
43
+
21 44
 \section{Methods}{
22
-  Methods defined on this class include:
45
+
46
+  \code{lapply} and \code{sapply} apply \code{FUN} to each result
47
+  applied to \code{yield()}. Infite producers will of course exhaust
48
+  memory.
49
+
50
+  Inherited methods defined on this class include:
23 51
   \describe{
24 52
     \item{stream}{\code{signature(x = "Producer", ...)}: see
25 53
       \code{?stream}.}
... ...
@@ -40,15 +68,18 @@
40 68
 
41 69
 \section{Class-Based Methods}{
42 70
   \describe{
43
-    The \code{Producer} class inherits the methods \code{initialize}, \code{msg},
44
-    \code{reset}, \code{status} and \code{yield} from the \code{Streamer}
45
-    virtual class. Please refer to the \code{\link{Streamer}} class for more
46
-    details.
71
+
72
+    The \code{Producer} class inherits the methods \code{initialize},
73
+    \code{msg}, \code{reset}, \code{status} and \code{yield} from the
74
+    \code{Streamer} virtual class. Please refer to the
75
+    \code{\link{Streamer}} class for more details.
47 76
     
48
-    Derived classes should implement an appropriate \code{initialize} method to
49
-    initialize the fields of the derived class. Additionally, a \code{yield} method
50
-    should be implemented to return the contents of the current stream. The
51
-    default method for the base virtual \code{Streamer} class returns a \code{list()} 
77
+    Derived classes should implement an appropriate \code{initialize}
78
+    method to initialize the fields of the derived class. Additionally,
79
+    a \code{yield} method should be implemented to return the contents
80
+    of the current stream. The default method for the base virtual
81
+    \code{Streamer} class returns a \code{list()}
82
+
52 83
   }
53 84
 }
54 85
 
... ...
@@ -61,6 +92,13 @@
61 92
 
62 93
 }
63 94
 
64
-\examples{showClass("Producer")}
95
+\examples{
96
+showClass("Producer")
97
+showMethods(class="Producer", where="package:Streamer")
98
+
99
+sapply(Seq(to=47, length.out=7), function(elt) {
100
+    c(n = length(elt), xbar = mean(elt))
101
+})
102
+}
65 103
 
66 104
 \keyword{classes}
... ...
@@ -1,10 +1,13 @@
1 1
 #include <R_ext/Rdynload.h>
2 2
 #include "raw_input.h"
3
+#include "lapply.h"
3 4
 
4 5
 static const R_CallMethodDef callMethods[] = {
5 6
   /* raw_parse */
6 7
   {".raw_parse_count_records", (DL_FUNC) &raw_parse_count_records, 2},
7 8
   {".raw_parse", (DL_FUNC) &raw_parse, 3},
9
+  /* lapply */
10
+  {".lapply_Producer", (DL_FUNC) &lapply_Producer, 3},
8 11
   {NULL, NULL, 0}
9 12
 };
10 13
 
11 14
new file mode 100644
... ...
@@ -0,0 +1,35 @@
1
+#include "lapply.h"
2
+
3
+SEXP
4
+lapply_Producer(SEXP fun, SEXP X, SEXP rho)
5
+{
6
+    SEXP call1, ans1, ans;
7
+    int iter = 0;
8
+    R_len_t curr_size = 4096;
9
+    PROTECT_INDEX px;
10
+
11
+    /* FIXME: type checks */
12
+
13
+    PROTECT(call1 = lang2(fun, X));
14
+    PROTECT_WITH_INDEX(ans = Rf_allocVector(VECSXP, curr_size), &px);
15
+    while (1) {
16
+	PROTECT(ans1 = Rf_eval(call1, rho));
17
+	if (0 == Rf_length(ans1)) {
18
+	    UNPROTECT(1);
19
+	    break;
20
+	}
21
+	if (iter == curr_size) {
22
+	    if (curr_size == R_LEN_T_MAX)
23
+		Rf_error("%s cannot create %d-element vector",
24
+			 "'lapply,Producer-method'", curr_size);
25
+	    curr_size *= 1.6;
26
+	    if (curr_size > R_LEN_T_MAX)
27
+		curr_size = R_LEN_T_MAX;
28
+	    REPROTECT(ans = Rf_lengthgets(ans, curr_size), px);
29
+	}
30
+	SET_VECTOR_ELT(ans, iter++, ans1);
31
+	UNPROTECT(1);
32
+    }
33
+    UNPROTECT(2);
34
+    return Rf_lengthgets(ans, iter);
35
+}
0 36
new file mode 100644
... ...
@@ -0,0 +1,10 @@
1
+#ifndef LAPPLY_H_
2
+#define LAPPLY_H
3
+
4
+#include <Rdefines.h>
5
+
6
+SEXP
7
+lapply_Producer(SEXP fun, SEXP X, SEXP rho);
8
+
9
+#endif
10
+