... | ... |
@@ -1,60 +1,73 @@ |
1 | 1 |
Package: RGMQL |
2 | 2 |
Type: Package |
3 | 3 |
Title: GenoMetric Query Language for R/Bioconductor |
4 |
-Version: 0.99.27 |
|
4 |
+Version: 0.99.28 |
|
5 | 5 |
Author: Simone Pallotta, Marco Masseroli |
6 | 6 |
Maintainer: Simone Pallotta <simonepallotta@hotmail.com> |
7 |
-Description: This RGMQL package brings the GenoMetric Query Language (GMQL) functionalities into the R environment. |
|
8 |
- GMQL is a high-level, declarative language to query and compare multiple and heterogeneous genomic |
|
9 |
- datasets for biomedical knowledge discovery. It allows expressing easily queries |
|
10 |
- and processing over genomic regions and their metadata, in a way similar to |
|
11 |
- what can be done with the Structured Query Language (SQL) over a relational database, |
|
12 |
- to extract genomic regions of interest and compute their properties. |
|
13 |
- GMQL adopts algorithms designed for big data and their efficient implementation |
|
14 |
- using cloud-computing technologies, including Apache Hadoop framework and Spark engine; |
|
15 |
- these make GMQL able to run on modern high performance computing infrastructures, |
|
16 |
- CPU clusters and network infrastructures, in order to achieve scalability |
|
17 |
- and performance on big data. |
|
18 |
- With GMQL very complex genomic operations can be written as simple queries, |
|
19 |
- with implicit iteration over thousands of heterogeneous samples, and computed |
|
20 |
- efficiently in few minutes over servers or clouds. |
|
21 |
- This RGMQL package is built over a scalable data management engine written in |
|
22 |
- Scala programming language, released as Scala API; it provides a set of functions |
|
23 |
- to create, manipulate and extract genomic data from different data sources |
|
24 |
- both from local and remote datasets. These RGMQL functions allow performing |
|
25 |
- complex queries and processing without knowing the GMQL syntax, |
|
26 |
- but leveraging on R idiomatic paradigm and logic. |
|
27 |
- RGMQL provides two different approaches in writing GMQL queries and processing scripts: |
|
28 |
- a) REST calls |
|
29 |
- b) standard R APIs |
|
30 |
- The REST approach let users to log into a remote infrastructure where a GMQL system |
|
31 |
- is installed, and manage remote big genomic datasets hosted in cluster-based repository. |
|
32 |
- User can download an entire remote dataset into local folder, upload local datasets |
|
33 |
- into the remote repository or compiling and running a textual query or processing script |
|
34 |
- just invoking the right RGMQL functions. |
|
35 |
- Multiple REST invocations can be invoked and run concurrently on remote infrastructure |
|
36 |
- allowing user to monitor the progress status of every call. |
|
37 |
- Many other REST functionalities are available in order to allow a complete interaction with remote infrastructure. |
|
38 |
- The R APIs approach lets user work with local or remote datasets using batch-like style |
|
39 |
- where single invocations must be invoked sequentially; with this approach all |
|
40 |
- GMQL queries and processing can be written as a sequence of RGMQL functions. |
|
41 |
- Unlike other similar packages, every RGMQL function simply builds a query, |
|
42 |
- with no intermediate result shown (except for a few functions that execute queries |
|
43 |
- and for some utility functions for interoperability with other packages) |
|
44 |
- The RGMQL package also provides a rich set of ancillary classes that allow |
|
45 |
- sophisticated input/output management and sorting, such as ASC, DESC, BAG, MIN, |
|
46 |
- MAX, SUM, AVG, MEDIAN, STD, Q1, Q2, Q3, and several others; |
|
47 |
- these classes are used only to build predicates and complex conditions taken as |
|
48 |
- input by RGMQL functions; Note that many RGMQL functions are not directly executed |
|
49 |
- in R environment, but are deferred until real execution is issued. |
|
7 |
+Description: This RGMQL package brings the GenoMetric Query Language (GMQL) |
|
8 |
+ functionalities into the R environment. GMQL is a high-level, declarative |
|
9 |
+ language to query and compare multiple and heterogeneous genomic datasets |
|
10 |
+ for biomedical knowledge discovery. It allows expressing easily queries and |
|
11 |
+ processing over genomic regions and their metadata, in a way similar to what |
|
12 |
+ can be done with the Structured Query Language (SQL) over a relational database, |
|
13 |
+ to extract genomic regions of interest and compute their properties. GMQL |
|
14 |
+ adopts algorithms designed for big data and their efficient implementation |
|
15 |
+ using cloud-computing technologies, including Apache Hadoop framework and |
|
16 |
+ Spark engine; these make GMQL able to run on modern high performance computing |
|
17 |
+ infrastructures, CPU clusters and network infrastructures, in order to achieve |
|
18 |
+ scalability and performance on big data. With GMQL very complex genomic |
|
19 |
+ operations can be written as simple queries, with implicit iteration over |
|
20 |
+ thousands of heterogeneous samples, and computed efficiently in few minutes over |
|
21 |
+ servers or clouds. This RGMQL package is built over a scalable data management |
|
22 |
+ engine written in Scala programming language, released as Scala API; it provides |
|
23 |
+ a set of functions to create, manipulate and extract genomic data from different |
|
24 |
+ data sources both from local and remote datasets. These RGMQL functions allow |
|
25 |
+ performing complex queries and processing without knowing the GMQL syntax, |
|
26 |
+ but leveraging on R idiomatic paradigm and logic. RGMQL provides two different |
|
27 |
+ approaches in writing GMQL queries and processing scripts: a) REST calls b) |
|
28 |
+ standard R APIs The REST approach let users to log into a remote infrastructure |
|
29 |
+ where a GMQL system is installed, and manage remote big genomic datasets hosted |
|
30 |
+ in cluster-based repository. User can download an entire remote dataset into |
|
31 |
+ local folder, upload local datasets into the remote repository or compiling |
|
32 |
+ and running a textual query or processing script just invoking the right RGMQL |
|
33 |
+ functions. Multiple REST invocations can be invoked and run concurrently on |
|
34 |
+ remote infrastructure allowing user to monitor the progress status of every |
|
35 |
+ call. Many other REST functionalities are available in order to allow a complete |
|
36 |
+ interaction with remote infrastructure. The R APIs approach lets user work with |
|
37 |
+ local or remote datasets using batch-like style where single invocations must |
|
38 |
+ be invoked sequentially; with this approach all GMQL queries and processing |
|
39 |
+ can be written as a sequence of RGMQL functions. Unlike other similar packages, |
|
40 |
+ every RGMQL function simply builds a query, with no intermediate result shown |
|
41 |
+ (except for a few functions that execute queries and for some utility functions |
|
42 |
+ for interoperability with other packages) The RGMQL package also provides a rich |
|
43 |
+ set of ancillary classes that allow sophisticated input/output management and |
|
44 |
+ sorting, such as ASC, DESC, BAG, MIN, MAX, SUM, AVG, MEDIAN, STD, Q1, Q2, Q3, |
|
45 |
+ and several others; these classes are used only to build predicates and complex |
|
46 |
+ conditions taken as input by RGMQL functions; Note that many RGMQL functions are |
|
47 |
+ not directly executed in R environment, but are deferred until real execution is |
|
48 |
+ issued. |
|
50 | 49 |
License: Artistic-2.0 |
51 | 50 |
URL: http://www.bioinformatics.deib.polimi.it/genomic_computing/GMQL/ |
52 | 51 |
Encoding: UTF-8 |
53 | 52 |
LazyData: true |
54 | 53 |
RoxygenNote: 6.0.1 |
55 |
-Imports: httr, rJava,GenomicRanges, rtracklayer, data.table, utils, plyr, xml2, |
|
56 |
- methods, S4Vectors, dplyr, stats |
|
57 |
-Depends: R(<= 3.4.2) |
|
54 |
+Imports: |
|
55 |
+ httr, |
|
56 |
+ rJava,GenomicRanges, |
|
57 |
+ rtracklayer, |
|
58 |
+ data.table, |
|
59 |
+ utils, |
|
60 |
+ plyr, |
|
61 |
+ xml2, |
|
62 |
+ methods, |
|
63 |
+ S4Vectors, |
|
64 |
+ dplyr, |
|
65 |
+ stats |
|
66 |
+Depends: |
|
67 |
+ R(>= 3.4.2) |
|
58 | 68 |
VignetteBuilder: knitr |
59 |
-Suggests: BiocStyle, knitr, rmarkdown |
|
69 |
+Suggests: |
|
70 |
+ BiocStyle, |
|
71 |
+ knitr, |
|
72 |
+ rmarkdown |
|
60 | 73 |
biocViews: Software,Infrastructure,DataImport,Network |
... | ... |
@@ -32,15 +32,11 @@ export(compile_query) |
32 | 32 |
export(compile_query_fromfile) |
33 | 33 |
export(cover) |
34 | 34 |
export(delete_dataset) |
35 |
-export(difference) |
|
36 | 35 |
export(download_as_GRangesList) |
37 | 36 |
export(download_dataset) |
38 | 37 |
export(execute) |
39 | 38 |
export(export_gmql) |
40 |
-export(extend) |
|
41 | 39 |
export(filter_and_extract) |
42 |
-export(flat) |
|
43 |
-export(histogram) |
|
44 | 40 |
export(import_gmql) |
45 | 41 |
export(init_gmql) |
46 | 42 |
export(join) |
... | ... |
@@ -48,9 +44,6 @@ export(login_gmql) |
48 | 44 |
export(logout_gmql) |
49 | 45 |
export(map) |
50 | 46 |
export(materialize) |
51 |
-export(merge) |
|
52 |
-export(order) |
|
53 |
-export(project) |
|
54 | 47 |
export(read) |
55 | 48 |
export(read_dataset) |
56 | 49 |
export(remote_processing) |
... | ... |
@@ -60,7 +53,6 @@ export(sample_metadata) |
60 | 53 |
export(sample_region) |
61 | 54 |
export(save_query) |
62 | 55 |
export(save_query_fromfile) |
63 |
-export(select) |
|
64 | 56 |
export(show_datasets_list) |
65 | 57 |
export(show_job_log) |
66 | 58 |
export(show_jobs_list) |
... | ... |
@@ -68,15 +60,24 @@ export(show_queries_list) |
68 | 60 |
export(show_samples_list) |
69 | 61 |
export(show_schema) |
70 | 62 |
export(stop_job) |
71 |
-export(summit) |
|
72 | 63 |
export(take) |
73 | 64 |
export(trace_job) |
74 |
-export(union) |
|
75 | 65 |
export(upload_dataset) |
66 |
+exportMethods(aggregate) |
|
67 |
+exportMethods(cover) |
|
68 |
+exportMethods(filter) |
|
69 |
+exportMethods(join) |
|
70 |
+exportMethods(materialize) |
|
71 |
+exportMethods(mutate) |
|
72 |
+exportMethods(setdiff) |
|
73 |
+exportMethods(sort) |
|
74 |
+exportMethods(subset) |
|
75 |
+exportMethods(union) |
|
76 | 76 |
import(GenomicRanges) |
77 | 77 |
import(httr) |
78 | 78 |
import(xml2) |
79 | 79 |
importClassesFrom(GenomicRanges,GRangesList) |
80 |
+importClassesFrom(S4Vectors,DataTable) |
|
80 | 81 |
importFrom(GenomicRanges,makeGRangesFromDataFrame) |
81 | 82 |
importFrom(S4Vectors,metadata) |
82 | 83 |
importFrom(data.table,fread) |
... | ... |
@@ -1,3 +1,10 @@ |
1 |
+#' @name cover |
|
2 |
+#' @rdname cover-methods |
|
3 |
+#' @aliases cover |
|
4 |
+#' @export |
|
5 |
+setGeneric("cover", function(data, minAcc, maxAcc, ...) |
|
6 |
+ standardGeneric("cover")) |
|
7 |
+ |
|
1 | 8 |
#' GMQL Operation: COVER |
2 | 9 |
#' |
3 | 10 |
#' It takes as input a dataset containing one or more samples and returns |
... | ... |
@@ -27,7 +34,7 @@ |
27 | 34 |
#' @importFrom rJava .jnull |
28 | 35 |
#' @importFrom rJava .jarray |
29 | 36 |
#' |
30 |
-#' @param input_data returned object from any GMQL function |
|
37 |
+#' @param data GMQLDataset class object |
|
31 | 38 |
#' @param minAcc minimum number of overlapping regions to be considered |
32 | 39 |
#' during execution |
33 | 40 |
#' Is a integer number, declared also as string. |
... | ... |
@@ -84,11 +91,25 @@ |
84 | 91 |
#' } |
85 | 92 |
#' "mixed style" is not allowed |
86 | 93 |
#' |
87 |
-#' @return DataSet class object. It contains the value to use as input |
|
94 |
+#' @param variation string identifying the cover GMQL function variation. |
|
95 |
+#' The admissible string are: |
|
96 |
+#' \itemize{ |
|
97 |
+#' \item{flat: returns the contiguous region that starts from the first end |
|
98 |
+#' and stops at the last end of the regions which would contribute |
|
99 |
+#' to each region of the \emph{cover}.} |
|
100 |
+#' \item{summit: returns regions that start from a position |
|
101 |
+#' where the number of intersecting regions is not increasing afterwards and |
|
102 |
+#' stops at a position where either the number of intersecting regions |
|
103 |
+#' decreases, or it violates the max accumulation index.} |
|
104 |
+#' \item{histogram: returns the non-overlapping regions contributing to |
|
105 |
+#' the cover, each with its accumulation index value, which is assigned to |
|
106 |
+#' the AccIndex region attribute.} |
|
107 |
+#' \item{cover: default value.} |
|
108 |
+#' } |
|
109 |
+#' |
|
110 |
+#' @return GMQLDataset class object. It contains the value to use as input |
|
88 | 111 |
#' for the subsequent GMQL function |
89 | 112 |
#' |
90 |
-#' @seealso \code{\link{summit}} \code{\link{flat}} \code{\link{histogram}} |
|
91 |
-#' |
|
92 | 113 |
#' @examples |
93 | 114 |
#' |
94 | 115 |
#' ## This statement produces an output dataset with a single output sample. |
... | ... |
@@ -99,7 +120,7 @@ |
99 | 120 |
#' init_gmql() |
100 | 121 |
#' test_path <- system.file("example","DATASET",package = "RGMQL") |
101 | 122 |
#' exp = read_dataset(test_path) |
102 |
-#' res = cover(input_data = exp, 2, ANY()) |
|
123 |
+#' res = cover(exp, 2, ANY()) |
|
103 | 124 |
#' |
104 | 125 |
#' \dontrun{ |
105 | 126 |
#' ## This GMQL statement computes the result grouping the input exp samples |
... | ... |
@@ -114,213 +135,21 @@ |
114 | 135 |
#' exp = read_dataset(test_path) |
115 | 136 |
#' res = cover(exp, 2, 3, c("cell"), list(min_pValue = MIN("pvalue"))) |
116 | 137 |
#' } |
138 |
+#' @name cover |
|
139 |
+#' @rdname cover-methods |
|
140 |
+#' @aliases cover, GMQLDataset-methods |
|
117 | 141 |
#' @export |
118 |
-#' |
|
119 |
-cover <- function(input_data, minAcc, maxAcc, groupBy = NULL, |
|
120 |
- aggregates = NULL) |
|
121 |
-{ |
|
122 |
- minAcc <- .trasform_cover(deparse(substitute(minAcc))) |
|
123 |
- maxAcc <- .trasform_cover(deparse(substitute(maxAcc))) |
|
124 |
- |
|
125 |
- min <- .check_cover_param(minAcc,TRUE) |
|
126 |
- max <- .check_cover_param(maxAcc,FALSE) |
|
127 |
- |
|
128 |
- .doVariant("COVER",minAcc,maxAcc,groupBy,aggregates,input_data) |
|
129 |
-} |
|
142 |
+setMethod("cover", "GMQLDataset", |
|
143 |
+ function(data, minAcc, maxAcc, groupBy = NULL, aggregates = NULL, |
|
144 |
+ variation = "cover") |
|
145 |
+ { |
|
146 |
+ gmql_cover(data@value, minAcc, maxAcc, groupBy, aggregates, |
|
147 |
+ variation) |
|
148 |
+ }) |
|
130 | 149 |
|
131 |
-#' GMQL Operation: HISTOGRAM |
|
132 |
-#' |
|
133 |
-#' returns the non-overlapping regions contributing to the cover, |
|
134 |
-#' each with its accumulation index value, which is assigned to |
|
135 |
-#' the AccIndex region attribute. |
|
136 |
-#' |
|
137 |
-#' @importFrom methods is |
|
138 |
-#' @importFrom rJava J |
|
139 |
-#' @importFrom rJava .jnull |
|
140 |
-#' @importFrom rJava .jarray |
|
141 |
-#' |
|
142 |
-#' @param input_data returned object from any GMQL function |
|
143 |
-#' @param minAcc minimum number of overlapping regions to be considered |
|
144 |
-#' during execution |
|
145 |
-#' Is a integer number, declared also as string. |
|
146 |
-#' minAcc accept also: |
|
147 |
-#' \itemize{ |
|
148 |
-#' \item{PARAMETER class object: \code{\link{ALL}} that represents the number |
|
149 |
-#' of samples in the input dataset} |
|
150 |
-#' \item{and expression built using PARAMETER object: (ALL() + N) / K or |
|
151 |
-#' ALL() / K } |
|
152 |
-#' } |
|
153 |
-#' @param maxAcc maximum number of overlapping regions to be considered |
|
154 |
-#' during execution |
|
155 |
-#' Is a integer number, declared also as string. |
|
156 |
-#' maxAcc accept also: |
|
157 |
-#' \itemize{ |
|
158 |
-#' \item{PARAMETER class object: \code{\link{ALL}} that represents the number |
|
159 |
-#' of samples in the input dataset} |
|
160 |
-#' \item{PARAMETER calss object: \code{\link{ANY}}} that acts as a wildcard, |
|
161 |
-#' considering any amount of overlapping. |
|
162 |
-#' \item{and expression built using PARAMETER object: (ALL() + N) / K or |
|
163 |
-#' ALL() / K } |
|
164 |
-#' } |
|
165 |
-#' @param groupBy list of CONDITION objects where every object contains |
|
166 |
-#' the name of metadata to be used in semijoin, or simple string concatenation |
|
167 |
-#' of name of metadata, e.g. c("cell_type", "attribute_tag", "size") |
|
168 |
-#' without declaring condition. |
|
169 |
-#' The CONDITION's available are: |
|
170 |
-#' \itemize{ |
|
171 |
-#' \item{\code{\link{FULL}}: Fullname evaluation, two attributes match |
|
172 |
-#' if they both end with value and, if they have a further prefixes, |
|
173 |
-#' the two prefix sequence are identical} |
|
174 |
-#' \item{\code{\link{EXACT}}: Exact evaluation, only attributes exactly |
|
175 |
-#' as value will match; no further prefixes are allowed. } |
|
176 |
-#' } |
|
177 |
-#' Every condition accepts only one string value. (e.g. FULL("cell_type") ) |
|
178 |
-#' In case of single concatenation with no CONDITION or list with some value |
|
179 |
-#' without conditon, the metadata are considered having default |
|
180 |
-#' evaluation: the two attributes match if both end with value. |
|
181 |
-#' |
|
182 |
-#' @param aggregates list of element in the form \emph{key} = \emph{aggregate}. |
|
183 |
-#' The \emph{aggregate} is an object of class AGGREGATES |
|
184 |
-#' The aggregate functions available are: \code{\link{SUM}}, |
|
185 |
-#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
186 |
-#' \code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
187 |
-#' \code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
188 |
-#' \code{\link{Q2}}, \code{\link{Q3}}. |
|
189 |
-#' Every aggregate accepts a string value, execet for COUNT, which does not |
|
190 |
-#' have any value. |
|
191 |
-#' Argument of 'aggregate function' must exist in schema, i.e. among region |
|
192 |
-#' attributes. Two style are allowed: |
|
193 |
-#' \itemize{ |
|
194 |
-#' \item list of key-value pairs: e.g. sum = SUM("pvalue") |
|
195 |
-#' \item list of values: e.g. SUM("pvalue") |
|
196 |
-#' } |
|
197 |
-#' "mixed style" is not allowed |
|
198 |
-#' |
|
199 |
-#' @return DataSet class object. It contains the value to use as input |
|
200 |
-#' for the subsequent GMQL function |
|
201 |
-#' |
|
202 |
-#' @seealso \code{\link{flat}} \code{\link{cover}} \code{\link{summit}} |
|
203 |
-#' |
|
204 |
-#' @examples |
|
205 |
-#' |
|
206 |
-#' ## This GMQL statement computes the result grouping the input \emph{exp} |
|
207 |
-#' ## samples by the values of their \emph{cell} metadata attribute, |
|
208 |
-#' ## thus one output \emph{res} sample is generated for each cell type. |
|
209 |
-#' ## Output regions are produced by dividing results from COVER in contiguous |
|
210 |
-#' ## subregions according to the varying accumulation values |
|
211 |
-#' ## (from 2 to 4 in this case): one region for each accumulation value; |
|
212 |
-#' |
|
213 |
-#' init_gmql() |
|
214 |
-#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
215 |
-#' exp = read_dataset(test_path) |
|
216 |
-#' res = histogram(exp, 2, 4, groupBy = c("cell")) |
|
217 |
-#' |
|
218 |
-#' @export |
|
219 |
-#' |
|
220 |
-histogram <- function(input_data, minAcc, maxAcc, groupBy = NULL, |
|
221 |
- aggregates = NULL) |
|
222 |
-{ |
|
223 |
- minAcc <- .trasform_cover(deparse(substitute(minAcc))) |
|
224 |
- maxAcc <- .trasform_cover(deparse(substitute(maxAcc))) |
|
225 |
- |
|
226 |
- min <- .check_cover_param(minAcc,TRUE) |
|
227 |
- max <- .check_cover_param(maxAcc,FALSE) |
|
228 |
- |
|
229 |
- .doVariant("HISTOGRAM",minAcc,maxAcc,groupBy,aggregates,input_data) |
|
230 |
-} |
|
231 | 150 |
|
232 |
-#' GMQL Operation: SUMMIT |
|
233 |
-#' |
|
234 |
-#' returns regions that start from a position |
|
235 |
-#' where the number of intersecting regions is not increasing afterwards and |
|
236 |
-#' stops at a position where either the number of intersecting regions |
|
237 |
-#' decreases, or it violates the max accumulation index. |
|
238 |
-#' |
|
239 |
-#' @importFrom methods is |
|
240 |
-#' @importFrom rJava J |
|
241 |
-#' @importFrom rJava .jnull |
|
242 |
-#' @importFrom rJava .jarray |
|
243 |
-#' |
|
244 |
-#' @param input_data returned object from any GMQL function |
|
245 |
-#' @param minAcc minimum number of overlapping regions to be considered |
|
246 |
-#' during execution |
|
247 |
-#' Is a integer number, declared also as string. |
|
248 |
-#' minAcc accept also: |
|
249 |
-#' \itemize{ |
|
250 |
-#' \item{PARAMETER class object: \code{\link{ALL}} that represents the number |
|
251 |
-#' of samples in the input dataset} |
|
252 |
-#' \item{and expression built using PARAMETER object: (ALL() + N) / K or |
|
253 |
-#' ALL() / K } |
|
254 |
-#' } |
|
255 |
-#' @param maxAcc maximum number of overlapping regions to be considered |
|
256 |
-#' during execution |
|
257 |
-#' Is a integer number, declared also as string. |
|
258 |
-#' maxAcc accept also: |
|
259 |
-#' \itemize{ |
|
260 |
-#' \item{PARAMETER class object: \code{\link{ALL}} that represents the number |
|
261 |
-#' of samples in the input dataset} |
|
262 |
-#' \item{PARAMETER calss object: \code{\link{ANY}}} that acts as a wildcard, |
|
263 |
-#' considering any amount of overlapping. |
|
264 |
-#' \item{and expression built using PARAMETER object: (ALL() + N) / K or |
|
265 |
-#' ALL() / K } |
|
266 |
-#' } |
|
267 |
-#' @param groupBy list of CONDITION objects where every object contains |
|
268 |
-#' the name of metadata to be used in semijoin, or simple string concatenation |
|
269 |
-#' of name of metadata, e.g. c("cell_type", "attribute_tag", "size") |
|
270 |
-#' without declaring condition. |
|
271 |
-#' The CONDITION's available are: |
|
272 |
-#' \itemize{ |
|
273 |
-#' \item{\code{\link{FULL}}: Fullname evaluation, two attributes match |
|
274 |
-#' if they both end with value and, if they have a further prefixes, |
|
275 |
-#' the two prefix sequence are identical} |
|
276 |
-#' \item{\code{\link{EXACT}}: Exact evaluation, only attributes exactly |
|
277 |
-#' as value will match; no further prefixes are allowed. } |
|
278 |
-#' } |
|
279 |
-#' Every condition accepts only one string value. (e.g. FULL("cell_type") ) |
|
280 |
-#' In case of single concatenation with no CONDITION or list with some value |
|
281 |
-#' without conditon, the metadata are considered having default |
|
282 |
-#' evaluation: the two attributes match if both end with value. |
|
283 |
-#' |
|
284 |
-#' @param aggregates list of element in the form \emph{key} = \emph{aggregate}. |
|
285 |
-#' The \emph{aggregate} is an object of class AGGREGATES |
|
286 |
-#' The aggregate functions available are: \code{\link{SUM}}, |
|
287 |
-#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
288 |
-#' \code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
289 |
-#' \code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
290 |
-#' \code{\link{Q2}}, \code{\link{Q3}}. |
|
291 |
-#' Every aggregate accepts a string value, execet for COUNT, which does not |
|
292 |
-#' have any value. |
|
293 |
-#' Argument of 'aggregate function' must exist in schema, i.e. among region |
|
294 |
-#' attributes. Two style are allowed: |
|
295 |
-#' \itemize{ |
|
296 |
-#' \item list of key-value pairs: e.g. sum = SUM("pvalue") |
|
297 |
-#' \item list of values: e.g. SUM("pvalue") |
|
298 |
-#' } |
|
299 |
-#' "mixed style" is not allowed |
|
300 |
-#' |
|
301 |
-#' @return DataSet class object. It contains the value to use as input |
|
302 |
-#' for the subsequent GMQL function |
|
303 |
-#' |
|
304 |
-#' @seealso \code{\link{flat}} \code{\link{cover}} \code{\link{histogram}} |
|
305 |
-#' |
|
306 |
-#' @examples |
|
307 |
-#' |
|
308 |
-#' ## This GMQL statement computes the result grouping the input \emph{exp} |
|
309 |
-#' ## samples by the values of their \emph{cell} metadata attribute, |
|
310 |
-#' ## thus one output \emph{res} sample is generated for each cell type. |
|
311 |
-#' ## Output regions are produced by extracting the highest accumulation |
|
312 |
-#' ## overlapping (sub)regions according to the methodologies described above; |
|
313 |
-#' |
|
314 |
-#' |
|
315 |
-#' init_gmql() |
|
316 |
-#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
317 |
-#' exp = read_dataset(test_path) |
|
318 |
-#' res = summit(input_data = exp, 2, 4, c("cell")) |
|
319 |
-#' |
|
320 |
-#' @export |
|
321 |
-#' |
|
322 |
-summit <- function(input_data, minAcc, maxAcc, groupBy = NULL, |
|
323 |
- aggregates = NULL) |
|
151 |
+gmql_cover <- function(input_data, minAcc, maxAcc, groupBy = NULL, |
|
152 |
+ aggregates = NULL, variation) |
|
324 | 153 |
{ |
325 | 154 |
minAcc <- .trasform_cover(deparse(substitute(minAcc))) |
326 | 155 |
maxAcc <- .trasform_cover(deparse(substitute(maxAcc))) |
... | ... |
@@ -328,104 +157,9 @@ summit <- function(input_data, minAcc, maxAcc, groupBy = NULL, |
328 | 157 |
min <- .check_cover_param(minAcc,TRUE) |
329 | 158 |
max <- .check_cover_param(maxAcc,FALSE) |
330 | 159 |
|
331 |
- .doVariant("SUMMIT",minAcc,maxAcc,groupBy,aggregates,input_data) |
|
332 |
-} |
|
333 |
- |
|
334 |
-#' GMQL Operation: FLAT |
|
335 |
-#' |
|
336 |
-#' returns the contiguous region that starts from the first end and stops at |
|
337 |
-#' the last end of the regions which would contribute to each region |
|
338 |
-#' of the COVER |
|
339 |
-#' |
|
340 |
-#' @importFrom methods is |
|
341 |
-#' @importFrom rJava J |
|
342 |
-#' @importFrom rJava .jnull |
|
343 |
-#' @importFrom rJava .jarray |
|
344 |
-#' |
|
345 |
-#' @param input_data returned object from any GMQL function |
|
346 |
-#' @param minAcc integer number representing minimum number of overlapping |
|
347 |
-#' regions to be considered during execution |
|
348 |
-#' minAcc accept also: |
|
349 |
-#' \itemize{ |
|
350 |
-#' \item{PARAMETER class object: \code{\link{ALL}} that represents the number |
|
351 |
-#' of samples in the input dataset} |
|
352 |
-#' \item{and expression built using PARAMETER object: (ALL() + N) / K or |
|
353 |
-#' ALL() / K } |
|
354 |
-#' } |
|
355 |
-#' @param maxAcc integer number representing maximum number of overlapping |
|
356 |
-#' regions to be considered during execution |
|
357 |
-#' maxAcc accept also: |
|
358 |
-#' \itemize{ |
|
359 |
-#' \item{PARAMETER class object: \code{\link{ALL}} that represents the number |
|
360 |
-#' of samples in the input dataset} |
|
361 |
-#' \item{PARAMETER calss object: \code{\link{ANY}}} that acts as a wildcard, |
|
362 |
-#' considering any amount of overlapping. |
|
363 |
-#' \item{and expression built using PARAMETER object: (ALL() + N) / K or |
|
364 |
-#' ALL() / K } |
|
365 |
-#' } |
|
366 |
-#' @param groupBy list of CONDITION objects where every object contains |
|
367 |
-#' the name of metadata to be used in semijoin, or simple string concatenation |
|
368 |
-#' of name of metadata, e.g. c("cell_type", "attribute_tag", "size") |
|
369 |
-#' without declaring condition. |
|
370 |
-#' The CONDITION's available are: |
|
371 |
-#' \itemize{ |
|
372 |
-#' \item{\code{\link{FULL}}: Fullname evaluation, two attributes match |
|
373 |
-#' if they both end with value and, if they have a further prefixes, |
|
374 |
-#' the two prefix sequence are identical} |
|
375 |
-#' \item{\code{\link{EXACT}}: Exact evaluation, only attributes exactly |
|
376 |
-#' as value will match; no further prefixes are allowed. } |
|
377 |
-#' } |
|
378 |
-#' Every condition accepts only one string value. (e.g. FULL("cell_type") ) |
|
379 |
-#' In case of single concatenation with no CONDITION or list with some value |
|
380 |
-#' without conditon, the metadata are considered having default |
|
381 |
-#' evaluation: the two attributes match if both end with value. |
|
382 |
-#' |
|
383 |
-#' @param aggregates list of element in the form \emph{key} = \emph{aggregate}. |
|
384 |
-#' The \emph{aggregate} is an object of class AGGREGATES |
|
385 |
-#' The aggregate functions available are: \code{\link{SUM}}, |
|
386 |
-#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
387 |
-#' \code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
388 |
-#' \code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
389 |
-#' \code{\link{Q2}}, \code{\link{Q3}}. |
|
390 |
-#' Every aggregate accepts a string value, execet for COUNT, which does not |
|
391 |
-#' have any value. |
|
392 |
-#' Argument of 'aggregate function' must exist in schema, i.e. among region |
|
393 |
-#' attributes. Two style are allowed: |
|
394 |
-#' \itemize{ |
|
395 |
-#' \item list of key-value pairs: e.g. sum = SUM("pvalue") |
|
396 |
-#' \item list of values: e.g. SUM("pvalue") |
|
397 |
-#' } |
|
398 |
-#' "mixed style" is not allowed |
|
399 |
-#' |
|
400 |
-#' @return DataSet class object. It contains the value to use as input |
|
401 |
-#' for the subsequent GMQL function |
|
402 |
-#' |
|
403 |
-#' @seealso \code{\link{summit}} \code{\link{cover}} \code{\link{histogram}} |
|
404 |
-#' |
|
405 |
-#' @examples |
|
406 |
-#' |
|
407 |
-#' ## This GMQL statement computes the result grouping the input \emph{exp} |
|
408 |
-#' ## samples by the values of their \emph{cell} metadata attribute, |
|
409 |
-#' ## thus one output \emph{res} sample is generated for each cell type. |
|
410 |
-#' ## Output regions are produced by concatenating all regions which would |
|
411 |
-#' ## have been used to construct a COVER(2,4) statement on the same dataset; |
|
412 |
-#' |
|
413 |
-#' init_gmql() |
|
414 |
-#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
415 |
-#' exp = read_dataset(test_path) |
|
416 |
-#' res = flat(input_data = exp, 2, 4, c("cell")) |
|
417 |
-#' |
|
418 |
-#' @export |
|
419 |
-#' |
|
420 |
-flat <- function(input_data, minAcc, maxAcc, groupBy = NULL, aggregates = NULL) |
|
421 |
-{ |
|
422 |
- minAcc <- .trasform_cover(deparse(substitute(minAcc))) |
|
423 |
- maxAcc <- .trasform_cover(deparse(substitute(maxAcc))) |
|
424 |
- |
|
425 |
- min <- .check_cover_param(minAcc,TRUE) |
|
426 |
- max <- .check_cover_param(maxAcc,FALSE) |
|
160 |
+ vary = toupper(variation) |
|
427 | 161 |
|
428 |
- .doVariant("FLAT",minAcc,maxAcc,groupBy,aggregates,input_data) |
|
162 |
+ .doVariant(vary, minAcc, maxAcc, groupBy, aggregates, input_data) |
|
429 | 163 |
} |
430 | 164 |
|
431 | 165 |
.doVariant <- function(flag,minAcc,maxAcc,groupBy,aggregates,input_data) |
... | ... |
@@ -445,21 +179,23 @@ flat <- function(input_data, minAcc, maxAcc, groupBy = NULL, aggregates = NULL) |
445 | 179 |
WrappeR <- J("it/polimi/genomics/r/Wrapper") |
446 | 180 |
response <- switch(flag, |
447 | 181 |
"COVER" = WrappeR$cover(minAcc, maxAcc, join_condition_matrix, |
448 |
- metadata_matrix, input_data$value), |
|
182 |
+ metadata_matrix, input_data), |
|
449 | 183 |
"FLAT" = WrappeR$flat(minAcc, maxAcc, join_condition_matrix, |
450 |
- metadata_matrix,input_data$value), |
|
184 |
+ metadata_matrix,input_data), |
|
451 | 185 |
"SUMMIT" = WrappeR$summit(minAcc,maxAcc, join_condition_matrix, |
452 |
- metadata_matrix, input_data$value), |
|
186 |
+ metadata_matrix, input_data), |
|
453 | 187 |
"HISTOGRAM" = WrappeR$histogram(minAcc, maxAcc, |
454 | 188 |
join_condition_matrix, metadata_matrix, |
455 |
- input_data$value)) |
|
456 |
- |
|
189 |
+ input_data)) |
|
190 |
+ if(is.null(response)) |
|
191 |
+ stop("no admissible variation: cover, flat, summit, histogram") |
|
192 |
+ |
|
457 | 193 |
error <- strtoi(response[1]) |
458 | 194 |
data <- response[2] |
459 | 195 |
if(error!=0) |
460 | 196 |
stop(data) |
461 | 197 |
else |
462 |
- DataSet(data) |
|
198 |
+ GMQLDataset(data) |
|
463 | 199 |
} |
464 | 200 |
|
465 | 201 |
.check_cover_param <- function(param,is_min) |
... | ... |
@@ -1,7 +1,47 @@ |
1 |
-DataSet <- function(value) |
|
2 |
-{ |
|
3 |
- op_list <- list(value = value) |
|
4 |
- ## Set the name for the class |
|
5 |
- class(op_list) <- "DataSet" |
|
6 |
- return(op_list) |
|
7 |
-} |
|
8 | 1 |
\ No newline at end of file |
2 |
+#' |
|
3 |
+#' Abstract class representing GMQL dataset |
|
4 |
+#' |
|
5 |
+#' @importClassesFrom S4Vectors DataTable |
|
6 |
+#' |
|
7 |
+#' @name GMQLDataset-class |
|
8 |
+#' @rdname GMQLDataset-class |
|
9 |
+#' |
|
10 |
+setClass("GMQLDataset", |
|
11 |
+ contains = c("DataTable"), |
|
12 |
+ representation(value = "character")) |
|
13 |
+ |
|
14 |
+# Constructor GMQLDataset |
|
15 |
+GMQLDataset <- function(value) { |
|
16 |
+ dataset <- new("GMQLDataset",value = value) |
|
17 |
+ return(dataset) |
|
18 |
+} |
|
19 |
+ |
|
20 |
+ |
|
21 |
+setMethod("show", "GMQLDataset", |
|
22 |
+ function(object) |
|
23 |
+ { |
|
24 |
+ cat("GMQL Dataset \n") |
|
25 |
+ cat(" value :",paste(object@value)) |
|
26 |
+ }) |
|
27 |
+ |
|
28 |
+## insted of GMQL select |
|
29 |
+setGeneric("filter", function(data, m_predicate = NULL, r_predicate = NULL, |
|
30 |
+ semi_join = NULL, semi_join_negation = FALSE, |
|
31 |
+ semi_join_dataset = NULL) |
|
32 |
+ standardGeneric("filter")) |
|
33 |
+ |
|
34 |
+## insted of GMQL extend |
|
35 |
+setGeneric("mutate", function(.data, metadata = NULL) |
|
36 |
+ standardGeneric("mutate")) |
|
37 |
+ |
|
38 |
+# insted of GMQL order |
|
39 |
+# setGeneric("sort", function(data, metadata_ordering = NULL, |
|
40 |
+# regions_ordering = NULL, fetch_opt = NULL, |
|
41 |
+# num_fetch = 0, reg_fetch_opt = NULL, |
|
42 |
+# reg_num_fetch = 0) standardGeneric("sort")) |
|
43 |
+ |
|
44 |
+## insted of GMQL merge |
|
45 |
+setGeneric("aggregate", function(data, groupBy = NULL) |
|
46 |
+ standardGeneric("aggregate")) |
|
47 |
+ |
|
48 |
+ |
... | ... |
@@ -15,8 +15,8 @@ |
15 | 15 |
#' @importFrom rJava .jnull |
16 | 16 |
#' @importFrom rJava .jarray |
17 | 17 |
#' |
18 |
-#' @param right_input_data returned object from any GMQL function |
|
19 |
-#' @param left_input_data returned object from any GMQL function |
|
18 |
+#' @param x returned object from any GMQL function |
|
19 |
+#' @param y returned object from any GMQL function |
|
20 | 20 |
#' @param joinBy list of CONDITION objects where every object contains |
21 | 21 |
#' the name of metadata to be used in semijoin, or simple string concatenation |
22 | 22 |
#' of name of metadata, e.g. c("cell_type", "attribute_tag", "size") |
... | ... |
@@ -55,7 +55,7 @@ |
55 | 55 |
#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
56 | 56 |
#' r_left = read_dataset(test_path) |
57 | 57 |
#' r_right = read_dataset(test_path2) |
58 |
-#' out = difference(r_left, r_right) |
|
58 |
+#' out = setdiff(r_left, r_right) |
|
59 | 59 |
#' |
60 | 60 |
#' \dontrun{ |
61 | 61 |
#' ## This GMQL statement extracts for every pair of samples s1 in EXP1 |
... | ... |
@@ -69,14 +69,23 @@ |
69 | 69 |
#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
70 | 70 |
#' exp1 = read_dataset(test_path) |
71 | 71 |
#' exp2 = read_dataset(test_path2) |
72 |
-#' out = difference(exp1, exp2, c("antibody_target")) |
|
72 |
+#' out = setdiff(exp1, exp2, c("antibody_target")) |
|
73 | 73 |
#' |
74 | 74 |
#' } |
75 | 75 |
#' |
76 |
+#' @rdname setdiff-methods |
|
77 |
+#' @aliases setdiff, GMQLDataset-methods |
|
76 | 78 |
#' @export |
77 |
-#' |
|
78 |
-difference <- function(left_input_data, right_input_data, joinBy = NULL, |
|
79 |
- is_exact = FALSE) |
|
79 |
+setMethod("setdiff", c("GMQLDataset","GMQLDataset"), |
|
80 |
+ function(x, y, joinBy = NULL, is_exact = FALSE) |
|
81 |
+ { |
|
82 |
+ val_x = x@value |
|
83 |
+ val_y = y@value |
|
84 |
+ gmql_difference(val_x, val_y, joinBy, is_exact) |
|
85 |
+ }) |
|
86 |
+ |
|
87 |
+gmql_difference <- function(left_data, right_data, joinBy = NULL, |
|
88 |
+ is_exact = FALSE) |
|
80 | 89 |
{ |
81 | 90 |
if(!is.null(joinBy)) |
82 | 91 |
join_condition_matrix <- .jarray(.join_condition(joinBy), |
... | ... |
@@ -85,14 +94,13 @@ difference <- function(left_input_data, right_input_data, joinBy = NULL, |
85 | 94 |
join_condition_matrix <- .jnull("java/lang/String") |
86 | 95 |
|
87 | 96 |
WrappeR <- J("it/polimi/genomics/r/Wrapper") |
88 |
- response <- WrappeR$difference(join_condition_matrix, |
|
89 |
- right_input_data$value, |
|
90 |
- left_input_data$value,is_exact) |
|
97 |
+ response <- WrappeR$difference(join_condition_matrix, right_data, |
|
98 |
+ left_data, is_exact) |
|
91 | 99 |
error <- strtoi(response[1]) |
92 | 100 |
data <- response[2] |
93 | 101 |
if(error!=0) |
94 | 102 |
stop(data) |
95 | 103 |
else |
96 |
- DataSet(data) |
|
104 |
+ GMQLDataset(data) |
|
97 | 105 |
} |
98 | 106 |
|
... | ... |
@@ -9,7 +9,7 @@ |
9 | 9 |
#' @importFrom rJava J |
10 | 10 |
#' @importFrom rJava .jarray |
11 | 11 |
#' |
12 |
-#' @param input_data returned object from any GMQL function |
|
12 |
+#' @param .data GMQLDataset class object |
|
13 | 13 |
#' @param metadata list of element in the form \emph{key} = \emph{aggregate}. |
14 | 14 |
#' The \emph{aggregate} is an object of class AGGREGATES |
15 | 15 |
#' The aggregate functions available are: \code{\link{SUM}}, |
... | ... |
@@ -37,7 +37,7 @@ |
37 | 37 |
#' init_gmql() |
38 | 38 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
39 | 39 |
#' r <- read_dataset(test_path) |
40 |
-#' e <- extend(input_data = r, list(RegionCount = COUNT())) |
|
40 |
+#' e <- mutate(input_data = r, list(RegionCount = COUNT())) |
|
41 | 41 |
#' |
42 | 42 |
#' \dontrun{ |
43 | 43 |
#' |
... | ... |
@@ -50,14 +50,24 @@ |
50 | 50 |
#' init_gmql() |
51 | 51 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
52 | 52 |
#' exp = read_dataset(test_path) |
53 |
-#' res = extend(input_data = exp, list(RegionCount = COUNT(), |
|
53 |
+#' res = mutate(input_data = exp, list(RegionCount = COUNT(), |
|
54 | 54 |
#' MinP = MIN("pvalue"))) |
55 | 55 |
#' |
56 | 56 |
#' } |
57 | 57 |
#' |
58 |
+#' @name mutate |
|
59 |
+#' @rdname mutate-methods |
|
60 |
+#' @aliases mutate, GMQLDataset-methods |
|
58 | 61 |
#' @export |
59 |
-#' |
|
60 |
-extend <-function(input_data, metadata = NULL) |
|
62 |
+setMethod("mutate", "GMQLDataset", |
|
63 |
+ function(.data, metadata = NULL) |
|
64 |
+ { |
|
65 |
+ val_x = .data@value |
|
66 |
+ gmql_extend(val_x, metadata) |
|
67 |
+ }) |
|
68 |
+ |
|
69 |
+ |
|
70 |
+gmql_extend <-function(input_data, metadata = NULL) |
|
61 | 71 |
{ |
62 | 72 |
if(!is.null(metadata)) |
63 | 73 |
metadata_matrix <- .jarray(.aggregates(metadata,"META_AGGREGATES"), |
... | ... |
@@ -66,12 +76,11 @@ extend <-function(input_data, metadata = NULL) |
66 | 76 |
metadata_matrix <- .jnull("java/lang/String") |
67 | 77 |
|
68 | 78 |
WrappeR <- J("it/polimi/genomics/r/Wrapper") |
69 |
- response <- WrappeR$extend(metadata_matrix,input_data$value) |
|
79 |
+ response <- WrappeR$extend(metadata_matrix,input_data) |
|
70 | 80 |
error <- strtoi(response[1]) |
71 | 81 |
data <- response[2] |
72 | 82 |
if(error!=0) |
73 | 83 |
stop(data) |
74 | 84 |
else |
75 |
- DataSet(data) |
|
85 |
+ GMQLDataset(data) |
|
76 | 86 |
} |
77 |
- |
... | ... |
@@ -1,12 +1,19 @@ |
1 |
+#' @name join |
|
2 |
+#' @rdname join-methods |
|
3 |
+#' @aliases join |
|
4 |
+#' @export |
|
5 |
+setGeneric("join", function(x, y, by = NULL, ...) standardGeneric("join")) |
|
6 |
+ |
|
7 |
+ |
|
1 | 8 |
#' GMQL Operation: JOIN |
2 | 9 |
#' |
3 | 10 |
#' It takes in input two datasets, respectively known as nchor (left) |
4 | 11 |
#' and experiment (right) and returns a dataset of samples consisting of |
5 | 12 |
#' regions extracted from the operands according to the specified condition |
6 |
-#' (a.k.a genometric_predicate). |
|
13 |
+#' (a.k.a \emph{genometric_predicate}). |
|
7 | 14 |
#' The number of generated output samples is the Cartesian product |
8 | 15 |
#' of the number of samples in the anchor and in the experiment dataset |
9 |
-#' (if joinBy is not specified). |
|
16 |
+#' (if \emph{by} is not specified). |
|
10 | 17 |
#' The output metadata are the union of the input metadata, |
11 | 18 |
#' with their attribute names prefixed with left or right respectively. |
12 | 19 |
#' |
... | ... |
@@ -14,14 +21,14 @@ |
14 | 21 |
#' @importFrom rJava J |
15 | 22 |
#' @importFrom rJava .jarray |
16 | 23 |
#' |
17 |
-#' @param left_input_data returned object from any GMQL function |
|
18 |
-#' @param right_input_data returned object from any GMQL function |
|
24 |
+#' @param x GMQLDataset class object |
|
25 |
+#' @param y GMQLDataset class object |
|
19 | 26 |
#' @param genometric_predicate is a list of lists of DISTAL object |
20 | 27 |
#' For details of DISTAL objects see: |
21 | 28 |
#' \code{\link{DLE}}, \code{\link{DGE}}, \code{\link{DL}}, \code{\link{DG}}, |
22 | 29 |
#' \code{\link{MD}}, \code{\link{UP}}, \code{\link{DOWN}} |
23 | 30 |
#' |
24 |
-#' @param joinBy list of CONDITION objects where every object contains |
|
31 |
+#' @param by list of CONDITION objects where every object contains |
|
25 | 32 |
#' the name of metadata to be used in semijoin, or simple string concatenation |
26 | 33 |
#' of name of metadata, e.g. c("cell_type", "attribute_tag", "size") |
27 | 34 |
#' without declaring condition. |
... | ... |
@@ -59,7 +66,7 @@ |
59 | 66 |
#' the genometric predicate)} |
60 | 67 |
#' } |
61 | 68 |
#' |
62 |
-#' @return DataSet class object. It contains the value to use as input |
|
69 |
+#' @return GMQLDataset class object. It contains the value to use as input |
|
63 | 70 |
#' for the subsequent GMQL function |
64 | 71 |
#' |
65 | 72 |
#' |
... | ... |
@@ -78,14 +85,26 @@ |
78 | 85 |
#' TSS = read_dataset(test_path) |
79 | 86 |
#' HM = read_dataset(test_path2) |
80 | 87 |
#' join_data = join(TSS, HM, |
81 |
-#' genometric_predicate = list(list(MD(1), DLE(120000))), c("provider"), |
|
88 |
+#' genometric_predicate = list(list(MD(1), DLE(120000))), by = c("provider"), |
|
82 | 89 |
#' region_output="RIGHT") |
83 | 90 |
#' |
91 |
+#' @name join |
|
92 |
+#' @rdname join-methods |
|
93 |
+#' @aliases join, GMQLDataset-methods |
|
84 | 94 |
#' @export |
85 |
-#' |
|
86 |
-join <- function(right_input_data, left_input_data, |
|
87 |
- genometric_predicate = NULL, joinBy = NULL, |
|
95 |
+setMethod("join", "GMQLDataset", |
|
96 |
+ function(x, y, by = NULL, genometric_predicate = NULL, |
|
88 | 97 |
region_output="contig") |
98 |
+ { |
|
99 |
+ r_data <- x@value |
|
100 |
+ l_data <- y@value |
|
101 |
+ gmql_join(r_data, l_data, genometric_predicate, joinBy, |
|
102 |
+ region_output="contig") |
|
103 |
+ }) |
|
104 |
+ |
|
105 |
+ |
|
106 |
+gmql_join <- function(right_data, left_data, genometric_predicate, joinBy, |
|
107 |
+ region_output="contig") |
|
89 | 108 |
{ |
90 | 109 |
if(!is.null(genometric_predicate)) |
91 | 110 |
{ |
... | ... |
@@ -141,5 +160,5 @@ join <- function(right_input_data, left_input_data, |
141 | 160 |
if(error!=0) |
142 | 161 |
stop(data) |
143 | 162 |
else |
144 |
- DataSet(data) |
|
163 |
+ GMQLDataset(data) |
|
145 | 164 |
} |
... | ... |
@@ -97,11 +97,11 @@ map <- function(left_input_data, right_input_data, aggregates = NULL, |
97 | 97 |
|
98 | 98 |
WrappeR <- J("it/polimi/genomics/r/Wrapper") |
99 | 99 |
response<-WrappeR$map(join_condition_matrix, metadata_matrix, |
100 |
- left_input_data$value, right_input_data$value) |
|
100 |
+ left_input_data@value, right_input_data@value) |
|
101 | 101 |
error <- strtoi(response[1]) |
102 | 102 |
data <- response[2] |
103 | 103 |
if(error!=0) |
104 | 104 |
stop(data) |
105 | 105 |
else |
106 |
- DataSet(data) |
|
106 |
+ GMQLDataset(data) |
|
107 | 107 |
} |
... | ... |
@@ -3,7 +3,6 @@ |
3 | 3 |
#' Execute GMQL query. |
4 | 4 |
#' The function works only after invoking at least one materialize |
5 | 5 |
#' |
6 |
-#' |
|
7 | 6 |
#' @importFrom rJava J |
8 | 7 |
#' |
9 | 8 |
#' @return None |
... | ... |
@@ -13,8 +12,8 @@ |
13 | 12 |
#' init_gmql() |
14 | 13 |
#' test_path <- system.file("example","DATASET",package = "RGMQL") |
15 | 14 |
#' r = read_dataset(test_path) |
16 |
-#' s = select(input_data = r) |
|
17 |
-#' m = merge(groupBy = c("antibody_targer","cell_karyotype"),input_data = s) |
|
15 |
+#' s = filter(input_data = r) |
|
16 |
+#' m = aggregate(groupBy = c("antibody_targer","cell_karyotype"),input_data = s) |
|
18 | 17 |
#' materialize(input_data = m, dir_out = test_path) |
19 | 18 |
#' |
20 | 19 |
#' \dontrun{ |
... | ... |
@@ -66,6 +65,13 @@ execute <- function() |
66 | 65 |
} |
67 | 66 |
} |
68 | 67 |
|
68 |
+#' @name materialize |
|
69 |
+#' @rdname materialize-methods |
|
70 |
+#' @aliases materialize |
|
71 |
+#' @export |
|
72 |
+setGeneric("materialize", function(data, ...) standardGeneric("materialize")) |
|
73 |
+ |
|
74 |
+ |
|
69 | 75 |
#' GMQL Operation: MATERIALIZE |
70 | 76 |
#' |
71 | 77 |
#' It saves the contents of a dataset that contains samples metadata and |
... | ... |
@@ -77,7 +83,7 @@ execute <- function() |
77 | 83 |
#' |
78 | 84 |
#' @importFrom rJava J |
79 | 85 |
#' |
80 |
-#' @param input_data returned object from any GMQL function |
|
86 |
+#' @param data GMQLDataset class object |
|
81 | 87 |
#' @param dir_out destination folder path. |
82 | 88 |
#' by default is current working directory of the R process |
83 | 89 |
#' |
... | ... |
@@ -88,16 +94,25 @@ execute <- function() |
88 | 94 |
#' init_gmql() |
89 | 95 |
#' test_path <- system.file("example","DATASET",package = "RGMQL") |
90 | 96 |
#' r = read_dataset(test_path) |
91 |
-#' s = select(input_data = r) |
|
92 |
-#' m = merge(groupBy = c("antibody_targer","cell_karyotype"),input_data = s) |
|
97 |
+#' s = filter(input_data = r) |
|
98 |
+#' m = aggregate(s, groupBy = c("antibody_targer","cell_karyotype")) |
|
93 | 99 |
#' materialize(input_data = m, dir_out = test_path) |
94 | 100 |
#' |
101 |
+#' @name materialize |
|
102 |
+#' @rdname materialize-methods |
|
103 |
+#' @aliases materialize, GMQLDataset-methods |
|
95 | 104 |
#' @export |
96 |
-#' |
|
97 |
-materialize <- function(input_data, dir_out = getwd()) |
|
105 |
+setMethod("materialize", "GMQLDataset", |
|
106 |
+ function(data, dir_out = getwd()) |
|
107 |
+ { |
|
108 |
+ gmql_materialize(data, dir_out) |
|
109 |
+ }) |
|
110 |
+ |
|
111 |
+ |
|
112 |
+gmql_materialize <- function(data, dir_out) |
|
98 | 113 |
{ |
99 | 114 |
WrappeR <- J("it/polimi/genomics/r/Wrapper") |
100 |
- response <- WrappeR$materialize(input_data$value,dir_out) |
|
115 |
+ response <- WrappeR$materialize(data@value,dir_out) |
|
101 | 116 |
error <- strtoi(response[1]) |
102 | 117 |
data <- response[2] |
103 | 118 |
if(error!=0) |
... | ... |
@@ -133,7 +148,7 @@ materialize <- function(input_data, dir_out = getwd()) |
133 | 148 |
#' init_gmql() |
134 | 149 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
135 | 150 |
#' r = read_dataset(test_path) |
136 |
-#' m = merge(groupBy = c("antibody_target", "cell_karyotype"), input_data = r) |
|
151 |
+#' m = aggregate(r, groupBy = c("antibody_target", "cell_karyotype")) |
|
137 | 152 |
#' g <- take(input_data = m, rows = 45) |
138 | 153 |
#' |
139 | 154 |
#' @export |
... | ... |
@@ -145,7 +160,7 @@ take <- function(input_data, rows=0L) |
145 | 160 |
stop("rows cannot be negative") |
146 | 161 |
|
147 | 162 |
WrappeR <- J("it/polimi/genomics/r/Wrapper") |
148 |
- response <- WrappeR$take(input_data$value,rows) |
|
163 |
+ response <- WrappeR$take(input_data@value,rows) |
|
149 | 164 |
error <- strtoi(response[1]) |
150 | 165 |
data <- response[2] |
151 | 166 |
if(error!=0) |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
#' GMQL Operation: MERGE |
2 | 2 |
#' |
3 | 3 |
#' It builds a dataset consisting of a single sample having as many regions |
4 |
-#' as the numebr of regions of the input data and as many metadata |
|
4 |
+#' as the number of regions of the input data and as many metadata |
|
5 | 5 |
#' as the union of the 'attribute-value' tuples of the input samples. |
6 | 6 |
#' A groupby clause can be specified on metadata: the samples are then |
7 | 7 |
#' partitioned in groups, each with a distinct value of the grouping |
... | ... |
@@ -15,7 +15,7 @@ |
15 | 15 |
#' @importFrom rJava .jnull |
16 | 16 |
#' @importFrom rJava .jarray |
17 | 17 |
#' |
18 |
-#' @param input_data returned object from any GMQL function |
|
18 |
+#' @param data GMQLDataset class object |
|
19 | 19 |
#' @param groupBy list of CONDITION objects where every object contains |
20 | 20 |
#' the name of metadata to be used in semijoin, or simple string concatenation |
21 | 21 |
#' of name of metadata, e.g. c("cell_type", "attribute_tag", "size") |
... | ... |
@@ -46,11 +46,21 @@ |
46 | 46 |
#' init_gmql() |
47 | 47 |
#' test_path <- system.file("example","DATASET",package = "RGMQL") |
48 | 48 |
#' exp = read_dataset(test_path) |
49 |
-#' merged = merge(input_data = exp, groupBy = c("antibody_target")) |
|
49 |
+#' merged = aggregate(input_data = exp, groupBy = c("antibody_target")) |
|
50 | 50 |
#' |
51 |
+#' @name aggregate |
|
52 |
+#' @rdname aggregate-methods |
|
53 |
+#' @aliases aggregate, GMQLDataset-methods |
|
51 | 54 |
#' @export |
52 |
-#' |
|
53 |
-merge <- function(input_data, groupBy = NULL) |
|
55 |
+#' |
|
56 |
+setMethod("aggregate", "GMQLDataset", |
|
57 |
+ function(data, groupBy = NULL) |
|
58 |
+ { |
|
59 |
+ val = .data@value |
|
60 |
+ gmql_merge(val, metadata) |
|
61 |
+ }) |
|
62 |
+ |
|
63 |
+gmql_merge <- function(data, groupBy = NULL) |
|
54 | 64 |
{ |
55 | 65 |
if(!is.null(groupBy)) |
56 | 66 |
join_condition_matrix <- .jarray(.join_condition(groupBy), |
... | ... |
@@ -59,12 +69,12 @@ merge <- function(input_data, groupBy = NULL) |
59 | 69 |
join_condition_matrix <- .jnull("java/lang/String") |
60 | 70 |
|
61 | 71 |
WrappeR <- J("it/polimi/genomics/r/Wrapper") |
62 |
- response <- WrappeR$merge(join_condition_matrix,input_data$value) |
|
72 |
+ response <- WrappeR$merge(join_condition_matrix,data) |
|
63 | 73 |
error <- strtoi(response[1]) |
64 | 74 |
data <- response[2] |
65 | 75 |
if(error!=0) |
66 | 76 |
stop(data) |
67 | 77 |
else |
68 |
- DataSet(data) |
|
78 |
+ GMQLDataset(data) |
|
69 | 79 |
} |
70 | 80 |
|
... | ... |
@@ -18,38 +18,49 @@ |
18 | 18 |
#' @importFrom rJava .jnull |
19 | 19 |
#' @importFrom rJava .jarray |
20 | 20 |
#' |
21 |
-#' @param input_data returned object from any GMQL function |
|
22 |
-#' @param metadata_ordering list of ORDER objects where every object |
|
21 |
+#' @param x GMQLDataset class object |
|
22 |
+#' @param decreasing logical value indicating the sorting for both metadata |
|
23 |
+#' and regions. |
|
24 |
+#' if you define \emph{metadata_ordering} or \emph{regions_ordering} |
|
25 |
+#' defyinig order objects, this value is discarded |
|
26 |
+#' @param metadata_ordering list of order objects where every object |
|
23 | 27 |
#' contains the name of metadata. |
24 | 28 |
#' The ORDER's available are: \code{\link{ASC}}, \code{\link{DESC}} |
25 | 29 |
#' Every condition accepts only one string value. (e.g. ASC("cell_type") ) |
26 |
-#' @param mtop integer value specifying the first k samples to retrieve. |
|
27 |
-#' default is 0 that means every sample must be considered |
|
28 |
-#' @param mtopg integer value specifying the first k samples to retrieve |
|
29 |
-#' in each group. |
|
30 |
-#' default is 0 that means every sample must be considered |
|
31 |
-#' @param mtopp integer value specifying the percentage of samples to retrieve. |
|
32 |
-#' default is 0 that means every sample must be considered |
|
30 |
+#' |
|
31 |
+#' @param fetch_opt string indicating the option used to fetch the |
|
32 |
+#' first k sample: |
|
33 |
+#' \itemize{ |
|
34 |
+#' \item{mtop: it fetch the first k sample} |
|
35 |
+#' \item{mtopp: it fetch the first k sample in each group.} |
|
36 |
+#' \item{mtopg: it fetch the percentage of sample.} |
|
37 |
+#' } |
|
38 |
+#' if NULL, \emph{num_fetch} is not considered |
|
39 |
+#' |
|
40 |
+#' @param num_fetch integer value identifying the number of region to fetch |
|
41 |
+#' by default is 0, that's means all sample are fetched |
|
42 |
+#' s |
|
33 | 43 |
#' @param regions_ordering list of ORDER objects where every object contains |
34 | 44 |
#' the name of region schema value. |
35 |
-#' The ORDER's available are: ASC, DESC. |
|
45 |
+#' The ORDER's available are: \code{\link{ASC}}, \code{\link{DESC}}. |
|
36 | 46 |
#' Every condition accepts only one string value. (e.g. DESC("pvalue") ) |
37 |
-#' @param rtop integer value specifying the first k regions in each group. |
|
38 |
-#' default is 0 that means every sample must be considered |
|
39 |
-#' @param rtopg integer value specifying the first k regions to retrieve |
|
40 |
-#' in each group. |
|
41 |
-#' default is 0 that means every sample must be considered |
|
42 |
-#' @param rtopp integer value specifying the percentage of regions to retrieve. |
|
43 |
-#' default is 0 that means every sample must be considered |
|
44 |
-#' |
|
47 |
+#' |
|
48 |
+#' @param reg_fetch_opt string indicating the option used to fetch the |
|
49 |
+#' first k regions: |
|
50 |
+#' \itemize{ |
|
51 |
+#' \item{rtop: it fetch the first k regions.} |
|
52 |
+#' \item{rtopp: it fetch the first k regions in each group.} |
|
53 |
+#' \item{rtopg: it fetch the percentage of regions.} |
|
54 |
+#' } |
|
55 |
+#' if NULL, \emph{reg_num_fetch} is not considered |
|
56 |
+#' |
|
57 |
+#' @param reg_num_fetch integer value identifying the number of region to fetch |
|
58 |
+#' by default is 0, that's means all regions are fetched |
|
59 |
+#' |
|
60 |
+#' |
|
45 | 61 |
#' @return DataSet class object. It contains the value to use as input |
46 | 62 |
#' for the subsequent GMQL function |
47 | 63 |
#' |
48 |
-#' @details |
|
49 |
-#' mtop, mtopg,mtopp, rtop, rtopg and rtopp are normally numbers: |
|
50 |
-#' if you specify a vector, only the first element will be used |
|
51 |
-#' mtop and mtopg and mtopp are mutalbe exclusive, so rtop and rtopg and rtopp |
|
52 |
-#' |
|
53 | 64 |
#' |
54 | 65 |
#' @examples |
55 | 66 |
#' |
... | ... |
@@ -59,117 +70,128 @@ |
59 | 70 |
#' init_gmql() |
60 | 71 |
#' test_path <- system.file("example","DATASET",package = "RGMQL") |
61 | 72 |
#' r = read_dataset(test_path) |
62 |
-#' o = order(r,list(DESC("Region_Count")), mtop = 2) |
|
63 |
-#' |
|
73 |
+#' o = sort(r,TRUE, c("Region_Count"),fetch_opt= "mtop",num_fetch = 2) |
|
74 |
+#' |
|
75 |
+#' |
|
76 |
+#' \dontrun{ |
|
77 |
+#' |
|
78 |
+#' ## the same result is obtained with using GMQL-syntax like: |
|
79 |
+#' ## in this case decreasing parameter TRUE is not considerd |
|
80 |
+#' o = sort(r,TRUE, c(DESC("Region_Count")), fetch_opt = "mtop", |
|
81 |
+#' num_fetch = 2) |
|
82 |
+#' |
|
83 |
+#' } |
|
84 |
+#' @name sort |
|
85 |
+#' @rdname sort-methods |
|
86 |
+#' @aliases sort, GMQLDataset-methods |
|
64 | 87 |
#' @export |
65 |
-#' |
|
66 |
-order <- function(input_data, metadata_ordering = NULL, mtop = 0, mtopg = 0, |
|
67 |
- mtopp = 0, regions_ordering = NULL, rtop = 0, rtopg = 0, |
|
68 |
- rtopp = 0) |
|
69 |
-{ |
|
70 |
- if(!is.numeric(mtop) || !is.numeric(mtopg) || !is.numeric(rtop) || |
|
71 |
- !is.numeric(rtopg) || !is.numeric(mtopp)|| !is.numeric(rtopp)) |
|
72 |
- stop("mtop, rtop, rtopg and mtopg must be integer") |
|
73 |
- |
|
74 |
- if(length(mtop)>1 || length(mtopg)>1 || length(rtop)>1 || length(rtopg)>1 |
|
75 |
- || length(mtopp)>1 || length(rtopp)>1) |
|
76 |
- warning("only first element: rtop, mtop, mtopg, rtopg, rtopp, mtopp") |
|
77 |
- |
|
78 |
- # we consider only the first element even if input is a vector of Int |
|
79 |
- # we cut the other arguments |
|
80 |
- |
|
81 |
- mtop = as.integer(mtop[1]) |
|
82 |
- mtopg = as.integer(mtopg[1]) |
|
83 |
- mtopp = as.integer(mtopp[1]) |
|
84 |
- |
|
85 |
- rtop = as.integer(rtop[1]) |
|
86 |
- rtopg = as.integer(rtopg[1]) |
|
87 |
- rtopp = as.integer(rtopp[1]) |
|
88 |
- |
|
89 |
- if(mtop > 0 && mtopg >0) |
|
90 |
- { |
|
91 |
- warning("cannot be used together.\nWe set mtopg = 0") |
|
92 |
- mtopg = 0L |
|
93 |
- } |
|
88 |
+setMethod("sort", "GMQLDataset", |
|
89 |
+ function(x, decreasing = FALSE, metadata_ordering = NULL, |
|
90 |
+ regions_ordering = NULL, fetch_opt = NULL, |
|
91 |
+ num_fetch = 0, reg_fetch_opt = NULL, reg_num_fetch = 0) |
|
92 |
+ { |
|
93 |
+ gmql_order(x@value, decreasing, metadata_ordering, |
|
94 |
+ regions_ordering, fetch_opt, num_fetch, |
|
95 |
+ reg_fetch_opt, reg_num_fetch) |
|
96 |
+ }) |
|
94 | 97 |
|
95 |
- if(mtop >0 && mtopp>0) |
|
96 |
- { |
|
97 |
- warning("cannot be used together.\nWe set mtopp = 0") |
|
98 |
- mtopp = 0L |
|
99 |
- } |
|
98 |
+gmql_order <- function(data, decreasing, metadata_ordering, regions_ordering, |
|
99 |
+ fetch_opt, num_fetch, reg_fetch_opt, reg_num_fetch) |
|
100 |
+{ |
|
101 |
+ if(!is.null(fetch_opt)) |
|
102 |
+ fetch_opt <- .check_option(fetch_opt) |
|
103 |
+ else |
|
104 |
+ fetch_opt <- .jnull("java/lang/String") |
|
100 | 105 |
|
101 |
- if(mtopg >0 && mtopp>0) |
|
102 |
- { |
|
103 |
- warning("cannot be used together.\nWe set mtopp = 0") |
|
104 |
- mtopp = 0L |
|
105 |
- } |
|
106 |
+ if(!is.null(num_fetch)) |
|
107 |
+ .check_opt_value(num_fetch) |
|
108 |
+ else |
|
109 |
+ num_fetch <- 0 |
|
106 | 110 |
|
107 |
- if(rtop > 0 && rtopg >0) |
|
108 |
- { |
|
109 |
- warning("cannot be used together.\nWe set rtopg = 0") |
|
110 |
- rtopg = 0L |
|
111 |
- } |
|
111 |
+ if(!is.null(reg_num_fetch)) |
|
112 |
+ .check_opt_value(reg_num_fetch) |
|
113 |
+ else |
|
114 |
+ reg_num_fetch <- 0 |
|
112 | 115 |
|
113 |
- if(rtop >0 && rtopp>0) |
|
114 |
- { |
|
115 |
- warning("cannot be used together.\nWe set rtopp = 0") |
|
116 |
- rtopp = 0L |
|
117 |
- } |
|
116 |
+ if(!is.null(reg_fetch_opt)) |
|
117 |
+ reg_fetch_opt <- .check_option(reg_fetch_opt) |
|
118 |
+ else |
|
119 |
+ reg_fetch_opt <- .jnull("java/lang/String") |
|
118 | 120 |
|
119 |
- if(rtopg >0 && rtopp>0) |
|
121 |
+ if(!is.null(metadata_ordering)) |
|
120 | 122 |
{ |
121 |
- warning("cannot be used together.\nWe set rtopp = 0") |
|
122 |
- rtopp = 0L |
|
123 |
+ meta_matrix <- .ordering_meta(metadata_ordering, decreasing) |
|
124 |
+ meta_matrix <- .jarray(meta_matrix, dispatch = TRUE) |
|
123 | 125 |
} |
124 |
- |
|
125 |
- if(!is.null(metadata_ordering)) |
|
126 |
- meta_matrix <- .jarray(.ordering_meta(metadata_ordering), |
|
127 |
- dispatch = TRUE) |
|
128 | 126 |
else |
129 | 127 |
meta_matrix <- .jnull("java/lang/String") |
130 | 128 |
|
131 | 129 |
if(!is.null(regions_ordering)) |
132 |
- region_matrix <- .jarray(.ordering_meta(regions_ordering), |
|
133 |
- dispatch = TRUE) |
|
130 |
+ { |
|
131 |
+ region_matrix <- .ordering_meta(regions_ordering, decreasing) |
|
132 |
+ region_matrix <- .jarray(region_matrix, dispatch = TRUE) |
|
133 |
+ } |
|
134 | 134 |
else |
135 | 135 |
region_matrix <- .jnull("java/lang/String") |
136 | 136 |
|
137 | 137 |
WrappeR <- J("it/polimi/genomics/r/Wrapper") |
138 |
- response <- WrappeR$order(meta_matrix, mtopg, mtop, mtopp, region_matrix, |
|
139 |
- rtopg, rtop, rtopp, input_data$value) |
|
138 |
+ response <- WrappeR$order(meta_matrix, fetch_opt, as.integer(num_fetch), |
|
139 |
+ reg_fetch_opt, as.integer(reg_num_fetch), |
|
140 |
+ region_matrix, data) |
|
140 | 141 |
error <- strtoi(response[1]) |
141 | 142 |
data <- response[2] |
142 | 143 |
if(error!=0) |
143 | 144 |
stop(data) |
144 | 145 |
else |
145 |
- DataSet(data) |
|
146 |
+ GMQLDataset(data) |
|
146 | 147 |
} |
147 | 148 |
|
148 | 149 |
|
149 |
-.ordering_meta <- function(ordering) |
|
150 |
+.ordering_meta <- function(ordering, decreasing) |
|
150 | 151 |
{ |
151 | 152 |
if(is.list(ordering)) |
152 | 153 |
{ |
153 |
- order_matrix <- t(sapply(ordering,function(x){ |
|
154 |
+ order_matrix <- t(mapply(function(x,dec){ |
|
154 | 155 |
new_value <- as.character(x) |
155 |
- if(length(new_value)==1) |
|
156 |
+ if(length(new_value)==1 && dec == FALSE) |
|
156 | 157 |
new_value = c("ASC",new_value) |
158 |
+ else if(length(new_value)==1 && dec == TRUE) |
|
159 |
+ new_value = c("DESC",new_value) |
|
157 | 160 |
else if(!identical("ASC",new_value[1]) && |
158 | 161 |
!identical("DESC",new_value[1])) |
159 | 162 |
stop("no more than one value") |
160 | 163 |
matrix <- matrix(new_value) |
161 |
- })) |
|
164 |
+ },ordering, decreasing)) |
|
162 | 165 |
} |
163 | 166 |
else if(is.character(ordering)) |
164 | 167 |
{ |
165 |
- order_matrix <- t(sapply(ordering, function(x) { |
|
166 |
- new_value = c("ASC",x) |
|
168 |
+ order_matrix <- t(mapply(function(x,dec) { |
|
169 |
+ if( dec == FALSE) |
|
170 |
+ new_value = c("ASC",x) |
|
171 |
+ else |
|
172 |
+ new_value = c("DESC",x) |
|
167 | 173 |
matrix <- matrix(new_value) |
168 |
- })) |
|
174 |
+ }, ordering, decreasing)) |
|
169 | 175 |
} |
170 | 176 |
else |
171 | 177 |
stop("only list or character") |
172 |
- |
|
173 | 178 |
} |
174 | 179 |
|
180 |
+.check_option <- function(opt) |
|
181 |
+{ |
|
182 |
+ opt <- tolower(opt) |
|
183 |
+ if(!identical("mtop",opt) && !identical("mtopp",opt) && |
|
184 |
+ !identical("mtopg",opt) && !identical("rtop",opt) && |
|
185 |
+ !identical("rtopp",opt) && !identical("rtopg",opt)) |
|
186 |
+ stop("option not admissable") |
|
187 |
+ opt |
|
188 |
+} |
|
175 | 189 |
|
190 |
+.check_opt_value <- function(opt_value) |
|
191 |
+{ |
|
192 |
+ if(!is.numeric(opt_value)) |
|
193 |
+ stop("no valid data") |
|
194 |
+ |
|
195 |
+ if(length(opt_value)>1) |
|
196 |
+ stop("no multiple value") |
|
197 |
+} |
... | ... |
@@ -15,7 +15,7 @@ |
15 | 15 |
#' @importFrom rJava .jnull |
16 | 16 |
#' @importFrom rJava .jarray |
17 | 17 |
#' |
18 |
-#' @param input_data returned object from any GMQL function |
|
18 |
+#' @param x GMQLDataset class object |
|
19 | 19 |
#' @param metadata vector of string made up by metadata attribute |
20 | 20 |
#' @param regions vector of string made up by schema field attribute |
21 | 21 |
#' @param all_but_reg logical value indicating which schema field attribute |
... | ... |
@@ -44,7 +44,7 @@ |
44 | 44 |
#' \item{All basic mathematical operations (+, -, *, /), including parenthesis} |
45 | 45 |
#' \item{SQRT, META, NULLABLE constructor object defined by OPERATOR object} |
46 | 46 |
#' } |
47 |
-#' @return DataSet class object. It contains the value to use as input |
|
47 |
+#' @return GMQLDataset class object. It contains the value to use as input |
|
48 | 48 |
#' for the subsequent GMQL function |
49 | 49 |
#' |
50 | 50 |
#' @examples |
... | ... |
@@ -60,7 +60,7 @@ |
60 | 60 |
#' init_gmql() |
61 | 61 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
62 | 62 |
#' input = read_dataset(test_path) |
63 |
-#' CTCF_NORM_SCORE = project(input, metadata_update = list(normalized = 1), |
|
63 |
+#' CTCF_NORM_SCORE = subset(input, metadata_update = list(normalized = 1), |
|
64 | 64 |
#' regions_update = list(new_score = (score / 1000.0) + 100), |
65 | 65 |
#' regions = c("score"), all_but_reg = TRUE) |
66 | 66 |
#' |
... | ... |
@@ -78,7 +78,7 @@ |
78 | 78 |
#' init_gmql() |
79 | 79 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
80 | 80 |
#' DS_in = read_dataset(test_path) |
81 |
-#' DS_out = project(DS_in, regions = c("variant_classification", |
|
81 |
+#' DS_out = subset(DS_in, regions = c("variant_classification", |
|
82 | 82 |
#' "variant_type"), metadata = c("manually_curated_tissue_status", |
83 | 83 |
#' "manually_curated_tumor_tag")) |
84 | 84 |
#' |
... | ... |
@@ -86,8 +86,22 @@ |
86 | 86 |
#' |
87 | 87 |
#' @export |
88 | 88 |
#' |
89 |
-#' |
|
90 |
-project <-function(input_data, metadata = NULL, metadata_update=NULL, |
|
89 |
+#' @name subset |
|
90 |
+#' @rdname subset-methods |
|
91 |
+#' @aliases subset, GMQLDataset-methods |
|
92 |
+#' @export |
|
93 |
+setMethod("subset", "GMQLDataset", |
|
94 |
+ function(x, metadata = NULL, metadata_update=NULL, |
|
95 |
+ all_but_meta = FALSE, regions = NULL, |
|
96 |
+ regions_update = NULL, all_but_reg=FALSE) |
|
97 |
+ { |
|
98 |
+ data = x@value |
|
99 |
+ gmql_project(data, metadata, metadata_update, |
|
100 |
+ all_but_meta, regions, |
|
101 |
+ regions_update, all_but_reg) |
|
102 |
+ }) |
|
103 |
+ |
|
104 |
+gmql_project <-function(input_data, metadata = NULL, metadata_update=NULL, |
|
91 | 105 |
all_but_meta = FALSE, regions = NULL, |
92 | 106 |
regions_update = NULL, all_but_reg=FALSE) |
93 | 107 |
{ |
... | ... |
@@ -159,7 +173,7 @@ project <-function(input_data, metadata = NULL, metadata_update=NULL, |
159 | 173 |
if(error!=0) |
160 | 174 |
stop(data) |
161 | 175 |
else |
162 |
- DataSet(data) |
|
176 |
+ GMQLDataset(data) |
|
163 | 177 |
} |
164 | 178 |
|
165 | 179 |
.trasform_update <- function(predicate=NULL) |
... | ... |
@@ -169,7 +169,7 @@ read_dataset <- function(dataset, parser = "CustomParser", is_local=TRUE, |
169 | 169 |
if(error!=0) |
170 | 170 |
stop(data) |
171 | 171 |
else |
172 |
- DataSet(data) |
|
172 |
+ GMQLDataset(data) |
|
173 | 173 |
} |
174 | 174 |
|
175 | 175 |
#' GMQL Function: READ |
... | ... |
@@ -259,7 +259,7 @@ We provide two metadata for you") |
259 | 259 |
|