... | ... |
@@ -57,6 +57,7 @@ export(show_jobs_list) |
57 | 57 |
export(show_queries_list) |
58 | 58 |
export(show_samples_list) |
59 | 59 |
export(show_schema) |
60 |
+export(stop_gmql) |
|
60 | 61 |
export(stop_job) |
61 | 62 |
export(trace_job) |
62 | 63 |
export(upload_dataset) |
... | ... |
@@ -80,6 +81,7 @@ importClassesFrom(GenomicRanges,GRangesList) |
80 | 81 |
importClassesFrom(S4Vectors,DataTable) |
81 | 82 |
importFrom(BiocGenerics,setdiff) |
82 | 83 |
importFrom(BiocGenerics,union) |
84 |
+importFrom(GenomicRanges,GRangesList) |
|
83 | 85 |
importFrom(GenomicRanges,makeGRangesFromDataFrame) |
84 | 86 |
importFrom(S4Vectors,aggregate) |
85 | 87 |
importFrom(S4Vectors,merge) |
... | ... |
@@ -6,7 +6,7 @@ |
6 | 6 |
#' @slot value value associated to GMQL dataset |
7 | 7 |
#' @name GMQLDataset-class |
8 | 8 |
#' @rdname GMQLDataset-class |
9 |
-#' |
|
9 |
+#' @noRd |
|
10 | 10 |
#' @return instance of GMQL dataset |
11 | 11 |
#' |
12 | 12 |
setClass("GMQLDataset", |
... | ... |
@@ -22,7 +22,7 @@ setClass("GMQLDataset", |
22 | 22 |
#' |
23 | 23 |
#' @param value value associated to GMQL dataset |
24 | 24 |
#' @rdname GMQLDataset-class |
25 |
-#' |
|
25 |
+#' @noRd |
|
26 | 26 |
GMQLDataset <- function(value) { |
27 | 27 |
dataset <- new("GMQLDataset",value = value) |
28 | 28 |
return(dataset) |
... | ... |
@@ -10,7 +10,7 @@ setGeneric("cover", function(data, ...) standardGeneric("cover")) |
10 | 10 |
|
11 | 11 |
#' Method map |
12 | 12 |
#' |
13 |
-#' Wrapper to GMQL map function |
|
13 |
+#' Wrapper to GMQL MAP operator |
|
14 | 14 |
#' |
15 | 15 |
#' @name map |
16 | 16 |
#' @rdname map |
... | ... |
@@ -21,7 +21,7 @@ setGeneric("map", function(x, y, ...) standardGeneric("map")) |
21 | 21 |
|
22 | 22 |
#' Method take |
23 | 23 |
#' |
24 |
-#' Wrapper to take function |
|
24 |
+#' Wrapper to TAKE operation |
|
25 | 25 |
#' |
26 | 26 |
#' @name take |
27 | 27 |
#' @rdname take |
... | ... |
@@ -32,7 +32,7 @@ setGeneric("take", function(data, ...) standardGeneric("take")) |
32 | 32 |
|
33 | 33 |
#' Method extend |
34 | 34 |
#' |
35 |
-#' Wrapper to GMQL extend function |
|
35 |
+#' Wrapper to GMQL EXTEND operator |
|
36 | 36 |
#' |
37 | 37 |
#' @name extend |
38 | 38 |
#' @rdname extend |
... | ... |
@@ -1,7 +1,7 @@ |
1 |
-#' Create GRangesList from GMQL Dataset |
|
1 |
+#' Create GRangesList from GMQL dataset |
|
2 | 2 |
#' |
3 |
-#' It create a GrangesList from GMQL samples in dataset |
|
4 |
-#' It reads sample files in GTF or GDM/tabulated format |
|
3 |
+#' It creates a GRangesList from GMQL samples in dataset. |
|
4 |
+#' It reads sample files in GTF or GDM/tab-delimited format. |
|
5 | 5 |
#' |
6 | 6 |
#' @importFrom rtracklayer import |
7 | 7 |
#' @importClassesFrom GenomicRanges GRangesList |
... | ... |
@@ -9,16 +9,19 @@ |
9 | 9 |
#' @importFrom utils read.delim |
10 | 10 |
#' @import xml2 |
11 | 11 |
#' |
12 |
-#' @param dataset_path string GMQL dataset folder path |
|
13 |
-#' @param is_gtf logical value indicating if samples inside are in GTF format |
|
14 |
-#' if TRUE and dataset does not contain gtf sample an error occures |
|
12 |
+#' @param dataset_path string with GMQL dataset folder path |
|
13 |
+#' @param is_gtf logical value indicating if dataset samples are in GTF format; |
|
14 |
+#' if TRUE and dataset does not contain GTF samples an error occurs |
|
15 | 15 |
#' |
16 |
-#' @return GrangesList containing all GMQL samples in dataset |
|
16 |
+#' @return GRangesList containing all GMQL samples in dataset |
|
17 | 17 |
#' |
18 | 18 |
#' @seealso \code{\link{export_gmql}} |
19 | 19 |
#' |
20 | 20 |
#' @examples |
21 |
-#' |
|
21 |
+#' |
|
22 |
+#' ## This statement defines the path to the subdirectory "example" of the |
|
23 |
+#' ## package "RGMQL" and import as GRangesList the GMQL dataset |
|
24 |
+#' |
|
22 | 25 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
23 | 26 |
#' grl = import_gmql(test_path, TRUE) |
24 | 27 |
#' |
... | ... |
@@ -1,6 +1,6 @@ |
1 |
-#' Create GMQL dataset from GrangesList |
|
1 |
+#' Create GMQL dataset from GRangesList |
|
2 | 2 |
#' |
3 |
-#' It create GMQL dataset from GRangesList. |
|
3 |
+#' It creates GMQL dataset from GRangesList. |
|
4 | 4 |
#' All samples are in GDM (tab-separated values) or GTF file format |
5 | 5 |
#' |
6 | 6 |
#' @import xml2 |
... | ... |
@@ -11,8 +11,9 @@ |
11 | 11 |
#' @importFrom S4Vectors metadata |
12 | 12 |
#' @import GenomicRanges |
13 | 13 |
#' |
14 |
-#' @param samples GrangesList |
|
15 |
-#' @param dir_out folder path where create a folder and write the sample files |
|
14 |
+#' @param samples GRangesList |
|
15 |
+#' @param dir_out folder path where to create a folder and write the sample |
|
16 |
+#' files |
|
16 | 17 |
#' @param is_gtf logical value indicating if samples have to be exported |
17 | 18 |
#' with GTF or GDM format |
18 | 19 |
#' |
... | ... |
@@ -21,28 +22,43 @@ |
21 | 22 |
#' @seealso \code{\link{import_gmql}} |
22 | 23 |
#' |
23 | 24 |
#' @details |
24 |
-#' The GMQL dataset is made up by two differet file type |
|
25 |
+#' The GMQL dataset is made up by two different file types: |
|
25 | 26 |
#' |
26 | 27 |
#' \itemize{ |
27 |
-#' \item{metadata files: contain metadata associated to corrisponding sample} |
|
28 |
-#' \item{region files: contain many genomic regions } |
|
29 |
-#' \item{region schema file: XML file contains region attributes |
|
28 |
+#' \item{metadata files: they contain metadata associated to corrisponding |
|
29 |
+#' sample.} |
|
30 |
+#' \item{region files: they contain many genomic regions data.} |
|
31 |
+#' \item{region schema file: XML file that contains region attribute name |
|
30 | 32 |
#' (e.g. chr, start, end, pvalue)} |
31 | 33 |
#' } |
32 |
-#' region sample files and metadata files are associated through file name: |
|
34 |
+#' Region sample files and metadata files are associated through file name: |
|
33 | 35 |
#' for example S_0001.gdm for region file and S_0001.gdm.meta for |
34 | 36 |
#' its metadata file |
35 | 37 |
#' |
36 | 38 |
#' |
37 | 39 |
#' @examples |
38 | 40 |
#' |
41 |
+#' ## load and attach add-on GenomicRanges package |
|
39 | 42 |
#' library(GenomicRanges) |
43 |
+#' |
|
44 |
+#' ## These statemens create two GRanges with the region attribute: seqnames, |
|
45 |
+#' ## ranges (region coordinates) and strand, plus two column element: |
|
46 |
+#' ## score and GC |
|
47 |
+#' |
|
40 | 48 |
#' gr1 <- GRanges(seqnames = "chr2", ranges = IRanges(3, 6), strand = "+", |
41 | 49 |
#' score = 5L, GC = 0.45) |
42 | 50 |
#' gr2 <- GRanges(seqnames = c("chr1", "chr1"), |
43 | 51 |
#' ranges = IRanges(c(7,13), width = 3), strand = c("+", "-"), |
44 | 52 |
#' score = 3:4, GC = c(0.3, 0.5)) |
53 |
+#' |
|
54 |
+#' ## This statement creates a GRangesList using the previous GRanges |
|
55 |
+#' |
|
45 | 56 |
#' grl = GRangesList(gr1, gr2) |
57 |
+#' |
|
58 |
+#' ## This statement defines the path to the subdirectory "example" of the |
|
59 |
+#' ## package "RGMQL" and export the GRangesList as GMQL dataset using the |
|
60 |
+#' ## last name of 'dir_out' path as dataset name |
|
61 |
+#' |
|
46 | 62 |
#' test_out_path <- system.file("example", package = "RGMQL") |
47 | 63 |
#' export_gmql(grl, test_out_path,TRUE) |
48 | 64 |
#' |
... | ... |
@@ -72,22 +72,22 @@ take_value.META_AGGREGATES <- function(obj){ |
72 | 72 |
#' function sum, performing all the type conversions needed } |
73 | 73 |
#' \item{COUNT: It prepares input parameter to be passed to the library |
74 | 74 |
#' function count, performing all the type conversions needed } |
75 |
-#' \item{MIN:It prepares input parameter to be passed to the library |
|
75 |
+#' \item{MIN: It prepares input parameter to be passed to the library |
|
76 | 76 |
#' function minimum, performing all the type conversions needed } |
77 | 77 |
#' \item{MAX: It prepares input parameter to be passed to the library |
78 | 78 |
#' function maximum, performing all the type conversions needed } |
79 |
-#' \item{BAG: It prepares input parameter to be passed to the library |
|
80 |
-#' function bag, this function creates comma-separated strings of |
|
81 |
-#' attribute values, performing all the types conversions needed} |
|
82 |
-#' \item{BAGD: It prepares input parameter to be passed to the library |
|
83 |
-#' function bag, this function creates comma-separated strings of distinct |
|
84 |
-#' attribute values, performing all the types conversions needed} |
|
85 | 79 |
#' \item{AVG: It prepares input parameter to be passed to the library |
86 | 80 |
#' function mean, performing all the type conversions needed } |
87 | 81 |
#' \item{MEDIAN: It prepares input parameter to be passed to the library |
88 | 82 |
#' function median, performing all the type conversions needed } |
89 | 83 |
#' \item{STD: It prepares input parameter to be passed to the library |
90 | 84 |
#' function standard deviation, performing all the type conversions needed} |
85 |
+#' \item{BAG: It prepares input parameter to be passed to the library |
|
86 |
+#' function bag; this function creates comma-separated strings of |
|
87 |
+#' attribute values, performing all the type conversions needed} |
|
88 |
+#' \item{BAGD: It prepares input parameter to be passed to the library |
|
89 |
+#' function bag; this function creates comma-separated strings of distinct |
|
90 |
+#' attribute values, performing all the type conversions needed} |
|
91 | 91 |
#' \item{Q1: It prepares input parameter to be passed to the library |
92 | 92 |
#' function fist quartile, performing all the type conversions needed} |
93 | 93 |
#' \item{Q2: It prepares input parameter to be passed to the library |
... | ... |
@@ -98,61 +98,66 @@ take_value.META_AGGREGATES <- function(obj){ |
98 | 98 |
#' |
99 | 99 |
#' @param value string identifying name of metadata or region attribute |
100 | 100 |
#' |
101 |
-#' @return aggregate object |
|
101 |
+#' @return Aggregate object |
|
102 | 102 |
#' |
103 | 103 |
#' @examples |
104 | 104 |
#' |
105 |
-#' ## local with CustomParser |
|
105 |
+#' ## This statement initializes and runs the GMQL server for local execution |
|
106 |
+#' ## and creation of results on disk. Then, with system.file() it defines |
|
107 |
+#' ## the path to the folder "DATASET" in the subdirectory "example" |
|
108 |
+#' ## of the package "RGMQL" and opens such folder as a GMQL dataset |
|
109 |
+#' ## named "exp" using customParser |
|
110 |
+#' |
|
106 | 111 |
#' init_gmql() |
107 | 112 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
108 | 113 |
#' exp = read_dataset(test_path) |
109 | 114 |
#' |
110 |
-#' ## This statement copies all samples of exp into res dataset, and |
|
111 |
-#' ## then calculates new metadata attributes for each of them: |
|
115 |
+#' ## This statement copies all samples of exp dataset into res dataset, and |
|
116 |
+#' ## then calculates new metadata attribute for each of them: |
|
112 | 117 |
#' ## sum_score is the sum of score of the sample regions. |
113 | 118 |
#' |
114 | 119 |
#' res = extend(exp, sum_score = SUM("score")) |
115 | 120 |
#' |
116 |
-#' ## This statement copies all samples of exp into res dataset, |
|
117 |
-#' ## and then calculates new metadata attributes for each of them: |
|
118 |
-#' ## MinP is the minimum pvalue of the sample regions. |
|
121 |
+#' ## This statement copies all samples of exp dataset into res dataset, |
|
122 |
+#' ## and then calculates new metadata attribute for each of them: |
|
123 |
+#' ## min_pvalue is the minimum pvalue of the sample regions. |
|
119 | 124 |
#' |
120 |
-#' res = extend(exp, minP = MIN("pvalue")) |
|
125 |
+#' res = extend(exp, min_pvalue = MIN("pvalue")) |
|
121 | 126 |
#' |
122 |
-#' ## This statement copies all samples of exp into res dataset, |
|
123 |
-#' ## and then calculates new metadata attributes for each of them: |
|
127 |
+#' ## This statement copies all samples of exp dataset into res dataset, |
|
128 |
+#' ## and then calculates new metadata attribute for each of them: |
|
124 | 129 |
#' ## max_score is the maximum score of the sample regions. |
125 | 130 |
#' |
126 | 131 |
#' res = extend(exp, max_score = MAX("score")) |
127 | 132 |
#' |
128 | 133 |
#' ## The following cover operation produces output regions where at least 2 |
129 |
-#' ## and at most 3 regions ofexp overlap, having as resulting region |
|
130 |
-#' ## attributes the average signal of the overlapping regions; |
|
134 |
+#' ## and at most 3 regions of exp dataset overlap, having as resulting region |
|
135 |
+#' ## attribute the average signal of the overlapping regions; |
|
131 | 136 |
#' ## the result has one sample for each input cell. |
132 | 137 |
#' |
133 | 138 |
#' res = cover(exp, 2, 3, groupBy = list(DF("cell")), |
134 | 139 |
#' avg_signal = AVG("signal") ) |
135 | 140 |
#' |
136 |
-#' ## It copies all samples of DATA into OUT dataset, and then for each of |
|
137 |
-#' ## them it adds another metadata attribute, allScores, |
|
141 |
+#' ## This statement copies all samples of DATA dataset into OUT dataset, |
|
142 |
+#' ## and then for each of them it adds another metadata attribute, allScores, |
|
138 | 143 |
#' ## which is the aggregation comma-separated list of all the values |
139 | 144 |
#' ## that the region attribute score takes in the sample. |
140 | 145 |
#' |
141 | 146 |
#' out = extend(exp, allScore = BAG("score")) |
142 | 147 |
#' |
143 |
-#' ## counts the regions in each sample and stores their number as value |
|
144 |
-#' ## of the new metadata RegionCount attribute of the sample. |
|
148 |
+#' ## This statement counts the regions in each sample and stores their number |
|
149 |
+#' ## as value of the new metadata RegionCount attribute of the sample. |
|
145 | 150 |
#' |
146 | 151 |
#' out = extend(exp, RegionCount = COUNT()) |
147 | 152 |
#' |
148 |
-#' ## This statement copies all samples of exp into res dataset, |
|
149 |
-#' ## and then calculates new metadata attributes for each of them: |
|
153 |
+#' ## This statement copies all samples of exp dataset into res dataset, |
|
154 |
+#' ## and then calculates new metadata attribute for each of them: |
|
150 | 155 |
#' ## std_score is the standard deviation score of the sample regions. |
151 | 156 |
#' |
152 | 157 |
#' res = extend(exp, std_score = STD("score")) |
153 | 158 |
#' |
154 |
-#' ## This statement copies all samples of exp into res dataset, |
|
155 |
-#' ## and then calculates new metadata attributes for each of them: |
|
159 |
+#' ## This statement copies all samples of exp dataset into res dataset, |
|
160 |
+#' ## and then calculates new metadata attribute for each of them: |
|
156 | 161 |
#' ## m_score is the median score of the sample regions. |
157 | 162 |
#' |
158 | 163 |
#' res = extend(exp, m_score = MEDIAN("score")) |
... | ... |
@@ -173,6 +178,25 @@ SUM <- function(value) |
173 | 178 |
return(list) |
174 | 179 |
} |
175 | 180 |
|
181 |
+#' @name AGGREGATES-Object |
|
182 |
+#' @aliases COUNT |
|
183 |
+#' @rdname aggr-class |
|
184 |
+#' @export |
|
185 |
+#' |
|
186 |
+COUNT <- function() |
|
187 |
+{ |
|
188 |
+ list <- list() |
|
189 |
+ ## Set the name for the class |
|
190 |
+ class(list) <- c("COUNT","AGGREGATES","META_AGGREGATES") |
|
191 |
+ return(list) |
|
192 |
+} |
|
193 |
+as.character.COUNT <- function(obj) { |
|
194 |
+ class <- class(obj)[1] |
|
195 |
+ c(class,"") |
|
196 |
+} |
|
197 |
+check.COUNT <- function(obj){} |
|
198 |
+ |
|
199 |
+ |
|
176 | 200 |
#' @name AGGREGATES-Object |
177 | 201 |
#' @aliases MIN |
178 | 202 |
#' @rdname aggr-class |
... | ... |
@@ -220,66 +244,63 @@ AVG <- function(value) |
220 | 244 |
} |
221 | 245 |
|
222 | 246 |
#' @name AGGREGATES-Object |
223 |
-#' @aliases BAG |
|
247 |
+#' @aliases MEDIAN |
|
224 | 248 |
#' @rdname aggr-class |
225 | 249 |
#' @export |
226 | 250 |
#' |
227 |
-BAG <- function(value) |
|
251 |
+MEDIAN <- function(value) |
|
228 | 252 |
{ |
229 | 253 |
check.META_AGGREGATES(value) |
230 | 254 |
|
231 | 255 |
list <- list(value = value) |
232 | 256 |
## Set the name for the class |
233 |
- class(list) <- c("BAG","AGGREGATES","META_AGGREGATES") |
|
257 |
+ class(list) <- c("MEDIAN","AGGREGATES","META_AGGREGATES") |
|
234 | 258 |
return(list) |
235 | 259 |
} |
236 | 260 |
|
261 |
+ |
|
237 | 262 |
#' @name AGGREGATES-Object |
238 |
-#' @aliases COUNT |
|
263 |
+#' @aliases STD |
|
239 | 264 |
#' @rdname aggr-class |
240 | 265 |
#' @export |
241 | 266 |
#' |
242 |
-COUNT <- function() |
|
267 |
+STD <- function(value) |
|
243 | 268 |
{ |
244 |
- list <- list() |
|
269 |
+ check.META_AGGREGATES(value) |
|
270 |
+ |
|
271 |
+ list <- list(value = value) |
|
245 | 272 |
## Set the name for the class |
246 |
- class(list) <- c("COUNT","AGGREGATES","META_AGGREGATES") |
|
273 |
+ class(list) <- c("STD","META_AGGREGATES") |
|
247 | 274 |
return(list) |
248 | 275 |
} |
249 |
-as.character.COUNT <- function(obj) { |
|
250 |
- class <- class(obj)[1] |
|
251 |
- c(class,"") |
|
252 |
-} |
|
253 |
-check.COUNT <- function(obj){} |
|
254 | 276 |
|
255 | 277 |
#' @name AGGREGATES-Object |
256 |
-#' @aliases STD |
|
278 |
+#' @aliases BAG |
|
257 | 279 |
#' @rdname aggr-class |
258 | 280 |
#' @export |
259 | 281 |
#' |
260 |
-STD <- function(value) |
|
282 |
+BAG <- function(value) |
|
261 | 283 |
{ |
262 | 284 |
check.META_AGGREGATES(value) |
263 | 285 |
|
264 | 286 |
list <- list(value = value) |
265 | 287 |
## Set the name for the class |
266 |
- class(list) <- c("STD","META_AGGREGATES") |
|
288 |
+ class(list) <- c("BAG","AGGREGATES","META_AGGREGATES") |
|
267 | 289 |
return(list) |
268 | 290 |
} |
269 | 291 |
|
270 |
- |
|
271 | 292 |
#' @name AGGREGATES-Object |
272 |
-#' @aliases MEDIAN |
|
293 |
+#' @aliases BAGD |
|
273 | 294 |
#' @rdname aggr-class |
274 | 295 |
#' @export |
275 | 296 |
#' |
276 |
-MEDIAN <- function(value) |
|
297 |
+BAGD <- function(value) |
|
277 | 298 |
{ |
278 | 299 |
check.META_AGGREGATES(value) |
279 | 300 |
|
280 | 301 |
list <- list(value = value) |
281 | 302 |
## Set the name for the class |
282 |
- class(list) <- c("MEDIAN","AGGREGATES","META_AGGREGATES") |
|
303 |
+ class(list) <- c("BAGD","AGGREGATES","META_AGGREGATES") |
|
283 | 304 |
return(list) |
284 | 305 |
} |
285 | 306 |
|
... | ... |
@@ -327,18 +348,4 @@ Q3 <- function(value) |
327 | 348 |
return(list) |
328 | 349 |
} |
329 | 350 |
|
330 |
-#' @name AGGREGATES-Object |
|
331 |
-#' @aliases BAGD |
|
332 |
-#' @rdname aggr-class |
|
333 |
-#' @export |
|
334 |
-#' |
|
335 |
-BAGD <- function(value) |
|
336 |
-{ |
|
337 |
- check.META_AGGREGATES(value) |
|
338 |
- |
|
339 |
- list <- list(value = value) |
|
340 |
- ## Set the name for the class |
|
341 |
- class(list) <- c("BAGD","AGGREGATES","META_AGGREGATES") |
|
342 |
- return(list) |
|
343 |
-} |
|
344 | 351 |
|
... | ... |
@@ -31,71 +31,80 @@ check.DISTAL <- function(value) |
31 | 31 |
#' DISTAL object class constructor |
32 | 32 |
#' |
33 | 33 |
#' This class constructor is used to create instances of DISTAL object |
34 |
-#' to be used in GMQL functions that use genometric predicate parameter |
|
35 |
-#' requiring distal condition on value |
|
34 |
+#' to be used in GMQL JOIN operations (RGMQL merge functions) that use |
|
35 |
+#' genometric predicate parameter requiring distal condition on value |
|
36 | 36 |
#' |
37 | 37 |
#' \itemize{ |
38 | 38 |
#' \item{DL: It denotes the less distance clause, |
39 |
-#' which selects all the regions of the experiment such that their distance |
|
40 |
-#' from the anchor region is less than 'value' bases.} |
|
39 |
+#' which selects all the regions of a joined experiment dataset sample such |
|
40 |
+#' that their distance from the anchor region of a joined reference dataset |
|
41 |
+#' sample is less than 'value' bases.} |
|
41 | 42 |
#' \item{DLE: It denotes the less distance clause, |
42 |
-#' which selects all the regions of the experiment such that their distance |
|
43 |
-#' from the anchor region is less than, or equal to, 'value' bases.} |
|
44 |
-#' \item{DG: it denotes the less distance clause, |
|
45 |
-#' which selects all the regions of the experiment such that their distance |
|
46 |
-#' from the anchor region is greater than 'value' bases. } |
|
47 |
-#' \item{DGE: It denotes the less distance clause, which selects all the |
|
48 |
-#' regions of the experiment such that their distance from the anchor region |
|
49 |
-#' is greater than, or equal to, 'value' bases.} |
|
43 |
+#' which selects all the regions of a joined experiment dataset sample such |
|
44 |
+#' that their distance from the anchor region of a joined reference dataset |
|
45 |
+#' sample is less than, or equal to, 'value' bases.} |
|
46 |
+#' \item{DG: It denotes the less distance clause, |
|
47 |
+#' which selects all the regions of a joined experiment dataset sample such |
|
48 |
+#' that their distance from the anchor region of a joined reference dataset |
|
49 |
+#' sample is greater than 'value' bases. } |
|
50 |
+#' \item{DGE: It denotes the less distance clause, |
|
51 |
+#' which selects all the regions of a joined experiment dataset sample such |
|
52 |
+#' that their distance from the anchor region of a joined reference dataset |
|
53 |
+#' sample is greater than, or equal to, 'value' bases.} |
|
50 | 54 |
#' \item{MD: It denotes the minimum distance clause, which selects |
51 |
-#' the 'value' regions of the experiment at minimial distance from the |
|
52 |
-#' anchor region.} |
|
55 |
+#' the first 'value' regions of a joined experiment at minimial distance |
|
56 |
+#' from the anchor region of a joined reference dataset sample.} |
|
53 | 57 |
#' \item{UP: It denotes the upstream direction of the genome. |
54 |
-#' They are interpreted as predicates that must hold on the regions |
|
55 |
-#' of the experiment. |
|
56 |
-#' UP is true when region of experiment is in the upstream genome |
|
57 |
-#' of the anchor region. |
|
58 |
+#' It makes predicates to be hold on the upstream of the regions of a joined |
|
59 |
+#' experiment dataset sample. |
|
60 |
+#' UP is true when region of a joined experiment dataset sample is in the |
|
61 |
+#' upstream genome of the anchor region of a joined reference dataset sample. |
|
58 | 62 |
#' When this clause is not present, distal conditions apply to both |
59 |
-#' the directions of the genome.} |
|
60 |
-#' \item{DOWN: It denotes the downstream direction of the genome. |
|
61 |
-#' They are interpreted as predicates that must hold on the regions of |
|
62 |
-#' the experiment. |
|
63 |
-#' DOWN is true when region of experiment is in the downstream genome of |
|
64 |
-#' the anchor region. |
|
65 |
-#' When this clause is not present, distal conditions apply to both the |
|
66 |
-#' directions of the genome. } |
|
63 |
+#' directions of the genome.} |
|
64 |
+#' \item{DOWN: It denotes the downstream direction of the genome. |
|
65 |
+#' It makes predicates to be hold on the downstream of the regions of a joined |
|
66 |
+#' experiment dataset sample. |
|
67 |
+#' UP is true when region of a joined experiment dataset sample is in the |
|
68 |
+#' downstream genome of the anchor region of a joined reference dataset sample. |
|
69 |
+#' When this clause is not present, distal conditions apply to both |
|
70 |
+#' directions of the genome.} |
|
67 | 71 |
#' } |
68 | 72 |
#' |
69 | 73 |
#' @param value string identifying distance between genomic regions |
70 |
-#' in base pairs, |
|
74 |
+#' in base pair, |
|
71 | 75 |
#' |
72 |
-#' @return distal object |
|
76 |
+#' @return Distal object |
|
73 | 77 |
#' |
74 | 78 |
#' @examples |
79 |
+#' ## Thi statement initializes and runs the GMQL server for local execution |
|
80 |
+#' ## and creation of results on disk. Then, with system.file() it defines |
|
81 |
+#' ## the path to the folders "DATASET" and "DATASET_GDM" in the subdirectory |
|
82 |
+#' ## "example" of the package "RGMQL" and opens such folder as a GMQL |
|
83 |
+#' ## dataset named "TSS" and "HM" respectively using customParser |
|
75 | 84 |
#' |
76 | 85 |
#' init_gmql() |
77 |
-#' test_path <- system.file("example","DATASET",package = "RGMQL") |
|
78 |
-#' test_path2 <- system.file("example","DATASET_GDM",package = "RGMQL") |
|
86 |
+#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
87 |
+#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
|
79 | 88 |
#' TSS = read_dataset(test_path) |
80 | 89 |
#' HM = read_dataset(test_path2) |
81 | 90 |
#' |
82 | 91 |
#' ## Given a dataset HM and one called TSS with a sample including |
83 |
-#' # Transcription Start Site annotations, it searches for those regions of hm |
|
84 |
-#' # that are at a minimal distance from a transcription start site (TSS) |
|
85 |
-#' # and takes the first/closest one for each TSS, |
|
86 |
-#' # provided that such distance is lesser than 1200 bases and joined TSS |
|
87 |
-#' # and HM samples are obtained from the same provider (joinby clause). |
|
92 |
+#' ## Transcription Start Site annotations, it searches for those regions of HM |
|
93 |
+#' ## that are at a minimal distance from a transcription start site (TSS) |
|
94 |
+#' ## and takes the first/closest one for each TSS, provided that such |
|
95 |
+#' ## distance is lesser than 1200 bases and joined TSS and HM samples are |
|
96 |
+#' ## obtained from the same provider (joinby clause). |
|
88 | 97 |
#' |
89 | 98 |
#' join_data = merge(TSS, HM, |
90 | 99 |
#' genometric_predicate = list(MD(1), DL(1200)), DF("provider"), |
91 | 100 |
#' region_output = "RIGHT") |
92 | 101 |
#' |
93 |
-#' #' # Given a dataset 'hm' and one called 'tss' with a sample including |
|
94 |
-#' # Transcription Start Site annotations, it searches for those regions of hm |
|
95 |
-#' # that are at a minimal distance from a transcription start site (TSS) |
|
96 |
-#' # and takes the first/closest one for each TSS, provided that such distance |
|
97 |
-#' # is greater than 12K bases and joined 'tss' and 'hm' samples are obtained |
|
98 |
-#' # from the same provider (joinby clause). |
|
102 |
+#' ## Given a dataset 'HM' and one called 'TSS' with a sample including |
|
103 |
+#' ## Transcription Start Site annotations, it searches for those regions of HM |
|
104 |
+#' ## that are at a minimal distance from a transcription start site (TSS) |
|
105 |
+#' ## and takes the first/closest one for each TSS, provided that such distance |
|
106 |
+#' ## is greater than 12K bases and joined 'tss' and 'hm' samples are obtained |
|
107 |
+#' ## from the same provider (joinby clause). |
|
99 | 108 |
#' |
100 | 109 |
#' join_data = merge(TSS, HM, |
101 | 110 |
#' genometric_predicate = list(MD(1), DGE(12000), DOWN()), |
... | ... |
@@ -39,37 +39,41 @@ as.character.OPERATOR <- function(obj) { |
39 | 39 |
#' to be used in GMQL functions that require operator on value. |
40 | 40 |
#' |
41 | 41 |
#' \itemize{ |
42 |
-#' \item{META: It prepared input parameter to be passed to library function |
|
43 |
-#' meta, performing all the type conversion needed} |
|
44 |
-#' \item{SQRT: It prepared input parameter to be passed to library function |
|
45 |
-#' sqrt, performing all the type conversion needed} |
|
46 |
-#' \item{NIL: It prepared input parameter to be passed to library function |
|
47 |
-#' null, performing all the type conversion needed} |
|
42 |
+#' \item{META: It prepares input parameter to be passed to library function |
|
43 |
+#' meta, performing all the type conversions needed} |
|
44 |
+#' \item{SQRT: It prepares input parameter to be passed to library function |
|
45 |
+#' sqrt, performing all the type conversions needed} |
|
46 |
+#' \item{NIL: It prepares input parameter to be passed to library function |
|
47 |
+#' null, performing all the type conversions needed} |
|
48 | 48 |
#' } |
49 | 49 |
#' |
50 |
-#' @param value string identifying name of metadata attribute or region |
|
51 |
-#' attribute |
|
52 |
-#' @param type string identifying the type of the attribute value |
|
53 |
-#' must be: integer, double or string |
|
50 |
+#' @param value string identifying name of metadata attribute |
|
51 |
+#' @param type string identifying the type of the attribute value; |
|
52 |
+#' it must be: INTEGER, DOUBLE or STRING |
|
54 | 53 |
#' |
55 |
-#' @return operator object |
|
54 |
+#' @return Operator object |
|
56 | 55 |
#' |
57 | 56 |
#' |
58 | 57 |
#' @examples |
58 |
+#' ## Thi statement initializes and runs the GMQL server for local execution |
|
59 |
+#' ## and creation of results on disk. Then, with system.file() it defines |
|
60 |
+#' ## the path to the folders "DATASET" in the subdirectory "example" |
|
61 |
+#' ## of the package "RGMQL" and opens such folder as a GMQL dataset |
|
62 |
+#' ## named "exp" |
|
59 | 63 |
#' |
60 | 64 |
#' init_gmql() |
61 | 65 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
62 | 66 |
#' exp = read_dataset(test_path) |
63 | 67 |
#' |
64 |
-#' ## allows to select, in all input sample, all those regions for which the |
|
65 |
-#' ## region attribute score has a value which is greater than the metadata |
|
66 |
-#' ## attribute value "avg_score" in their sample. |
|
68 |
+#' ## This statement allows to select, in all input sample, all those regions |
|
69 |
+#' ## for which the region attribute score has a value which is greater |
|
70 |
+#' ## than the metadata attribute value "avg_score" in their sample. |
|
67 | 71 |
#' |
68 | 72 |
#' data = filter(exp, r_predicate = score > META("avg_score")) |
69 | 73 |
#' |
70 |
-#' ## It define a new numeric region attribute with "null" value. |
|
74 |
+#' ## This statement defines a new numeric region attribute with "null" value. |
|
71 | 75 |
#' ## The syntax for creating a new attribute with null value is |
72 |
-#' ## attribute_name = NULL(TYPE), where type may be INTEGER or DOUBLE. |
|
76 |
+#' ## attribute_name = NULL(TYPE), where type may be INTEGER, DOUBLE or STRING. |
|
73 | 77 |
#' |
74 | 78 |
#' out = select(exp, regions_update = list(signal = NIL("INTEGER"), |
75 | 79 |
#' pvalue = NIL("DOUBLE"))) |
... | ... |
@@ -82,9 +86,6 @@ as.character.OPERATOR <- function(obj) { |
82 | 86 |
#' ## with value correspondent to the mathematical squared root |
83 | 87 |
#' ## of the pre-existing metadata attribute concentration. |
84 | 88 |
#' |
85 |
-#' init_gmql() |
|
86 |
-#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
87 |
-#' exp = read_dataset(test_path) |
|
88 | 89 |
#' out = select(exp, metadata_update = list(concSq = SQRT("concentration"))) |
89 | 90 |
#' |
90 | 91 |
#' @name OPERATOR-Object |
... | ... |
@@ -92,7 +93,7 @@ as.character.OPERATOR <- function(obj) { |
92 | 93 |
#' @rdname operator-class |
93 | 94 |
#' @export |
94 | 95 |
#' |
95 |
-META <- function(value,type=NULL) |
|
96 |
+META <- function(value, type = NULL) |
|
96 | 97 |
{ |
97 | 98 |
check.OPERATOR(value) |
98 | 99 |
if(!is.null(type)) |
... | ... |
@@ -128,11 +129,11 @@ check.META <- function(type) |
128 | 129 |
#' @rdname operator-class |
129 | 130 |
#' @export |
130 | 131 |
#' |
131 |
-NIL <- function(value) |
|
132 |
+NIL <- function(type) |
|
132 | 133 |
{ |
133 |
- check.NIL(value) |
|
134 |
+ check.NIL(type) |
|
134 | 135 |
|
135 |
- list <- list(value = value) |
|
136 |
+ list <- list(value = type) |
|
136 | 137 |
## Set the name for the class |
137 | 138 |
class(list) <- c("NIL","OPERATOR") |
138 | 139 |
return(list) |
... | ... |
@@ -5,7 +5,7 @@ |
5 | 5 |
#' Condition evaluation functions |
6 | 6 |
#' |
7 | 7 |
#' These functions are used to support joinBy and/or groupBy function parameter. |
8 |
-#' It create a list of one element: matrix containing the two coloumn: |
|
8 |
+#' They create a 2-D array made up by two coloumn: |
|
9 | 9 |
#' type of condition evaluation and the metadata attribute name |
10 | 10 |
#' |
11 | 11 |
#' \itemize{ |
... | ... |
@@ -13,16 +13,16 @@ |
13 | 13 |
#' FULL evaluation: two attributes match if they both end with value and, |
14 | 14 |
#' if they have further prefixes, the two prefix sequences are identical} |
15 | 15 |
#' \item{EX: It defines a EXACT evaluation of the input values. |
16 |
-#' EXACT evaluation: only attributes exactly as value will match; |
|
16 |
+#' EXACT evaluation: only attributes exactly as value match; |
|
17 | 17 |
#' no further prefixes are allowed. } |
18 | 18 |
#' \item{DF: It defines a DEFAULT evaluation of the input values. |
19 | 19 |
#' DEFAULT evaluation: the two attributes match if both end with value.} |
20 | 20 |
#' } |
21 | 21 |
#' |
22 |
-#' @param ... series of string identifying name of metadata attribute |
|
22 |
+#' @param ... series of string identifying a name of metadata attribute |
|
23 | 23 |
#' to be evaluated |
24 | 24 |
#' |
25 |
-#' @return list of 2-D array containing method of evaluation and metadata |
|
25 |
+#' @return 2-D array containing method of evaluation and metadata |
|
26 | 26 |
#' |
27 | 27 |
#' @examples |
28 | 28 |
#' |
... | ... |
@@ -20,9 +20,9 @@ |
20 | 20 |
#' this condition is logically "ANDed" with prefix filtering (see below) |
21 | 21 |
#' if NULL no filtering action occures |
22 | 22 |
#' (i.e every sample is taken for region filtering) |
23 |
-#' @param metadata_prefix vector of strings that will filter metadata |
|
24 |
-#' containing rispectively every element of this vector. |
|
25 |
-#' number of element in both vector must match |
|
23 |
+#' @param metadata_prefix vector of strings that will support the metadata |
|
24 |
+#' filtering. If defined every defined 'metadata' are concatenated with the |
|
25 |
+#' corresponding prefix. |
|
26 | 26 |
#' @param regions vector of strings that extracts only region attribute |
27 | 27 |
#' specified; if NULL no regions attribute is taken and the output is only |
28 | 28 |
#' GRanges made up by the region coordinate attributes |
... | ... |
@@ -33,18 +33,26 @@ |
33 | 33 |
#' selected regions |
34 | 34 |
#' |
35 | 35 |
#' @details |
36 |
-#' This function works only with datatset or GRangesList that has the same |
|
37 |
-#' information about regions attribute (but of course different value) |
|
36 |
+#' This function works only with datatset or GRangesList which samples or |
|
37 |
+#' Granges have the same regions coordinates (chr, ranges, strand) |
|
38 |
+#' |
|
38 | 39 |
#' In case of Grangeslist data input the function will search for metadata |
39 | 40 |
#' into metadata() function associated to Grangeslist. |
40 | 41 |
#' |
41 |
-#' @return Granges with selected regions (if any) in elementMetadata |
|
42 |
+#' @return GRanges with selected regions |
|
42 | 43 |
#' |
43 | 44 |
#' @examples |
44 |
-#' |
|
45 |
+#' |
|
46 |
+#' ## This statement defines the path to the folders "DATASET" in the |
|
47 |
+#' ## subdirectory "example" of the package "RGMQL" and filter such folder |
|
48 |
+#' ## dataset including at output only "pvalue" and "peak" regions |
|
49 |
+#' |
|
45 | 50 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
46 |
-#' filter_and_extract(test_path,regions = c("pvalue", "peak")) |
|
47 |
-#' |
|
51 |
+#' filter_and_extract(test_path, regions = c("pvalue", "peak")) |
|
52 |
+#' |
|
53 |
+#' ## This statement import a GMQL dataset as GRangesList and filter it |
|
54 |
+#' ## including at output only "pvalue" and "peak" regions |
|
55 |
+#' |
|
48 | 56 |
#' grl = import_gmql(test_path, TRUE) |
49 | 57 |
#' filter_and_extract(grl, regions = c("pvalue", "peak")) |
50 | 58 |
#' |
... | ... |
@@ -1,13 +1,13 @@ |
1 | 1 |
#' Method setdiff |
2 | 2 |
#' |
3 |
-#' @description Wrapper to GMQL difference function |
|
3 |
+#' @description Wrapper to GMQL DIFFERENCE operator |
|
4 | 4 |
#' |
5 | 5 |
#' @description It produces one sample in the result for each sample of the |
6 | 6 |
#' left operand, by keeping the same metadata of the left input sample |
7 | 7 |
#' and only those regions (with their schema and values) of the left input |
8 | 8 |
#' sample which do not intersect with any region in the right operand sample. |
9 | 9 |
#' The optional \emph{joinby} clause is used to extract a subset of couples |
10 |
-#' from the cartesian product of two dataset \emph{x} and \emph{y} |
|
10 |
+#' from the Cartesian product of two dataset \emph{x} and \emph{y} |
|
11 | 11 |
#' on which to apply the DIFFERENCE operator: |
12 | 12 |
#' only those samples that have the same value for each attribute |
13 | 13 |
#' are considered when performing the difference. |
... | ... |
@@ -17,19 +17,16 @@ |
17 | 17 |
#' |
18 | 18 |
#' @param x GMQLDataset class object |
19 | 19 |
#' @param y GMQLDataset class object |
20 |
-#' @param ... Additional arguments for use in specific methods. |
|
21 |
-#' |
|
22 |
-#' This method accept a function to define condition evaluation on metadata. |
|
20 |
+#' @param joinBy list of evalation functions to define evaluation on metadata: |
|
23 | 21 |
#' \itemize{ |
24 |
-#' \item{\code{\link{FN}}: Fullname evaluation, two attributes match |
|
25 |
-#' if they both end with value and, if they have a further prefixes, |
|
22 |
+#' \item{\code{\link{FN}}(value): Fullname evaluation, two attributes match |
|
23 |
+#' if they both end with \emph{value} and, if they have further prefixes, |
|
26 | 24 |
#' the two prefix sequence are identical} |
27 |
-#' \item{\code{\link{EX}}: Exact evaluation, only attributes exactly |
|
28 |
-#' as value will match; no further prefixes are allowed. } |
|
29 |
-#' \item{\code{\link{DF}}: Default evaluation, the two attributes match |
|
30 |
-#' if both end with value.} |
|
25 |
+#' \item{\code{\link{EX}}(value): Exact evaluation, only attributes exactly |
|
26 |
+#' as \emph{value} match; no further prefixes are allowed. } |
|
27 |
+#' \item{\code{\link{DF}}(value): Default evaluation, the two attributes match |
|
28 |
+#' if both end with \emph{value}.} |
|
31 | 29 |
#' } |
32 |
-#' |
|
33 | 30 |
#' @param is_exact single logical value: TRUE means that the region difference |
34 | 31 |
#' is executed only on regions in left_input_data with exactly the same |
35 | 32 |
#' coordinates of at least one region present in right_input_data; |
... | ... |
@@ -42,47 +39,48 @@ |
42 | 39 |
#' |
43 | 40 |
#' |
44 | 41 |
#' @examples |
45 |
-#' |
|
46 |
-#' ## This GMQL statement returns all the regions in the first dataset |
|
47 |
-#' ## that do not overlap any region in the second dataset. |
|
42 |
+#' ## This statement initializes and runs the GMQL server for local execution |
|
43 |
+#' ## and creation of results on disk. Then, with system.file() it defines |
|
44 |
+#' ## the path to the folders "DATASET" and "DATASET_GDM" in the subdirectory |
|
45 |
+#' ## "example" of the package "RGMQL" and opens such folder as a GMQL |
|
46 |
+#' ## dataset named "data1" and "data2" respectively using customParser |
|
48 | 47 |
#' |
49 | 48 |
#' init_gmql() |
50 | 49 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
51 | 50 |
#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
52 |
-#' r_left = read_dataset(test_path) |
|
53 |
-#' r_right = read_dataset(test_path2) |
|
54 |
-#' out = setdiff(r_left, r_right) |
|
51 |
+#' data1 = read_dataset(test_path) |
|
52 |
+#' data2 = read_dataset(test_path2) |
|
53 |
+#' |
|
54 |
+#' ## This GMQL statement returns all the regions in the first dataset |
|
55 |
+#' ## that do not overlap any region in the second dataset. |
|
56 |
+#' |
|
57 |
+#' out = setdiff(data1, data2) |
|
55 | 58 |
#' |
56 |
-#' \dontrun{ |
|
57 | 59 |
#' ## This GMQL statement extracts for every pair of samples s1 in EXP1 |
58 | 60 |
#' ## and s2 in EXP2 having the same value of the metadata |
59 | 61 |
#' ## attribute 'antibody_target' the regions that appear in s1 but |
60 | 62 |
#' ## do not overlap any region in s2; |
61 | 63 |
#' ## metadata of the result are the same as the metadata of s1. |
62 | 64 |
#' |
63 |
-#' init_gmql() |
|
64 |
-#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
65 |
-#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
|
66 |
-#' exp1 = read_dataset(test_path) |
|
67 |
-#' exp2 = read_dataset(test_path2) |
|
68 |
-#' out = setdiff(exp1, exp2, DF("antibody_target")) |
|
65 |
+#' out_t = setdiff(data1, data2, DF("antibody_target")) |
|
69 | 66 |
#' |
70 |
-#' } |
|
71 | 67 |
#' @name setdiff |
72 | 68 |
#' @aliases setdiff,GMQLDataset,GMQLDataset-method |
73 | 69 |
#' @aliases setdiff-method |
74 | 70 |
#' @export |
75 | 71 |
setMethod("setdiff", c("GMQLDataset","GMQLDataset"), |
76 |
- function(x, y, ..., is_exact = FALSE) |
|
72 |
+ function(x, y, joinBy = NULL, is_exact = FALSE) |
|
77 | 73 |
{ |
78 | 74 |
ptr_data_x = x@value |
79 | 75 |
ptr_data_y = y@value |
80 |
- joinBy = list(...) |
|
81 | 76 |
gmql_difference(ptr_data_x, ptr_data_y, is_exact, joinBy) |
82 | 77 |
}) |
83 | 78 |
|
84 | 79 |
gmql_difference <- function(left_data, right_data, is_exact, joinBy) |
85 | 80 |
{ |
81 |
+ if(!is.list(joinBy)) |
|
82 |
+ stop("joinBy: must be a list") |
|
83 |
+ |
|
86 | 84 |
if(!is.null(joinBy) && !length(joinBy) == 0) |
87 | 85 |
{ |
88 | 86 |
cond <- .join_condition(joinBy) |
... | ... |
@@ -5,24 +5,20 @@ |
5 | 5 |
#' and adds them to the existing metadata attributes of the sample. |
6 | 6 |
#' Aggregate functions are applied sample by sample. |
7 | 7 |
#' |
8 |
-#' @importFrom rJava .jnull |
|
9 |
-#' @importFrom rJava J |
|
10 |
-#' @importFrom rJava .jarray |
|
8 |
+#' @importFrom rJava J .jnull .jarray |
|
11 | 9 |
#' |
12 | 10 |
#' @param .data GMQLDataset class object |
13 |
-#' @param ... Additional arguments for use in specific methods. |
|
14 |
-#' It accept a series of aggregate function on region attribute. |
|
15 |
-#' All the element in the form \emph{key} = \emph{aggregate}. |
|
16 |
-#' The \emph{aggregate} is an object of class AGGREGATES |
|
17 |
-#' The aggregate functions available are: \code{\link{SUM}}, |
|
11 |
+#' @param ... a series of expressions separated by comma in the form |
|
12 |
+#' \emph{key} = \emph{aggregate}. The \emph{aggregate} is an object of |
|
13 |
+#' class AGGREGATES. The aggregate functions available are: \code{\link{SUM}}, |
|
18 | 14 |
#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
19 | 15 |
#' \code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
20 | 16 |
#' \code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
21 | 17 |
#' \code{\link{Q2}}, \code{\link{Q3}}. |
22 |
-#' Every aggregate accepts a string value, execet for COUNT, which does not |
|
18 |
+#' Every aggregate accepts a string value, except for COUNT, which does not |
|
23 | 19 |
#' have any value. |
24 | 20 |
#' Argument of 'aggregate function' must exist in schema, i.e. among region |
25 |
-#' attributes. Two style are allowed: |
|
21 |
+#' attributes. Two styles are allowed: |
|
26 | 22 |
#' \itemize{ |
27 | 23 |
#' \item list of key-value pairs: e.g. sum = SUM("pvalue") |
28 | 24 |
#' \item list of values: e.g. SUM("pvalue") |
... | ... |
@@ -33,28 +29,29 @@ |
33 | 29 |
#' for the subsequent GMQLDataset method |
34 | 30 |
#' |
35 | 31 |
#' @examples |
36 |
-#' |
|
37 |
-#' ## it counts the regions in each sample and stores their number as value |
|
38 |
-#' ## of the new metadata RegionCount attribute of the sample. |
|
32 |
+#' |
|
33 |
+#' ## This statement initializes and runs the GMQL server for local execution |
|
34 |
+#' ## and creation of results on disk. Then, with system.file() it defines |
|
35 |
+#' ## the path to the folders "DATASET" in the subdirectory "example" |
|
36 |
+#' ## of the package "RGMQL" and opens such folder as a GMQL dataset |
|
37 |
+#' ## named "data" |
|
38 |
+#' |
|
39 | 39 |
#' init_gmql() |
40 | 40 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
41 |
-#' r <- read_dataset(test_path) |
|
42 |
-#' e <- extend(r, RegionCount = COUNT()) |
|
41 |
+#' data <- read_dataset(test_path) |
|
42 |
+#' |
|
43 |
+#' ## This statement counts the regions in each sample and stores their number |
|
44 |
+#' ## as value of the new metadata attribute RegionCount of the sample. |
|
43 | 45 |
#' |
44 |
-#' \dontrun{ |
|
46 |
+#' e <- extend(data, RegionCount = COUNT()) |
|
45 | 47 |
#' |
46 |
-#' ## it copies all samples of exp dataset into res dataset, |
|
48 |
+#' ## This statement copies all samples of data dataset into 'res' dataset, |
|
47 | 49 |
#' ## and then calculates for each of them two new metadata attributes: |
48 | 50 |
#' ## 1. RegionCount is the number of sample regions; |
49 | 51 |
#' ## 2. MinP is the minimum pvalue of the sample regions. |
50 |
-#' ## res sample regions are the same as the ones in exp. |
|
52 |
+#' ## res sample regions are the same as the ones in data. |
|
51 | 53 |
#' |
52 |
-#' init_gmql() |
|
53 |
-#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
54 |
-#' exp = read_dataset(test_path) |
|
55 |
-#' res = extend(exp, RegionCount = COUNT(), MinP = MIN("pvalue")) |
|
56 |
-#' |
|
57 |
-#' } |
|
54 |
+#' res = extend(data, RegionCount = COUNT(), MinP = MIN("pvalue")) |
|
58 | 55 |
#' |
59 | 56 |
#' @name extend |
60 | 57 |
#' @rdname extend |
61 | 58 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,121 @@ |
1 |
+#' Init GMQL server |
|
2 |
+#' |
|
3 |
+#' It initializes and runs GMQL server for executing GMQL query |
|
4 |
+#' It also performs a login to GMQL REST services suite, if needed |
|
5 |
+#' |
|
6 |
+#' @importFrom rJava J |
|
7 |
+#' |
|
8 |
+#' @param output_format string identifies the output format of sample files. |
|
9 |
+#' It can be TAB, GTF or COLLECT: |
|
10 |
+#' \itemize{ |
|
11 |
+#' \item{TAB: tab delimited file format} |
|
12 |
+#' \item{GTF: tab-delimited text fstandard ormat based on the general |
|
13 |
+#' feature format} |
|
14 |
+#' \item{COLLECT: used for storing output in memory} |
|
15 |
+#' } |
|
16 |
+#' @param remote_processing logical value specifying the processing mode. |
|
17 |
+#' True for processing on cluster (remote), false for local processing. |
|
18 |
+#' |
|
19 |
+#' @param url string url of server: It must contain the server address |
|
20 |
+#' and base url; service name is added automatically. |
|
21 |
+#' If NULL, no login is performed. |
|
22 |
+#' You can always perform it by calling the function \code{\link{login_gmql}} |
|
23 |
+#' explicitly |
|
24 |
+#' |
|
25 |
+#' @param username string name used during signup |
|
26 |
+#' @param password string password used during signup |
|
27 |
+#' |
|
28 |
+#' @return None |
|
29 |
+#' |
|
30 |
+#' @examples |
|
31 |
+#' |
|
32 |
+#' ## This statement initializes GMQL with local processing with sample files |
|
33 |
+#' ## output format as tab delimited |
|
34 |
+#' |
|
35 |
+#' init_gmql("tab", FALSE) |
|
36 |
+#' |
|
37 |
+#' ## initializes GMQL with remote processing |
|
38 |
+#' |
|
39 |
+#' remote_url = "http://genomic.deib.polimi.it/gmql-rest-r/" |
|
40 |
+#' init_gmql(remote_processing = TRUE, url = remote_url) |
|
41 |
+#' |
|
42 |
+#' @export |
|
43 |
+#' |
|
44 |
+init_gmql <- function(output_format = "GTF", remote_processing = FALSE, |
|
45 |
+ url = NULL, username = NULL, password = NULL) |
|
46 |
+{ |
|
47 |
+ out_format <- toupper(output_format) |
|
48 |
+ if(!identical(out_format,"TAB") && !identical(out_format,"GTF") && |
|
49 |
+ !identical(out_format,"COLLECT")) |
|
50 |
+ stop("output_format must be TAB, GTF or COLLECT") |
|
51 |
+ .check_logical(remote_processing) |
|
52 |
+ |
|
53 |
+ # mettere attesa da input keyboard, controllare se token già esiste |
|
54 |
+ # da sessione precedente |
|
55 |
+ if(!is.null(url) && !exists("authToken",envir = .GlobalEnv)) |
|
56 |
+ login_gmql(url,username,password) |
|
57 |
+ |
|
58 |
+ WrappeR <- J("it/polimi/genomics/r/Wrapper") |
|
59 |
+ WrappeR$initGMQL(out_format,remote_processing) |
|
60 |
+} |
|
61 |
+ |
|
62 |
+#' Stop GMQL server |
|
63 |
+#' |
|
64 |
+#' Stop GMQL server |
|
65 |
+#' |
|
66 |
+#' @importFrom rJava J |
|
67 |
+#' |
|
68 |
+#' @return None |
|
69 |
+#' |
|
70 |
+#' @examples |
|
71 |
+#' |
|
72 |
+#' ## These statements initializes GMQL with local processing with sample files |
|
73 |
+#' ## output format as tab delimited and then stop it |
|
74 |
+#' |
|
75 |
+#' init_gmql("tab", FALSE) |
|
76 |
+#' |
|
77 |
+#' stop_gmql() |
|
78 |
+#' |
|
79 |
+#' @export |
|
80 |
+#' |
|
81 |
+stop_gmql <- function() |
|
82 |
+{ |
|
83 |
+ WrappeR <- J("it/polimi/genomics/r/Wrapper") |
|
84 |
+ WrappeR$stopGMQL() |
|
85 |
+} |
|
86 |
+ |
|
87 |
+ |
|
88 |
+#' Disable or Enable remote processing |
|
89 |
+#' |
|
90 |
+#' It allows to enable or disable remote processing |
|
91 |
+#' |
|
92 |
+#' @details |
|
93 |
+#' The invocation of this function allows to change mode of processing. |
|
94 |
+#' After invoking collect() it is not possbile to switch the processing mode. |
|
95 |
+#' |
|
96 |
+#' @importFrom rJava J |
|
97 |
+#' |
|
98 |
+#' @param is_remote logical value used in order to set the processing mode. |
|
99 |
+#' TRUE you set a remote query processing mode, otherwise it will be local, |
|
100 |
+#' |
|
101 |
+#' @return None |
|
102 |
+#' |
|
103 |
+#' @examples |
|
104 |
+#' |
|
105 |
+#' ## These statements initializes GMQL with local processing with sample files |
|
106 |
+#' ## output format as tab delimited and then change processing mode to remote |
|
107 |
+#' |
|
108 |
+#' init_gmql("tab", remote_processing = FALSE) |
|
109 |
+#' |
|
110 |
+#' remote_processing(TRUE) |
|
111 |
+#' |
|
112 |
+#' @export |
|
113 |
+#' |
|
114 |
+remote_processing<-function(is_remote) |
|
115 |
+{ |
|
116 |
+ WrappeR <- J("it/polimi/genomics/r/Wrapper") |
|
117 |
+ .check_logical(is_remote) |
|
118 |
+ response <- WrappeR$remote_processing(is_remote) |
|
119 |
+ print(response) |
|
120 |
+} |
|
121 |
+ |
... | ... |
@@ -1,14 +1,17 @@ |
1 | 1 |
#' Method merge |
2 | 2 |
#' |
3 |
-#' It takes in input two datasets, respectively known as nchor (left) |
|
4 |
-#' and experiment (right) and returns a dataset of samples consisting of |
|
5 |
-#' regions extracted from the operands according to the specified condition |
|
3 |
+#' @description Wrapper to GMQL JOIN operator |
|
4 |
+#' |
|
5 |
+#' @description It takes in input two datasets, respectively known as anchor |
|
6 |
+#' (left) and experiment (right) and returns a dataset of samples consisting |
|
7 |
+#' of regions extracted from the operands according to the specified condition |
|
6 | 8 |
#' (a.k.a \emph{genometric_predicate}). |
7 | 9 |
#' The number of generated output samples is the Cartesian product |
8 | 10 |
#' of the number of samples in the anchor and in the experiment dataset |
9 |
-#' (if \emph{by} is not specified). |
|
11 |
+#' (if \emph{joinBy} is not specified). |
|
10 | 12 |
#' The output metadata are the union of the input metadata, |
11 |
-#' with their attribute names prefixed with left or right respectively. |
|
13 |
+#' with their attribute names prefixed with left or right dataset name, |
|
14 |
+#' respectively. |
|
12 | 15 |
#' |
13 | 16 |
#' @importFrom rJava J .jnull .jarray |
14 | 17 |
#' @importFrom S4Vectors merge |
... | ... |
@@ -16,41 +19,37 @@ |
16 | 19 |
#' @param x GMQLDataset class object |
17 | 20 |
#' @param y GMQLDataset class object |
18 | 21 |
#' |
19 |
-#' @param genometric_predicate is a list of DISTAL object |
|
22 |
+#' @param genometric_predicate it is a list of DISTAL objects |
|
20 | 23 |
#' For details of DISTAL objects see: |
21 | 24 |
#' \code{\link{DLE}}, \code{\link{DGE}}, \code{\link{DL}}, \code{\link{DG}}, |
22 | 25 |
#' \code{\link{MD}}, \code{\link{UP}}, \code{\link{DOWN}} |
23 | 26 |
#' |
24 |
-#' @param ... Additional arguments for use in specific methods. |
|
25 |
-#' |
|
26 |
-#' This method accept a function to define condition evaluation on metadata. |
|
27 |
+#' @param joinBy list of evalation functions to define evaluation on metadata: |
|
27 | 28 |
#' \itemize{ |
28 |
-#' \item{\code{\link{FN}}: Fullname evaluation, two attributes match |
|
29 |
-#' if they both end with value and, if they have a further prefixes, |
|
30 |
-#' the two prefix sequence are identical} |
|
31 |
-#' \item{\code{\link{EX}}: Exact evaluation, only attributes exactly |
|
32 |
-#' as value will match; no further prefixes are allowed. } |
|
33 |
-#' \item{\code{\link{DF}}: Default evaluation, the two attributes match |
|
34 |
-#' if both end with value.} |
|
29 |
+#' \item{ \code{\link{FN}}(value): Fullname evaluation, two attributes match |
|
30 |
+#' if they both end with \emph{value} and, if they have further prefixes, |
|
31 |
+#' the two prefix sequence are identical.} |
|
32 |
+#' \item{ \code{\link{EX}}(value): Exact evaluation, only attributes exactly |
|
33 |
+#' as \emph{value} match; no further prefixes are allowed.} |
|
34 |
+#' \item{ \code{\link{DF}}(value): Default evaluation, the two attributes match |
|
35 |
+#' if both end with \emph{value}.} |
|
35 | 36 |
#' } |
36 | 37 |
#' |
37 |
-#' @param region_output single string that declare which region is given in |
|
38 |
-#' output for each input pair of left dataset right dataset regions |
|
38 |
+#' @param region_output single string that declares which region is given in |
|
39 |
+#' output for each input pair of left dataset and right dataset regions |
|
39 | 40 |
#' satisfying the genometric predicate: |
40 | 41 |
#' \itemize{ |
41 |
-#' \item{left: outputs the anchor regions from left_input_data that satisfy |
|
42 |
-#' the genometric predicate} |
|
43 |
-#' \item{right: outputs the experiment regions from right_input_data that |
|
44 |
-#' satisfy the genometric predicate} |
|
45 |
-#' \item{int (intersection): outputs the overlapping part (intersection) |
|
46 |
-#' of the left_input_data and right_input_data regions that satisfy |
|
47 |
-#' the genometric predicate; if the intersection is empty, |
|
48 |
-#' no output is produced} |
|
49 |
-#' \item{contig: outputs the concatenation between the left_input_data and |
|
50 |
-#' right_input_data regions that satisfy the genometric predicate, |
|
51 |
-#' (i.e. the output regionis defined as having left (right) coordinates |
|
52 |
-#' equal to the minimum (maximum) of the corresponding coordinate values |
|
53 |
-#' in the left_input_data and right_input_data regions satisfying |
|
42 |
+#' \item{LEFT: It outputs the anchor regions from 'x' that satisfy the |
|
43 |
+#' genometric predicate} |
|
44 |
+#' \item{RIGHT: It outputs the experiment regions from 'y' that satisfy the |
|
45 |
+#' genometric predicate} |
|
46 |
+#' \item{INT (intersection): It outputs the overlapping part (intersection) |
|
47 |
+#' of the 'x' and 'y' regions that satisfy the genometric predicate; if the |
|
48 |
+#' intersection is empty, no output is produced} |
|
49 |
+#' \item{CAT: It outputs the concatenation between the 'x' and 'y' regions |
|
50 |
+#' that satisfy the genometric predicate, (i.e. the output regionis defined as |
|
51 |
+#' having left (right) coordinates equal to the minimum (maximum) of the |
|
52 |
+#' corresponding coordinate values in the 'x' and 'y' regions satisfying |
|
54 | 53 |
#' the genometric predicate)} |
55 | 54 |
#' } |
56 | 55 |
#' |
... | ... |
@@ -59,18 +58,26 @@ |
59 | 58 |
#' |
60 | 59 |
#' @examples |
61 | 60 |
#' |
62 |
-#' # Given a dataset 'hm' and one called 'tss' with a sample including |
|
63 |
-#' # Transcription Start Site annotations, it searches for those regions of hm |
|
64 |
-#' # that are at a minimal distance from a transcription start site (TSS) |
|
65 |
-#' # and takes the first/closest one for each TSS, provided that such distance |
|
66 |
-#' # is lesser than 120K bases and joined 'tss' and 'hm' samples are obtained |
|
67 |
-#' # from the same provider (joinby clause). |
|
61 |
+#' ## Thi statement initializes and runs the GMQL server for local execution |
|
62 |
+#' ## and creation of results on disk. Then, with system.file() it defines |
|
63 |
+#' ## the path to the folders "DATASET" and "DATASET_GDM" in the subdirectory |
|
64 |
+#' ## "example" of the package "RGMQL" and opens such folder as a GMQL |
|
65 |
+#' ## dataset named "exp" and "ref" respectively using customParser |
|
68 | 66 |
#' |
69 | 67 |
#' init_gmql() |
70 | 68 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
71 | 69 |
#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
72 | 70 |
#' TSS = read_dataset(test_path) |
73 | 71 |
#' HM = read_dataset(test_path2) |
72 |
+#' |
|
73 |
+#' ## Given a dataset 'HM' and one called 'TSS' with a sample including |
|
74 |
+#' ## Transcription Start Site annotations, it searches for those regions of HM |
|
75 |
+#' ## that are at a minimal distance from a transcription start site (TSS) |
|
76 |
+#' ## and takes the first/closest one for each TSS, provided that such distance |
|
77 |
+#' ## is lesser than 120K bases and joined 'tss' and 'hm' samples are obtained |
|
78 |
+#' ## from the same provider (joinby clause). |
|
79 |
+#' |
|
80 |
+#' |
|
74 | 81 |
#' join_data = merge(TSS, HM, |
75 | 82 |
#' genometric_predicate = list(MD(1), DLE(120000)), DF("provider"), |
76 | 83 |
#' region_output = "RIGHT") |
... | ... |
@@ -82,7 +89,7 @@ |
82 | 89 |
#' @export |
83 | 90 |
setMethod("merge", c("GMQLDataset","GMQLDataset"), |
84 | 91 |
function(x, y, genometric_predicate = NULL, |
85 |
- region_output = "contig", ...) |
|
92 |
+ region_output = "contig", joinBy = NULL) |
|
86 | 93 |
{ |
87 | 94 |
ptr_data_x <- x@value |
88 | 95 |
ptr_data_y <- y@value |
... | ... |
@@ -123,9 +130,12 @@ gmql_join <- function(left_data, right_data, genometric_predicate, joinBy, |
123 | 130 |
join_condition_matrix <- .jnull("java/lang/String") |
124 | 131 |
|
125 | 132 |
ouput <- toupper(region_output) |
126 |
- if(!identical(ouput,"CONTIG") && !identical(ouput,"LEFT") && |
|
127 |
- !identical(ouput,"RIGHT") && !identical(ouput,"INT")) |
|
128 |
- stop("region_output must be contig,left,right or int (intersection)") |
|
133 |
+ if(!identical(ouput,"CAT") && !identical(ouput,"LEFT") && |
|
134 |
+ !identical(ouput,"RIGHT") && !identical(ouput,"INT") && |
|
135 |
+ !identical(ouput,"RIGHT_DIST") && !identical(ouput,"BOTH") && |
|
136 |
+ !identical(ouput,"LEFT_DIST")) |
|
137 |
+ stop("region_output must be cat, left, right, right_dist, left_dist |
|
138 |
+ or int (intersection)") |
|
129 | 139 |
|
130 | 140 |
WrappeR <- J("it/polimi/genomics/r/Wrapper") |
131 | 141 |
response <- WrappeR$join(genomatrix,join_condition_matrix, |
... | ... |
@@ -1,12 +1,12 @@ |
1 | 1 |
#' Method map |
2 | 2 |
#' |
3 | 3 |
#' It computes, for each sample in the right dataset, aggregates over the |
4 |
-#' values of the right regions that intersect with a region in a left sample, |
|
5 |
-#' for each region of each sample in the left dataset; |
|
4 |
+#' values of the right dataset regions that intersect with a region in a left |
|
5 |
+#' dataset sample, for each region of each sample in the left dataset; |
|
6 | 6 |
#' The number of generated output samples is the Cartesian product |
7 | 7 |
#' of the samples in the two input datasets; |
8 |
-#' each output sample has the same regions as the related input left sample, |
|
9 |
-#' with their attributes and values, plus the attributes computed as |
|
8 |
+#' each output sample has the same regions as the related input left dataset |
|
9 |
+#' sample, with their attributes and values, plus the attributes computed as |
|
10 | 10 |
#' aggregates over right region values. |
11 | 11 |
#' Output sample metadata are the union of the related input sample metadata, |
12 | 12 |
#' whose attribute names are prefixed with "left" or "right" respectively. |
... | ... |
@@ -22,41 +22,51 @@ |
22 | 22 |
#' @param x GMQLDataset class object |
23 | 23 |
#' @param y GMQLDataset class object |
24 | 24 |
#' |
25 |
-#' @param ... Additional arguments for use in specific methods. |
|
26 |
-#' |
|
27 |
-#' In this case a series of element in the form \emph{key} = \emph{aggregate}. |
|
28 |
-#' The \emph{aggregate} is an object of class AGGREGATES |
|
29 |
-#' The aggregate functions available are: \code{\link{SUM}}, |
|
25 |
+#' @param ... a series of expressions separated by comma in the form |
|
26 |
+#' \emph{key} = \emph{aggregate}. The \emph{aggregate} is an object of |
|
27 |
+#' class AGGREGATES. The aggregate functions available are: \code{\link{SUM}}, |
|
30 | 28 |
#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
31 | 29 |
#' \code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
32 | 30 |
#' \code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
33 | 31 |
#' \code{\link{Q2}}, \code{\link{Q3}}. |
34 |
-#' Every aggregate accepts a string value, execet for COUNT |
|
35 |
-#' Argument of 'aggregate' must exist in schema |
|
36 |
-#' Two style are allowed: |
|
32 |
+#' Every aggregate accepts a string value, except for COUNT, which does not |
|
33 |
+#' have any value. |
|
34 |
+#' Argument of 'aggregate function' must exist in schema, i.e. among region |
|
35 |
+#' attributes. Two styles are allowed: |
|
37 | 36 |
#' \itemize{ |
38 | 37 |
#' \item list of key-value pairs: e.g. sum = SUM("pvalue") |
39 | 38 |
#' \item list of values: e.g. SUM("pvalue") |
40 | 39 |
#' } |
41 | 40 |
#' "mixed style" is not allowed |
42 | 41 |
#' |
43 |
-#' @param joinBy list of evalation function to define condition |
|
44 |
-#' evaluation on metadata: |
|
42 |
+#' @param joinBy list of evalation functions to define evaluation on metadata: |
|
45 | 43 |
#' \itemize{ |
46 |
-#' \item{\code{\link{FN}}: Fullname evaluation, two attributes match |
|
47 |
-#' if they both end with value and, if they have a further prefixes, |
|
48 |
-#' the two prefix sequence are identical} |
|
49 |
-#' \item{\code{\link{EX}}: Exact evaluation, only attributes exactly |
|
50 |
-#' as value will match; no further prefixes are allowed. } |
|
51 |
-#' \item{\code{\link{DF}}: Default evaluation, the two attributes match |
|
52 |
-#' if both end with value.} |
|
44 |
+#' \item{ \code{\link{FN}}(value): Fullname evaluation, two attributes match |
|
45 |
+#' if they both end with \emph{value} and, if they have further prefixes, |
|
46 |
+#' the two prefix sequence are identical.} |
|
47 |
+#' \item{ \code{\link{EX}}(value): Exact evaluation, only attributes exactly |
|
48 |
+#' as \emph{value} match; no further prefixes are allowed.} |
|
49 |
+#' \item{ \code{\link{DF}}(value): Default evaluation, the two attributes match |
|
50 |
+#' if both end with \emph{value}.} |
|
53 | 51 |
#' } |
54 | 52 |
#' |
55 | 53 |
#' @return GMQLDataset object. It contains the value to use as input |
56 | 54 |
#' for the subsequent GMQLDataset method |
57 | 55 |
#' |
58 | 56 |
#' @examples |
59 |
-#' |
|
57 |
+#' |
|
58 |
+#' ## Thi statement initializes and runs the GMQL server for local execution |
|
59 |
+#' ## and creation of results on disk. Then, with system.file() it defines |
|
60 |
+#' ## the path to the folders "DATASET" and "DATASET_GDM" in the subdirectory |
|
61 |
+#' ## "example" of the package "RGMQL" and opens such folder as a GMQL |
|
62 |
+#' ## dataset named "exp" and "ref" respectively using customParser |
|
63 |
+#' |
|
64 |
+#' init_gmql() |
|
65 |
+#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
66 |
+#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
|
67 |
+#' exp = read_dataset(test_path) |
|
68 |
+#' ref = read_dataset(test_path2) |
|
69 |
+#' |
|
60 | 70 |
#' # It counts the number of regions in each sample from exp that overlap with |
61 | 71 |
#' # a ref region, and for each ref region it computes the minimum score |
62 | 72 |
#' # of all the regions in each exp sample that overlap with it. |
... | ... |
@@ -66,12 +76,7 @@ |
66 | 76 |
#' # but with a different value from the one(s) of ref sample(s), |
67 | 77 |
#' # are disregarded. |
68 | 78 |
#' |
69 |
-#' init_gmql() |
|
70 |
-#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
71 |
-#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
|
72 |
-#' exp = read_dataset(test_path) |
|
73 |
-#' ref = read_dataset(test_path2) |
|
74 |
-#' out = map(ref,exp, minScore = MIN("score"), |
|
79 |
+#' out = map(ref, exp, minScore = MIN("score"), |
|
75 | 80 |
#' joinBy = list(DF("cell_tissue"))) |
76 | 81 |
#' |
77 | 82 |
#' @name map |
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
#' GMQL Function: EXECUTE |
2 | 2 |
#' |
3 |
-#' Execute GMQL query. |
|
3 |
+#' It executes GMQL query. |
|
4 | 4 |
#' The function works only after invoking at least one collect |
5 | 5 |
#' |
6 | 6 |
#' @importFrom rJava J |
... | ... |
@@ -8,15 +8,24 @@ |
8 | 8 |
#' @return None |
9 | 9 |
#' |
10 | 10 |
#' @examples |
11 |
-#' |
|
11 |
+#' ## Thi statement initializes and runs the GMQL server for local execution |
|
12 |
+#' ## and creation of results on disk. Then, with system.file() it defines |
|
13 |
+#' ## the path to the folders "DATASET" in the subdirectory "example" |
|
14 |
+#' ## of the package "RGMQL" and opens such folder as a GMQL dataset |
|
15 |
+#' ## named "data" |
|
16 |
+#' |
|
12 | 17 |
#' init_gmql() |
13 | 18 |
#' test_path <- system.file("example","DATASET",package = "RGMQL") |
14 |
-#' rd = read_dataset(test_path) |
|
15 |
-#' filtered = filter(rd) |
|
16 |
-#' aggr = aggregate(filtered, list(DF("antibody_targer","cell_karyotype"))) |
|
17 |
-#' collect(aggr, dir_out = test_path) |
|
19 |
+#' data = read_dataset(test_path) |
|
20 |
+#' |
|
21 |
+#' ## The following statement materialize the dataset, previoulsy read, at |
|
22 |
+#' ## th specific destination path into local folder "ds1" opportunely created |
|
23 |
+#' |
|
24 |
+#' collect(data, dir_out = test_path) |
|
18 | 25 |
#' |
26 |
+#' ## This statement executes GMQL query. |
|
19 | 27 |
#' \dontrun{ |
28 |
+#' |
|
20 | 29 |
#' execute() |
21 | 30 |
#' } |
22 | 31 |
#' @export |
... | ... |
@@ -147,32 +156,40 @@ gmql_materialize <- function(input_data, dir_out, name) |
147 | 156 |
|
148 | 157 |
#' Method take |
149 | 158 |
#' |
150 |
-#' It saves the contents of a dataset that contains samples metadata |
|
151 |
-#' and samples regions as GrangesList. |
|
152 |
-#' It is normally used to store in memory the contents of any dataset |
|
159 |
+#' It saves the content of a dataset that contains samples metadata |
|
160 |
+#' and samples regions as GRangesList. |
|
161 |
+#' It is normally used to store in memory the content of any dataset |
|
153 | 162 |
#' generated during a GMQL query. The operation can be very time-consuming. |
154 | 163 |
#' If you have invoked any materialization before take function, |
155 |
-#' all those dataset will be materialized as folder. |
|
164 |
+#' all those datasets are materialized as folders. |
|
156 | 165 |
#' |
157 |
-#' @import GenomicRanges |
|
166 |
+#' @importFrom GenomicRanges makeGRangesFromDataFrame |
|
167 |
+#' @importFrom S4Vectors metadata |
|
158 | 168 |
#' @importFrom stats setNames |
159 |
-#' @importFrom rJava J |
|
160 |
-#' @importFrom rJava .jevalArray |
|
169 |
+#' @importFrom rJava J .jevalArray |
|
170 |
+#' @importFrom GenomicRanges GRangesList |
|
161 | 171 |
#' |
162 | 172 |
#' @param data returned object from any GMQL function |
163 | 173 |
#' @param rows number of rows for each sample regions that you want to |
164 |
-#' retrieve and stored in memory. |
|
165 |
-#' by default is 0 that means take all rows for each sample |
|
174 |
+#' retrieve and store in memory. |
|
175 |
+#' By default it is 0 that means take all rows for each sample |
|
166 | 176 |
#' |
167 |
-#' @param ... Additional arguments for use in specific methods |
|
177 |
+#' @param ... Additional arguments for use in other specific methods of the |
|
178 |
+#' generic take function |
|
168 | 179 |
#' |
169 |
-#' @return GrangesList with associated metadata |
|
180 |
+#' @return GRangesList with associated metadata |
|
170 | 181 |
#' |
171 | 182 |
#' @examples |
172 |
-#' |
|
183 |
+#' ## This statement initializes and runs the GMQL server for local execution |
|
184 |
+#' ## and creation of results on disk. Then, with system.file() it defines |
|
185 |
+#' ## the path to the folder "DATASET" in the subdirectory "example" |
|
186 |
+#' ## of the package "RGMQL" and opens such folder as a GMQL dataset |
|
187 |
+#' ## named "rd" using customParser |
|
188 |
+#' |
|
173 | 189 |
#' init_gmql() |
174 | 190 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
175 | 191 |
#' rd = read_dataset(test_path) |
192 |
+#' |
|
176 | 193 |
#' aggr = aggregate(rd, list(DF("antibody_target", "cell_karyotype"))) |
177 | 194 |
#' taken <- take(aggr, rows = 45) |
178 | 195 |
#' |
... | ... |
@@ -36,7 +36,7 @@ select.GMQLDataset <- function(.data, metadata = NULL, metadata_update = NULL, |
36 | 36 |
#' those in the input dataset. It allows to: |
37 | 37 |
#' \itemize{ |
38 | 38 |
#' \item{Remove existing metadata and/or region attributes from a dataset} |
39 |
-#' \item{Update new metadata and/or region attributes in the result} |
|
39 |
+#' \item{Update or set new metadata and/or region attributes in the result} |
|
40 | 40 |
#' } |
41 | 41 |
#' |
42 | 42 |
#' @importFrom rJava J .jnull .jarray |
... | ... |
@@ -44,25 +44,25 @@ select.GMQLDataset <- function(.data, metadata = NULL, metadata_update = NULL, |
44 | 44 |
#' |
45 | 45 |
#' @param .data GMQLDataset class object |
46 | 46 |
#' |
47 |
-#' @param metadata vector of string made up by metadata attribute |
|
48 |
-#' @param regions vector of string made up by schema field attribute |
|
49 |
-#' @param all_but_reg logical value indicating which schema field attribute |
|
50 |
-#' you want to exclude; if FALSE only the regions you choose is kept |
|
51 |
-#' in the output of the project operation, if TRUE the schema region |
|
52 |
-#' are all except ones include in region parameter. |
|
53 |
-#' if regions is not defined \emph{all_but_reg} is not considerd. |
|
47 |
+#' @param metadata vector of string made up by metadata attributes |
|
48 |
+#' @param regions vector of string made up by region attributes |
|
49 |
+#' @param all_but_reg logical value indicating which region attributes |
|
50 |
+#' you want to exclude; if FALSE, only the regions you choose are kept |
|
51 |
+#' in the output of the operation; if TRUE the regions |
|
52 |
+#' are all kept except those in region parameter. |
|
53 |
+#' If regions is not defined, \emph{all_but_reg} is not considerd. |
|
54 | 54 |
#' @param all_but_meta logical value indicating which metadata |
55 |
-#' you want to exclude; If FALSE only the metadata you choose is kept |
|
56 |
-#' in the output of the project operation, if TRUE the metadata |
|
57 |
-#' are all except ones include in region parameter. |
|
58 |
-#' if metadata is not defined \emph{all_but_meta} is not considerd. |
|
55 |
+#' you want to exclude; If FALSE only the metadata you choose are kept |
|
56 |
+#' in the output of the operation; if TRUE the metadata |
|
57 |
+#' are all kept except those in metadata. |
|
58 |
+#' If metadata is not defined \emph{all_but_meta} is not considerd. |
|
59 | 59 |
#' @param regions_update list of updating rules in the form of |
60 |
-#' key = value generating new genomic region attributes. |
|
60 |
+#' key = value generating new genomic region attributes and values. |
|
61 | 61 |
#' To specify the new values, the following options are available: |
62 | 62 |
#' \itemize{ |
63 | 63 |
#' \item{All aggregation functions already defined by AGGREGATES object} |
64 | 64 |
#' \item{All basic mathematical operations (+, -, *, /), including parenthesis} |
65 |
-#' \item{SQRT, META, NIL constructor object defined by OPERATOR object} |
|
65 |
+#' \item{SQRT, META, NIL constructor objects defined by OPERATOR object} |
|
66 | 66 |
#' } |
67 | 67 |
#' @param metadata_update list of updating rules in the form of |
68 | 68 |
#' key = value generating new metadata. |
... | ... |
@@ -70,13 +70,22 @@ select.GMQLDataset <- function(.data, metadata = NULL, metadata_update = NULL, |
70 | 70 |
#' \itemize{ |
71 | 71 |
#' \item{All aggregation functions already defined by AGGREGATES object} |
72 | 72 |
#' \item{All basic mathematical operations (+, -, *, /), including parenthesis} |
73 |
-#' \item{SQRT, META, NIL constructor object defined by OPERATOR object} |
|
73 |
+#' \item{SQRT, META, NIL constructor objects defined by OPERATOR object} |
|
74 | 74 |
#' } |
75 | 75 |
#' |
76 | 76 |
#' @return GMQLDataset object. It contains the value to use as input |
77 | 77 |
#' for the subsequent GMQLDataset method |
78 | 78 |
#' |
79 | 79 |
#' @examples |
80 |
+#' ## This statement initializes and runs the GMQL server for local execution |
|
81 |
+#' ## and creation of results on disk. Then, with system.file() it defines |
|
82 |
+#' ## the path to the folders "DATASET" in the subdirectory "example" |
|
83 |
+#' ## of the package "RGMQL" and opens such folder as a GMQL dataset |
|
84 |
+#' ## named "data" |
|
85 |
+#' |
|
86 |
+#' init_gmql() |
|
87 |
+#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
88 |
+#' data = read_dataset(test_path) |
|
80 | 89 |
#' |
81 | 90 |
#' ## It creates a new dataset called CTCF_NORM_SCORE by preserving all |
82 | 91 |
#' ## region attributes apart from score, and creating a new region attribute |
... | ... |
@@ -86,16 +95,12 @@ select.GMQLDataset <- function(.data, metadata = NULL, metadata_update = NULL, |
86 | 95 |
#' ## a new metadata attribute called normalized with value 1, |
87 | 96 |
#' ## which can be used in future selections. |
88 | 97 |
#' |
89 |
-#' init_gmql() |
|
90 |
-#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
91 |
-#' input = read_dataset(test_path) |
|
92 |
-#' CTCF_NORM_SCORE = select(input, metadata_update = list(normalized = 1), |
|
98 |
+#' |
|
99 |
+#' CTCF_NORM_SCORE = select(data, metadata_update = list(normalized = 1), |
|
93 | 100 |
#' regions_update = list(new_score = (score / 1000.0) + 100), |
94 | 101 |
#' regions = c("score"), all_but_reg = TRUE) |
95 | 102 |
#' |
96 | 103 |
#' |
97 |
-#' \dontrun{ |
|
98 |
-#' |
|
99 | 104 |
#' ## It produces an output dataset that contains the same samples |
100 | 105 |
#' ## as the input dataset. |
101 | 106 |
#' ## Each output sample only contains, as region attributes, |
... | ... |
@@ -104,14 +109,10 @@ select.GMQLDataset <- function(.data, metadata = NULL, metadata_update = NULL, |
104 | 109 |
#' ## and as metadata attributes only the specified ones, |
105 | 110 |
#' ## i.e. manually_curated_tissue_status and manually_curated_tumor_tag. |
106 | 111 |
#' |
107 |
-#' init_gmql() |
|
108 |
-#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
109 |
-#' DS_in = read_dataset(test_path) |
|
110 |
-#' DS_out = select(DS_in, regions = c("variant_classification", |
|
112 |
+#' DS_out = select(data, regions = c("variant_classification", |
|
111 | 113 |
#' "variant_type"), metadata = c("manually_curated_tissue_status", |
112 | 114 |
#' "manually_curated_tumor_tag")) |
113 |
-#' |
|
114 |
-#' } |
|
115 |
+#' |
|
115 | 116 |
#' |
116 | 117 |
#' @name select |
117 | 118 |
#' @rdname select |
... | ... |
@@ -1,131 +1,67 @@ |
1 |
-#' Init GMQL Server |
|
2 |
-#' |
|
3 |
-#' Initialize and run GMQL server for executing GMQL query |
|
4 |
-#' It is also perform a login to GMQL REST services suite if needed |
|
5 |
-#' |
|
6 |
-#' @importFrom rJava J |
|
7 |
-#' |
|
8 |
-#' @param output_format string identifies the output format of sample files. |
|
9 |
-#' Can be TAB, GTF or COLLECT |
|
10 |
-#' \itemize{ |
|
11 |
-#' \item{TAB: tab delimited file format} |
|
12 |
-#' \item{GTF: file format used to hold information about gene structure. |
|
13 |
-#' It is a tab-delimited text format based on the general feature format} |
|
14 |
-#' \item{COLLECT: used for storing output in memory} |
|
15 |
-#' } |
|
16 |
-#' @param remote_processing logical value specifying the processing mode. |
|
17 |
-#' True for processing on cluster (remote), false for local processing. |
|
18 |
-#' |
|
19 |
-#' @param url string url of server: It must contain the server address |
|
20 |
-#' and base url; service name is added automatically. |
|
21 |
-#' If null, no login is performed. |
|
22 |
-#' You can always perform it, calling the function \code{\link{login_gmql}} |
|
23 |
-#' explicitly |
|
24 |
-#' |
|
25 |
-#' @param username string name used during signup |
|
26 |
-#' @param password string password used during signup |
|
27 |
-#' |
|
28 |
-#' @return None |
|
29 |
-#' |
|
30 |
-#' @examples |
|
31 |
-#' |
|
32 |
-#' ## initialize GMQL with local processing with sample files output format |
|
33 |
-#' ## as Tab delimited |
|
34 |
-#' |
|
35 |
-#' init_gmql("tab", FALSE) |
|
36 |
-#' |
|
37 |
-#' \dontrun{ |
|
38 |
-#' |
|
39 |
-#' ## initialize GMQL with remote processing |
|
40 |
-#' remote_url = "http://130.186.13.219/gmql-rest" |
|
41 |
-#' init_gmql(remote_processing = TRUE, url = remote_url) |
|
42 |
-#' |
|
43 |
-#' } |
|
44 |
-#' |
|
45 |
-#' @export |
|
46 |
-#' |
|
47 |
-init_gmql <- function(output_format = "gtf", remote_processing = FALSE, |
|
48 |
- url = NULL, username = NULL, password = NULL) |
|
49 |
-{ |
|
50 |
- out_format <- toupper(output_format) |
|
51 |
- if(!identical(out_format,"TAB") && !identical(out_format,"GTF") && |
|
52 |
- !identical(out_format,"COLLECT")) |
|
53 |
- stop("output_format must be TAB, GTF or COLLECT") |
|
54 |
- .check_logical(remote_processing) |
|
55 |
- |
|
56 |
- # mettere attesa da input keyboard, controllare se token già esiste |
|
57 |
- # da sessione precedente |
|
58 |
- if(!is.null(url) && !exists("authToken",envir = .GlobalEnv)) |
|
59 |
- login_gmql(url,username,password) |
|
60 |
- |
|
61 |
- WrappeR <- J("it/polimi/genomics/r/Wrapper") |
|
62 |
- WrappeR$initGMQL(out_format,remote_processing) |
|
63 |
-} |
|
64 |
- |
|
65 | 1 |
#' Function read |
66 | 2 |
#' |
67 |
-#' Read a GMQL dataset, folder containig some homogenus sample from disk |
|
68 |
-#' or GrangesList saving in Scala memory that can be referenced in R. |
|
69 |
-#' Also used to read a repository dataset in case of remote processing. |
|
3 |
+#' It reads a GMQL dataset, as a folder containig some homogenus samples on |
|
4 |
+#' disk or as a GrangesList; it saving in Scala memory in a way that can be |
|
5 |
+#' referenced in R. It is also used to read a repository dataset in case of |
|
6 |
+#' remote processing. |
|
70 | 7 |
#' |
71 |
-#' @importFrom rJava .jnull |
|
72 |
-#' @importFrom rJava .jarray |
|
8 |
+#' @importFrom rJava J .jnull .jarray |
|
73 | 9 |
#' @importFrom methods is |
74 |
-#' @importFrom rJava J |
|
75 | 10 |
#' |
76 |
-#' @param dataset folder path for GMQL dataset or datasetname on repository |
|
11 |
+#' @param dataset folder path for GMQL dataset or dataset name on repository |
|
77 | 12 |
#' @param parser string used to parsing dataset files |
78 | 13 |
#' The Parser's available are: |
79 | 14 |
#' \itemize{ |
80 |
-#' \item{BedParser} |
|
81 | 15 |
#' \item{ANNParser} |
82 | 16 |
#' \item{BroadProjParser} |
83 |
-#' \item{BedParser} |
|
84 | 17 |
#' \item{NarrowPeakParser} |
85 | 18 |
#' \item{RnaSeqParser} |
86 | 19 |
#' \item{CustomParser.} |
87 | 20 |
#' } |
88 | 21 |
#' Default is CustomParser. |
89 | 22 |
#' @param is_local logical value indicating local or remote dataset |
90 |
-#' @param is_GMQL logical value indicating if is a GMQL dataset or not |
|
23 |
+#' @param is_GMQL logical value indicating GMQL dataset or not |
|
91 | 24 |
#' |
92 | 25 |
#' @return GMQLDataset object. It contains the value to use as input |
93 | 26 |
#' for the subsequent GMQLDataset method |
94 | 27 |
#' |
95 | 28 |
#' @details |
96 | 29 |
#' Normally a GMQL dataset contains an XML schema file that contains |
97 |
-#' name of column header. (e.g chr, start, stop, strand) |
|
98 |
-#' The CustomParser read this XML schema; |
|
99 |
-#' if you already know what kind of schema your files are, use one of the |
|
100 |
-#' parser defined without reading any XML schema |
|
30 |
+#' name of region attributes. (e.g chr, start, stop, strand) |
|
31 |
+#' The CustomParser reads this XML schema; |
|
32 |
+#' if you already know what kind of schema your files have, use one of the |
|
33 |
+#' parsers defined, without reading any XML schema. |
|
101 | 34 |
#' |
102 |
-#' If GrangesList has no metadata: i.e. metadata() is empty, two metadata are |
|
35 |
+#' If GRangesList has no metadata: i.e. metadata() is empty, two metadata are |
|
103 | 36 |
#' generated. |
104 | 37 |
#' \itemize{ |
105 |
-#' \item{"Provider" = "Polimi"} |
|
106 |
-#' \item{"Application" = "RGMQL"} |
|
38 |
+#' \item{"provider" = "PoliMi"} |
|
39 |
+#' \item{"application" = "RGMQL"} |
|
107 | 40 |
#' } |
108 | 41 |
#' |
109 | 42 |
#' @examples |
110 | 43 |
#' |
111 |
-#' ## read local dataset with CustomParser |
|
44 |
+#' ## Thi statement initializes and runs the GMQL server for local execution |
|
45 |
+#' ## and creation of results on disk. Then, with system.file() it defines |
|
46 |
+#' ## the path to the folders "DATASET" in the subdirectory "example" |
|
47 |
+#' ## of the package "RGMQL" and opens such folder as a GMQL dataset |
|
48 |
+#' ## named "data" |
|
49 |
+#' |
|
112 | 50 |
#' init_gmql() |
113 | 51 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
114 |
-#' r = read_dataset(test_path) |
|
52 |
+#' data = read_dataset(test_path) |
|
115 | 53 |
#' |
116 |
-#' \dontrun{ |
|
54 |
+#' ## This statement opens such folder as a GMQL dataset named "data" using |
|
55 |
+#' ## "NarrowPeakParser" |
|
56 |
+#' dataPeak = read_dataset(test_path,"NarrowPeakParser") |
|
117 | 57 |
#' |
118 |
-#' ## read local dataset with other Parser |
|
119 |
-#' init_gmql() |
|
120 |
-#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
121 |
-#' r = read_dataset(test_path,"ANNParser") |
|
58 |
+#' ## This statement reads a remote public dataset stored into GMQL system |
|
59 |
+#' ## repository. For a public dataset in a (remote) GMQL repository the |
|
60 |
+#' ## prefix "public." is needed before dataset name |
|
122 | 61 |
#' |
123 |
-#' ## read remote public dataset stored into GMQL system repository |
|
124 |
-#' ## If public dataset a prefix "public." is needed before dataset name |
|
125 |
-#' r2 = read_dataset("public.HG19_TCGA_dnaseq",is_local = FALSE) |
|
62 |
+#' data1 = read_dataset("public.Example_Dataset1",is_local = FALSE) |
|
126 | 63 |
#' |
127 |
-#' } |
|
128 |
-#' @name read |
|
64 |
+#' @name read_dataset |
|
129 | 65 |
#' @rdname read-function |
130 | 66 |
#' @export |
131 | 67 |
#' |
... | ... |
@@ -188,12 +124,11 @@ read_dataset <- function(dataset, parser = "CustomParser", is_local=TRUE, |
188 | 124 |
|
189 | 125 |
|
190 | 126 |
#' @importFrom S4Vectors metadata |
191 |
-#' @importFrom rJava J |
|
192 |
-#' @importFrom rJava .jarray |
|
127 |
+#' @importFrom rJava J .jarray |
|
193 | 128 |
#' |
194 |
-#' @param samples GrangesList |
|
129 |
+#' @param samples GRangesList |
|
195 | 130 |
#' |
196 |
-#' @name read |
|
131 |
+#' @name read_dataset |
|
197 | 132 |
#' @rdname read-function |
198 | 133 |
#' @export |
199 | 134 |
#' |
... | ... |
@@ -210,7 +145,7 @@ read <- function(samples) |
210 | 145 |
warning("GrangesList has no metadata. |
211 | 146 |
We provide two metadata for you") |
212 | 147 |
index_meta <- rep(1:len,each = len) |
213 |
- rep_meta <- rep(c("Provider","Polimi", "Application", "R-GMQL"), |
|
148 |
+ rep_meta <- rep(c("provider","PoliMi", "application", "RGMQL"), |
|
214 | 149 |
times=len) |
215 | 150 |
meta_matrix <- matrix(rep_meta,ncol = 2,byrow = TRUE) |
216 | 151 |
meta_matrix <- cbind(index_meta,meta_matrix) |
... | ... |
@@ -275,37 +210,3 @@ We provide two metadata for you") |
275 | 210 |
parser |
276 | 211 |
} |
277 | 212 |
|
278 |
-#' Disable or Enable remote processing |
|
279 |
-#' |
|
280 |
-#' It allows to enable or disable remote processing |
|
281 |
-#' |
|
282 |
-#' @details |
|
283 |
-#' The invocation of this function allow to change mode of processing. |
|
284 |
-#' after invoking collect() is not possbile to switch the processing mode, |
|
285 |
-#' |
|
286 |
-#' @importFrom rJava J |
|
287 |
-#' |
|
288 |
-#' @param is_remote logical value used in order to set the processing mode. |
|
289 |
-#' TRUE you will set a remote query processing mode otherwise will be local, |
|
290 |
-#' |
|
291 |
-#' @return None |
|
292 |
-#' |
|
293 |
-#' @examples |
|
294 |
-#' |
|
295 |
-#' # initialize with remote processing off |
|
296 |
-#' init_gmql("tab",remote_processing = FALSE) |
|
297 |
-#' |
|
298 |
-#' # change processing mode to remote |
|
299 |
-#' remote_processing(TRUE) |
|
300 |
-#' |
|
301 |
-#' @export |
|
302 |
-#' |
|
303 |
-remote_processing<-function(is_remote) |
|
304 |
-{ |
|
305 |
- WrappeR <- J("it/polimi/genomics/r/Wrapper") |
|
306 |
- .check_logical(is_remote) |
|
307 |
- response <- WrappeR$remote_processing(is_remote) |
|
308 |
- print(response) |
|
309 |
-} |
|
310 |
- |
|
311 |
- |
... | ... |
@@ -29,12 +29,13 @@ filter.GMQLDateset <- function(.data, m_predicate = NULL, r_predicate = NULL, |
29 | 29 |
|
30 | 30 |
#' Method filter |
31 | 31 |
#' |
32 |
-#' It creates a new dataset from an existing one by extracting a subset of |
|
33 |
-#' samples and/or regions from the input dataset according to their predicate. |
|
34 |
-#' each sample in the output dataset has the same region attributes, |
|
32 |
+#' @description Wrapper to GMQL SELECT operator |
|
33 |
+#' @description It creates a new dataset from an existing one by extracting a |
|
34 |
+#' subset of samples and/or regions from the input dataset according to their |
|
35 |
+#' predicate. Each sample in the output dataset has the same region attributes, |
|
35 | 36 |
#' values, and metadata as in the input dataset. |
36 | 37 |
#' When semijoin function is defined, it extracts those samples containing |
37 |
-#' all metadata attribute defined in semijoin clause with at least |
|
38 |
+#' all metadata attributes defined in semijoin clause with at least |
|
38 | 39 |
#' one metadata value in common with semijoin dataset. |
39 | 40 |
#' If no metadata in common between input dataset and semijoin dataset, |
40 | 41 |
#' no sample is extracted. |
... | ... |
@@ -45,11 +46,11 @@ filter.GMQLDateset <- function(.data, m_predicate = NULL, r_predicate = NULL, |
45 | 46 |
#' @importFrom dplyr filter |
46 | 47 |
#' |
47 | 48 |
#' @param .data GMQLDataset class object |
48 |
-#' @param m_predicate logical predicate made up by R logical operation |
|
49 |
-#' on metadata attribute. |
|
49 |
+#' @param m_predicate logical predicate made up by R logical operations |
|
50 |
+#' on metadata attributes. |
|
50 | 51 |
#' Only !, |, ||, &, && are admitted. |
51 |
-#' @param r_predicate logical predicate made up by R logical operation |
|
52 |
-#' on schema region values. |
|
52 |
+#' @param r_predicate logical predicate made up by R logical operations |
|
53 |
+#' on region attributes. |
|
53 | 54 |
#' Only !, |, ||, &, && are admitted. |
54 | 55 |
#' |
55 | 56 |
#' @param semijoin \code{\link{semijoin}} function to define filter method |
... | ... |
@@ -61,38 +62,45 @@ filter.GMQLDateset <- function(.data, m_predicate = NULL, r_predicate = NULL, |
61 | 62 |
#' |
62 | 63 |
#' @examples |
63 | 64 |
#' |
64 |
-#' ## It selects from input data samples of patients younger than 70 years old, |
|
65 |
-#' ## based on filtering on sample metadata attribute Patient_age |
|
65 |
+#' ## This statement initializes and runs the GMQL server for local execution |
|
66 |
+#' ## and creation of results on disk. Then, with system.file() it defines |
|
67 |
+#' ## the path to the folders "DATASET" in the subdirectory "example" |
|
68 |
+#' ## of the package "RGMQL" and opens such folder as a GMQL dataset |
|
69 |
+#' ## named "data" |
|
66 | 70 |
#' |
67 | 71 |
#' init_gmql() |
68 | 72 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
69 |
-#' input <- read_dataset(test_path) |
|
70 |
-#' s <- filter(input, Patient_age < 70) |
|
73 |
+#' data <- read_dataset(test_path) |
|
71 | 74 |
#' |
72 |
-#' \dontrun{ |
|
75 |
+#' ## This statement selects from input data samples of patients younger |
|
76 |
+#' ## than 70 years old, based on filtering on sample metadata attribute |
|
77 |
+#' ## 'patient_age' |
|
73 | 78 |
#' |
74 |
-#' ## It creates a new dataset called 'jun_tf' by selecting those samples and |
|
75 |
-#' ## their regions from the existing 'data' dataset such that: |
|
79 |
+#' filter_data <- filter(data, patient_age < 70) |
|
80 |
+#' |
|
81 |
+#' ## This statement defines the path to the folders "DATASET_GDM" in the |
|
82 |
+#' ## subdirectory "example" of the package "RGMQL" and opens such folder |
|
83 |
+#' ## as a GMQL dataset named "join_data" |
|
84 |
+#' |
|
85 |
+#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
|
86 |
+#' join_data <- read_dataset(test_path2) |
|
87 |
+#' |
|
88 |
+#' ## This statement creates a new dataset called 'jun_tf' by selecting those |
|
89 |
+#' ## samples and their regions from the existing 'data' dataset such that: |
|
76 | 90 |
#' ## Each output sample has a metadata attribute called antibody_target |
77 | 91 |
#' ## with value JUN. |
78 | 92 |
#' ## Each output sample also has not a metadata attribute called "cell" |
79 | 93 |
#' ## that has the same value of at least one of the values that a metadata |
80 | 94 |
#' ## attribute equally called cell has in at least one sample |
81 | 95 |
#' ## of the 'join_data' dataset. |
82 |
-#' ## For each sample satisfying previous condition,only its regions that |
|
96 |
+#' ## For each sample satisfying previous conditions, only its regions that |
|
83 | 97 |
#' ## have a region attribute called pValue with the associated value |
84 | 98 |
#' ## less than 0.01 are conserved in output |
85 | 99 |
#' |
86 |
-#' |
|
87 |
-#' init_gmql() |
|
88 |
-#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
89 |
-#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
|
90 |
-#' data <- read_dataset(test_path) |
|
91 |
-#' join_data <- read_dataset(test_path2) |
|
92 | 100 |
#' jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, |
93 | 101 |
#' semijoin(join_data, TRUE, list(DF("cell")))) |
94 | 102 |
#' |
95 |
-#' } |
|
103 |
+#' |
|
96 | 104 |
#' @name filter |
97 | 105 |
#' @rdname filter |
98 | 106 |
#' @aliases filter,GMQLDataset-method |
... | ... |
@@ -124,30 +132,45 @@ gmql_select <- function(input_data, predicate, region_predicate, s_join) |
124 | 132 |
|
125 | 133 |
} |
126 | 134 |
|
127 |
-#' Semijoin Condtion |
|
135 |
+#' Semijoin condtion |
|
128 | 136 |
#' |
129 |
-#' This function is use as support to filter method to define |
|
137 |
+#' This function is used as support to the filter method to define |
|
130 | 138 |
#' semijoin conditions on metadata |
131 | 139 |
#' |
132 | 140 |
#' @param data GMQLDataset class object |
133 | 141 |
#' |
134 |
-#' @param not_in logical value: T => semijoin is perfomed |
|
135 |
-#' considering semi_join NOT IN semi_join_dataset, F => semijoin is performed |
|
136 |
-#' considering semi_join IN semi_join_dataset |
|
142 |
+#' @param not_in logical value: TRUE => for a given sample of input dataset |
|
143 |
+#' ".data" in \code{\link{filter}} method if and only if there exists at |
|
144 |
+#' least one sample in dataset 'data' with metadata attributes defined |
|
145 |
+#' in groupBy and these attributes of 'data' have at least one value in |
|
146 |
+#' common with the same attributes defined in one sample of '.data' |
|
147 |
+#' FALSE => semijoin condition is evaluated accordingly. |
|
137 | 148 |
#' |
138 |
-#' @param groupBy it define condition evaluation on metadata. |
|
149 |
+#' @param groupBy list of evalation functions to define evaluation on metadata: |
|
139 | 150 |
#' \itemize{ |
140 |
-#' \item{\code{\link{FN}}: Fullname evaluation, two attributes match |
|
141 |
-#' if they both end with value and, if they have a further prefixes, |
|
142 |
-#' the two prefix sequence are identical} |
|
143 |
-#' \item{\code{\link{EX}}: Exact evaluation, only attributes exactly |
|
144 |
-#' as value will match; no further prefixes are allowed. } |
|
145 |
-#' \item{\code{\link{DF}}: Default evaluation, the two attributes match |
|
146 |
-#' if both end with value.} |
|
151 |
+#' \item{ \code{\link{FN}}(value): Fullname evaluation, two attributes match |
|
152 |
+#' if they both end with \emph{value} and, if they have further prefixes, |
|
153 |
+#' the two prefix sequence are identical.} |
|
154 |
+#' \item{ \code{\link{EX}}(value): Exact evaluation, only attributes exactly |
|
155 |
+#' as \emph{value} match; no further prefixes are allowed.} |
|
156 |
+#' \item{ \code{\link{DF}}(value): Default evaluation, the two attributes match |
|
157 |
+#' if both end with \emph{value}.} |
|
147 | 158 |
#' } |
148 | 159 |
#' |
149 | 160 |
#' @examples |
150 | 161 |
#' |
162 |
+#' ## These statements initializes and runs the GMQL server for local execution |
|
163 |
+#' ## and creation of results on disk. Then, with system.file() it defines |
|
164 |
+#' ## the path to the folders "DATASET" and "DATASET_GDM" in the subdirectory |
|
165 |
+#' ## "example" of the package "RGMQL" and opens such folders as a GMQL dataset |
|
166 |
+#' ## named "data" and "join_data" respectively |
|
167 |
+#' |
|
168 |
+#' init_gmql() |
|
169 |
+#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
170 |
+#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
|
171 |
+#' data <- read_dataset(test_path) |
|
172 |
+#' join_data <- read_dataset(test_path2) |
|
173 |
+#' |
|
151 | 174 |
#' # It creates a new dataset called 'jun_tf' by selecting those samples and |
152 | 175 |
#' # their regions from the existing 'data' dataset such that: |
153 | 176 |
#' # Each output sample has a metadata attribute called antibody_target |
... | ... |
@@ -156,21 +179,16 @@ gmql_select <- function(input_data, predicate, region_predicate, s_join) |
156 | 179 |
#' # that has the same value of at least one of the values that a metadata |
157 | 180 |
#' # attribute equally called cell has in at least one sample |
158 | 181 |
#' # of the 'join_data' dataset. |
159 |
-#' # For each sample satisfying previous condition,only its regions that |
|
182 |
+#' # For each sample satisfying previous conditions, only its regions that |
|
160 | 183 |
#' # have a region attribute called pValue with the associated value |
161 | 184 |
#' # less than 0.01 are conserved in output |
162 | 185 |
#' |
163 |
-#' |
|
164 |
-#' init_gmql() |
|
165 |
-#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
166 |
-#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
|
167 |
-#' data <- read_dataset(test_path) |
|
168 |
-#' join_data <- read_dataset(test_path2) |
|
169 |
-#' jun_tf <- filter(data,NULL,NULL, semijoin(join_data, TRUE, |
|
170 |
-#' list(DF("cell")))) |
|
186 |
+#' jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, |
|
187 |
+#' semijoin(join_data, TRUE, list(DF("cell")))) |
|
171 | 188 |
#' |
172 | 189 |
#' @return semijoin condition as list |
173 | 190 |
#' @export |
191 |
+#' |
|
174 | 192 |
semijoin <- function(data, not_in = FALSE, groupBy = NULL) |
175 | 193 |
{ |
176 | 194 |
if(!is.list(groupBy)) |
... | ... |
@@ -1,25 +1,21 @@ |
1 | 1 |
#' Method union |
2 | 2 |
#' |
3 |
-#' @description Wrapper to GMQL union function |
|
3 |
+#' @description Wrapper to GMQL UNION operator |
|
4 | 4 |
#' |
5 | 5 |
#' @description It is used to integrate homogeneous or heterogeneous samples |
6 | 6 |
#' of two datasets within a single dataset; for each sample of either input |
7 | 7 |
#' dataset, a result sample is created as follows: |
8 | 8 |
#' \itemize{ |
9 | 9 |
#' \item {Metadata are the same as in the original sample.} |
10 |
-#' \item {Resulting schema is obtained by projecting the schema |
|
11 |
-#' of the right dataset over the schema of the left one |
|
12 |
-#' (more properly, it will be performed by adding to the schema of the |
|
13 |
-#' left dataset the region attributes of the right dataset which are not |
|
14 |
-#' identical to those of the left dataset)} |
|
10 |
+#' \item {Resulting schema is is the schema of the left input dataset. } |
|
15 | 11 |
#' \item {Regions are the same (in coordinates and attribute values) |
16 |
-#' as in the original sample. |
|
12 |
+#' as in the original sample if it is from the left input dataset; |
|
13 |
+#' if it is from the right input dataset, its regions are the same in |
|
14 |
+#' coordinates, but only region attributes identical (in name and type) to |
|
15 |
+#' those of the first input dataset are retained, with the same values. |
|
17 | 16 |
#' Region attributes which are missing in an input dataset sample |
18 | 17 |
#' w.r.t. the merged schema are set to null.} |
19 | 18 |
#' } |
20 |
-#' For what concerns metadata, attributes of samples from the left (right) |
|
21 |
-#' input dataset are prefixed with the strings LEFT (RIGHT), so as to trace |
|
22 |
-#' the dataset to which they originally belonged. |
|
23 | 19 |
#' |
24 | 20 |
#' @importFrom rJava J |
25 | 21 |
#' @importFrom BiocGenerics union |
... | ... |
@@ -32,10 +28,11 @@ |
32 | 28 |
#' |
33 | 29 |
#' @examples |
34 | 30 |
#' |
35 |
-#' ## It creates a dataset called full which contains all samples from the |
|
36 |
-#' ## datasets data1 and data2 whose schema is defined by merging the two |
|
37 |
-#' ## dataset schemas. |
|
38 |
-#' ## (union of all the attributes present in the two input datasets). |
|
31 |
+#' ## Thi statement initializes and runs the GMQL server for local execution |
|
32 |
+#' ## and creation of results on disk. Then, with system.file() it defines |
|
33 |
+#' ## the path to the folders "DATASET" and "DATASET_GDM" in the subdirectory |
|
34 |
+#' ## "example" of the package "RGMQL" and opens such folder as a GMQL |
|
35 |
+#' ## dataset named "data1" and "data2" respectively using customParser |
|
39 | 36 |
#' |
40 | 37 |
#' init_gmql() |
41 | 38 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
... | ... |
@@ -43,6 +40,9 @@ |
43 | 40 |
#' data1 <- read_dataset(test_path) |
44 | 41 |
#' data2 <- read_dataset(test_path2) |
45 | 42 |
#' |
43 |
+#' ## This statement creates a dataset called 'full' which contains all samples |
|
44 |
+#' ## from the datasets 'data1' and 'data2' |
|
45 |
+#' |
|
46 | 46 |
#' res <- union(data1, data2) |
47 | 47 |
#' |
48 | 48 |
#' |
... | ... |
@@ -13,7 +13,7 @@ if(getRversion() >= "3.1.0") |
13 | 13 |
#' Login to GMQL |
14 | 14 |
#' |
15 | 15 |
#' Login to GMQL REST services suite as a registered user, specifying username |
16 |
-#' and password, or as guest using the proper GMQL web service available |
|
16 |
+#' and password, or as guest, using the proper GMQL web service available |
|
17 | 17 |
#' on a remote server |
18 | 18 |
#' |
19 | 19 |
#' @import httr |
... | ... |
@@ -25,19 +25,20 @@ if(getRversion() >= "3.1.0") |
25 | 25 |
#' @param password string password used during signup |
26 | 26 |
#' |
27 | 27 |
#' @details |
28 |
-#' if both username and password are NULL you will be logged as guest |
|
28 |
+#' If both username and password are NULL you will be logged as guest. |
|
29 | 29 |
#' After login you will receive an authentication token. |
30 | 30 |
#' As token remains vaild on server (until the next login / registration) |